From 52194ddaf84c5389143e6623524f847a2ca489fc Mon Sep 17 00:00:00 2001 From: Marty Kulma <18468315+martykulma@users.noreply.github.com> Date: Wed, 20 May 2026 17:50:09 -0400 Subject: [PATCH 1/4] docs: add notes about MySQL zero-value time values --- doc/user/content/sql/create-source/mysql.md | 5 ++++- doc/user/data/mysql_source_details.yml | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/doc/user/content/sql/create-source/mysql.md b/doc/user/content/sql/create-source/mysql.md index 248206b91c2ca..93ce730ad7b0c 100644 --- a/doc/user/content/sql/create-source/mysql.md +++ b/doc/user/content/sql/create-source/mysql.md @@ -294,7 +294,10 @@ CREATE SOURCE mz_source If you're replicating tables that use [data types unsupported](#supported-types) by Materialize, use the `TEXT COLUMNS` option to decode data as `text` for the -affected columns. This option expects the upstream fully-qualified names of the +affected columns. `TEXT COLUMNS` should also be used for columns that contain +MySQL zero-value `DATE`, `DATETIME`, or `TIMESTAMP` data. + +This option expects the upstream fully-qualified names of the replicated table and column (i.e. as defined in your MySQL database). ```mzsql diff --git a/doc/user/data/mysql_source_details.yml b/doc/user/data/mysql_source_details.yml index 98db0fb56975d..cda72698e48d5 100644 --- a/doc/user/data/mysql_source_details.yml +++ b/doc/user/data/mysql_source_details.yml @@ -67,6 +67,23 @@ - Use the [`EXCLUDE COLUMNS`](/sql/create-source/mysql/#excluding-columns) option to exclude any columns that contain unsupported data types. + #### Zero values for `date`, `datetime`, and `timestamp` + + MySQL allows the special "zero" values `0000-00-00`, `0000-00-00 + 00:00:00` in `date`, `datetime`, and `timestamp` columns when the server + `sql_mode` does not include `NO_ZERO_DATE` or `NO_ZERO_IN_DATE`. These + values are not representable in Materialize's corresponding native types, + so they will cause ingestion to fail for the affected column. + + To ingest columns that contain zero values, use [`TEXT + COLUMNS`](/sql/create-source/mysql/#handling-unsupported-types) to + decode the affected columns as `text`. The zero values for `date`, + `datetime`, and `timestamp` are preserved verbatim as strings (e.g. + `"0000-00-00 00:00:00"`). + + NOTE: The zero value for `year` surfaces as the string `"0"` rather than + `"0000"`. + - name: mysql-truncation-restriction content: | Avoid truncating upstream tables that are being replicated into Materialize. From 1068d08353a035b9a6bb829421582e3287f82954 Mon Sep 17 00:00:00 2001 From: Marty Kulma <18468315+martykulma@users.noreply.github.com> Date: Wed, 20 May 2026 18:12:36 -0400 Subject: [PATCH 2/4] Add additional test coverage for MySQL YEAR variants --- test/mysql-cdc/30-text-columns.td | 5 +- test/mysql-cdc/text-columns-year.td | 111 ++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 test/mysql-cdc/text-columns-year.td diff --git a/test/mysql-cdc/30-text-columns.td b/test/mysql-cdc/30-text-columns.td index 0b8e438fed6d7..28acf4bea53f7 100644 --- a/test/mysql-cdc/30-text-columns.td +++ b/test/mysql-cdc/30-text-columns.td @@ -24,7 +24,10 @@ $ mysql-connect name=mysql url=mysql://root@mysql password=${arg.mysql-root-pass # Insert data into MySQL that can't be decoded using native types and must be decoded # as a TEXT COLUMN. DATE-type coverage lives in text-columns-date.td; -# TIMESTAMP/DATETIME coverage lives in text-columns-timestamp.td. +# TIMESTAMP/DATETIME coverage lives in text-columns-timestamp.td; +# YEAR coverage (including the zero-year sentinel) lives in text-columns-year.td. +# The YEAR usage retained below is intentional, as part of the multi-column +# TEXT COLUMNS integration check (combined-clause SHOW CREATE TABLE rewrite). $ mysql-execute name=mysql DROP DATABASE IF EXISTS public; diff --git a/test/mysql-cdc/text-columns-year.td b/test/mysql-cdc/text-columns-year.td new file mode 100644 index 0000000000000..a03decb6b8034 --- /dev/null +++ b/test/mysql-cdc/text-columns-year.td @@ -0,0 +1,111 @@ +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + +# +# Regression test for MySQL YEAR columns ingested via TEXT COLUMNS, +# with a mix of valid values and the zero-year sentinel. +# +# YEAR is one of the MySQL types that cannot be ingested natively in +# Materialize (see schemas.rs: YEAR is only mapped in parse_as_text_column, +# not in the native parser). CREATE TABLE FROM SOURCE on a YEAR column +# without TEXT COLUMNS errors with "unsupported type"; declaring the column +# in TEXT COLUMNS is the documented workaround. +# +# Per https://dev.mysql.com/doc/refman/8.0/en/year.html: +# * YEAR range: 1901 to 2155 +# * Zero value: 0000 (allowed when sql_mode lacks NO_ZERO_DATE) +# +# Note: the zero value surfaces in Materialize as the string "0", not "0000", +# because decoding.rs renders YEAR via u16::to_string(). DATE/TIMESTAMP zero +# values are preserved as their literal MySQL form ("0000-00-00", +# "0000-00-00 00:00:00") — YEAR diverges from that convention today. + +> CREATE SECRET mysqlpass AS '${arg.mysql-root-password}' + +> CREATE CONNECTION mysqc TO MYSQL ( + HOST mysql, + USER root, + PASSWORD SECRET mysqlpass + ) + +$ mysql-connect name=mysql url=mysql://root@mysql password=${arg.mysql-root-password} + +# sql_mode = '' is required so MySQL accepts the zero-year that motivates +# the use of TEXT COLUMNS in the first place. +$ mysql-execute name=mysql +DROP DATABASE IF EXISTS public; +CREATE DATABASE public; +USE public; +SET SESSION sql_mode = ''; +CREATE TABLE events (id INT PRIMARY KEY, event_year YEAR NULL); +START TRANSACTION; +INSERT INTO events VALUES (1, '2024'), (2, '0000'), (3, NULL); +# Boundary rows: min and max valid YEAR values. +INSERT INTO events VALUES (4, '1901'), (5, '2155'); +COMMIT; + +> BEGIN +> CREATE SOURCE da + FROM MYSQL CONNECTION mysqc; +> CREATE TABLE events FROM SOURCE da (REFERENCE public.events) + WITH (TEXT COLUMNS = (event_year)); +> COMMIT + +# Post-snapshot rows exercise the replication / binlog decode path. +$ mysql-execute name=mysql +USE public; +SET SESSION sql_mode = ''; +START TRANSACTION; +INSERT INTO events VALUES (6, '2025'), (7, '0000'), (8, NULL); +INSERT INTO events VALUES (9, '1901'), (10, '2155'); +COMMIT; + +> SELECT id, event_year FROM events ORDER BY id; +1 "2024" +2 "0" +3 +4 "1901" +5 "2155" +6 "2025" +7 "0" +8 +9 "1901" +10 "2155" + +# Verify the column type was rewritten to text by TEXT COLUMNS. +> SELECT pg_typeof(event_year) FROM events LIMIT 1; +text + +# None of the data above should have caused the source to go into a stalled state. +> SELECT name, status, error IS NULL FROM mz_internal.mz_source_statuses WHERE name IN ('da', 'events') ORDER BY name; +da running true +events running true + +> DROP SOURCE da CASCADE; + +# +# Negative path: a YEAR column that is NOT declared as a TEXT COLUMN cannot be +# ingested. YEAR has no native mapping in Materialize, so CREATE TABLE FROM +# SOURCE must error. This guards the documented workaround: declare YEAR +# columns in TEXT COLUMNS. +# + +$ mysql-execute name=mysql +DROP DATABASE IF EXISTS public; +CREATE DATABASE public; +USE public; +CREATE TABLE reports (id INT PRIMARY KEY, reported_year YEAR NULL); +INSERT INTO reports VALUES (1, '2024'); + +> CREATE SOURCE da + FROM MYSQL CONNECTION mysqc; +! CREATE TABLE reports FROM SOURCE da (REFERENCE public.reports); +contains: unsupported type + +> DROP SOURCE da CASCADE; From 3a860245569a335db330684351c8d05657dc5b0e Mon Sep 17 00:00:00 2001 From: Marty Kulma <18468315+martykulma@users.noreply.github.com> Date: Thu, 21 May 2026 17:00:53 -0400 Subject: [PATCH 3/4] Fix handling of MySQL YEAR, improve tests to avoid races --- src/mysql-util/src/decoding.rs | 12 ++++++++++-- test/mysql-cdc/text-columns-date.td | 15 +++++++++++++++ test/mysql-cdc/text-columns-timestamp.td | 10 ++++++++++ test/mysql-cdc/text-columns-year.td | 23 ++++++++++++++++------- 4 files changed, 51 insertions(+), 9 deletions(-) diff --git a/src/mysql-util/src/decoding.rs b/src/mysql-util/src/decoding.rs index 53b78be10b65b..926c2568b2199 100644 --- a/src/mysql-util/src/decoding.rs +++ b/src/mysql-util/src/decoding.rs @@ -361,8 +361,16 @@ fn pack_val_as_datum( } } Some(MySqlColumnMeta::Year) => { - let val = from_value_opt::(value)?; - packer.push(Datum::String(&val.to_string())); + let mut val = from_value_opt::(value)?; + // mysql_common incorrectly handles MySQL YEAR type, which has a valid range + // of 1901-2155 (https://dev.mysql.com/doc/refman/8.0/en/year.html) + // + // We treat the value 1900 as the zero-value year - "0000" + // https://github.com/blackbeam/rust_mysql_common/blob/v0.35.5/src/binlog/value.rs#L124-L129 + if val == 1900 { + val = 0; + } + packer.push(Datum::String(&format!("{val:04}"))); } Some(MySqlColumnMeta::Date) => { // Some MySQL dates are invalid in chrono/NaiveDate (e.g. 0000-00-00), so diff --git a/test/mysql-cdc/text-columns-date.td b/test/mysql-cdc/text-columns-date.td index 4bf5a8eed392c..df1e8537f5208 100644 --- a/test/mysql-cdc/text-columns-date.td +++ b/test/mysql-cdc/text-columns-date.td @@ -56,6 +56,18 @@ COMMIT; WITH (TEXT COLUMNS = (event_date)); > COMMIT +# Block until the snapshot is fully ingested before issuing the +# post-snapshot inserts, so those rows go through the binlog decode +# path rather than being absorbed into the snapshot. +> SELECT id, event_date FROM events ORDER BY id; +1 "2024-04-03" +2 "0000-00-00" +3 +4 "1000-01-01" +5 "9999-12-31" +11 "2024-00-01" +12 "2024-01-00" + # Post-snapshot rows exercise the replication / binlog decode path. $ mysql-execute name=mysql USE public; @@ -136,6 +148,9 @@ INSERT INTO reports VALUES (1, '2024-04-03'); > CREATE TABLE reports FROM SOURCE da (REFERENCE public.reports); > COMMIT +# Block until the snapshot lands before issuing the binlog-path insert, +# so the zero-date below is decoded as a replication event, not a +# snapshot row. > SELECT * FROM reports; 1 "2024-04-03" diff --git a/test/mysql-cdc/text-columns-timestamp.td b/test/mysql-cdc/text-columns-timestamp.td index c97d9ab9d3a79..ba566179bc55f 100644 --- a/test/mysql-cdc/text-columns-timestamp.td +++ b/test/mysql-cdc/text-columns-timestamp.td @@ -56,6 +56,16 @@ COMMIT; WITH (TEXT COLUMNS = (created_at, updated_at, archived_at, born_at, mid_at)); > COMMIT +# Block until the snapshot is fully ingested before issuing the +# post-snapshot inserts, so those rows go through the binlog decode +# path rather than being absorbed into the snapshot. +> SELECT id, created_at, updated_at, archived_at, born_at, mid_at FROM products ORDER BY id; +1 "2024-04-03 10:15:13" "2024-04-03 10:15:13.123456" "2024-04-03 10:15:13" "2024-04-03 10:15:13.123456" "2024-04-03 10:15:13.1234" +2 "0000-00-00 00:00:00" "0000-00-00 00:00:00.000000" "0000-00-00 00:00:00" "0000-00-00 00:00:00.000000" "0000-00-00 00:00:00.0000" +3 +7 "1001-01-01 00:00:00" "1001-01-01 00:00:00.000001" "1001-01-01 00:00:00.0001" +8 "9999-12-31 23:59:59" "9999-12-31 23:59:59.999999" "9999-12-31 23:59:59.9999" + # Post-snapshot rows exercise the replication / binlog decode path, # which uses a different mysql_common::Value variant than the snapshot. $ mysql-execute name=mysql diff --git a/test/mysql-cdc/text-columns-year.td b/test/mysql-cdc/text-columns-year.td index a03decb6b8034..3ef6435784b8c 100644 --- a/test/mysql-cdc/text-columns-year.td +++ b/test/mysql-cdc/text-columns-year.td @@ -21,10 +21,12 @@ # * YEAR range: 1901 to 2155 # * Zero value: 0000 (allowed when sql_mode lacks NO_ZERO_DATE) # -# Note: the zero value surfaces in Materialize as the string "0", not "0000", -# because decoding.rs renders YEAR via u16::to_string(). DATE/TIMESTAMP zero -# values are preserved as their literal MySQL form ("0000-00-00", -# "0000-00-00 00:00:00") — YEAR diverges from that convention today. +# YEAR values are rendered zero-padded to four digits, matching the +# literal MySQL form and the DATE/TIMESTAMP zero-value convention +# ("0000-00-00", "0000-00-00 00:00:00"). The binlog decode path +# additionally remaps mysql_common's 1900-on-the-wire representation +# of the zero-year back to 0; the snapshot row (id 2) and the binlog +# row (id 7) below pin both paths. > CREATE SECRET mysqlpass AS '${arg.mysql-root-password}' @@ -57,23 +59,30 @@ COMMIT; WITH (TEXT COLUMNS = (event_year)); > COMMIT +> SELECT id, event_year FROM events ORDER BY id; +1 "2024" +2 "0000" +3 +4 "1901" +5 "2155" + # Post-snapshot rows exercise the replication / binlog decode path. $ mysql-execute name=mysql USE public; SET SESSION sql_mode = ''; START TRANSACTION; -INSERT INTO events VALUES (6, '2025'), (7, '0000'), (8, NULL); +INSERT INTO events VALUES (6, '2025'), (7, 0), (8, NULL); INSERT INTO events VALUES (9, '1901'), (10, '2155'); COMMIT; > SELECT id, event_year FROM events ORDER BY id; 1 "2024" -2 "0" +2 "0000" 3 4 "1901" 5 "2155" 6 "2025" -7 "0" +7 "0000" 8 9 "1901" 10 "2155" From cbbdf7abeb2250b6bca657b697066dbfb11578cd Mon Sep 17 00:00:00 2001 From: Marty Kulma <18468315+martykulma@users.noreply.github.com> Date: Thu, 21 May 2026 18:24:04 -0400 Subject: [PATCH 4/4] Additional updates to documentation after YEAR fix --- doc/user/data/mysql_source_details.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/doc/user/data/mysql_source_details.yml b/doc/user/data/mysql_source_details.yml index cda72698e48d5..295b63bfb836c 100644 --- a/doc/user/data/mysql_source_details.yml +++ b/doc/user/data/mysql_source_details.yml @@ -78,11 +78,8 @@ To ingest columns that contain zero values, use [`TEXT COLUMNS`](/sql/create-source/mysql/#handling-unsupported-types) to decode the affected columns as `text`. The zero values for `date`, - `datetime`, and `timestamp` are preserved verbatim as strings (e.g. - `"0000-00-00 00:00:00"`). - - NOTE: The zero value for `year` surfaces as the string `"0"` rather than - `"0000"`. + `datetime`, `timestamp`, and `year` are preserved verbatim as strings + (e.g. `"0000-00-00 00:00:00"`, `"0000"`). - name: mysql-truncation-restriction content: |