diff --git a/docs/content/concepts/spec/fileformat.md b/docs/content/concepts/spec/fileformat.md
index 27d4f63d8182..ec4a133f6835 100644
--- a/docs/content/concepts/spec/fileformat.md
+++ b/docs/content/concepts/spec/fileformat.md
@@ -791,3 +791,11 @@ Limitations:
 3. Statistics collection is not supported for BLOB columns.
 
 For usage details, configuration options, and examples, see [Blob Type]({{< ref "append-table/blob" >}}).
+
+## MOSAIC
+
+Mosaic is a columnar-bucket hybrid format optimized for wide tables (10,000+ columns). Columns are hashed into buckets
+by name, stored row-oriented within each bucket, and independently compressed. This enables efficient projection pushdown
+at bucket granularity — reading 10 columns out of 10,000 only decompresses the buckets that contain those 10 columns.
+
+For the detailed file format specification, see [Mosaic File Format]({{< ref "concepts/spec/mosaic" >}}).
diff --git a/docs/content/concepts/spec/mosaic.md b/docs/content/concepts/spec/mosaic.md
new file mode 100644
index 000000000000..6c5169a3b0c3
--- /dev/null
+++ b/docs/content/concepts/spec/mosaic.md
@@ -0,0 +1,313 @@
+---
+title: "Mosaic"
+weight: 9
+type: docs
+aliases:
+- /concepts/spec/mosaic.html
+---
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Mosaic File Format
+
+Mosaic is a columnar-bucket hybrid format optimized for wide tables (10,000+ columns). Columns are hashed into buckets
+by name, stored column-oriented within each bucket, and independently compressed. This enables efficient projection
+pushdown at bucket granularity — reading 10 columns out of 10,000 only decompresses the buckets that contain those
+10 columns.
+
+## File Layout
+
+```
++--------------------------------------------+
+|  Row Group 0: Bucket Data                  |
+|    [Bucket 0 compressed block]             |
+|    [Bucket 3 compressed block]             |
+|    ...  (only non-empty buckets)           |
++--------------------------------------------+
+|  Row Group 1: Bucket Data                  |
+|    ...                                     |
++--------------------------------------------+
+|  Schema Block                              |
+|    [4 bytes: uncompressed size (BE int)]   |
+|    [schema data (possibly compressed)]     |
++--------------------------------------------+
+|  Row Group Index (varint encoded)          |
++--------------------------------------------+
+|  Footer (32 bytes, fixed)                  |
++--------------------------------------------+
+```
+
+## Footer (32 bytes, big-endian)
+
+| Offset | Size | Field             | Description                        |
+|--------|------|-------------------|------------------------------------|
+| 0      | 8    | indexOffset        | Absolute offset of Row Group Index |
+| 8      | 8    | schemaBlockOffset  | Absolute offset of Schema Block    |
+| 16     | 4    | numBuckets         | Total number of buckets            |
+| 20     | 4    | numRowGroups       | Total number of row groups         |
+| 24     | 1    | compression        | 0 = none, 1 = zstd                |
+| 25     | 1    | version            | Format version (currently 1)       |
+| 26     | 2    | (reserved)         | Padding, set to 0                  |
+| 28     | 4    | magic              | `MOSA` (0x4D4F5341)               |
+
+## Row Group Index
+
+Varint-encoded, only non-empty buckets are stored. For each row group:
+
+```
+varint   numRows
+varint   nonEmptyCount
+repeated nonEmptyCount times:
+    varint    bucketId
+    8 bytes   bucketOffset       (big-endian, absolute file offset)
+    varint    compressedSize
+    varint    uncompressedSize
+```
+
+## Schema Block
+
+Prefixed with a 4-byte big-endian int (uncompressed size), followed by the schema data (compressed with the file's
+compression method).
+
+Column names are stored using **front coding** (incremental encoding): each name shares a prefix with the previous name,
+and only the suffix is stored. This is the same technique used by Lucene, LevelDB, and RocksDB for their block index
+entries.
+
+```
+varint   numColumns
+varint   numBuckets
+repeated numColumns times:
+    varint   fieldId
+    varint   bucketId
+    varint   indexInBucket
+    varint   sharedPrefixLen     (bytes shared with previous column name)
+    varint   suffixLen           (bytes of new suffix)
+    bytes    suffix (UTF-8)     (suffixLen bytes)
+    TypeDescriptor
+```
+
+The first column has `sharedPrefixLen = 0`. To reconstruct a column name, take the first `sharedPrefixLen` bytes from
+the previous name and append the suffix.
+
+### TypeDescriptor
+
+```
+1 byte   typeId
+1 byte   nullable      (0 = not null, 1 = nullable)
+[type-specific params]
+```
+
+<table class="table table-bordered">
+    <thead>
+      <tr>
+        <th class="text-left">typeId</th>
+        <th class="text-left">Type</th>
+        <th class="text-left">Params</th>
+      </tr>
+    </thead>
+    <tbody>
+    <tr><td>0</td><td>BOOLEAN</td><td>(none)</td></tr>
+    <tr><td>1</td><td>TINYINT</td><td>(none)</td></tr>
+    <tr><td>2</td><td>SMALLINT</td><td>(none)</td></tr>
+    <tr><td>3</td><td>INTEGER</td><td>(none)</td></tr>
+    <tr><td>4</td><td>BIGINT</td><td>(none)</td></tr>
+    <tr><td>5</td><td>FLOAT</td><td>(none)</td></tr>
+    <tr><td>6</td><td>DOUBLE</td><td>(none)</td></tr>
+    <tr><td>7</td><td>DATE</td><td>(none)</td></tr>
+    <tr><td>8</td><td>CHAR</td><td>varint length</td></tr>
+    <tr><td>9</td><td>VARCHAR</td><td>varint length</td></tr>
+    <tr><td>10</td><td>STRING</td><td>(none) — VARCHAR with MAX_LENGTH</td></tr>
+    <tr><td>11</td><td>BINARY</td><td>varint length</td></tr>
+    <tr><td>12</td><td>VARBINARY</td><td>varint length</td></tr>
+    <tr><td>13</td><td>BYTES</td><td>(none) — VARBINARY with MAX_LENGTH</td></tr>
+    <tr><td>14</td><td>DECIMAL</td><td>varint precision, varint scale</td></tr>
+    <tr><td>15</td><td>TIME</td><td>varint precision</td></tr>
+    <tr><td>16</td><td>TIMESTAMP</td><td>varint precision</td></tr>
+    <tr><td>17</td><td>TIMESTAMP_LTZ</td><td>varint precision</td></tr>
+    </tbody>
+</table>
+
+Complex types (ARRAY, MAP, ROW, etc.), VARIANT, and BLOB are not supported.
+
+## Bucket Data
+
+Each bucket is stored as a **column-oriented** block. Within a bucket, each column is independently encoded using one
+of four encodings (PLAIN, CONST, DICT, or ALL_NULL), chosen automatically based on the column's value distribution.
+
+### Bucket Block Layout (before compression)
+
+```
++--------------------------------------------+
+|  Encoding Flags                            |
+|    2 bits per column, packed into bytes     |
++--------------------------------------------+
+|  Has-Nulls Flags                           |
+|    1 bit per column, packed into bytes      |
++--------------------------------------------+
+|  Const Metadata (CONST columns only)       |
+|    serialized value for each CONST column   |
++--------------------------------------------+
+|  Dict Metadata (DICT columns only)         |
+|    for each DICT column:                   |
+|      varint    numEntries                  |
+|      repeated: serialized value per entry  |
++--------------------------------------------+
+|  Null Bitmaps                              |
+|    ceil(numRows/8) bytes per column        |
+|    (only for columns with nulls,           |
+|     excluding ALL_NULL columns)            |
++--------------------------------------------+
+|  Column Data                               |
+|    PLAIN: raw serialized values            |
+|    DICT: 1-byte index per non-null cell    |
+|    CONST/ALL_NULL: (nothing)               |
++--------------------------------------------+
+```
+
+**Encoding Flags**: 2 bits per column, packed left-to-right. Encoding values:
+
+| Value | Encoding | Description |
+|-------|----------|-------------|
+| 0     | PLAIN    | Raw serialized values for each non-null cell |
+| 1     | CONST    | All non-null values are identical; the single value is stored in metadata |
+| 2     | DICT     | 2-255 distinct values; each non-null cell stores a 1-byte dictionary index |
+| 3     | ALL_NULL | Every cell in this column is null; no data or null bitmap stored |
+
+**Has-Nulls Flags**: 1 bit per column. If set, a null bitmap exists for that column. ALL_NULL columns always have
+this flag cleared (no bitmap is stored for them).
+
+**Null Bitmap**: `ceil(numRows / 8)` bytes per column. Bit `i` = 1 means row `i` is null. Only present for columns
+where has-nulls flag is set.
+
+### Column Encoding Selection
+
+The encoding for each column is chosen automatically during writing based on value distribution and cost:
+
+- **ALL_NULL**: 0 non-null values
+- **CONST**: exactly 1 distinct non-null value (any number of nulls allowed)
+- **DICT**: 2-255 distinct non-null values, **and** the dictionary-encoded size is smaller than plain — the writer
+  compares `varint(numEntries) + sum(entryBytes) + nonNullCount` against the raw value buffer size
+- **PLAIN**: 256+ distinct values, dict tracking was abandoned, or dict encoding would be larger than plain
+
+CONST detection is independent of dictionary tracking — it uses a lightweight byte comparison against the first non-null
+value, so it works for all types and value sizes (including long strings).
+
+Dictionary encoding works for all data types including variable-width types (VARCHAR, VARBINARY, DECIMAL). The writer
+uses primitive long keys for fixed-width types (≤8 bytes) and byte-array keys for variable-width types. Variable-width
+dictionary tracking is bounded by a cumulative byte budget and abandoned when cardinality exceeds 255 or total dictionary
+entry bytes exceed the budget.
+
+Dictionary indices are limited to 1 byte (max 255 entries). This is a deliberate simplicity trade-off for the first
+version — columns with 256+ distinct values fall back to PLAIN encoding.
+
+## Value Serialization
+
+Values are serialized in the same format for PLAIN data, CONST metadata, and DICT entries:
+
+<table class="table table-bordered">
+    <thead>
+      <tr>
+        <th class="text-left">Type</th>
+        <th class="text-left">Encoding</th>
+      </tr>
+    </thead>
+    <tbody>
+    <tr><td>BOOLEAN</td><td>1 byte (0 or 1)</td></tr>
+    <tr><td>TINYINT</td><td>1 byte</td></tr>
+    <tr><td>SMALLINT</td><td>2 bytes big-endian</td></tr>
+    <tr><td>INTEGER / DATE / TIME</td><td>4 bytes big-endian</td></tr>
+    <tr><td>BIGINT</td><td>8 bytes big-endian</td></tr>
+    <tr><td>FLOAT</td><td>4 bytes IEEE 754 (big-endian)</td></tr>
+    <tr><td>DOUBLE</td><td>8 bytes IEEE 754 (big-endian)</td></tr>
+    <tr><td>DECIMAL (compact, precision &le; 18)</td><td>8 bytes big-endian (unscaled long)</td></tr>
+    <tr><td>DECIMAL (large, precision &gt; 18)</td><td>varint length + unscaled BigInteger bytes</td></tr>
+    <tr><td>TIMESTAMP (precision &le; 3)</td><td>8 bytes (epoch millis, big-endian)</td></tr>
+    <tr><td>TIMESTAMP (precision &gt; 3)</td><td>8 bytes (epoch millis) + 4 bytes (nanos of millis)</td></tr>
+    <tr><td>CHAR / VARCHAR / STRING</td><td>varint length + UTF-8 bytes</td></tr>
+    <tr><td>BINARY / VARBINARY / BYTES</td><td>varint length + raw bytes</td></tr>
+    </tbody>
+</table>
+
+## ALL_NULL Column Pruning
+
+For single-row-group files (the common case with small files), columns where every value is null are pruned from both
+the schema and bucket data. This reduces schema size for wide sparse tables where many columns are entirely null.
+
+- The writer detects ALL_NULL columns after buffering all rows
+- ALL_NULL columns are removed from the encoding/null flags in bucket data
+- ALL_NULL columns are removed from the schema block
+- The reader treats any projected column not found in the schema as all-null (returns null for every row)
+
+This optimization only applies to single-row-group files. Multi-row-group files retain all columns because a column may
+be ALL_NULL in one row group but have values in another.
+
+## Column-to-Bucket Assignment
+
+Columns are assigned to buckets by hashing the column name:
+
+```
+bucketId = Math.floorMod(fieldName.hashCode(), numBuckets)
+```
+
+Default number of buckets: `min(100, numColumns)`.
+
+## Compression
+
+Compression is applied independently to each bucket data block and to the schema block. Supported methods:
+
+- `0` — No compression
+- `1` — Zstd (configurable level)
+
+## Benchmark
+
+Test setup: 10,000 columns (90% STRING, 10% INT), column names ~80 bytes each, Zstd compression (level 9).
+
+**File Size (10 rows):**
+
+| Format  | Size       | vs Mosaic |
+|---------|------------|-----------|
+| Parquet | 9,696 KB   | 14.8x     |
+| ORC     | 6,377 KB   | 9.7x      |
+| Mosaic  | 654 KB     | 1x        |
+
+**Projection Read (500 rows):**
+
+| Projected Columns | Parquet    | ORC        | Mosaic    |
+|-------------------|------------|------------|-----------|
+| 10 / 10,000       | 53,170 us  | 72,729 us  | 25,081 us |
+| 1 / 10,000        | 50,919 us  | 70,712 us  | 2,374  us |
+
+File size — Parquet: 57.4 MB, ORC: 95.4 MB, Mosaic: 11.5 MB
+
+**Projection Read (4,500 rows, ~458 MB Parquet):**
+
+| Projected Columns | Parquet     | ORC        | Mosaic     |
+|-------------------|-------------|------------|------------|
+| 10 / 10,000       | 369,627 us  | 89,344 us  | 67,314 us  |
+| 1 / 10,000        | 360,458 us  | 81,934 us  | 26,924 us  |
+
+File size — Parquet: 458.4 MB, ORC: 827.9 MB, Mosaic: 100.2 MB
+
+When projecting a small subset of columns, Mosaic only decompresses the buckets containing the requested columns,
+avoiding I/O on the remaining data.
+
+## Limitations
+
+1. Complex types (ARRAY, MAP, MULTISET, ROW) are not supported.
+2. Mosaic format is designed for wide tables and may not be efficient for narrow tables with few columns.
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicBucketReader.java b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicBucketReader.java
new file mode 100644
index 000000000000..0a614f5552b4
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicBucketReader.java
@@ -0,0 +1,359 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import org.apache.paimon.data.BinaryString;
+import org.apache.paimon.data.Decimal;
+import org.apache.paimon.data.Timestamp;
+import org.apache.paimon.types.DataType;
+import org.apache.paimon.types.DecimalType;
+
+import static org.apache.paimon.format.mosaic.MosaicSpec.ENCODING_ALL_NULL;
+import static org.apache.paimon.format.mosaic.MosaicSpec.ENCODING_CONST;
+import static org.apache.paimon.format.mosaic.MosaicSpec.ENCODING_DICT;
+import static org.apache.paimon.format.mosaic.MosaicSpec.ENCODING_PLAIN;
+
+/**
+ * Columnar bucket reader for the Mosaic v2 format. Reads column-oriented data with
+ * CONST/DICT/PLAIN/ALL_NULL encoding.
+ */
+public class MosaicBucketReader {
+
+    private final DataType[] allColumnTypes;
+    private final int[] localToOutputMapping;
+    private final int numColumnsInBucket;
+
+    // Per-column state set during init()
+    private byte[] encodings;
+    private boolean[] hasNulls;
+    private byte[][] nullBitmaps;
+    private Object[] constValues;
+    private Object[][] dictValues;
+    private int[] dataCursors;
+    private byte[] data;
+    private int numRows;
+    private int currentRow;
+
+    public MosaicBucketReader(DataType[] allColumnTypes, int[] localToOutputMapping) {
+        this.allColumnTypes = allColumnTypes;
+        this.localToOutputMapping = localToOutputMapping;
+        this.numColumnsInBucket = allColumnTypes.length;
+    }
+
+    public void init(byte[] data, int numRows) {
+        this.data = data;
+        this.numRows = numRows;
+        this.currentRow = 0;
+
+        this.encodings = new byte[numColumnsInBucket];
+        this.hasNulls = new boolean[numColumnsInBucket];
+        this.nullBitmaps = new byte[numColumnsInBucket][];
+        this.constValues = new Object[numColumnsInBucket];
+        this.dictValues = new Object[numColumnsInBucket][];
+        this.dataCursors = new int[numColumnsInBucket];
+
+        int pos = 0;
+
+        // 1. Read encoding flags (2 bits per column)
+        int encodingFlagsBytes = (numColumnsInBucket * 2 + 7) / 8;
+        for (int i = 0; i < numColumnsInBucket; i++) {
+            int byteIdx = (i * 2) / 8;
+            int bitIdx = (i * 2) % 8;
+            encodings[i] = (byte) ((data[pos + byteIdx] >>> bitIdx) & 0x03);
+        }
+        pos += encodingFlagsBytes;
+
+        // 2. Read has-nulls flags (1 bit per column)
+        int hasNullsFlagsBytes = (numColumnsInBucket + 7) / 8;
+        for (int i = 0; i < numColumnsInBucket; i++) {
+            hasNulls[i] = (data[pos + i / 8] & (1 << (i % 8))) != 0;
+        }
+        pos += hasNullsFlagsBytes;
+
+        // 3. Read const metadata
+        for (int i = 0; i < numColumnsInBucket; i++) {
+            if (encodings[i] == ENCODING_CONST) {
+                int w = MosaicBucketWriter.getFixedWidth(allColumnTypes[i]);
+                if (w > 0) {
+                    constValues[i] = readTypedValue(allColumnTypes[i], data, pos, w);
+                    pos += w;
+                } else {
+                    constValues[i] = readVariableValue(allColumnTypes[i], data, pos);
+                    int len = readVarint(data, pos);
+                    pos += varintSize(len) + len;
+                }
+            }
+        }
+
+        // 4. Read dict metadata
+        for (int i = 0; i < numColumnsInBucket; i++) {
+            if (encodings[i] == ENCODING_DICT) {
+                int numEntries = readVarint(data, pos);
+                pos += varintSize(numEntries);
+                int w = MosaicBucketWriter.getFixedWidth(allColumnTypes[i]);
+                Object[] entries = new Object[numEntries];
+                for (int j = 0; j < numEntries; j++) {
+                    if (w > 0) {
+                        entries[j] = readTypedValue(allColumnTypes[i], data, pos, w);
+                        pos += w;
+                    } else {
+                        entries[j] = readVariableValue(allColumnTypes[i], data, pos);
+                        int len = readVarint(data, pos);
+                        pos += varintSize(len) + len;
+                    }
+                }
+                dictValues[i] = entries;
+            }
+        }
+
+        // 5. Read null bitmaps
+        int nullBitmapSize = (numRows + 7) / 8;
+        for (int i = 0; i < numColumnsInBucket; i++) {
+            if (hasNulls[i] && encodings[i] != ENCODING_ALL_NULL) {
+                nullBitmaps[i] = new byte[nullBitmapSize];
+                System.arraycopy(data, pos, nullBitmaps[i], 0, nullBitmapSize);
+                pos += nullBitmapSize;
+            }
+        }
+
+        // 6. Record column data start offsets
+        for (int i = 0; i < numColumnsInBucket; i++) {
+            dataCursors[i] = pos;
+            if (encodings[i] == ENCODING_PLAIN) {
+                // Skip past all plain data for this column to find next column's offset
+                int w = MosaicBucketWriter.getFixedWidth(allColumnTypes[i]);
+                if (w > 0) {
+                    int nonNullCount = countNonNull(i);
+                    pos += nonNullCount * w;
+                } else {
+                    // Variable-width: scan through
+                    int nonNullCount = countNonNull(i);
+                    for (int j = 0; j < nonNullCount; j++) {
+                        int len = readVarint(data, pos);
+                        pos += varintSize(len) + len;
+                    }
+                }
+            } else if (encodings[i] == ENCODING_DICT) {
+                int nonNullCount = countNonNull(i);
+                pos += nonNullCount; // 1 byte per non-null cell
+            }
+            // CONST and ALL_NULL: no data to skip
+        }
+    }
+
+    public void readRow(Object[] outputFields) {
+        for (int i = 0; i < numColumnsInBucket; i++) {
+            int outputPos = localToOutputMapping[i];
+
+            if (encodings[i] == ENCODING_ALL_NULL) {
+                if (outputPos >= 0) {
+                    outputFields[outputPos] = null;
+                }
+                continue;
+            }
+
+            boolean isNull =
+                    hasNulls[i] && (nullBitmaps[i][currentRow / 8] & (1 << (currentRow % 8))) != 0;
+
+            if (isNull) {
+                if (outputPos >= 0) {
+                    outputFields[outputPos] = null;
+                }
+                continue;
+            }
+
+            // Non-null value
+            switch (encodings[i]) {
+                case ENCODING_CONST:
+                    if (outputPos >= 0) {
+                        outputFields[outputPos] = constValues[i];
+                    }
+                    break;
+                case ENCODING_DICT:
+                    {
+                        int idx = data[dataCursors[i]++] & 0xFF;
+                        if (outputPos >= 0) {
+                            outputFields[outputPos] = dictValues[i][idx];
+                        }
+                        break;
+                    }
+                case ENCODING_PLAIN:
+                    {
+                        int w = MosaicBucketWriter.getFixedWidth(allColumnTypes[i]);
+                        if (outputPos >= 0) {
+                            if (w > 0) {
+                                outputFields[outputPos] =
+                                        readTypedValue(allColumnTypes[i], data, dataCursors[i], w);
+                            } else {
+                                outputFields[outputPos] =
+                                        readVariableValue(allColumnTypes[i], data, dataCursors[i]);
+                            }
+                        }
+                        // Advance cursor
+                        if (w > 0) {
+                            dataCursors[i] += w;
+                        } else {
+                            int len = readVarint(data, dataCursors[i]);
+                            dataCursors[i] += varintSize(len) + len;
+                        }
+                        break;
+                    }
+                default:
+                    break;
+            }
+        }
+        currentRow++;
+    }
+
+    // ======================== Value reading ========================
+
+    private static Object readTypedValue(DataType type, byte[] buf, int pos, int width) {
+        switch (type.getTypeRoot()) {
+            case BOOLEAN:
+                return buf[pos] != 0;
+            case TINYINT:
+                return buf[pos];
+            case SMALLINT:
+                return (short) ((buf[pos] << 8) | (buf[pos + 1] & 0xFF));
+            case INTEGER:
+            case DATE:
+            case TIME_WITHOUT_TIME_ZONE:
+                return readInt(buf, pos);
+            case BIGINT:
+                return readLong(buf, pos);
+            case FLOAT:
+                return Float.intBitsToFloat(readInt(buf, pos));
+            case DOUBLE:
+                return Double.longBitsToDouble(readLong(buf, pos));
+            case DECIMAL:
+                {
+                    DecimalType dt = (DecimalType) type;
+                    return Decimal.fromUnscaledLong(
+                            readLong(buf, pos), dt.getPrecision(), dt.getScale());
+                }
+            case TIMESTAMP_WITHOUT_TIME_ZONE:
+                {
+                    long millis = readLong(buf, pos);
+                    if (width == 12) {
+                        int nanos = readInt(buf, pos + 8);
+                        return Timestamp.fromEpochMillis(millis, nanos);
+                    }
+                    return Timestamp.fromEpochMillis(millis);
+                }
+            case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
+                {
+                    long millis = readLong(buf, pos);
+                    if (width == 12) {
+                        int nanos = readInt(buf, pos + 8);
+                        return Timestamp.fromEpochMillis(millis, nanos);
+                    }
+                    return Timestamp.fromEpochMillis(millis);
+                }
+            default:
+                throw new UnsupportedOperationException("Unsupported fixed type: " + type);
+        }
+    }
+
+    private static Object readVariableValue(DataType type, byte[] buf, int pos) {
+        int len = readVarint(buf, pos);
+        int dataStart = pos + varintSize(len);
+        switch (type.getTypeRoot()) {
+            case CHAR:
+            case VARCHAR:
+                return BinaryString.fromBytes(buf, dataStart, len);
+            case BINARY:
+            case VARBINARY:
+                {
+                    byte[] bytes = new byte[len];
+                    System.arraycopy(buf, dataStart, bytes, 0, len);
+                    return bytes;
+                }
+            case DECIMAL:
+                {
+                    DecimalType dt = (DecimalType) type;
+                    byte[] bytes = new byte[len];
+                    System.arraycopy(buf, dataStart, bytes, 0, len);
+                    return Decimal.fromUnscaledBytes(bytes, dt.getPrecision(), dt.getScale());
+                }
+            default:
+                throw new UnsupportedOperationException("Unsupported variable type: " + type);
+        }
+    }
+
+    // ======================== Helpers ========================
+
+    private int countNonNull(int colIdx) {
+        if (!hasNulls[colIdx]) {
+            return numRows;
+        }
+        if (encodings[colIdx] == ENCODING_ALL_NULL) {
+            return 0;
+        }
+        int count = 0;
+        int fullBytes = numRows / 8;
+        for (int b = 0; b < fullBytes; b++) {
+            count += Integer.bitCount(nullBitmaps[colIdx][b] & 0xFF);
+        }
+        int remaining = numRows % 8;
+        if (remaining > 0) {
+            int mask = (1 << remaining) - 1;
+            count += Integer.bitCount(nullBitmaps[colIdx][fullBytes] & mask);
+        }
+        return numRows - count;
+    }
+
+    private static int readInt(byte[] buf, int pos) {
+        return ((buf[pos] & 0xFF) << 24)
+                | ((buf[pos + 1] & 0xFF) << 16)
+                | ((buf[pos + 2] & 0xFF) << 8)
+                | (buf[pos + 3] & 0xFF);
+    }
+
+    private static long readLong(byte[] buf, int pos) {
+        return ((long) (buf[pos] & 0xFF) << 56)
+                | ((long) (buf[pos + 1] & 0xFF) << 48)
+                | ((long) (buf[pos + 2] & 0xFF) << 40)
+                | ((long) (buf[pos + 3] & 0xFF) << 32)
+                | ((long) (buf[pos + 4] & 0xFF) << 24)
+                | ((long) (buf[pos + 5] & 0xFF) << 16)
+                | ((long) (buf[pos + 6] & 0xFF) << 8)
+                | (buf[pos + 7] & 0xFF);
+    }
+
+    private static int readVarint(byte[] buf, int pos) {
+        int value = 0;
+        int shift = 0;
+        int b;
+        do {
+            b = buf[pos++] & 0xFF;
+            value |= (b & 0x7F) << shift;
+            shift += 7;
+        } while ((b & 0x80) != 0);
+        return value;
+    }
+
+    private static int varintSize(int value) {
+        int size = 1;
+        while ((value & ~0x7F) != 0) {
+            size++;
+            value >>>= 7;
+        }
+        return size;
+    }
+}
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicBucketWriter.java b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicBucketWriter.java
new file mode 100644
index 000000000000..fce7ebdc6edf
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicBucketWriter.java
@@ -0,0 +1,713 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import org.apache.paimon.data.BinaryString;
+import org.apache.paimon.data.Decimal;
+import org.apache.paimon.data.InternalRow;
+import org.apache.paimon.data.Timestamp;
+import org.apache.paimon.types.DataType;
+import org.apache.paimon.types.DecimalType;
+import org.apache.paimon.types.LocalZonedTimestampType;
+import org.apache.paimon.types.RowType;
+import org.apache.paimon.types.TimestampType;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.apache.paimon.format.mosaic.MosaicSpec.ENCODING_ALL_NULL;
+import static org.apache.paimon.format.mosaic.MosaicSpec.ENCODING_CONST;
+import static org.apache.paimon.format.mosaic.MosaicSpec.ENCODING_DICT;
+import static org.apache.paimon.format.mosaic.MosaicSpec.ENCODING_PLAIN;
+import static org.apache.paimon.format.mosaic.MosaicUtils.writeVarint;
+
+/**
+ * Columnar bucket writer for the Mosaic format. Buffers values per-column and produces a
+ * column-oriented byte array with CONST/DICT/PLAIN/ALL_NULL encoding per column.
+ *
+ * <p>CONST detection uses a lightweight byte-comparison tracker that works for all types and value
+ * sizes, independent of dictionary tracking. Dictionary tracking uses primitive long keys for
+ * fixed-width types (≤8 bytes) and byte-array keys for variable-width types. Variable-width dict
+ * tracking is bounded by a cumulative byte budget ({@link #MAX_DICT_TOTAL_BYTES}). DICT encoding is
+ * chosen only when it produces fewer bytes than PLAIN (cost-based selection).
+ */
+public class MosaicBucketWriter {
+
+    private static final int MAX_DICT_TOTAL_BYTES = 16384;
+
+    private final InternalRow.FieldGetter[] fieldGetters;
+    private final int numColumns;
+    private final int[] fixedWidths;
+    private final boolean[] isVariableWidth;
+
+    // Per-column buffers
+    private byte[][] nullBitmaps;
+    private byte[][] valueBuffers;
+    private int[] valueBufPos;
+    private int[] nonNullCounts;
+
+    // CONST tracking: byte comparison against first non-null value (works for any size)
+    private boolean[] constTracking;
+    private int[] firstValueLen;
+
+    // Fixed-width ≤8 bytes: primitive long-based dict tracking
+    private Map<Long, Integer>[] longDictMaps;
+    // Variable-width and width>8: byte-array-based dict tracking with cumulative budget
+    private Map<ByteKey, Integer>[] byteDictMaps;
+    private int[] dictTotalBytes;
+
+    private int numRows;
+
+    public MosaicBucketWriter(RowType fullRowType, int[] globalColumnIndices) {
+        this.numColumns = globalColumnIndices.length;
+        this.fieldGetters = new InternalRow.FieldGetter[numColumns];
+        this.fixedWidths = new int[numColumns];
+        this.isVariableWidth = new boolean[numColumns];
+
+        for (int i = 0; i < numColumns; i++) {
+            int globalIdx = globalColumnIndices[i];
+            DataType type = fullRowType.getTypeAt(globalIdx);
+            fieldGetters[i] = InternalRow.createFieldGetter(type, globalIdx);
+            fixedWidths[i] = getFixedWidth(type);
+            isVariableWidth[i] = fixedWidths[i] < 0;
+        }
+
+        initBuffers();
+    }
+
+    @SuppressWarnings("unchecked")
+    private void initBuffers() {
+        this.nullBitmaps = new byte[numColumns][];
+        this.valueBuffers = new byte[numColumns][];
+        this.valueBufPos = new int[numColumns];
+        this.nonNullCounts = new int[numColumns];
+        this.constTracking = new boolean[numColumns];
+        this.firstValueLen = new int[numColumns];
+        this.longDictMaps = new Map[numColumns];
+        this.byteDictMaps = new Map[numColumns];
+        this.dictTotalBytes = new int[numColumns];
+
+        for (int i = 0; i < numColumns; i++) {
+            nullBitmaps[i] = new byte[128];
+            valueBuffers[i] = new byte[1024];
+            constTracking[i] = true;
+            if (usesLongDict(i)) {
+                longDictMaps[i] = new HashMap<>();
+            } else {
+                byteDictMaps[i] = new HashMap<>();
+            }
+        }
+        this.numRows = 0;
+    }
+
+    private boolean usesLongDict(int colIdx) {
+        return fixedWidths[colIdx] > 0 && fixedWidths[colIdx] <= 8;
+    }
+
+    public boolean isEmpty() {
+        return numRows == 0;
+    }
+
+    public int writeRow(InternalRow row) {
+        int bitmapIdx = numRows / 8;
+
+        int totalSize = 0;
+        for (int i = 0; i < numColumns; i++) {
+            // Ensure null bitmap capacity
+            if (bitmapIdx >= nullBitmaps[i].length) {
+                byte[] newBm = new byte[nullBitmaps[i].length * 2];
+                System.arraycopy(nullBitmaps[i], 0, newBm, 0, nullBitmaps[i].length);
+                nullBitmaps[i] = newBm;
+            }
+
+            Object value = fieldGetters[i].getFieldOrNull(row);
+            if (value == null) {
+                nullBitmaps[i][bitmapIdx] |= (byte) (1 << (numRows % 8));
+            } else {
+                nonNullCounts[i]++;
+                int before = valueBufPos[i];
+                writeValue(i, value);
+                int written = valueBufPos[i] - before;
+                totalSize += written;
+
+                // CONST tracking: compare against first non-null value
+                if (constTracking[i]) {
+                    if (nonNullCounts[i] == 1) {
+                        firstValueLen[i] = written;
+                    } else if (written != firstValueLen[i]
+                            || !regionEquals(valueBuffers[i], 0, before, written)) {
+                        constTracking[i] = false;
+                    }
+                }
+
+                // Dict tracking (separate from CONST)
+                if (longDictMaps[i] != null) {
+                    long key = extractFixedKey(valueBuffers[i], before, fixedWidths[i]);
+                    longDictMaps[i].putIfAbsent(key, longDictMaps[i].size());
+                    if (longDictMaps[i].size() > 255) {
+                        longDictMaps[i] = null;
+                    }
+                } else if (byteDictMaps[i] != null) {
+                    ByteKey key = new ByteKey(valueBuffers[i], before, written);
+                    int sizeBefore = byteDictMaps[i].size();
+                    byteDictMaps[i].putIfAbsent(key, sizeBefore);
+                    if (byteDictMaps[i].size() > sizeBefore) {
+                        dictTotalBytes[i] += written;
+                    }
+                    if (byteDictMaps[i].size() > 255 || dictTotalBytes[i] > MAX_DICT_TOTAL_BYTES) {
+                        byteDictMaps[i] = null;
+                    }
+                }
+            }
+        }
+        numRows++;
+        // Include null bitmap overhead (~1 bit per column per row)
+        totalSize += (numColumns + 7) / 8;
+        return totalSize;
+    }
+
+    public byte[] finish() {
+        return finish(false);
+    }
+
+    public byte[] finish(boolean pruneAllNull) {
+        if (numRows == 0) {
+            return new byte[0];
+        }
+
+        // 1. Determine encoding per column
+        byte[] encodings = new byte[numColumns];
+        boolean[] hasNulls = new boolean[numColumns];
+
+        for (int i = 0; i < numColumns; i++) {
+            if (nonNullCounts[i] == 0) {
+                encodings[i] = ENCODING_ALL_NULL;
+                hasNulls[i] = false;
+            } else if (constTracking[i]) {
+                encodings[i] = ENCODING_CONST;
+                hasNulls[i] = nonNullCounts[i] < numRows;
+            } else {
+                int dictSize = getDictSize(i);
+                if (dictSize >= 2 && dictSize <= 255 && dictEncodedSize(i) < valueBufPos[i]) {
+                    encodings[i] = ENCODING_DICT;
+                } else {
+                    encodings[i] = ENCODING_PLAIN;
+                }
+                hasNulls[i] = nonNullCounts[i] < numRows;
+            }
+        }
+
+        // Count output columns (skip ALL_NULL when pruning)
+        int numOutputCols = numColumns;
+        if (pruneAllNull) {
+            numOutputCols = 0;
+            for (int i = 0; i < numColumns; i++) {
+                if (encodings[i] != ENCODING_ALL_NULL) {
+                    numOutputCols++;
+                }
+            }
+        }
+
+        // 2. Compute exact output size
+        byte[] out = computeOutBuffer(numOutputCols, encodings, hasNulls);
+        int pos = 0;
+
+        // 2a. Encoding flags: 2 bits per output column
+        int encodingFlagsBytes = (numOutputCols * 2 + 7) / 8;
+        int outputIdx = 0;
+        for (int i = 0; i < numColumns; i++) {
+            if (pruneAllNull && encodings[i] == ENCODING_ALL_NULL) {
+                continue;
+            }
+            int byteIdx = (outputIdx * 2) / 8;
+            int bitIdx = (outputIdx * 2) % 8;
+            out[pos + byteIdx] |= (byte) (encodings[i] << bitIdx);
+            outputIdx++;
+        }
+        pos += encodingFlagsBytes;
+
+        // 2b. Has-nulls flags: 1 bit per output column
+        int hasNullsFlagsBytes = (numOutputCols + 7) / 8;
+        outputIdx = 0;
+        for (int i = 0; i < numColumns; i++) {
+            if (pruneAllNull && encodings[i] == ENCODING_ALL_NULL) {
+                continue;
+            }
+            if (hasNulls[i]) {
+                out[pos + outputIdx / 8] |= (byte) (1 << (outputIdx % 8));
+            }
+            outputIdx++;
+        }
+        pos += hasNullsFlagsBytes;
+
+        // 2c. Const metadata — first non-null value from value buffer
+        for (int i = 0; i < numColumns; i++) {
+            if (encodings[i] == ENCODING_CONST) {
+                System.arraycopy(valueBuffers[i], 0, out, pos, firstValueLen[i]);
+                pos += firstValueLen[i];
+            }
+        }
+
+        // 2d. Dict metadata
+        for (int i = 0; i < numColumns; i++) {
+            if (encodings[i] == ENCODING_DICT) {
+                if (longDictMaps[i] != null) {
+                    int numEntries = longDictMaps[i].size();
+                    pos = writeVarint(out, pos, numEntries);
+                    int w = fixedWidths[i];
+                    long[] keys = new long[numEntries];
+                    for (Map.Entry<Long, Integer> e : longDictMaps[i].entrySet()) {
+                        keys[e.getValue()] = e.getKey();
+                    }
+                    for (int j = 0; j < numEntries; j++) {
+                        pos = writeFixedKey(out, pos, keys[j], w);
+                    }
+                } else {
+                    int numEntries = byteDictMaps[i].size();
+                    pos = writeVarint(out, pos, numEntries);
+                    ByteKey[] keys = new ByteKey[numEntries];
+                    for (Map.Entry<ByteKey, Integer> e : byteDictMaps[i].entrySet()) {
+                        keys[e.getValue()] = e.getKey();
+                    }
+                    for (int j = 0; j < numEntries; j++) {
+                        System.arraycopy(keys[j].data, 0, out, pos, keys[j].data.length);
+                        pos += keys[j].data.length;
+                    }
+                }
+            }
+        }
+
+        // 2e. Null bitmaps (only for cols with nulls and not ALL_NULL)
+        int nullBitmapBytes = (numRows + 7) / 8;
+        for (int i = 0; i < numColumns; i++) {
+            if (hasNulls[i] && encodings[i] != ENCODING_ALL_NULL) {
+                System.arraycopy(nullBitmaps[i], 0, out, pos, nullBitmapBytes);
+                pos += nullBitmapBytes;
+            }
+        }
+
+        // 2f. Column data
+        for (int i = 0; i < numColumns; i++) {
+            if (encodings[i] == ENCODING_PLAIN) {
+                System.arraycopy(valueBuffers[i], 0, out, pos, valueBufPos[i]);
+                pos += valueBufPos[i];
+            } else if (encodings[i] == ENCODING_DICT) {
+                int w = fixedWidths[i];
+                int valPos = 0;
+                for (int r = 0; r < numRows; r++) {
+                    boolean isNull = (nullBitmaps[i][r / 8] & (1 << (r % 8))) != 0;
+                    if (!isNull) {
+                        if (longDictMaps[i] != null) {
+                            long key = extractFixedKey(valueBuffers[i], valPos, w);
+                            valPos += w;
+                            out[pos++] = (byte) (int) longDictMaps[i].get(key);
+                        } else {
+                            int valueLen;
+                            if (w > 0) {
+                                valueLen = w;
+                            } else {
+                                int varLen = readVarint(valueBuffers[i], valPos);
+                                valueLen = varintSize(varLen) + varLen;
+                            }
+                            ByteKey key = new ByteKey(valueBuffers[i], valPos, valueLen);
+                            valPos += valueLen;
+                            out[pos++] = (byte) (int) byteDictMaps[i].get(key);
+                        }
+                    }
+                }
+            }
+            // CONST and ALL_NULL: no column data
+        }
+
+        return out;
+    }
+
+    private byte[] computeOutBuffer(int numOutputCols, byte[] encodings, boolean[] hasNulls) {
+        int nullBitmapBytesPerCol = (numRows + 7) / 8;
+        int exactSize = (numOutputCols * 2 + 7) / 8 + (numOutputCols + 7) / 8;
+        for (int i = 0; i < numColumns; i++) {
+            if (encodings[i] == ENCODING_ALL_NULL) {
+                continue;
+            }
+            if (hasNulls[i]) {
+                exactSize += nullBitmapBytesPerCol;
+            }
+            if (encodings[i] == ENCODING_CONST) {
+                exactSize += firstValueLen[i];
+            } else if (encodings[i] == ENCODING_DICT) {
+                if (longDictMaps[i] != null) {
+                    int numEntries = longDictMaps[i].size();
+                    exactSize +=
+                            varintSize(numEntries) + numEntries * fixedWidths[i] + nonNullCounts[i];
+                } else {
+                    int numEntries = byteDictMaps[i].size();
+                    exactSize += varintSize(numEntries);
+                    for (ByteKey key : byteDictMaps[i].keySet()) {
+                        exactSize += key.data.length;
+                    }
+                    exactSize += nonNullCounts[i];
+                }
+            } else if (encodings[i] == ENCODING_PLAIN) {
+                exactSize += valueBufPos[i];
+            }
+        }
+        return new byte[exactSize];
+    }
+
+    private int getDictSize(int colIdx) {
+        if (longDictMaps[colIdx] != null) {
+            return longDictMaps[colIdx].size();
+        }
+        if (byteDictMaps[colIdx] != null) {
+            return byteDictMaps[colIdx].size();
+        }
+        return -1;
+    }
+
+    /** Compare dict encoded size vs plain size (pre-compression). */
+    private int dictEncodedSize(int colIdx) {
+        int numEntries;
+        int entryBytes;
+        if (longDictMaps[colIdx] != null) {
+            numEntries = longDictMaps[colIdx].size();
+            entryBytes = numEntries * fixedWidths[colIdx];
+        } else if (byteDictMaps[colIdx] != null) {
+            numEntries = byteDictMaps[colIdx].size();
+            entryBytes = 0;
+            for (ByteKey key : byteDictMaps[colIdx].keySet()) {
+                entryBytes += key.data.length;
+            }
+        } else {
+            return Integer.MAX_VALUE;
+        }
+        return varintSize(numEntries) + entryBytes + nonNullCounts[colIdx];
+    }
+
+    public boolean[] getAllNullFlags() {
+        boolean[] flags = new boolean[numColumns];
+        for (int i = 0; i < numColumns; i++) {
+            flags[i] = nonNullCounts[i] == 0;
+        }
+        return flags;
+    }
+
+    public void reset() {
+        for (int i = 0; i < numColumns; i++) {
+            Arrays.fill(nullBitmaps[i], (byte) 0);
+            valueBufPos[i] = 0;
+            nonNullCounts[i] = 0;
+            constTracking[i] = true;
+            firstValueLen[i] = 0;
+            dictTotalBytes[i] = 0;
+            if (usesLongDict(i)) {
+                if (longDictMaps[i] != null) {
+                    longDictMaps[i].clear();
+                } else {
+                    longDictMaps[i] = new HashMap<>();
+                }
+            } else {
+                if (byteDictMaps[i] != null) {
+                    byteDictMaps[i].clear();
+                } else {
+                    byteDictMaps[i] = new HashMap<>();
+                }
+            }
+        }
+        numRows = 0;
+    }
+
+    // ======================== Value writing ========================
+
+    private void writeValue(int colIdx, Object value) {
+        int w = fixedWidths[colIdx];
+        if (w > 0) {
+            ensureValueCapacity(colIdx, w);
+            writeFixedValue(valueBuffers[colIdx], valueBufPos[colIdx], value, w);
+            valueBufPos[colIdx] += w;
+        } else {
+            writeVariableValue(colIdx, value);
+        }
+    }
+
+    private static void writeFixedValue(byte[] buf, int pos, Object value, int width) {
+        switch (width) {
+            case 1:
+                if (value instanceof Boolean) {
+                    buf[pos] = (byte) ((Boolean) value ? 1 : 0);
+                } else {
+                    buf[pos] = (Byte) value;
+                }
+                break;
+            case 2:
+                {
+                    short v = (Short) value;
+                    buf[pos] = (byte) (v >>> 8);
+                    buf[pos + 1] = (byte) v;
+                    break;
+                }
+            case 4:
+                {
+                    int v;
+                    if (value instanceof Float) {
+                        v = Float.floatToRawIntBits((Float) value);
+                    } else {
+                        v = (Integer) value;
+                    }
+                    buf[pos] = (byte) (v >>> 24);
+                    buf[pos + 1] = (byte) (v >>> 16);
+                    buf[pos + 2] = (byte) (v >>> 8);
+                    buf[pos + 3] = (byte) v;
+                    break;
+                }
+            case 8:
+                {
+                    long v;
+                    if (value instanceof Long) {
+                        v = (Long) value;
+                    } else if (value instanceof Double) {
+                        v = Double.doubleToRawLongBits((Double) value);
+                    } else if (value instanceof Decimal) {
+                        v = ((Decimal) value).toUnscaledLong();
+                    } else if (value instanceof Timestamp) {
+                        v = ((Timestamp) value).getMillisecond();
+                    } else {
+                        throw new IllegalArgumentException("Unsupported type: " + value.getClass());
+                    }
+                    writeLong(buf, pos, v);
+                    break;
+                }
+            case 12:
+                {
+                    Timestamp ts = (Timestamp) value;
+                    long millis = ts.getMillisecond();
+                    int nanos = ts.getNanoOfMillisecond();
+                    writeLong(buf, pos, millis);
+                    buf[pos + 8] = (byte) (nanos >>> 24);
+                    buf[pos + 9] = (byte) (nanos >>> 16);
+                    buf[pos + 10] = (byte) (nanos >>> 8);
+                    buf[pos + 11] = (byte) nanos;
+                    break;
+                }
+            default:
+                break;
+        }
+    }
+
+    private static void writeLong(byte[] buf, int pos, long v) {
+        buf[pos] = (byte) (v >>> 56);
+        buf[pos + 1] = (byte) (v >>> 48);
+        buf[pos + 2] = (byte) (v >>> 40);
+        buf[pos + 3] = (byte) (v >>> 32);
+        buf[pos + 4] = (byte) (v >>> 24);
+        buf[pos + 5] = (byte) (v >>> 16);
+        buf[pos + 6] = (byte) (v >>> 8);
+        buf[pos + 7] = (byte) v;
+    }
+
+    private void writeVariableValue(int colIdx, Object value) {
+        byte[] bytes;
+        if (value instanceof BinaryString) {
+            bytes = ((BinaryString) value).toBytes();
+        } else if (value instanceof byte[]) {
+            bytes = (byte[]) value;
+        } else if (value instanceof Decimal) {
+            bytes = ((Decimal) value).toUnscaledBytes();
+        } else {
+            throw new UnsupportedOperationException("Unsupported variable-width type: " + value);
+        }
+        ensureValueCapacity(colIdx, 5 + bytes.length);
+        valueBufPos[colIdx] = writeVarint(valueBuffers[colIdx], valueBufPos[colIdx], bytes.length);
+        System.arraycopy(bytes, 0, valueBuffers[colIdx], valueBufPos[colIdx], bytes.length);
+        valueBufPos[colIdx] += bytes.length;
+    }
+
+    // ======================== Fixed-width key helpers ========================
+
+    private static long extractFixedKey(byte[] buf, int pos, int width) {
+        switch (width) {
+            case 1:
+                return buf[pos] & 0xFFL;
+            case 2:
+                return ((buf[pos] & 0xFFL) << 8) | (buf[pos + 1] & 0xFFL);
+            case 4:
+                return ((buf[pos] & 0xFFL) << 24)
+                        | ((buf[pos + 1] & 0xFFL) << 16)
+                        | ((buf[pos + 2] & 0xFFL) << 8)
+                        | (buf[pos + 3] & 0xFFL);
+            case 8:
+                return ((buf[pos] & 0xFFL) << 56)
+                        | ((buf[pos + 1] & 0xFFL) << 48)
+                        | ((buf[pos + 2] & 0xFFL) << 40)
+                        | ((buf[pos + 3] & 0xFFL) << 32)
+                        | ((buf[pos + 4] & 0xFFL) << 24)
+                        | ((buf[pos + 5] & 0xFFL) << 16)
+                        | ((buf[pos + 6] & 0xFFL) << 8)
+                        | (buf[pos + 7] & 0xFFL);
+            default:
+                return 0;
+        }
+    }
+
+    private static int writeFixedKey(byte[] buf, int pos, long key, int width) {
+        switch (width) {
+            case 1:
+                buf[pos++] = (byte) key;
+                break;
+            case 2:
+                buf[pos++] = (byte) (key >>> 8);
+                buf[pos++] = (byte) key;
+                break;
+            case 4:
+                buf[pos++] = (byte) (key >>> 24);
+                buf[pos++] = (byte) (key >>> 16);
+                buf[pos++] = (byte) (key >>> 8);
+                buf[pos++] = (byte) key;
+                break;
+            case 8:
+                buf[pos++] = (byte) (key >>> 56);
+                buf[pos++] = (byte) (key >>> 48);
+                buf[pos++] = (byte) (key >>> 40);
+                buf[pos++] = (byte) (key >>> 32);
+                buf[pos++] = (byte) (key >>> 24);
+                buf[pos++] = (byte) (key >>> 16);
+                buf[pos++] = (byte) (key >>> 8);
+                buf[pos++] = (byte) key;
+                break;
+            default:
+                break;
+        }
+        return pos;
+    }
+
+    // ======================== Buffer helpers ========================
+
+    private void ensureValueCapacity(int colIdx, int additional) {
+        int required = valueBufPos[colIdx] + additional;
+        if (required > valueBuffers[colIdx].length) {
+            int newLen = Math.max(valueBuffers[colIdx].length * 2, required);
+            byte[] newBuf = new byte[newLen];
+            System.arraycopy(valueBuffers[colIdx], 0, newBuf, 0, valueBufPos[colIdx]);
+            valueBuffers[colIdx] = newBuf;
+        }
+    }
+
+    private static boolean regionEquals(byte[] buf, int off1, int off2, int len) {
+        for (int i = 0; i < len; i++) {
+            if (buf[off1 + i] != buf[off2 + i]) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    // ======================== Type width ========================
+
+    static int getFixedWidth(DataType type) {
+        switch (type.getTypeRoot()) {
+            case BOOLEAN:
+            case TINYINT:
+                return 1;
+            case SMALLINT:
+                return 2;
+            case INTEGER:
+            case DATE:
+            case TIME_WITHOUT_TIME_ZONE:
+            case FLOAT:
+                return 4;
+            case BIGINT:
+            case DOUBLE:
+                return 8;
+            case DECIMAL:
+                if (Decimal.isCompact(((DecimalType) type).getPrecision())) {
+                    return 8;
+                }
+                return -1;
+            case TIMESTAMP_WITHOUT_TIME_ZONE:
+                if (Timestamp.isCompact(((TimestampType) type).getPrecision())) {
+                    return 8;
+                }
+                return 12;
+            case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
+                if (Timestamp.isCompact(((LocalZonedTimestampType) type).getPrecision())) {
+                    return 8;
+                }
+                return 12;
+            default:
+                return -1;
+        }
+    }
+
+    // ======================== Varint helpers ========================
+
+    private static int readVarint(byte[] buf, int pos) {
+        int value = 0;
+        int shift = 0;
+        int b;
+        do {
+            b = buf[pos++] & 0xFF;
+            value |= (b & 0x7F) << shift;
+            shift += 7;
+        } while ((b & 0x80) != 0);
+        return value;
+    }
+
+    private static int varintSize(int value) {
+        int size = 1;
+        while ((value & ~0x7F) != 0) {
+            size++;
+            value >>>= 7;
+        }
+        return size;
+    }
+
+    // ======================== ByteKey ========================
+
+    /** Immutable byte array wrapper with value-based hash and equals for dict tracking. */
+    static final class ByteKey {
+        final byte[] data;
+        private final int hash;
+
+        ByteKey(byte[] source, int offset, int length) {
+            this.data = new byte[length];
+            System.arraycopy(source, offset, this.data, 0, length);
+            int h = 1;
+            for (int i = 0; i < length; i++) {
+                h = 31 * h + this.data[i];
+            }
+            this.hash = h;
+        }
+
+        @Override
+        public int hashCode() {
+            return hash;
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            if (this == obj) {
+                return true;
+            }
+            if (!(obj instanceof ByteKey)) {
+                return false;
+            }
+            return Arrays.equals(data, ((ByteKey) obj).data);
+        }
+    }
+}
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicFileAnalyzer.java b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicFileAnalyzer.java
new file mode 100644
index 000000000000..f7f1bac2d329
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicFileAnalyzer.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.fs.SeekableInputStream;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+import static org.apache.paimon.format.mosaic.MosaicUtils.readLong;
+import static org.apache.paimon.format.mosaic.MosaicUtils.readVarint;
+import static org.apache.paimon.utils.IOUtils.readFully;
+
+/** Utility to analyze the storage breakdown of a Mosaic file. */
+public class MosaicFileAnalyzer {
+
+    public static String analyze(FileIO fileIO, Path path) throws IOException {
+        long fileSize = fileIO.getFileSize(path);
+        try (SeekableInputStream in = fileIO.newInputStream(path)) {
+            return analyze(in, fileSize);
+        }
+    }
+
+    public static String analyze(SeekableInputStream in, long fileSize) throws IOException {
+        in.seek(fileSize - MosaicSpec.FOOTER_SIZE);
+        byte[] footerBytes = new byte[MosaicSpec.FOOTER_SIZE];
+        readFully(in, footerBytes);
+        ByteBuffer footer = ByteBuffer.wrap(footerBytes).order(ByteOrder.BIG_ENDIAN);
+        long indexOffset = footer.getLong();
+        long schemaBlockOffset = footer.getLong();
+        int numBuckets = footer.getInt();
+        int numRowGroups = footer.getInt();
+        byte compression = footer.get();
+        byte version = footer.get();
+
+        long schemaBlockSize = indexOffset - schemaBlockOffset;
+        long indexSize = fileSize - MosaicSpec.FOOTER_SIZE - indexOffset;
+
+        // Schema uncompressed size
+        in.seek(schemaBlockOffset);
+        byte[] lenBuf = new byte[4];
+        readFully(in, lenBuf);
+        int schemaUncompressed = ByteBuffer.wrap(lenBuf).order(ByteOrder.BIG_ENDIAN).getInt();
+        long schemaCompressed = schemaBlockSize - 4;
+
+        // Per-bucket stats from row group index (varint encoded, non-empty only)
+        in.seek(indexOffset);
+        byte[] indexBytes = new byte[(int) indexSize];
+        readFully(in, indexBytes);
+        int[] idxPos = {0};
+
+        long totalCompressed = 0;
+        long totalUncompressed = 0;
+        int nonEmptyBuckets = 0;
+        int totalRows = 0;
+
+        for (int rg = 0; rg < numRowGroups; rg++) {
+            totalRows += readVarint(indexBytes, idxPos);
+            int nonEmpty = readVarint(indexBytes, idxPos);
+            nonEmptyBuckets += nonEmpty;
+            for (int i = 0; i < nonEmpty; i++) {
+                readVarint(indexBytes, idxPos); // bucketId
+                readLong(indexBytes, idxPos); // offset
+                int cs = readVarint(indexBytes, idxPos);
+                int us = readVarint(indexBytes, idxPos);
+                totalCompressed += cs;
+                totalUncompressed += us;
+            }
+        }
+
+        return String.format(
+                        "=== Mosaic File Analysis ===%n"
+                                + "File size:    %,d bytes (%.1f KB)%n"
+                                + "Version:      %d%n"
+                                + "Compression:  %d%n"
+                                + "Buckets:      %d (%d non-empty)%n"
+                                + "Row groups:   %d%n"
+                                + "Total rows:   %,d%n%n",
+                        fileSize,
+                        fileSize / 1024.0,
+                        version,
+                        compression,
+                        numBuckets,
+                        nonEmptyBuckets,
+                        numRowGroups,
+                        totalRows)
+                + String.format(
+                        "--- Section Sizes ---%n"
+                                + "Bucket data:     %,9d bytes (%5.1f KB, %5.1f%%)%n"
+                                + "Schema block:    %,9d bytes (%5.1f KB, %5.1f%%)%n"
+                                + "Row group index: %,9d bytes (%5.1f KB, %5.1f%%)%n"
+                                + "Footer:          %,9d bytes (%5.1f KB, %5.1f%%)%n%n",
+                        schemaBlockOffset,
+                        schemaBlockOffset / 1024.0,
+                        100.0 * schemaBlockOffset / fileSize,
+                        schemaBlockSize,
+                        schemaBlockSize / 1024.0,
+                        100.0 * schemaBlockSize / fileSize,
+                        indexSize,
+                        indexSize / 1024.0,
+                        100.0 * indexSize / fileSize,
+                        (long) MosaicSpec.FOOTER_SIZE,
+                        MosaicSpec.FOOTER_SIZE / 1024.0,
+                        100.0 * MosaicSpec.FOOTER_SIZE / fileSize)
+                + String.format(
+                        "--- Compression ---%n"
+                                + "Schema:      %,9d -> %,9d bytes (%.1fx)%n"
+                                + "Bucket data: %,9d -> %,9d bytes (%.1fx)%n",
+                        schemaUncompressed,
+                        schemaCompressed,
+                        schemaCompressed > 0 ? (double) schemaUncompressed / schemaCompressed : 0,
+                        totalUncompressed,
+                        totalCompressed,
+                        totalCompressed > 0 ? (double) totalUncompressed / totalCompressed : 0);
+    }
+}
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicFileFormat.java b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicFileFormat.java
new file mode 100644
index 000000000000..ebb969344677
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicFileFormat.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import org.apache.paimon.format.FileFormat;
+import org.apache.paimon.format.FileFormatFactory.FormatContext;
+import org.apache.paimon.format.FormatReaderFactory;
+import org.apache.paimon.format.FormatWriterFactory;
+import org.apache.paimon.predicate.Predicate;
+import org.apache.paimon.types.DataTypeRoot;
+import org.apache.paimon.types.RowType;
+
+import javax.annotation.Nullable;
+
+import java.util.List;
+
+/**
+ * Mosaic file format: a column-bucket hybrid format optimized for wide tables (1,000-100,000+
+ * columns). Columns are hashed into buckets, row-stored within each bucket, and independently
+ * compressed. Projection pushdown works at bucket granularity.
+ */
+public class MosaicFileFormat extends FileFormat {
+
+    private final int numBuckets;
+    private final int zstdLevel;
+    private final long rowGroupMaxSize;
+
+    public MosaicFileFormat(FormatContext formatContext) {
+        super(MosaicFileFormatFactory.IDENTIFIER);
+        this.numBuckets =
+                formatContext
+                        .options()
+                        .getOptional(MosaicOptions.NUM_COLUMN_BUCKETS)
+                        .orElse(MosaicSpec.DEFAULT_NUM_BUCKETS);
+        this.zstdLevel = formatContext.zstdLevel();
+        this.rowGroupMaxSize = formatContext.writeBatchMemory().getBytes();
+    }
+
+    @Override
+    public FormatReaderFactory createReaderFactory(
+            RowType dataSchemaRowType,
+            RowType projectedRowType,
+            @Nullable List<Predicate> filters) {
+        return new MosaicReaderFactory(projectedRowType);
+    }
+
+    @Override
+    public FormatWriterFactory createWriterFactory(RowType type) {
+        return new MosaicWriterFactory(type, numBuckets, zstdLevel, rowGroupMaxSize);
+    }
+
+    @Override
+    public void validateDataFields(RowType rowType) {
+        rowType.getFields().forEach(f -> validateFieldType(f.type().getTypeRoot(), f.name()));
+    }
+
+    private static void validateFieldType(DataTypeRoot root, String fieldName) {
+        switch (root) {
+            case ARRAY:
+            case VECTOR:
+            case MAP:
+            case MULTISET:
+            case ROW:
+            case VARIANT:
+            case BLOB:
+                throw new UnsupportedOperationException(
+                        "Unsupported type: " + root + " for field: " + fieldName);
+            default:
+        }
+    }
+}
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicFileFormatFactory.java b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicFileFormatFactory.java
new file mode 100644
index 000000000000..d94aff596ed7
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicFileFormatFactory.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import org.apache.paimon.format.FileFormat;
+import org.apache.paimon.format.FileFormatFactory;
+
+/** Factory for creating Mosaic file format instances via SPI. */
+public class MosaicFileFormatFactory implements FileFormatFactory {
+
+    public static final String IDENTIFIER = "mosaic";
+
+    @Override
+    public String identifier() {
+        return IDENTIFIER;
+    }
+
+    @Override
+    public FileFormat create(FormatContext formatContext) {
+        return new MosaicFileFormat(formatContext);
+    }
+}
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicOptions.java b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicOptions.java
new file mode 100644
index 000000000000..51f0aa9557f5
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicOptions.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import org.apache.paimon.options.ConfigOption;
+import org.apache.paimon.options.ConfigOptions;
+
+/** Configuration options for the Mosaic file format. */
+public class MosaicOptions {
+
+    public static final ConfigOption<Integer> NUM_COLUMN_BUCKETS =
+            ConfigOptions.key("mosaic.num-column-buckets")
+                    .intType()
+                    .defaultValue(MosaicSpec.DEFAULT_NUM_BUCKETS)
+                    .withDescription(
+                            "Number of column buckets in the Mosaic format. "
+                                    + "Columns are hashed into this many buckets. "
+                                    + "Default is 100.");
+}
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicReader.java b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicReader.java
new file mode 100644
index 000000000000..b53befc243ff
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicReader.java
@@ -0,0 +1,292 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import org.apache.paimon.data.GenericRow;
+import org.apache.paimon.data.InternalRow;
+import org.apache.paimon.fs.FileIO;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.fs.SeekableInputStream;
+import org.apache.paimon.reader.FileRecordIterator;
+import org.apache.paimon.reader.FileRecordReader;
+import org.apache.paimon.types.DataType;
+import org.apache.paimon.types.RowType;
+import org.apache.paimon.utils.IteratorResultIterator;
+import org.apache.paimon.utils.IteratorWithException;
+
+import com.github.luben.zstd.Zstd;
+
+import javax.annotation.Nullable;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.util.Set;
+
+import static org.apache.paimon.format.mosaic.MosaicSpec.COMPRESSION_NONE;
+import static org.apache.paimon.format.mosaic.MosaicSpec.COMPRESSION_ZSTD;
+import static org.apache.paimon.format.mosaic.MosaicUtils.readLong;
+import static org.apache.paimon.format.mosaic.MosaicUtils.readVarint;
+
+/** Reader for the Mosaic file format with row group support. */
+public class MosaicReader implements FileRecordReader<InternalRow> {
+
+    private final Path filePath;
+    private final SeekableInputStream inputStream;
+    private final RowType projectedRowType;
+
+    private byte compression;
+    private int[] sortedRequiredBuckets;
+    private MosaicSpec.RowGroupMeta[] rowGroupMetas;
+    private MosaicBucketReader[] bucketReaders;
+    private int currentRowGroup;
+    private byte[] compressedBuf;
+
+    public MosaicReader(FileIO fileIO, Path filePath, long fileSize, RowType projectedRowType)
+            throws IOException {
+        this.filePath = filePath;
+        this.inputStream = fileIO.newInputStream(filePath);
+        this.projectedRowType = projectedRowType;
+        this.currentRowGroup = 0;
+
+        readFooterAndInit(fileSize);
+    }
+
+    private void readFooterAndInit(long fileSize) throws IOException {
+        // Read footer (last 32 bytes)
+        inputStream.seek(fileSize - MosaicSpec.FOOTER_SIZE);
+        byte[] footerBytes = new byte[MosaicSpec.FOOTER_SIZE];
+        readFully(footerBytes);
+
+        ByteBuffer footer = ByteBuffer.wrap(footerBytes).order(ByteOrder.BIG_ENDIAN);
+        long indexOffset = footer.getLong();
+        long schemaBlockOffset = footer.getLong();
+        int numBuckets = footer.getInt();
+        int numRowGroups = footer.getInt();
+        this.compression = footer.get();
+        byte version = footer.get();
+        footer.getShort(); // padding
+        byte[] magic = new byte[4];
+        footer.get(magic);
+
+        if (magic[0] != 'M' || magic[1] != 'O' || magic[2] != 'S' || magic[3] != 'A') {
+            throw new IOException("Invalid Mosaic file: bad magic bytes");
+        }
+
+        if (version != MosaicSpec.VERSION) {
+            throw new IOException(
+                    "Unsupported Mosaic file version: "
+                            + version
+                            + ", expected: "
+                            + MosaicSpec.VERSION);
+        }
+
+        // Read schema block
+        inputStream.seek(schemaBlockOffset);
+        int schemaUncompressedSize = readInt();
+        int schemaCompressedSize = (int) (indexOffset - schemaBlockOffset - 4);
+        byte[] schemaCompressed = new byte[schemaCompressedSize];
+        readFully(schemaCompressed);
+
+        byte[] schemaRaw;
+        switch (compression) {
+            case COMPRESSION_NONE:
+                schemaRaw = schemaCompressed;
+                break;
+            case COMPRESSION_ZSTD:
+                schemaRaw = new byte[schemaUncompressedSize];
+                Zstd.decompress(schemaRaw, schemaCompressed);
+                break;
+            default:
+                throw new UnsupportedEncodingException("Unsupported compression: " + compression);
+        }
+        MosaicSchema schema = MosaicSchema.deserialize(schemaRaw);
+
+        // Determine which buckets we need
+        Set<Integer> requiredBuckets = schema.getRequiredBuckets(projectedRowType);
+
+        // Read row group index (varint encoded, only non-empty buckets)
+        inputStream.seek(indexOffset);
+        int indexSize = (int) (fileSize - MosaicSpec.FOOTER_SIZE - indexOffset);
+        byte[] indexBytes = new byte[indexSize];
+        readFully(indexBytes);
+        int[] idxPos = {0};
+
+        this.rowGroupMetas = new MosaicSpec.RowGroupMeta[numRowGroups];
+        for (int rg = 0; rg < numRowGroups; rg++) {
+            int numRows = readVarint(indexBytes, idxPos);
+            int nonEmpty = readVarint(indexBytes, idxPos);
+
+            long[] bucketOffsets = new long[numBuckets];
+            int[] compressedSizes = new int[numBuckets];
+            int[] uncompressedSizes = new int[numBuckets];
+
+            for (int i = 0; i < nonEmpty; i++) {
+                int bucketId = readVarint(indexBytes, idxPos);
+                bucketOffsets[bucketId] = readLong(indexBytes, idxPos);
+                compressedSizes[bucketId] = readVarint(indexBytes, idxPos);
+                uncompressedSizes[bucketId] = readVarint(indexBytes, idxPos);
+            }
+
+            rowGroupMetas[rg] =
+                    new MosaicSpec.RowGroupMeta(
+                            numRows, bucketOffsets, compressedSizes, uncompressedSizes);
+        }
+
+        this.bucketReaders = new MosaicBucketReader[numBuckets];
+        int count = 0;
+        for (int b : requiredBuckets) {
+            DataType[] bucketTypes = schema.getBucketColumnTypes(b);
+            int[] projMapping = schema.getProjectionMapping(b, projectedRowType);
+            if (projMapping != null) {
+                bucketReaders[b] = new MosaicBucketReader(bucketTypes, projMapping);
+                count++;
+            }
+        }
+        this.sortedRequiredBuckets = new int[count];
+        int idx = 0;
+        for (int b : requiredBuckets) {
+            if (bucketReaders[b] != null) {
+                sortedRequiredBuckets[idx++] = b;
+            }
+        }
+        this.compressedBuf = new byte[0];
+    }
+
+    @Nullable
+    @Override
+    public FileRecordIterator<InternalRow> readBatch() throws IOException {
+        if (currentRowGroup >= rowGroupMetas.length) {
+            return null;
+        }
+
+        MosaicSpec.RowGroupMeta meta = rowGroupMetas[currentRowGroup++];
+        if (meta.numRows == 0) {
+            return readBatch();
+        }
+
+        final MosaicBucketReader[] readers = this.bucketReaders;
+
+        // Sort required buckets by file offset for sequential I/O
+        int[] ordered = Arrays.copyOf(sortedRequiredBuckets, sortedRequiredBuckets.length);
+        final long[] offsets = meta.bucketOffsets;
+        // insertion sort — array is small (number of projected buckets)
+        for (int i = 1; i < ordered.length; i++) {
+            int key = ordered[i];
+            long keyOff = offsets[key];
+            int j = i - 1;
+            while (j >= 0 && offsets[ordered[j]] > keyOff) {
+                ordered[j + 1] = ordered[j];
+                j--;
+            }
+            ordered[j + 1] = key;
+        }
+
+        int activeCount = 0;
+        int[] activeBuckets = new int[ordered.length];
+
+        for (int b : ordered) {
+            if (meta.compressedSizes[b] == 0) {
+                continue;
+            }
+
+            int compSize = meta.compressedSizes[b];
+            inputStream.seek(meta.bucketOffsets[b]);
+
+            byte[] bucketData;
+            switch (compression) {
+                case COMPRESSION_NONE:
+                    bucketData = new byte[compSize];
+                    readFully(bucketData);
+                    break;
+                case COMPRESSION_ZSTD:
+                    if (compressedBuf.length < compSize) {
+                        compressedBuf = new byte[compSize];
+                    }
+                    readFully(compressedBuf, compSize);
+                    int uncompSize = meta.uncompressedSizes[b];
+                    bucketData = new byte[uncompSize];
+                    Zstd.decompressByteArray(bucketData, 0, uncompSize, compressedBuf, 0, compSize);
+                    break;
+                default:
+                    throw new UnsupportedEncodingException(
+                            "Unsupported compression: " + compression);
+            }
+
+            readers[b].init(bucketData, meta.numRows);
+            activeBuckets[activeCount++] = b;
+        }
+
+        final int[] active = Arrays.copyOf(activeBuckets, activeCount);
+        return new IteratorResultIterator(
+                toIterator(meta.numRows, active, readers), null, filePath, 0);
+    }
+
+    private IteratorWithException<InternalRow, IOException> toIterator(
+            int totalRows, int[] active, MosaicBucketReader[] readers) {
+        final int projectedFieldCount = projectedRowType.getFieldCount();
+        return new IteratorWithException<InternalRow, IOException>() {
+            int currentRow = 0;
+            final Object[] fields = new Object[projectedFieldCount];
+
+            @Override
+            public boolean hasNext() {
+                return currentRow < totalRows;
+            }
+
+            @Override
+            public InternalRow next() {
+                Arrays.fill(fields, null);
+                for (int j : active) {
+                    readers[j].readRow(fields);
+                }
+                currentRow++;
+                return GenericRow.of(fields);
+            }
+        };
+    }
+
+    @Override
+    public void close() throws IOException {
+        inputStream.close();
+    }
+
+    private void readFully(byte[] buf) throws IOException {
+        readFully(buf, buf.length);
+    }
+
+    private void readFully(byte[] buf, int len) throws IOException {
+        int offset = 0;
+        while (offset < len) {
+            int read = inputStream.read(buf, offset, len - offset);
+            if (read < 0) {
+                throw new IOException("Unexpected EOF");
+            }
+            offset += read;
+        }
+    }
+
+    private int readInt() throws IOException {
+        byte[] buf = new byte[4];
+        readFully(buf);
+        return ByteBuffer.wrap(buf).order(ByteOrder.BIG_ENDIAN).getInt();
+    }
+}
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicReaderFactory.java b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicReaderFactory.java
new file mode 100644
index 000000000000..3a5704eb1b0a
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicReaderFactory.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import org.apache.paimon.data.InternalRow;
+import org.apache.paimon.format.FormatReaderFactory;
+import org.apache.paimon.reader.FileRecordReader;
+import org.apache.paimon.types.RowType;
+
+import java.io.IOException;
+
+/** Factory for creating {@link MosaicReader} instances. */
+public class MosaicReaderFactory implements FormatReaderFactory {
+
+    private final RowType projectedRowType;
+
+    public MosaicReaderFactory(RowType projectedRowType) {
+        this.projectedRowType = projectedRowType;
+    }
+
+    @Override
+    public FileRecordReader<InternalRow> createReader(Context context) throws IOException {
+        return new MosaicReader(
+                context.fileIO(), context.filePath(), context.fileSize(), projectedRowType);
+    }
+}
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicSchema.java b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicSchema.java
new file mode 100644
index 000000000000..03d9f16edb57
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicSchema.java
@@ -0,0 +1,287 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import org.apache.paimon.types.DataField;
+import org.apache.paimon.types.DataType;
+import org.apache.paimon.types.RowType;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import static org.apache.paimon.format.mosaic.MosaicUtils.readVarint;
+import static org.apache.paimon.format.mosaic.MosaicUtils.writeVarint;
+
+/** Schema block for the Mosaic file format. Stores column metadata and bucket assignments. */
+public class MosaicSchema {
+
+    private final int numBuckets;
+    private final List<ColumnMeta> columns;
+    private final int[][] bucketToGlobalIndices;
+
+    private MosaicSchema(int numBuckets, List<ColumnMeta> columns, int[][] bucketToGlobalIndices) {
+        this.numBuckets = numBuckets;
+        this.columns = columns;
+        this.bucketToGlobalIndices = bucketToGlobalIndices;
+    }
+
+    public static MosaicSchema create(RowType rowType, int numBuckets) {
+        int[][] bucketMapping = MosaicSpec.groupColumnsByBucket(rowType, numBuckets);
+        List<DataField> fields = rowType.getFields();
+        List<ColumnMeta> columns = new ArrayList<>(fields.size());
+
+        int[] columnToBucket = new int[fields.size()];
+        int[] columnToIndexInBucket = new int[fields.size()];
+        for (int b = 0; b < numBuckets; b++) {
+            for (int localIdx = 0; localIdx < bucketMapping[b].length; localIdx++) {
+                int globalIdx = bucketMapping[b][localIdx];
+                columnToBucket[globalIdx] = b;
+                columnToIndexInBucket[globalIdx] = localIdx;
+            }
+        }
+
+        for (int i = 0; i < fields.size(); i++) {
+            DataField field = fields.get(i);
+            columns.add(
+                    new ColumnMeta(
+                            field.id(),
+                            field.name(),
+                            field.type(),
+                            columnToBucket[i],
+                            columnToIndexInBucket[i]));
+        }
+
+        return new MosaicSchema(numBuckets, columns, bucketMapping);
+    }
+
+    public int numBuckets() {
+        return numBuckets;
+    }
+
+    public int[][] bucketToGlobalIndices() {
+        return bucketToGlobalIndices;
+    }
+
+    public DataType[] getBucketColumnTypes(int bucketId) {
+        int[] globalIndices = bucketToGlobalIndices[bucketId];
+        DataType[] types = new DataType[globalIndices.length];
+        for (int i = 0; i < globalIndices.length; i++) {
+            types[i] = columns.get(globalIndices[i]).type;
+        }
+        return types;
+    }
+
+    /** Returns the set of bucket IDs that contain at least one projected column. */
+    public Set<Integer> getRequiredBuckets(RowType projectedRowType) {
+        Set<String> projectedNames = new HashSet<>(projectedRowType.getFieldNames());
+        Set<Integer> requiredBuckets = new HashSet<>();
+        for (ColumnMeta col : columns) {
+            if (projectedNames.contains(col.name)) {
+                requiredBuckets.add(col.bucketId);
+            }
+        }
+        return requiredBuckets;
+    }
+
+    /**
+     * For a given bucket, returns the mapping from local column indices within the bucket to output
+     * positions in the projected row. The array index is the local column index, and the value is
+     * the output position (-1 means skip). Returns null if no columns in this bucket are projected.
+     */
+    public int[] getProjectionMapping(int bucketId, RowType projectedRowType) {
+        Map<String, Integer> projectedNameToPos = new HashMap<>();
+        List<String> projectedNames = projectedRowType.getFieldNames();
+        for (int i = 0; i < projectedNames.size(); i++) {
+            projectedNameToPos.put(projectedNames.get(i), i);
+        }
+
+        int[] globalIndices = bucketToGlobalIndices[bucketId];
+        int[] localToOutput = new int[globalIndices.length];
+        Arrays.fill(localToOutput, -1);
+        boolean hasProjection = false;
+        for (int localIdx = 0; localIdx < globalIndices.length; localIdx++) {
+            ColumnMeta col = columns.get(globalIndices[localIdx]);
+            Integer outputPos = projectedNameToPos.get(col.name);
+            if (outputPos != null) {
+                localToOutput[localIdx] = outputPos;
+                hasProjection = true;
+            }
+        }
+        return hasProjection ? localToOutput : null;
+    }
+
+    public byte[] serialize() throws IOException {
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        DataOutputStream out = new DataOutputStream(baos);
+
+        writeVarint(out, columns.size());
+        writeVarint(out, numBuckets);
+
+        // Front coding: each column name stored as (sharedPrefixLen, suffix)
+        byte[] prevNameBytes = new byte[0];
+        for (ColumnMeta col : columns) {
+            writeVarint(out, col.fieldId);
+            writeVarint(out, col.bucketId);
+            writeVarint(out, col.indexInBucket);
+
+            byte[] nameBytes = col.name.getBytes(StandardCharsets.UTF_8);
+            int shared = commonPrefixLength(prevNameBytes, nameBytes);
+            writeVarint(out, shared);
+            writeVarint(out, nameBytes.length - shared);
+            out.write(nameBytes, shared, nameBytes.length - shared);
+            prevNameBytes = nameBytes;
+
+            MosaicTypes.writeType(out, col.type);
+        }
+
+        out.flush();
+        return baos.toByteArray();
+    }
+
+    public static MosaicSchema deserialize(byte[] data) throws IOException {
+        DataInputStream in = new DataInputStream(new ByteArrayInputStream(data));
+
+        int numColumns = readVarint(in);
+        int numBuckets = readVarint(in);
+
+        List<ColumnMeta> columns = new ArrayList<>(numColumns);
+        List<List<Integer>> bucketLists = new ArrayList<>(numBuckets);
+        for (int i = 0; i < numBuckets; i++) {
+            bucketLists.add(new ArrayList<>());
+        }
+
+        byte[] prevNameBytes = new byte[0];
+        for (int i = 0; i < numColumns; i++) {
+            int fieldId = readVarint(in);
+            int bucketId = readVarint(in);
+            int indexInBucket = readVarint(in);
+
+            int shared = readVarint(in);
+            int suffixLen = readVarint(in);
+            byte[] nameBytes = new byte[shared + suffixLen];
+            System.arraycopy(prevNameBytes, 0, nameBytes, 0, shared);
+            in.readFully(nameBytes, shared, suffixLen);
+            prevNameBytes = nameBytes;
+
+            String name = new String(nameBytes, StandardCharsets.UTF_8);
+            DataType type = MosaicTypes.readType(in);
+            columns.add(new ColumnMeta(fieldId, name, type, bucketId, indexInBucket));
+            bucketLists.get(bucketId).add(i);
+        }
+
+        int[][] bucketToGlobal = new int[numBuckets][];
+        for (int b = 0; b < numBuckets; b++) {
+            List<Integer> list = bucketLists.get(b);
+            bucketToGlobal[b] = new int[list.size()];
+            for (int j = 0; j < list.size(); j++) {
+                bucketToGlobal[b][j] = list.get(j);
+            }
+        }
+
+        return new MosaicSchema(numBuckets, columns, bucketToGlobal);
+    }
+
+    public MosaicSchema pruneAllNullColumns(boolean[][] allNullByBucket) {
+        Set<Integer> prunedGlobalIndices = new HashSet<>();
+        for (int b = 0; b < numBuckets; b++) {
+            if (allNullByBucket[b] == null) {
+                continue;
+            }
+            int[] globalIndices = bucketToGlobalIndices[b];
+            for (int local = 0; local < globalIndices.length; local++) {
+                if (allNullByBucket[b][local]) {
+                    prunedGlobalIndices.add(globalIndices[local]);
+                }
+            }
+        }
+
+        if (prunedGlobalIndices.isEmpty()) {
+            return this;
+        }
+
+        List<ColumnMeta> newColumns = new ArrayList<>();
+        Map<Integer, Integer> oldToNew = new HashMap<>();
+        for (int i = 0; i < columns.size(); i++) {
+            if (!prunedGlobalIndices.contains(i)) {
+                oldToNew.put(i, newColumns.size());
+                newColumns.add(columns.get(i));
+            }
+        }
+
+        int[][] newBucketToGlobal = new int[numBuckets][];
+        for (int b = 0; b < numBuckets; b++) {
+            List<Integer> kept = new ArrayList<>();
+            for (int globalIdx : bucketToGlobalIndices[b]) {
+                Integer newIdx = oldToNew.get(globalIdx);
+                if (newIdx != null) {
+                    kept.add(newIdx);
+                }
+            }
+            newBucketToGlobal[b] = new int[kept.size()];
+            for (int j = 0; j < kept.size(); j++) {
+                newBucketToGlobal[b][j] = kept.get(j);
+                ColumnMeta old = newColumns.get(kept.get(j));
+                newColumns.set(
+                        kept.get(j),
+                        new ColumnMeta(old.fieldId, old.name, old.type, old.bucketId, j));
+            }
+        }
+
+        return new MosaicSchema(numBuckets, newColumns, newBucketToGlobal);
+    }
+
+    private static int commonPrefixLength(byte[] a, byte[] b) {
+        int len = Math.min(a.length, b.length);
+        for (int i = 0; i < len; i++) {
+            if (a[i] != b[i]) {
+                return i;
+            }
+        }
+        return len;
+    }
+
+    /** Metadata for a single column. */
+    public static class ColumnMeta {
+        public final int fieldId;
+        public final String name;
+        public final DataType type;
+        public final int bucketId;
+        public final int indexInBucket;
+
+        public ColumnMeta(
+                int fieldId, String name, DataType type, int bucketId, int indexInBucket) {
+            this.fieldId = fieldId;
+            this.name = name;
+            this.type = type;
+            this.bucketId = bucketId;
+            this.indexInBucket = indexInBucket;
+        }
+    }
+}
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicSpec.java b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicSpec.java
new file mode 100644
index 000000000000..36bb50d76e23
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicSpec.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import org.apache.paimon.types.DataField;
+import org.apache.paimon.types.RowType;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/** Constants and utilities for the Mosaic file format. */
+public class MosaicSpec {
+
+    public static final byte[] MAGIC = new byte[] {'M', 'O', 'S', 'A'};
+    public static final byte VERSION = 1;
+
+    public static final int FOOTER_SIZE = 32;
+
+    public static final byte COMPRESSION_NONE = 0;
+    public static final byte COMPRESSION_ZSTD = 1;
+
+    public static final int DEFAULT_NUM_BUCKETS = 100;
+
+    // Column encoding types (2 bits each in encoding flags)
+    public static final byte ENCODING_PLAIN = 0;
+    public static final byte ENCODING_CONST = 1;
+    public static final byte ENCODING_DICT = 2;
+    public static final byte ENCODING_ALL_NULL = 3;
+
+    public static int assignBucket(String fieldName, int numBuckets) {
+        return Math.floorMod(fieldName.hashCode(), numBuckets);
+    }
+
+    /**
+     * Groups columns by bucket. Returns an array where each element is the list of global column
+     * indices assigned to that bucket.
+     */
+    public static int[][] groupColumnsByBucket(RowType rowType, int numBuckets) {
+        List<DataField> fields = rowType.getFields();
+        List<List<Integer>> buckets = new ArrayList<>(numBuckets);
+        for (int i = 0; i < numBuckets; i++) {
+            buckets.add(new ArrayList<>());
+        }
+        for (int i = 0; i < fields.size(); i++) {
+            int bucketId = assignBucket(fields.get(i).name(), numBuckets);
+            buckets.get(bucketId).add(i);
+        }
+        int[][] result = new int[numBuckets][];
+        for (int i = 0; i < numBuckets; i++) {
+            List<Integer> list = buckets.get(i);
+            result[i] = new int[list.size()];
+            for (int j = 0; j < list.size(); j++) {
+                result[i][j] = list.get(j);
+            }
+        }
+        return result;
+    }
+
+    public static byte compressionToByte(String compression) {
+        if (compression == null || compression.isEmpty() || "none".equalsIgnoreCase(compression)) {
+            return COMPRESSION_NONE;
+        }
+        if ("zstd".equalsIgnoreCase(compression)) {
+            return COMPRESSION_ZSTD;
+        }
+        throw new IllegalArgumentException("Unsupported Mosaic compression: " + compression);
+    }
+
+    /** Metadata for a single row group. */
+    public static class RowGroupMeta {
+        public final int numRows;
+        public final long[] bucketOffsets;
+        public final int[] compressedSizes;
+        public final int[] uncompressedSizes;
+
+        public RowGroupMeta(
+                int numRows, long[] bucketOffsets, int[] compressedSizes, int[] uncompressedSizes) {
+            this.numRows = numRows;
+            this.bucketOffsets = bucketOffsets;
+            this.compressedSizes = compressedSizes;
+            this.uncompressedSizes = uncompressedSizes;
+        }
+    }
+}
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicTypes.java b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicTypes.java
new file mode 100644
index 000000000000..bc0892c7d807
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicTypes.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import org.apache.paimon.types.BigIntType;
+import org.apache.paimon.types.BinaryType;
+import org.apache.paimon.types.BooleanType;
+import org.apache.paimon.types.CharType;
+import org.apache.paimon.types.DataType;
+import org.apache.paimon.types.DataTypeRoot;
+import org.apache.paimon.types.DateType;
+import org.apache.paimon.types.DecimalType;
+import org.apache.paimon.types.DoubleType;
+import org.apache.paimon.types.FloatType;
+import org.apache.paimon.types.IntType;
+import org.apache.paimon.types.LocalZonedTimestampType;
+import org.apache.paimon.types.SmallIntType;
+import org.apache.paimon.types.TimeType;
+import org.apache.paimon.types.TimestampType;
+import org.apache.paimon.types.TinyIntType;
+import org.apache.paimon.types.VarBinaryType;
+import org.apache.paimon.types.VarCharType;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import static org.apache.paimon.format.mosaic.MosaicUtils.readVarint;
+import static org.apache.paimon.format.mosaic.MosaicUtils.writeVarint;
+
+/** Recursive binary serialization/deserialization for {@link DataType}. */
+public class MosaicTypes {
+
+    private static final byte TYPE_BOOLEAN = 0;
+    private static final byte TYPE_TINYINT = 1;
+    private static final byte TYPE_SMALLINT = 2;
+    private static final byte TYPE_INTEGER = 3;
+    private static final byte TYPE_BIGINT = 4;
+    private static final byte TYPE_FLOAT = 5;
+    private static final byte TYPE_DOUBLE = 6;
+    private static final byte TYPE_DATE = 7;
+    private static final byte TYPE_CHAR = 8;
+    private static final byte TYPE_VARCHAR = 9;
+    private static final byte TYPE_STRING = 10;
+    private static final byte TYPE_BINARY = 11;
+    private static final byte TYPE_VARBINARY = 12;
+    private static final byte TYPE_BYTES = 13;
+    private static final byte TYPE_DECIMAL = 14;
+    private static final byte TYPE_TIME = 15;
+    private static final byte TYPE_TIMESTAMP = 16;
+    private static final byte TYPE_TIMESTAMP_LTZ = 17;
+
+    @FunctionalInterface
+    interface TypeWriter {
+        void write(DataOutputStream out, DataType type) throws IOException;
+    }
+
+    @FunctionalInterface
+    interface TypeReader {
+        DataType read(DataInputStream in, boolean nullable) throws IOException;
+    }
+
+    private static final TypeWriter[] WRITERS = new TypeWriter[DataTypeRoot.values().length];
+    private static final TypeReader[] READERS = new TypeReader[18];
+
+    static {
+        // simple types
+        reg(DataTypeRoot.BOOLEAN, TYPE_BOOLEAN, (in, n) -> new BooleanType(n));
+        reg(DataTypeRoot.TINYINT, TYPE_TINYINT, (in, n) -> new TinyIntType(n));
+        reg(DataTypeRoot.SMALLINT, TYPE_SMALLINT, (in, n) -> new SmallIntType(n));
+        reg(DataTypeRoot.INTEGER, TYPE_INTEGER, (in, n) -> new IntType(n));
+        reg(DataTypeRoot.BIGINT, TYPE_BIGINT, (in, n) -> new BigIntType(n));
+        reg(DataTypeRoot.FLOAT, TYPE_FLOAT, (in, n) -> new FloatType(n));
+        reg(DataTypeRoot.DOUBLE, TYPE_DOUBLE, (in, n) -> new DoubleType(n));
+        reg(DataTypeRoot.DATE, TYPE_DATE, (in, n) -> new DateType(n));
+
+        // CHAR
+        WRITERS[DataTypeRoot.CHAR.ordinal()] =
+                (out, type) -> {
+                    out.writeByte(TYPE_CHAR);
+                    out.writeBoolean(type.isNullable());
+                    writeVarint(out, ((CharType) type).getLength());
+                };
+        READERS[TYPE_CHAR] = (in, n) -> new CharType(n, readVarint(in));
+
+        // VARCHAR / STRING
+        WRITERS[DataTypeRoot.VARCHAR.ordinal()] =
+                (out, type) -> {
+                    int len = ((VarCharType) type).getLength();
+                    if (len == VarCharType.MAX_LENGTH) {
+                        out.writeByte(TYPE_STRING);
+                        out.writeBoolean(type.isNullable());
+                    } else {
+                        out.writeByte(TYPE_VARCHAR);
+                        out.writeBoolean(type.isNullable());
+                        writeVarint(out, len);
+                    }
+                };
+        READERS[TYPE_VARCHAR] = (in, n) -> new VarCharType(n, readVarint(in));
+        READERS[TYPE_STRING] = (in, n) -> new VarCharType(n, VarCharType.MAX_LENGTH);
+
+        // BINARY
+        WRITERS[DataTypeRoot.BINARY.ordinal()] =
+                (out, type) -> {
+                    out.writeByte(TYPE_BINARY);
+                    out.writeBoolean(type.isNullable());
+                    writeVarint(out, ((BinaryType) type).getLength());
+                };
+        READERS[TYPE_BINARY] = (in, n) -> new BinaryType(n, readVarint(in));
+
+        // VARBINARY / BYTES
+        WRITERS[DataTypeRoot.VARBINARY.ordinal()] =
+                (out, type) -> {
+                    int len = ((VarBinaryType) type).getLength();
+                    if (len == VarBinaryType.MAX_LENGTH) {
+                        out.writeByte(TYPE_BYTES);
+                        out.writeBoolean(type.isNullable());
+                    } else {
+                        out.writeByte(TYPE_VARBINARY);
+                        out.writeBoolean(type.isNullable());
+                        writeVarint(out, len);
+                    }
+                };
+        READERS[TYPE_VARBINARY] = (in, n) -> new VarBinaryType(n, readVarint(in));
+        READERS[TYPE_BYTES] = (in, n) -> new VarBinaryType(n, VarBinaryType.MAX_LENGTH);
+
+        // DECIMAL
+        WRITERS[DataTypeRoot.DECIMAL.ordinal()] =
+                (out, type) -> {
+                    out.writeByte(TYPE_DECIMAL);
+                    out.writeBoolean(type.isNullable());
+                    DecimalType dt = (DecimalType) type;
+                    writeVarint(out, dt.getPrecision());
+                    writeVarint(out, dt.getScale());
+                };
+        READERS[TYPE_DECIMAL] = (in, n) -> new DecimalType(n, readVarint(in), readVarint(in));
+
+        // TIME
+        WRITERS[DataTypeRoot.TIME_WITHOUT_TIME_ZONE.ordinal()] =
+                (out, type) -> {
+                    out.writeByte(TYPE_TIME);
+                    out.writeBoolean(type.isNullable());
+                    writeVarint(out, ((TimeType) type).getPrecision());
+                };
+        READERS[TYPE_TIME] = (in, n) -> new TimeType(n, readVarint(in));
+
+        // TIMESTAMP
+        WRITERS[DataTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE.ordinal()] =
+                (out, type) -> {
+                    out.writeByte(TYPE_TIMESTAMP);
+                    out.writeBoolean(type.isNullable());
+                    writeVarint(out, ((TimestampType) type).getPrecision());
+                };
+        READERS[TYPE_TIMESTAMP] = (in, n) -> new TimestampType(n, readVarint(in));
+
+        // TIMESTAMP WITH LOCAL TIME ZONE
+        WRITERS[DataTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE.ordinal()] =
+                (out, type) -> {
+                    out.writeByte(TYPE_TIMESTAMP_LTZ);
+                    out.writeBoolean(type.isNullable());
+                    writeVarint(out, ((LocalZonedTimestampType) type).getPrecision());
+                };
+        READERS[TYPE_TIMESTAMP_LTZ] = (in, n) -> new LocalZonedTimestampType(n, readVarint(in));
+    }
+
+    private static void reg(DataTypeRoot root, byte typeId, TypeReader reader) {
+        WRITERS[root.ordinal()] =
+                (out, type) -> {
+                    out.writeByte(typeId);
+                    out.writeBoolean(type.isNullable());
+                };
+        READERS[typeId] = reader;
+    }
+
+    public static void writeType(DataOutputStream out, DataType type) throws IOException {
+        TypeWriter writer = WRITERS[type.getTypeRoot().ordinal()];
+        if (writer == null) {
+            throw new IOException("Unsupported Mosaic type: " + type.getTypeRoot());
+        }
+        writer.write(out, type);
+    }
+
+    public static DataType readType(DataInputStream in) throws IOException {
+        int typeId = in.readByte() & 0xFF;
+        boolean nullable = in.readBoolean();
+        TypeReader reader = typeId < READERS.length ? READERS[typeId] : null;
+        if (reader == null) {
+            throw new IOException("Unsupported Mosaic type ID: " + typeId);
+        }
+        return reader.read(in, nullable);
+    }
+}
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicUtils.java b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicUtils.java
new file mode 100644
index 000000000000..a7f4cdcbe48b
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicUtils.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+/** Shared varint and long encoding/decoding utilities for the Mosaic file format. */
+public class MosaicUtils {
+
+    // ==================== byte[] based ====================
+
+    public static int readVarint(byte[] buf, int[] pos) {
+        int value = 0;
+        int shift = 0;
+        int b;
+        do {
+            b = buf[pos[0]++] & 0xFF;
+            value |= (b & 0x7F) << shift;
+            shift += 7;
+        } while ((b & 0x80) != 0);
+        return value;
+    }
+
+    public static long readLong(byte[] buf, int[] pos) {
+        long v = 0;
+        for (int i = 0; i < 8; i++) {
+            v = (v << 8) | (buf[pos[0]++] & 0xFF);
+        }
+        return v;
+    }
+
+    public static int writeVarint(byte[] buf, int pos, int value) {
+        while ((value & ~0x7F) != 0) {
+            buf[pos++] = (byte) ((value & 0x7F) | 0x80);
+            value >>>= 7;
+        }
+        buf[pos++] = (byte) value;
+        return pos;
+    }
+
+    public static int writeLong(byte[] buf, int pos, long value) {
+        buf[pos++] = (byte) (value >>> 56);
+        buf[pos++] = (byte) (value >>> 48);
+        buf[pos++] = (byte) (value >>> 40);
+        buf[pos++] = (byte) (value >>> 32);
+        buf[pos++] = (byte) (value >>> 24);
+        buf[pos++] = (byte) (value >>> 16);
+        buf[pos++] = (byte) (value >>> 8);
+        buf[pos++] = (byte) value;
+        return pos;
+    }
+
+    // ==================== stream based ====================
+
+    public static void writeVarint(DataOutputStream out, int value) throws IOException {
+        while ((value & ~0x7F) != 0) {
+            out.writeByte((value & 0x7F) | 0x80);
+            value >>>= 7;
+        }
+        out.writeByte(value);
+    }
+
+    public static int readVarint(DataInputStream in) throws IOException {
+        int value = 0;
+        int shift = 0;
+        int b;
+        do {
+            b = in.readByte() & 0xFF;
+            value |= (b & 0x7F) << shift;
+            shift += 7;
+        } while ((b & 0x80) != 0);
+        return value;
+    }
+}
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicWriter.java b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicWriter.java
new file mode 100644
index 000000000000..78083c283d8d
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicWriter.java
@@ -0,0 +1,288 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import org.apache.paimon.data.InternalRow;
+import org.apache.paimon.format.FormatWriter;
+import org.apache.paimon.format.mosaic.MosaicSpec.RowGroupMeta;
+import org.apache.paimon.fs.PositionOutputStream;
+import org.apache.paimon.types.RowType;
+
+import com.github.luben.zstd.Zstd;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.paimon.format.mosaic.MosaicSpec.COMPRESSION_NONE;
+import static org.apache.paimon.format.mosaic.MosaicSpec.COMPRESSION_ZSTD;
+import static org.apache.paimon.format.mosaic.MosaicUtils.writeLong;
+import static org.apache.paimon.format.mosaic.MosaicUtils.writeVarint;
+
+/** Writer for the Mosaic file format with row group support. */
+public class MosaicWriter implements FormatWriter {
+
+    private final PositionOutputStream out;
+    private final MosaicSchema schema;
+    private MosaicSchema prunedSchema;
+    private final MosaicBucketWriter[] bucketWriters;
+    private final int numBuckets;
+    private final int zstdLevel;
+    private final byte compressionByte;
+    private final long rowGroupMaxSize;
+
+    private final List<RowGroupMeta> rowGroupMetas;
+    private byte[] compressBuffer;
+    private int currentRowGroupRows;
+    private long currentBufferedSize;
+    private double compressionRatio;
+    private boolean closed;
+
+    public MosaicWriter(
+            PositionOutputStream out,
+            RowType rowType,
+            int numBuckets,
+            int zstdLevel,
+            String compression,
+            long rowGroupMaxSize) {
+        this.out = out;
+        this.numBuckets = Math.min(numBuckets, rowType.getFieldCount());
+        this.zstdLevel = zstdLevel;
+        this.compressionByte = MosaicSpec.compressionToByte(compression);
+        this.rowGroupMaxSize = rowGroupMaxSize;
+        this.schema = MosaicSchema.create(rowType, this.numBuckets);
+        this.bucketWriters = new MosaicBucketWriter[this.numBuckets];
+
+        int[][] bucketMapping = schema.bucketToGlobalIndices();
+        for (int b = 0; b < this.numBuckets; b++) {
+            if (bucketMapping[b].length > 0) {
+                bucketWriters[b] = new MosaicBucketWriter(rowType, bucketMapping[b]);
+            }
+        }
+
+        this.rowGroupMetas = new ArrayList<>();
+        this.compressBuffer = new byte[0];
+        this.currentRowGroupRows = 0;
+        this.currentBufferedSize = 0;
+        this.compressionRatio = this.compressionByte == COMPRESSION_NONE ? 1.0 : 0.3;
+        this.closed = false;
+    }
+
+    @Override
+    public void addElement(InternalRow element) throws IOException {
+        long size = 0;
+        for (int i = 0; i < numBuckets; i++) {
+            if (bucketWriters[i] != null) {
+                size += bucketWriters[i].writeRow(element);
+            }
+        }
+        currentRowGroupRows++;
+        currentBufferedSize += size;
+
+        if (currentBufferedSize >= rowGroupMaxSize) {
+            flushRowGroup();
+        }
+    }
+
+    @Override
+    public boolean reachTargetSize(boolean suggestedCheck, long targetSize) throws IOException {
+        long estimatedSize = out.getPos() + (long) (currentBufferedSize * compressionRatio);
+        return estimatedSize >= targetSize;
+    }
+
+    private void flushRowGroup() throws IOException {
+        if (currentRowGroupRows == 0) {
+            return;
+        }
+
+        long[] bucketOffsets = new long[numBuckets];
+        int[] compressedSizes = new int[numBuckets];
+        int[] uncompressedSizes = new int[numBuckets];
+
+        for (int b = 0; b < numBuckets; b++) {
+            MosaicBucketWriter bucketWriter = bucketWriters[b];
+            if (bucketWriter == null || bucketWriter.isEmpty()) {
+                continue;
+            }
+            byte[] raw = bucketWriter.finish();
+            compressedSizes[b] = writeCompressed(raw);
+            uncompressedSizes[b] = raw.length;
+            bucketOffsets[b] = out.getPos() - compressedSizes[b];
+            bucketWriter.reset();
+        }
+
+        rowGroupMetas.add(
+                new RowGroupMeta(
+                        currentRowGroupRows, bucketOffsets, compressedSizes, uncompressedSizes));
+
+        long totalCompressed = 0;
+        long totalUncompressed = 0;
+        for (int b = 0; b < numBuckets; b++) {
+            totalCompressed += compressedSizes[b];
+            totalUncompressed += uncompressedSizes[b];
+        }
+        if (totalUncompressed > 0) {
+            compressionRatio = (double) totalCompressed / totalUncompressed;
+        }
+
+        currentRowGroupRows = 0;
+        currentBufferedSize = 0;
+    }
+
+    private void flushRowGroupPruned() throws IOException {
+        if (currentRowGroupRows == 0) {
+            return;
+        }
+
+        boolean[][] allNullByBucket = new boolean[numBuckets][];
+        long[] bucketOffsets = new long[numBuckets];
+        int[] compressedSizes = new int[numBuckets];
+        int[] uncompressedSizes = new int[numBuckets];
+
+        for (int b = 0; b < numBuckets; b++) {
+            MosaicBucketWriter bucketWriter = bucketWriters[b];
+            if (bucketWriter == null || bucketWriter.isEmpty()) {
+                continue;
+            }
+            allNullByBucket[b] = bucketWriter.getAllNullFlags();
+            byte[] raw = bucketWriter.finish(true);
+            compressedSizes[b] = writeCompressed(raw);
+            uncompressedSizes[b] = raw.length;
+            bucketOffsets[b] = out.getPos() - compressedSizes[b];
+            bucketWriter.reset();
+        }
+
+        rowGroupMetas.add(
+                new RowGroupMeta(
+                        currentRowGroupRows, bucketOffsets, compressedSizes, uncompressedSizes));
+
+        prunedSchema = schema.pruneAllNullColumns(allNullByBucket);
+
+        currentRowGroupRows = 0;
+        currentBufferedSize = 0;
+    }
+
+    private int writeCompressed(byte[] raw) throws IOException {
+        switch (compressionByte) {
+            case COMPRESSION_NONE:
+                out.write(raw);
+                return raw.length;
+            case COMPRESSION_ZSTD:
+                int bound = (int) Zstd.compressBound(raw.length);
+                if (compressBuffer.length < bound) {
+                    compressBuffer = new byte[bound];
+                }
+                int compLen = (int) Zstd.compress(compressBuffer, raw, zstdLevel);
+                out.write(compressBuffer, 0, compLen);
+                return compLen;
+            default:
+                throw new UnsupportedEncodingException(
+                        "Unsupported compression: " + compressionByte);
+        }
+    }
+
+    @Override
+    public void close() throws IOException {
+        if (closed) {
+            return;
+        }
+        closed = true;
+
+        // Flush remaining rows as the last row group
+        boolean singleRowGroup = rowGroupMetas.isEmpty() && currentRowGroupRows > 0;
+        if (singleRowGroup) {
+            flushRowGroupPruned();
+        } else {
+            flushRowGroup();
+        }
+
+        // Write schema block (use pruned schema if available)
+        MosaicSchema schemaToWrite = prunedSchema != null ? prunedSchema : schema;
+        byte[] schemaRaw = schemaToWrite.serialize();
+        long schemaBlockOffset = out.getPos();
+        switch (compressionByte) {
+            case COMPRESSION_NONE:
+                {
+                    ByteBuffer lenBuf = ByteBuffer.allocate(4).order(ByteOrder.BIG_ENDIAN);
+                    lenBuf.putInt(schemaRaw.length);
+                    out.write(lenBuf.array());
+                    out.write(schemaRaw);
+                    break;
+                }
+            case COMPRESSION_ZSTD:
+                {
+                    int schemaBound = (int) Zstd.compressBound(schemaRaw.length);
+                    if (compressBuffer.length < schemaBound) {
+                        compressBuffer = new byte[schemaBound];
+                    }
+                    long compLen = Zstd.compress(compressBuffer, schemaRaw, zstdLevel);
+                    ByteBuffer lenBuf = ByteBuffer.allocate(4).order(ByteOrder.BIG_ENDIAN);
+                    lenBuf.putInt(schemaRaw.length);
+                    out.write(lenBuf.array());
+                    out.write(compressBuffer, 0, (int) compLen);
+                    break;
+                }
+            default:
+                throw new UnsupportedEncodingException(
+                        "Unsupported compression: " + compressionByte);
+        }
+
+        // Write row group index (varint encoded, only non-empty buckets)
+        long indexOffset = out.getPos();
+        int numRowGroups = rowGroupMetas.size();
+        byte[] indexBuf = new byte[numRowGroups * (5 + numBuckets * 25)];
+        int idxPos = 0;
+        for (RowGroupMeta meta : rowGroupMetas) {
+            idxPos = writeVarint(indexBuf, idxPos, meta.numRows);
+            int nonEmpty = 0;
+            for (int b = 0; b < numBuckets; b++) {
+                if (meta.compressedSizes[b] > 0) {
+                    nonEmpty++;
+                }
+            }
+            idxPos = writeVarint(indexBuf, idxPos, nonEmpty);
+            for (int b = 0; b < numBuckets; b++) {
+                if (meta.compressedSizes[b] > 0) {
+                    idxPos = writeVarint(indexBuf, idxPos, b);
+                    idxPos = writeLong(indexBuf, idxPos, meta.bucketOffsets[b]);
+                    idxPos = writeVarint(indexBuf, idxPos, meta.compressedSizes[b]);
+                    idxPos = writeVarint(indexBuf, idxPos, meta.uncompressedSizes[b]);
+                }
+            }
+        }
+        out.write(indexBuf, 0, idxPos);
+
+        // Write footer
+        ByteBuffer footer = ByteBuffer.allocate(MosaicSpec.FOOTER_SIZE).order(ByteOrder.BIG_ENDIAN);
+        footer.putLong(indexOffset);
+        footer.putLong(schemaBlockOffset);
+        footer.putInt(numBuckets);
+        footer.putInt(numRowGroups);
+        footer.put(compressionByte);
+        footer.put(MosaicSpec.VERSION);
+        footer.putShort((short) 0);
+        footer.put(MosaicSpec.MAGIC);
+        out.write(footer.array());
+
+        out.flush();
+    }
+}
diff --git a/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicWriterFactory.java b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicWriterFactory.java
new file mode 100644
index 000000000000..5393ebcc0396
--- /dev/null
+++ b/paimon-format/src/main/java/org/apache/paimon/format/mosaic/MosaicWriterFactory.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import org.apache.paimon.format.FormatWriter;
+import org.apache.paimon.format.FormatWriterFactory;
+import org.apache.paimon.fs.PositionOutputStream;
+import org.apache.paimon.types.RowType;
+
+import java.io.IOException;
+
+/** Factory for creating {@link MosaicWriter} instances. */
+public class MosaicWriterFactory implements FormatWriterFactory {
+
+    private final RowType rowType;
+    private final int numBuckets;
+    private final int zstdLevel;
+    private final long rowGroupMaxSize;
+
+    public MosaicWriterFactory(
+            RowType rowType, int numBuckets, int zstdLevel, long rowGroupMaxSize) {
+        this.rowType = rowType;
+        this.numBuckets = numBuckets;
+        this.zstdLevel = zstdLevel;
+        this.rowGroupMaxSize = rowGroupMaxSize;
+    }
+
+    @Override
+    public FormatWriter create(PositionOutputStream out, String compression) throws IOException {
+        return new MosaicWriter(out, rowType, numBuckets, zstdLevel, compression, rowGroupMaxSize);
+    }
+}
diff --git a/paimon-format/src/main/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory b/paimon-format/src/main/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory
index 80cfe4b946b8..777fcb65f545 100644
--- a/paimon-format/src/main/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory
+++ b/paimon-format/src/main/resources/META-INF/services/org.apache.paimon.format.FileFormatFactory
@@ -20,3 +20,4 @@ org.apache.paimon.format.csv.CsvFileFormatFactory
 org.apache.paimon.format.text.TextFileFormatFactory
 org.apache.paimon.format.json.JsonFileFormatFactory
 org.apache.paimon.format.blob.BlobFileFormatFactory
+org.apache.paimon.format.mosaic.MosaicFileFormatFactory
diff --git a/paimon-format/src/test/java/org/apache/paimon/format/WideTableFormatBenchmark.java b/paimon-format/src/test/java/org/apache/paimon/format/WideTableFormatBenchmark.java
new file mode 100644
index 000000000000..4d915960f0f6
--- /dev/null
+++ b/paimon-format/src/test/java/org/apache/paimon/format/WideTableFormatBenchmark.java
@@ -0,0 +1,468 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format;
+
+import org.apache.paimon.data.BinaryString;
+import org.apache.paimon.data.GenericRow;
+import org.apache.paimon.data.InternalRow;
+import org.apache.paimon.format.FileFormatFactory.FormatContext;
+import org.apache.paimon.format.mosaic.MosaicFileFormat;
+import org.apache.paimon.format.orc.OrcFileFormat;
+import org.apache.paimon.format.parquet.ParquetFileFormat;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.fs.PositionOutputStream;
+import org.apache.paimon.fs.local.LocalFileIO;
+import org.apache.paimon.options.MemorySize;
+import org.apache.paimon.options.Options;
+import org.apache.paimon.reader.RecordReader;
+import org.apache.paimon.types.DataTypeRoot;
+import org.apache.paimon.types.DataTypes;
+import org.apache.paimon.types.RowType;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Benchmark to compare file sizes and projection read performance between Parquet, ORC and Mosaic
+ * for wide tables (10,000+ columns).
+ *
+ * <p>Run manually: {@code mvn exec:java -pl paimon-format
+ * -Dexec.mainClass="org.apache.paimon.format.WideTableFileFormatSizeTest"
+ * -Dexec.classpathScope="test"}
+ */
+public class WideTableFormatBenchmark {
+
+    private static final int COLUMN_COUNT = 10000;
+    private static final int ROW_COUNT = 10;
+    private static final String COMPRESSION = "zstd";
+
+    public static void main(String[] args) throws Exception {
+        run(WideTableFormatBenchmark::fileSizeComparison);
+        run(tempDir -> projectionReadPerformance(tempDir, 500));
+        run(tempDir -> projectionReadPerformance(tempDir, 4500));
+    }
+
+    private static void run(Runner runner) throws IOException {
+        java.nio.file.Path tempDir = Files.createTempDirectory("mosaic-benchmark");
+        try {
+            runner.run(tempDir);
+        } finally {
+            deleteRecursively(tempDir);
+        }
+    }
+
+    private static void fileSizeComparison(java.nio.file.Path tempDir) throws IOException {
+        RowType rowType = buildWideRowType();
+        int fieldCount = rowType.getFieldCount();
+        LocalFileIO fileIO = new LocalFileIO();
+
+        long parquetSize =
+                writeParquet(
+                        rowType,
+                        ROW_COUNT,
+                        new Path(tempDir.toString(), "wide_table.parquet"),
+                        fileIO);
+
+        long orcSize =
+                writeOrc(
+                        rowType, ROW_COUNT, new Path(tempDir.toString(), "wide_table.orc"), fileIO);
+
+        Path mosaicPath = new Path(tempDir.toString(), "wide_table.mosaic");
+        long mosaicSize = writeMosaic(rowType, ROW_COUNT, mosaicPath, fileIO);
+
+        System.out.println("=== Wide Table File Size Comparison ===");
+        System.out.println("Columns: " + COLUMN_COUNT + ", Rows: " + ROW_COUNT);
+        System.out.println("Column name avg length: ~80 bytes");
+        System.out.println("Compression: " + COMPRESSION + " (level 9)");
+        System.out.println("---------------------------------------");
+        System.out.printf("Parquet:    %,d bytes (%.1f KB)%n", parquetSize, parquetSize / 1024.0);
+        System.out.printf("ORC:        %,d bytes (%.1f KB)%n", orcSize, orcSize / 1024.0);
+        System.out.printf("Mosaic:     %,d bytes (%.1f KB)%n", mosaicSize, mosaicSize / 1024.0);
+        System.out.println("---------------------------------------");
+
+        // verify Mosaic correctness
+        List<InternalRow> mosaicResult = readMosaic(rowType, rowType, mosaicPath, fileIO);
+        check(mosaicResult.size() == ROW_COUNT, "Row count mismatch");
+        for (int r = 0; r < ROW_COUNT; r++) {
+            GenericRow expected = generateRow(r, fieldCount);
+            for (int c = 0; c < COLUMN_COUNT; c++) {
+                assertCellEqual(mosaicResult.get(r), expected, c);
+            }
+        }
+        System.out.println("Correctness check: PASSED");
+    }
+
+    private static void projectionReadPerformance(java.nio.file.Path tempDir, int rows)
+            throws IOException {
+        RowType rowType = buildWideRowType();
+        LocalFileIO fileIO = new LocalFileIO();
+
+        Path parquetPath = new Path(tempDir.toString(), "proj_test.parquet");
+        long parquetFileSize = writeParquet(rowType, rows, parquetPath, fileIO);
+
+        Path orcPath = new Path(tempDir.toString(), "proj_test.orc");
+        long orcFileSize = writeOrc(rowType, rows, orcPath, fileIO);
+
+        Path mosaicPath = new Path(tempDir.toString(), "proj_test.mosaic");
+        long mosaicFileSize = writeMosaic(rowType, rows, mosaicPath, fileIO);
+
+        int[] projected10Cols = {0, 100, 500, 1000, 2000, 5000, 7000, 8000, 9000, 9999};
+        int[] projected1Col = {1000};
+
+        System.out.printf("\n=== Projection Read Performance (%d rows) ===%n", rows);
+        System.out.printf(
+                "File size - Parquet: %.1f MB, ORC: %.1f MB, Mosaic: %.1f MB%n",
+                parquetFileSize / 1024.0 / 1024.0,
+                orcFileSize / 1024.0 / 1024.0,
+                mosaicFileSize / 1024.0 / 1024.0);
+        System.out.println("---------------------------------------");
+
+        benchmarkProjection(
+                rowType, projected10Cols, rows, parquetPath, orcPath, mosaicPath, fileIO);
+        benchmarkProjection(rowType, projected1Col, rows, parquetPath, orcPath, mosaicPath, fileIO);
+    }
+
+    private static void benchmarkProjection(
+            RowType rowType,
+            int[] projectedColumns,
+            int rows,
+            Path parquetPath,
+            Path orcPath,
+            Path mosaicPath,
+            LocalFileIO fileIO)
+            throws IOException {
+        RowType projectedType = rowType.project(projectedColumns);
+
+        int warmup = 3;
+        int iterations = 10;
+
+        for (int i = 0; i < warmup; i++) {
+            readParquetProjected(rowType, projectedType, parquetPath, fileIO);
+        }
+        long parquetStart = System.nanoTime();
+        for (int i = 0; i < iterations; i++) {
+            readParquetProjected(rowType, projectedType, parquetPath, fileIO);
+        }
+        long parquetTimeNs = (System.nanoTime() - parquetStart) / iterations;
+
+        for (int i = 0; i < warmup; i++) {
+            readOrcProjected(rowType, projectedType, orcPath, fileIO);
+        }
+        long orcStart = System.nanoTime();
+        for (int i = 0; i < iterations; i++) {
+            readOrcProjected(rowType, projectedType, orcPath, fileIO);
+        }
+        long orcTimeNs = (System.nanoTime() - orcStart) / iterations;
+
+        for (int i = 0; i < warmup; i++) {
+            readMosaic(rowType, projectedType, mosaicPath, fileIO);
+        }
+        long mosaicStart = System.nanoTime();
+        for (int i = 0; i < iterations; i++) {
+            readMosaic(rowType, projectedType, mosaicPath, fileIO);
+        }
+        long mosaicTimeNs = (System.nanoTime() - mosaicStart) / iterations;
+
+        System.out.printf(
+                "Project %2d / %d cols: Parquet %,d us, ORC %,d us, Mosaic %,d us%n",
+                projectedColumns.length,
+                COLUMN_COUNT,
+                parquetTimeNs / 1000,
+                orcTimeNs / 1000,
+                mosaicTimeNs / 1000);
+
+        // verify projection results
+        List<InternalRow> parquetResult =
+                readParquetProjected(rowType, projectedType, parquetPath, fileIO);
+        List<InternalRow> mosaicResult = readMosaic(rowType, projectedType, mosaicPath, fileIO);
+        check(
+                mosaicResult.size() == parquetResult.size(),
+                "Projection row count mismatch: parquet="
+                        + parquetResult.size()
+                        + " mosaic="
+                        + mosaicResult.size());
+        for (int r = 0; r < parquetResult.size(); r++) {
+            for (int c = 0; c < projectedColumns.length; c++) {
+                int origCol = projectedColumns[c];
+                if (isIntColumn(origCol)) {
+                    check(
+                            mosaicResult.get(r).getInt(c) == parquetResult.get(r).getInt(c),
+                            "INT mismatch at row=" + r + " col=" + c);
+                } else {
+                    check(
+                            mosaicResult
+                                    .get(r)
+                                    .getString(c)
+                                    .toString()
+                                    .equals(parquetResult.get(r).getString(c).toString()),
+                            "STRING mismatch at row=" + r + " col=" + c);
+                }
+            }
+        }
+    }
+
+    // ==================== Parquet helpers ====================
+
+    private static long writeParquet(RowType rowType, int rowCount, Path path, LocalFileIO fileIO)
+            throws IOException {
+        ParquetFileFormat parquet = new ParquetFileFormat(createFormatContext());
+        FormatWriterFactory writerFactory = parquet.createWriterFactory(rowType);
+        PositionOutputStream out = fileIO.newOutputStream(path, false);
+        FormatWriter writer = writerFactory.create(out, COMPRESSION);
+        int fieldCount = rowType.getFieldCount();
+        for (int r = 0; r < rowCount; r++) {
+            writer.addElement(generateRow(r, fieldCount));
+        }
+        writer.close();
+        out.close();
+        return fileIO.getFileSize(path);
+    }
+
+    private static List<InternalRow> readParquetProjected(
+            RowType fullType, RowType projectedType, Path path, LocalFileIO fileIO)
+            throws IOException {
+        ParquetFileFormat parquet = new ParquetFileFormat(createFormatContext());
+        RecordReader<InternalRow> reader =
+                parquet.createReaderFactory(fullType, projectedType, null)
+                        .createReader(
+                                new FormatReaderContext(fileIO, path, fileIO.getFileSize(path)));
+        List<InternalRow> result = new ArrayList<>();
+        reader.forEachRemaining(
+                row -> {
+                    Object[] fields = new Object[projectedType.getFieldCount()];
+                    for (int i = 0; i < fields.length; i++) {
+                        if (row.isNullAt(i)) {
+                            fields[i] = null;
+                        } else if (projectedType.getTypeAt(i).getTypeRoot()
+                                == DataTypeRoot.INTEGER) {
+                            fields[i] = row.getInt(i);
+                        } else {
+                            fields[i] = BinaryString.fromString(row.getString(i).toString());
+                        }
+                    }
+                    result.add(GenericRow.of(fields));
+                });
+        reader.close();
+        return result;
+    }
+
+    // ==================== ORC helpers ====================
+
+    private static long writeOrc(RowType rowType, int rowCount, Path path, LocalFileIO fileIO)
+            throws IOException {
+        OrcFileFormat orc = new OrcFileFormat(createFormatContext());
+        FormatWriterFactory writerFactory = orc.createWriterFactory(rowType);
+        PositionOutputStream out = fileIO.newOutputStream(path, false);
+        FormatWriter writer = writerFactory.create(out, COMPRESSION);
+        int fieldCount = rowType.getFieldCount();
+        for (int r = 0; r < rowCount; r++) {
+            writer.addElement(generateRow(r, fieldCount));
+        }
+        writer.close();
+        out.close();
+        return fileIO.getFileSize(path);
+    }
+
+    private static List<InternalRow> readOrcProjected(
+            RowType fullType, RowType projectedType, Path path, LocalFileIO fileIO)
+            throws IOException {
+        OrcFileFormat orc = new OrcFileFormat(createFormatContext());
+        RecordReader<InternalRow> reader =
+                orc.createReaderFactory(fullType, projectedType, new ArrayList<>())
+                        .createReader(
+                                new FormatReaderContext(fileIO, path, fileIO.getFileSize(path)));
+        List<InternalRow> result = new ArrayList<>();
+        reader.forEachRemaining(
+                row -> {
+                    Object[] fields = new Object[projectedType.getFieldCount()];
+                    for (int i = 0; i < fields.length; i++) {
+                        if (row.isNullAt(i)) {
+                            fields[i] = null;
+                        } else if (projectedType.getTypeAt(i).getTypeRoot()
+                                == DataTypeRoot.INTEGER) {
+                            fields[i] = row.getInt(i);
+                        } else {
+                            fields[i] = row.getString(i);
+                        }
+                    }
+                    result.add(GenericRow.of(fields));
+                });
+        reader.close();
+        return result;
+    }
+
+    // ==================== Mosaic helpers ====================
+
+    private static long writeMosaic(RowType rowType, int rowCount, Path path, LocalFileIO fileIO)
+            throws IOException {
+        MosaicFileFormat mosaic = new MosaicFileFormat(createFormatContext());
+        FormatWriterFactory writerFactory = mosaic.createWriterFactory(rowType);
+        PositionOutputStream out = fileIO.newOutputStream(path, false);
+        FormatWriter writer = writerFactory.create(out, COMPRESSION);
+        int fieldCount = rowType.getFieldCount();
+        for (int r = 0; r < rowCount; r++) {
+            writer.addElement(generateRow(r, fieldCount));
+        }
+        writer.close();
+        out.close();
+        return fileIO.getFileSize(path);
+    }
+
+    private static List<InternalRow> readMosaic(
+            RowType fullType, RowType projectedType, Path path, LocalFileIO fileIO)
+            throws IOException {
+        MosaicFileFormat mosaic = new MosaicFileFormat(createFormatContext());
+        RecordReader<InternalRow> reader =
+                mosaic.createReaderFactory(fullType, projectedType, null)
+                        .createReader(
+                                new FormatReaderContext(fileIO, path, fileIO.getFileSize(path)));
+        List<InternalRow> result = new ArrayList<>();
+        reader.forEachRemaining(
+                row -> {
+                    Object[] fields = new Object[projectedType.getFieldCount()];
+                    for (int i = 0; i < fields.length; i++) {
+                        if (row.isNullAt(i)) {
+                            fields[i] = null;
+                        } else if (projectedType.getTypeAt(i).getTypeRoot()
+                                == DataTypeRoot.INTEGER) {
+                            fields[i] = row.getInt(i);
+                        } else {
+                            fields[i] = row.getString(i);
+                        }
+                    }
+                    result.add(GenericRow.of(fields));
+                });
+        reader.close();
+        return result;
+    }
+
+    // ==================== Helpers ====================
+
+    private static final int INT_COLUMN_INTERVAL = 10;
+    private static final String[] STRING_SAMPLES = {
+        "uuid: 550e8400-e29b-41d4-a716-446655440000",
+        "{\"user_id\": 12345, \"action\": \"click\", \"page\": \"home\"}",
+        "https://example.com/api/v1/resource/abc123?query=active&sort=desc",
+        "customer_service@company-name.example.com",
+        "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
+        "2024-01-15T09:23:47.123Z",
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
+        "error: connection timeout after 30000ms, retrying...",
+        "session_token_a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0",
+        "active,verified,premium,notifications_enabled,marketing_opt_in",
+        "New York, NY 10001, United States",
+        "REF-ORD-2024-8847293-XJ",
+        "0x7f8a9b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9",
+        "Approved by manager at 2024-01-15T10:00:00Z",
+        "file:///data/storage/partition_2024_01/batch_17.parquet",
+        "[ERROR] NullPointerException at com.example.Service.processLine(42)",
+        "User preferences: theme=dark, lang=zh-CN, timezone=Asia/Shanghai",
+        "192.168.1.105",
+        "Batch job completed successfully. Processed 1,234,567 records in 45.3s.",
+        "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0",
+        "Shipping via FedEx Ground, tracking #: 784930123456, est. 3 business days",
+        "comment: This product exceeded my expectations! Would recommend to everyone.",
+        "department=engineering|team=platform|role=senior|level=L6",
+        "version=3.2.1-SNAPSHOT, build=20240115.1423, commit=abc123def",
+        "payment_method=visa_ending_4242|billing_cycle=monthly|amount=99.99USD"
+    };
+
+    private static RowType buildWideRowType() {
+        RowType.Builder builder = RowType.builder();
+        for (int i = 0; i < COLUMN_COUNT; i++) {
+            String name =
+                    String.format(
+                            "this_is_a_very_long_column_name_for_testing_compression_ratio_column_index_%05d",
+                            i);
+            if (i % INT_COLUMN_INTERVAL == 0) {
+                builder.field(name, DataTypes.INT());
+            } else {
+                builder.field(name, DataTypes.STRING());
+            }
+        }
+        return builder.build();
+    }
+
+    private static GenericRow generateRow(int rowIndex, int fieldCount) {
+        Object[] fields = new Object[fieldCount];
+        for (int c = 0; c < fieldCount; c++) {
+            if (c % INT_COLUMN_INTERVAL == 0) {
+                fields[c] = rowIndex * fieldCount + c;
+            } else {
+                int sampleIdx = (rowIndex + c) % STRING_SAMPLES.length;
+                fields[c] =
+                        BinaryString.fromString(
+                                STRING_SAMPLES[sampleIdx]
+                                        + " [row="
+                                        + rowIndex
+                                        + ",col="
+                                        + c
+                                        + "]");
+            }
+        }
+        return GenericRow.of(fields);
+    }
+
+    private static boolean isIntColumn(int index) {
+        return index % INT_COLUMN_INTERVAL == 0;
+    }
+
+    private static void assertCellEqual(InternalRow actual, InternalRow expected, int col) {
+        if (isIntColumn(col)) {
+            check(actual.getInt(col) == expected.getInt(col), "INT mismatch at col=" + col);
+        } else {
+            check(
+                    actual.getString(col).toString().equals(expected.getString(col).toString()),
+                    "STRING mismatch at col=" + col);
+        }
+    }
+
+    private static void check(boolean condition, String message) {
+        if (!condition) {
+            throw new AssertionError(message);
+        }
+    }
+
+    private static FormatContext createFormatContext() {
+        return new FormatContext(new Options(), 1024, 1024, MemorySize.ofMebiBytes(128), 9, null);
+    }
+
+    private static void deleteRecursively(java.nio.file.Path dir) {
+        try {
+            Files.walk(dir)
+                    .sorted(java.util.Comparator.reverseOrder())
+                    .forEach(
+                            p -> {
+                                try {
+                                    Files.deleteIfExists(p);
+                                } catch (IOException e) {
+                                    // ignore
+                                }
+                            });
+        } catch (IOException e) {
+            // ignore
+        }
+    }
+
+    private interface Runner {
+        void run(java.nio.file.Path tempDir) throws IOException;
+    }
+}
diff --git a/paimon-format/src/test/java/org/apache/paimon/format/mosaic/MosaicFileFormatTest.java b/paimon-format/src/test/java/org/apache/paimon/format/mosaic/MosaicFileFormatTest.java
new file mode 100644
index 000000000000..17e4d3a5a392
--- /dev/null
+++ b/paimon-format/src/test/java/org/apache/paimon/format/mosaic/MosaicFileFormatTest.java
@@ -0,0 +1,1244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.format.mosaic;
+
+import org.apache.paimon.data.BinaryString;
+import org.apache.paimon.data.Decimal;
+import org.apache.paimon.data.GenericRow;
+import org.apache.paimon.data.InternalRow;
+import org.apache.paimon.data.Timestamp;
+import org.apache.paimon.format.FileFormatFactory.FormatContext;
+import org.apache.paimon.format.FormatReaderContext;
+import org.apache.paimon.format.FormatReaderFactory;
+import org.apache.paimon.format.FormatWriter;
+import org.apache.paimon.format.FormatWriterFactory;
+import org.apache.paimon.format.orc.OrcFileFormat;
+import org.apache.paimon.fs.Path;
+import org.apache.paimon.fs.PositionOutputStream;
+import org.apache.paimon.fs.local.LocalFileIO;
+import org.apache.paimon.options.MemorySize;
+import org.apache.paimon.options.Options;
+import org.apache.paimon.reader.FileRecordReader;
+import org.apache.paimon.reader.RecordReader;
+import org.apache.paimon.types.DataTypes;
+import org.apache.paimon.types.RowType;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/** Tests for the Mosaic file format. */
+public class MosaicFileFormatTest {
+
+    @TempDir java.nio.file.Path tempDir;
+
+    @Test
+    public void testBasicRoundTrip() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("name", DataTypes.STRING())
+                        .field("value", DataTypes.DOUBLE())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            data.add(GenericRow.of(i, BinaryString.fromString("name_" + i), i * 1.5));
+        }
+
+        Path path = new Path(tempDir.toString(), "basic.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(100);
+        for (int i = 0; i < 100; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).getString(1).toString()).isEqualTo("name_" + i);
+            assertThat(result.get(i).getDouble(2)).isEqualTo(i * 1.5);
+        }
+    }
+
+    @Test
+    public void testProjectionPushdown() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("a", DataTypes.INT())
+                        .field("b", DataTypes.STRING())
+                        .field("c", DataTypes.BIGINT())
+                        .field("d", DataTypes.DOUBLE())
+                        .field("e", DataTypes.FLOAT())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 50; i++) {
+            data.add(
+                    GenericRow.of(
+                            i,
+                            BinaryString.fromString("val_" + i),
+                            (long) i * 100,
+                            i * 2.5,
+                            (float) i * 0.1f));
+        }
+
+        Path path = new Path(tempDir.toString(), "proj.mosaic");
+        write(rowType, data, path);
+
+        // Project only columns a and c
+        RowType projectedType =
+                RowType.builder()
+                        .field("a", DataTypes.INT())
+                        .field("c", DataTypes.BIGINT())
+                        .build();
+
+        List<InternalRow> result = read(rowType, projectedType, path);
+
+        assertThat(result).hasSize(50);
+        for (int i = 0; i < 50; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).getLong(1)).isEqualTo((long) i * 100);
+        }
+    }
+
+    @Test
+    public void testProjectionSkipsVariableLengthColumns() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("f_int", DataTypes.INT())
+                        .field("f_str1", DataTypes.STRING())
+                        .field("f_bytes", DataTypes.BYTES())
+                        .field("f_str2", DataTypes.STRING())
+                        .field("f_decimal_large", DataTypes.DECIMAL(30, 5))
+                        .field("f_target", DataTypes.BIGINT())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            data.add(
+                    GenericRow.of(
+                            i,
+                            BinaryString.fromString("variable_length_string_" + i),
+                            ("binary_data_" + i).getBytes(),
+                            BinaryString.fromString("another_string_value_" + i),
+                            Decimal.fromBigDecimal(
+                                    new BigDecimal("123456789012345678901234.12345"), 30, 5),
+                            (long) i * 1000));
+        }
+
+        Path path = new Path(tempDir.toString(), "skip_varlen.mosaic");
+        write(rowType, data, path);
+
+        // Project only f_int and f_target, forcing reader to skip variable-length columns in
+        // between
+        RowType projectedType =
+                RowType.builder()
+                        .field("f_int", DataTypes.INT())
+                        .field("f_target", DataTypes.BIGINT())
+                        .build();
+
+        List<InternalRow> result = read(rowType, projectedType, path);
+
+        assertThat(result).hasSize(100);
+        for (int i = 0; i < 100; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).getLong(1)).isEqualTo((long) i * 1000);
+        }
+    }
+
+    @Test
+    public void testNullValues() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("name", DataTypes.STRING().nullable())
+                        .field("value", DataTypes.DOUBLE().nullable())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        data.add(GenericRow.of(1, BinaryString.fromString("hello"), 1.0));
+        data.add(GenericRow.of(2, null, 2.0));
+        data.add(GenericRow.of(3, BinaryString.fromString("world"), null));
+        data.add(GenericRow.of(4, null, null));
+
+        Path path = new Path(tempDir.toString(), "nulls.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(4);
+
+        assertThat(result.get(0).getInt(0)).isEqualTo(1);
+        assertThat(result.get(0).getString(1).toString()).isEqualTo("hello");
+        assertThat(result.get(0).getDouble(2)).isEqualTo(1.0);
+
+        assertThat(result.get(1).getInt(0)).isEqualTo(2);
+        assertThat(result.get(1).isNullAt(1)).isTrue();
+        assertThat(result.get(1).getDouble(2)).isEqualTo(2.0);
+
+        assertThat(result.get(2).getInt(0)).isEqualTo(3);
+        assertThat(result.get(2).getString(1).toString()).isEqualTo("world");
+        assertThat(result.get(2).isNullAt(2)).isTrue();
+
+        assertThat(result.get(3).getInt(0)).isEqualTo(4);
+        assertThat(result.get(3).isNullAt(1)).isTrue();
+        assertThat(result.get(3).isNullAt(2)).isTrue();
+    }
+
+    @Test
+    public void testAllPrimitiveTypes() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("f_boolean", DataTypes.BOOLEAN())
+                        .field("f_tinyint", DataTypes.TINYINT())
+                        .field("f_smallint", DataTypes.SMALLINT())
+                        .field("f_int", DataTypes.INT())
+                        .field("f_bigint", DataTypes.BIGINT())
+                        .field("f_float", DataTypes.FLOAT())
+                        .field("f_double", DataTypes.DOUBLE())
+                        .field("f_string", DataTypes.STRING())
+                        .field("f_bytes", DataTypes.BYTES())
+                        .field("f_decimal_compact", DataTypes.DECIMAL(10, 2))
+                        .field("f_decimal_large", DataTypes.DECIMAL(30, 5))
+                        .field("f_date", DataTypes.DATE())
+                        .field("f_timestamp", DataTypes.TIMESTAMP(3))
+                        .field("f_timestamp_high", DataTypes.TIMESTAMP(9))
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        data.add(
+                GenericRow.of(
+                        true,
+                        (byte) 42,
+                        (short) 1234,
+                        999999,
+                        123456789012345L,
+                        3.14f,
+                        2.718281828,
+                        BinaryString.fromString("hello world"),
+                        new byte[] {1, 2, 3, 4, 5},
+                        Decimal.fromBigDecimal(new BigDecimal("12345.67"), 10, 2),
+                        Decimal.fromBigDecimal(
+                                new BigDecimal("123456789012345678901234.12345"), 30, 5),
+                        19000, // days since epoch
+                        Timestamp.fromEpochMillis(1700000000000L),
+                        Timestamp.fromEpochMillis(1700000000000L, 123456)));
+
+        Path path = new Path(tempDir.toString(), "all_types.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(1);
+        InternalRow row = result.get(0);
+        assertThat(row.getBoolean(0)).isTrue();
+        assertThat(row.getByte(1)).isEqualTo((byte) 42);
+        assertThat(row.getShort(2)).isEqualTo((short) 1234);
+        assertThat(row.getInt(3)).isEqualTo(999999);
+        assertThat(row.getLong(4)).isEqualTo(123456789012345L);
+        assertThat(row.getFloat(5)).isEqualTo(3.14f);
+        assertThat(row.getDouble(6)).isEqualTo(2.718281828);
+        assertThat(row.getString(7).toString()).isEqualTo("hello world");
+        assertThat(row.getBinary(8)).isEqualTo(new byte[] {1, 2, 3, 4, 5});
+        assertThat(row.getDecimal(9, 10, 2).toBigDecimal())
+                .isEqualByComparingTo(new BigDecimal("12345.67"));
+        assertThat(row.getDecimal(10, 30, 5).toBigDecimal())
+                .isEqualByComparingTo(new BigDecimal("123456789012345678901234.12345"));
+        assertThat(row.getInt(11)).isEqualTo(19000);
+        assertThat(row.getTimestamp(12, 3).getMillisecond()).isEqualTo(1700000000000L);
+        assertThat(row.getTimestamp(13, 9).getMillisecond()).isEqualTo(1700000000000L);
+        assertThat(row.getTimestamp(13, 9).getNanoOfMillisecond()).isEqualTo(123456);
+    }
+
+    @Test
+    public void testWideTable() throws IOException {
+        int columnCount = 10000;
+        int rowCount = 10;
+
+        RowType rowType = buildWideRowType(columnCount);
+        List<InternalRow> data = new ArrayList<>();
+        for (int r = 0; r < rowCount; r++) {
+            Object[] fields = new Object[columnCount];
+            for (int c = 0; c < columnCount; c++) {
+                fields[c] = r * columnCount + c;
+            }
+            data.add(GenericRow.of(fields));
+        }
+
+        Path path = new Path(tempDir.toString(), "wide.mosaic");
+        LocalFileIO fileIO = new LocalFileIO();
+        write(rowType, data, path);
+        long mosaicSize = fileIO.getFileSize(path);
+
+        // Compare with ORC
+        Path orcPath = new Path(tempDir.toString(), "wide.orc");
+        OrcFileFormat orc =
+                new OrcFileFormat(
+                        new FormatContext(
+                                new Options(), 1024, 1024, MemorySize.ofMebiBytes(128), 9, null));
+        FormatWriterFactory orcWriterFactory = orc.createWriterFactory(rowType);
+        PositionOutputStream orcOut = fileIO.newOutputStream(orcPath, false);
+        FormatWriter orcWriter = orcWriterFactory.create(orcOut, "zstd");
+        for (InternalRow row : data) {
+            orcWriter.addElement(row);
+        }
+        orcWriter.close();
+        orcOut.close();
+        long orcSize = fileIO.getFileSize(orcPath);
+
+        System.out.println("=== Wide Table: Mosaic vs ORC ===");
+        System.out.printf("Mosaic: %,d bytes (%.1f KB)%n", mosaicSize, mosaicSize / 1024.0);
+        System.out.printf("ORC:    %,d bytes (%.1f KB)%n", orcSize, orcSize / 1024.0);
+        System.out.printf("Ratio:  ORC is %.1fx larger%n", (double) orcSize / mosaicSize);
+
+        assertThat(mosaicSize).isLessThan(orcSize);
+
+        // Verify correctness
+        List<InternalRow> result = read(rowType, rowType, path);
+        assertThat(result).hasSize(rowCount);
+        for (int r = 0; r < rowCount; r++) {
+            for (int c = 0; c < columnCount; c++) {
+                assertThat(result.get(r).getInt(c)).isEqualTo(r * columnCount + c);
+            }
+        }
+    }
+
+    @Test
+    public void testWideTableProjection() throws IOException {
+        int columnCount = 10000;
+        int rowCount = 100;
+
+        RowType rowType = buildWideRowType(columnCount);
+        List<InternalRow> data = new ArrayList<>();
+        for (int r = 0; r < rowCount; r++) {
+            Object[] fields = new Object[columnCount];
+            for (int c = 0; c < columnCount; c++) {
+                fields[c] = r * columnCount + c;
+            }
+            data.add(GenericRow.of(fields));
+        }
+
+        Path path = new Path(tempDir.toString(), "wide_proj.mosaic");
+        write(rowType, data, path);
+
+        // Project 10 columns
+        int[] projectedIndices = {0, 100, 500, 1000, 2000, 5000, 7000, 8000, 9000, 9999};
+        RowType projectedType = rowType.project(projectedIndices);
+
+        List<InternalRow> result = read(rowType, projectedType, path);
+
+        assertThat(result).hasSize(rowCount);
+        for (int r = 0; r < rowCount; r++) {
+            for (int i = 0; i < projectedIndices.length; i++) {
+                int c = projectedIndices[i];
+                assertThat(result.get(r).getInt(i)).isEqualTo(r * columnCount + c);
+            }
+        }
+    }
+
+    @Test
+    public void testEmptyTable() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("name", DataTypes.STRING())
+                        .build();
+
+        Path path = new Path(tempDir.toString(), "empty.mosaic");
+        write(rowType, new ArrayList<>(), path);
+        List<InternalRow> result = read(rowType, rowType, path);
+        assertThat(result).isEmpty();
+    }
+
+    @Test
+    public void testSingleColumn() throws IOException {
+        RowType rowType = RowType.builder().field("id", DataTypes.INT()).build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 10; i++) {
+            data.add(GenericRow.of(i));
+        }
+
+        Path path = new Path(tempDir.toString(), "single.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(10);
+        for (int i = 0; i < 10; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+        }
+    }
+
+    @Test
+    public void testMultiRowGroupStringStability() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("name", DataTypes.STRING())
+                        .build();
+
+        // Use tiny writeBatchMemory to force multiple row groups
+        MosaicFileFormat format =
+                new MosaicFileFormat(
+                        new FormatContext(
+                                new Options(), 1024, 1024, MemorySize.ofBytes(1), 3, null));
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            data.add(GenericRow.of(i, BinaryString.fromString("string_value_" + i)));
+        }
+
+        Path path = new Path(tempDir.toString(), "multi_rg_string.mosaic");
+        LocalFileIO fileIO = new LocalFileIO();
+        FormatWriterFactory writerFactory = format.createWriterFactory(rowType);
+        PositionOutputStream out = fileIO.newOutputStream(path, false);
+        FormatWriter writer = writerFactory.create(out, "zstd");
+        for (InternalRow row : data) {
+            writer.addElement(row);
+        }
+        writer.close();
+        out.close();
+
+        // Project only the string column
+        RowType projectedType = RowType.builder().field("name", DataTypes.STRING()).build();
+        FormatReaderFactory readerFactory =
+                format.createReaderFactory(rowType, projectedType, null);
+        FileRecordReader<InternalRow> reader =
+                (FileRecordReader<InternalRow>)
+                        readerFactory.createReader(
+                                new FormatReaderContext(fileIO, path, fileIO.getFileSize(path)));
+
+        // Read batches one by one; retain string values from earlier batches
+        List<BinaryString> allStrings = new ArrayList<>();
+        RecordReader.RecordIterator<InternalRow> batch;
+        while ((batch = reader.readBatch()) != null) {
+            InternalRow row;
+            while ((row = batch.next()) != null) {
+                allStrings.add(row.getString(0));
+            }
+            batch.releaseBatch();
+        }
+        reader.close();
+
+        // Verify all retained strings are still correct
+        assertThat(allStrings).hasSize(100);
+        for (int i = 0; i < 100; i++) {
+            assertThat(allStrings.get(i).toString()).isEqualTo("string_value_" + i);
+        }
+    }
+
+    // ==================== Columnar Encoding Tests ====================
+
+    @Test
+    public void testConstEncoding() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("const_int", DataTypes.INT())
+                        .field("const_long", DataTypes.BIGINT())
+                        .field("const_double", DataTypes.DOUBLE())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 200; i++) {
+            data.add(GenericRow.of(i, 42, 999L, 3.14));
+        }
+
+        Path path = new Path(tempDir.toString(), "const_enc.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(200);
+        for (int i = 0; i < 200; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).getInt(1)).isEqualTo(42);
+            assertThat(result.get(i).getLong(2)).isEqualTo(999L);
+            assertThat(result.get(i).getDouble(3)).isEqualTo(3.14);
+        }
+    }
+
+    @Test
+    public void testConstEncodingWithNulls() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("const_nullable", DataTypes.INT().nullable())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            data.add(GenericRow.of(i, i % 3 == 0 ? null : 42));
+        }
+
+        Path path = new Path(tempDir.toString(), "const_null.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(100);
+        for (int i = 0; i < 100; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            if (i % 3 == 0) {
+                assertThat(result.get(i).isNullAt(1)).isTrue();
+            } else {
+                assertThat(result.get(i).getInt(1)).isEqualTo(42);
+            }
+        }
+    }
+
+    @Test
+    public void testBooleanConstEncoding() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("flag_true", DataTypes.BOOLEAN())
+                        .field("flag_false", DataTypes.BOOLEAN())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            data.add(GenericRow.of(i, true, false));
+        }
+
+        Path path = new Path(tempDir.toString(), "bool_const.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(100);
+        for (int i = 0; i < 100; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).getBoolean(1)).isTrue();
+            assertThat(result.get(i).getBoolean(2)).isFalse();
+        }
+    }
+
+    @Test
+    public void testBooleanDictEncoding() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("flag", DataTypes.BOOLEAN())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            data.add(GenericRow.of(i, i % 2 == 0));
+        }
+
+        Path path = new Path(tempDir.toString(), "bool_dict.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(100);
+        for (int i = 0; i < 100; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).getBoolean(1)).isEqualTo(i % 2 == 0);
+        }
+    }
+
+    @Test
+    public void testDictEncoding() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("status", DataTypes.INT())
+                        .field("category", DataTypes.BIGINT())
+                        .field("level", DataTypes.SMALLINT())
+                        .build();
+
+        int[] statuses = {1, 2, 3, 4, 5};
+        long[] categories = {100L, 200L, 300L};
+        short[] levels = {10, 20};
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 200; i++) {
+            data.add(GenericRow.of(i, statuses[i % 5], categories[i % 3], levels[i % 2]));
+        }
+
+        Path path = new Path(tempDir.toString(), "dict_enc.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(200);
+        for (int i = 0; i < 200; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).getInt(1)).isEqualTo(statuses[i % 5]);
+            assertThat(result.get(i).getLong(2)).isEqualTo(categories[i % 3]);
+            assertThat(result.get(i).getShort(3)).isEqualTo(levels[i % 2]);
+        }
+    }
+
+    @Test
+    public void testDictEncodingWithNulls() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("flag", DataTypes.TINYINT().nullable())
+                        .build();
+
+        byte[] flags = {1, 2, 3};
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            data.add(GenericRow.of(i, i % 4 == 0 ? null : flags[i % 3]));
+        }
+
+        Path path = new Path(tempDir.toString(), "dict_null.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(100);
+        for (int i = 0; i < 100; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            if (i % 4 == 0) {
+                assertThat(result.get(i).isNullAt(1)).isTrue();
+            } else {
+                assertThat(result.get(i).getByte(1)).isEqualTo(flags[i % 3]);
+            }
+        }
+    }
+
+    @Test
+    public void testDictEncodingBoundary() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("dict_255", DataTypes.INT())
+                        .field("plain_256", DataTypes.INT())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 512; i++) {
+            data.add(GenericRow.of(i % 255, i % 256));
+        }
+
+        Path path = new Path(tempDir.toString(), "dict_boundary.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(512);
+        for (int i = 0; i < 512; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i % 255);
+            assertThat(result.get(i).getInt(1)).isEqualTo(i % 256);
+        }
+    }
+
+    @Test
+    public void testFloatDictEncoding() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("f_float", DataTypes.FLOAT())
+                        .field("f_double", DataTypes.DOUBLE())
+                        .build();
+
+        float[] floats = {1.5f, 2.5f, 3.5f};
+        double[] doubles = {10.1, 20.2};
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            data.add(GenericRow.of(i, floats[i % 3], doubles[i % 2]));
+        }
+
+        Path path = new Path(tempDir.toString(), "float_dict.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(100);
+        for (int i = 0; i < 100; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).getFloat(1)).isEqualTo(floats[i % 3]);
+            assertThat(result.get(i).getDouble(2)).isEqualTo(doubles[i % 2]);
+        }
+    }
+
+    @Test
+    public void testAllNullEncoding() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("always_null_str", DataTypes.STRING().nullable())
+                        .field("always_null_dbl", DataTypes.DOUBLE().nullable())
+                        .field("always_null_int", DataTypes.INT().nullable())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 50; i++) {
+            data.add(GenericRow.of(i, null, null, null));
+        }
+
+        Path path = new Path(tempDir.toString(), "all_null_enc.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(50);
+        for (int i = 0; i < 50; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).isNullAt(1)).isTrue();
+            assertThat(result.get(i).isNullAt(2)).isTrue();
+            assertThat(result.get(i).isNullAt(3)).isTrue();
+        }
+    }
+
+    @Test
+    public void testMixedEncodings() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("plain_col", DataTypes.INT())
+                        .field("const_col", DataTypes.BIGINT())
+                        .field("dict_col", DataTypes.SMALLINT())
+                        .field("all_null_col", DataTypes.DOUBLE().nullable())
+                        .field("plain_str", DataTypes.STRING())
+                        .build();
+
+        short[] dictValues = {10, 20, 30, 40, 50};
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 1000; i++) {
+            data.add(
+                    GenericRow.of(
+                            i, 999L, dictValues[i % 5], null, BinaryString.fromString("str_" + i)));
+        }
+
+        Path path = new Path(tempDir.toString(), "mixed_enc.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(1000);
+        for (int i = 0; i < 1000; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).getLong(1)).isEqualTo(999L);
+            assertThat(result.get(i).getShort(2)).isEqualTo(dictValues[i % 5]);
+            assertThat(result.get(i).isNullAt(3)).isTrue();
+            assertThat(result.get(i).getString(4).toString()).isEqualTo("str_" + i);
+        }
+    }
+
+    @Test
+    public void testMixedEncodingsWithProjection() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("plain_col", DataTypes.INT())
+                        .field("const_col", DataTypes.BIGINT())
+                        .field("dict_col", DataTypes.SMALLINT())
+                        .field("all_null_col", DataTypes.DOUBLE().nullable())
+                        .field("plain_str", DataTypes.STRING())
+                        .build();
+
+        short[] dictValues = {10, 20, 30};
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            data.add(
+                    GenericRow.of(
+                            i, 42L, dictValues[i % 3], null, BinaryString.fromString("s" + i)));
+        }
+
+        Path path = new Path(tempDir.toString(), "mixed_proj.mosaic");
+        write(rowType, data, path);
+
+        RowType projectedType =
+                RowType.builder()
+                        .field("dict_col", DataTypes.SMALLINT())
+                        .field("const_col", DataTypes.BIGINT())
+                        .build();
+
+        List<InternalRow> result = read(rowType, projectedType, path);
+        assertThat(result).hasSize(100);
+        for (int i = 0; i < 100; i++) {
+            assertThat(result.get(i).getShort(0)).isEqualTo(dictValues[i % 3]);
+            assertThat(result.get(i).getLong(1)).isEqualTo(42L);
+        }
+    }
+
+    // ==================== Schema Prefix Compression Tests ====================
+
+    @Test
+    public void testSchemaPrefixCompression() throws IOException {
+        int numCols = 100;
+        RowType.Builder builder = RowType.builder();
+        for (int i = 0; i < numCols; i++) {
+            builder.field(
+                    "com.example.sensors.signal_" + String.format("%03d", i),
+                    DataTypes.DOUBLE().nullable());
+        }
+        RowType rowType = builder.build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int r = 0; r < 50; r++) {
+            Object[] fields = new Object[numCols];
+            for (int c = 0; c < numCols; c++) {
+                fields[c] = (double) (r * numCols + c);
+            }
+            data.add(GenericRow.of(fields));
+        }
+
+        Path path = new Path(tempDir.toString(), "prefix.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(50);
+        for (int r = 0; r < 50; r++) {
+            for (int c = 0; c < numCols; c++) {
+                assertThat(result.get(r).getDouble(c)).isEqualTo((double) (r * numCols + c));
+            }
+        }
+
+        RowType projectedType =
+                RowType.builder()
+                        .field("com.example.sensors.signal_050", DataTypes.DOUBLE().nullable())
+                        .build();
+        List<InternalRow> projected = read(rowType, projectedType, path);
+        assertThat(projected).hasSize(50);
+        for (int r = 0; r < 50; r++) {
+            assertThat(projected.get(r).getDouble(0)).isEqualTo((double) (r * numCols + 50));
+        }
+    }
+
+    @Test
+    public void testSchemaMixedPrefixAndNonPrefix() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("group.a.signal_1", DataTypes.DOUBLE())
+                        .field("group.a.signal_2", DataTypes.DOUBLE())
+                        .field("name", DataTypes.STRING())
+                        .field("group.b.signal_1", DataTypes.FLOAT())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 20; i++) {
+            data.add(
+                    GenericRow.of(
+                            i,
+                            (double) i,
+                            (double) (i * 2),
+                            BinaryString.fromString("n" + i),
+                            (float) i));
+        }
+
+        Path path = new Path(tempDir.toString(), "mixed_prefix.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(20);
+        for (int i = 0; i < 20; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).getDouble(1)).isEqualTo((double) i);
+            assertThat(result.get(i).getDouble(2)).isEqualTo((double) (i * 2));
+            assertThat(result.get(i).getString(3).toString()).isEqualTo("n" + i);
+            assertThat(result.get(i).getFloat(4)).isEqualTo((float) i);
+        }
+    }
+
+    @Test
+    public void testSchemaSerializationRoundTrip() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("simple", DataTypes.INT())
+                        .field("a.b.col1", DataTypes.DOUBLE())
+                        .field("a.b.col2", DataTypes.STRING())
+                        .field("x.y.z.col3", DataTypes.BIGINT())
+                        .build();
+
+        MosaicSchema original = MosaicSchema.create(rowType, 10);
+        byte[] serialized = original.serialize();
+        MosaicSchema restored = MosaicSchema.deserialize(serialized);
+
+        assertThat(restored.numBuckets()).isEqualTo(10);
+
+        RowType projAll = rowType;
+        for (int b = 0; b < 10; b++) {
+            int[] origMapping = original.getProjectionMapping(b, projAll);
+            int[] restoredMapping = restored.getProjectionMapping(b, projAll);
+            if (origMapping == null) {
+                assertThat(restoredMapping).isNull();
+            } else {
+                assertThat(restoredMapping).isEqualTo(origMapping);
+            }
+        }
+    }
+
+    // ==================== ALL_NULL Column Pruning Tests ====================
+
+    @Test
+    public void testAllNullColumnPruningRoundTrip() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("null_col_1", DataTypes.DOUBLE().nullable())
+                        .field("value", DataTypes.BIGINT())
+                        .field("null_col_2", DataTypes.STRING().nullable())
+                        .field("null_col_3", DataTypes.INT().nullable())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            data.add(GenericRow.of(i, null, (long) i * 10, null, null));
+        }
+
+        Path path = new Path(tempDir.toString(), "prune.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(100);
+        for (int i = 0; i < 100; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).isNullAt(1)).isTrue();
+            assertThat(result.get(i).getLong(2)).isEqualTo((long) i * 10);
+            assertThat(result.get(i).isNullAt(3)).isTrue();
+            assertThat(result.get(i).isNullAt(4)).isTrue();
+        }
+    }
+
+    @Test
+    public void testProjectPrunedAllNullColumn() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("always_null", DataTypes.DOUBLE().nullable())
+                        .field("value", DataTypes.INT())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 50; i++) {
+            data.add(GenericRow.of(i, null, i * 2));
+        }
+
+        Path path = new Path(tempDir.toString(), "proj_pruned.mosaic");
+        write(rowType, data, path);
+
+        RowType projNull =
+                RowType.builder().field("always_null", DataTypes.DOUBLE().nullable()).build();
+        List<InternalRow> result = read(rowType, projNull, path);
+        assertThat(result).hasSize(50);
+        for (int i = 0; i < 50; i++) {
+            assertThat(result.get(i).isNullAt(0)).isTrue();
+        }
+
+        RowType projMixed =
+                RowType.builder()
+                        .field("always_null", DataTypes.DOUBLE().nullable())
+                        .field("value", DataTypes.INT())
+                        .build();
+        List<InternalRow> result2 = read(rowType, projMixed, path);
+        assertThat(result2).hasSize(50);
+        for (int i = 0; i < 50; i++) {
+            assertThat(result2.get(i).isNullAt(0)).isTrue();
+            assertThat(result2.get(i).getInt(1)).isEqualTo(i * 2);
+        }
+    }
+
+    @Test
+    public void testAllNullPruningWideTable() throws IOException {
+        int totalCols = 500;
+        int nonNullCols = 50;
+
+        RowType.Builder builder = RowType.builder();
+        for (int i = 0; i < totalCols; i++) {
+            builder.field("col_" + String.format("%04d", i), DataTypes.INT().nullable());
+        }
+        RowType rowType = builder.build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int r = 0; r < 100; r++) {
+            Object[] fields = new Object[totalCols];
+            for (int c = 0; c < nonNullCols; c++) {
+                fields[c] = r * totalCols + c;
+            }
+            data.add(GenericRow.of(fields));
+        }
+
+        Path path = new Path(tempDir.toString(), "wide_prune.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(100);
+        for (int r = 0; r < 100; r++) {
+            for (int c = 0; c < nonNullCols; c++) {
+                assertThat(result.get(r).getInt(c)).isEqualTo(r * totalCols + c);
+            }
+            for (int c = nonNullCols; c < totalCols; c++) {
+                assertThat(result.get(r).isNullAt(c)).isTrue();
+            }
+        }
+
+        // Verify pruning reduced schema size (compared to no pruning)
+        LocalFileIO fileIO = new LocalFileIO();
+        long prunedFileSize = fileIO.getFileSize(path);
+
+        // Write same data without pruning (multi-row-group forces no pruning)
+        Path noPrunePath = new Path(tempDir.toString(), "wide_no_prune.mosaic");
+        MosaicFileFormat tinyFormat =
+                new MosaicFileFormat(
+                        new FormatContext(
+                                new Options(), 1024, 1024, MemorySize.ofBytes(1), 3, null));
+        FormatWriterFactory noPruneFactory = tinyFormat.createWriterFactory(rowType);
+        PositionOutputStream noPruneOut = fileIO.newOutputStream(noPrunePath, false);
+        FormatWriter noPruneWriter = noPruneFactory.create(noPruneOut, "zstd");
+        for (InternalRow row : data) {
+            noPruneWriter.addElement(row);
+        }
+        noPruneWriter.close();
+        noPruneOut.close();
+        long noPruneSize = fileIO.getFileSize(noPrunePath);
+
+        System.out.printf(
+                "Pruning test: pruned=%,d bytes, unpruned=%,d bytes, saved=%.0f%%%n",
+                prunedFileSize, noPruneSize, (1.0 - (double) prunedFileSize / noPruneSize) * 100);
+        assertThat(prunedFileSize).isLessThan(noPruneSize);
+    }
+
+    @Test
+    public void testMultiRowGroupNoPruning() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("nullable", DataTypes.INT().nullable())
+                        .build();
+
+        MosaicFileFormat format =
+                new MosaicFileFormat(
+                        new FormatContext(
+                                new Options(), 1024, 1024, MemorySize.ofBytes(1), 3, null));
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            data.add(GenericRow.of(i, i == 0 ? 42 : null));
+        }
+
+        Path path = new Path(tempDir.toString(), "multi_rg_no_prune.mosaic");
+        LocalFileIO fileIO = new LocalFileIO();
+        FormatWriterFactory writerFactory = format.createWriterFactory(rowType);
+        PositionOutputStream out = fileIO.newOutputStream(path, false);
+        FormatWriter writer = writerFactory.create(out, "zstd");
+        for (InternalRow row : data) {
+            writer.addElement(row);
+        }
+        writer.close();
+        out.close();
+
+        FormatReaderFactory readerFactory = format.createReaderFactory(rowType, rowType, null);
+        List<InternalRow> result = new ArrayList<>();
+        try (RecordReader<InternalRow> reader =
+                readerFactory.createReader(
+                        new FormatReaderContext(fileIO, path, fileIO.getFileSize(path)))) {
+            reader.forEachRemaining(
+                    row -> {
+                        Object[] fields = new Object[rowType.getFieldCount()];
+                        for (int i = 0; i < fields.length; i++) {
+                            if (!row.isNullAt(i)) {
+                                fields[i] =
+                                        InternalRow.createFieldGetter(rowType.getTypeAt(i), i)
+                                                .getFieldOrNull(row);
+                            }
+                        }
+                        result.add(GenericRow.of(fields));
+                    });
+        }
+
+        assertThat(result).hasSize(100);
+        assertThat(result.get(0).getInt(0)).isEqualTo(0);
+        assertThat(result.get(0).getInt(1)).isEqualTo(42);
+        for (int i = 1; i < 100; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).isNullAt(1)).isTrue();
+        }
+    }
+
+    @Test
+    public void testAllColumnsAllNull() throws IOException {
+        RowType rowType =
+                RowType.builder()
+                        .field("a", DataTypes.INT().nullable())
+                        .field("b", DataTypes.STRING().nullable())
+                        .field("c", DataTypes.DOUBLE().nullable())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 30; i++) {
+            data.add(GenericRow.of(null, null, null));
+        }
+
+        Path path = new Path(tempDir.toString(), "all_cols_null.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(30);
+        for (int i = 0; i < 30; i++) {
+            assertThat(result.get(i).isNullAt(0)).isTrue();
+            assertThat(result.get(i).isNullAt(1)).isTrue();
+            assertThat(result.get(i).isNullAt(2)).isTrue();
+        }
+    }
+
+    // ==================== Helpers ====================
+
+    private void write(RowType rowType, List<InternalRow> data, Path path) throws IOException {
+        LocalFileIO fileIO = new LocalFileIO();
+        MosaicFileFormat format = createFormat();
+        FormatWriterFactory writerFactory = format.createWriterFactory(rowType);
+        PositionOutputStream out = fileIO.newOutputStream(path, false);
+        FormatWriter writer = writerFactory.create(out, "zstd");
+        for (InternalRow row : data) {
+            writer.addElement(row);
+        }
+        writer.close();
+        out.close();
+    }
+
+    private List<InternalRow> read(RowType dataType, RowType projectedType, Path path)
+            throws IOException {
+        LocalFileIO fileIO = new LocalFileIO();
+        MosaicFileFormat format = createFormat();
+        FormatReaderFactory readerFactory =
+                format.createReaderFactory(dataType, projectedType, null);
+        RecordReader<InternalRow> reader =
+                readerFactory.createReader(
+                        new FormatReaderContext(fileIO, path, fileIO.getFileSize(path)));
+
+        List<InternalRow> result = new ArrayList<>();
+        reader.forEachRemaining(
+                row -> {
+                    int fieldCount = projectedType.getFieldCount();
+                    Object[] fields = new Object[fieldCount];
+                    for (int i = 0; i < fieldCount; i++) {
+                        if (row.isNullAt(i)) {
+                            fields[i] = null;
+                        } else {
+                            fields[i] =
+                                    InternalRow.createFieldGetter(projectedType.getTypeAt(i), i)
+                                            .getFieldOrNull(row);
+                        }
+                    }
+                    result.add(GenericRow.of(fields));
+                });
+        reader.close();
+        return result;
+    }
+
+    @Test
+    public void testLongConstantString() throws IOException {
+        // 1KB constant string — CONST should work regardless of value length
+        String longStr = repeatChar('x', 1024);
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("long_const", DataTypes.STRING())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 200; i++) {
+            data.add(GenericRow.of(i, BinaryString.fromString(longStr)));
+        }
+
+        Path path = new Path(tempDir.toString(), "long_const.mosaic");
+        write(rowType, data, path);
+
+        // Verify CONST is smaller than PLAIN (200 * 1KB = 200KB plain, CONST = 1KB)
+        long fileSize = tempDir.toFile().toPath().resolve("long_const.mosaic").toFile().length();
+
+        List<InternalRow> result = read(rowType, rowType, path);
+        assertThat(result).hasSize(200);
+        for (int i = 0; i < 200; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).getString(1).toString()).isEqualTo(longStr);
+        }
+    }
+
+    @Test
+    public void testLongConstantStringWithNulls() throws IOException {
+        String longStr = repeatChar('y', 2048);
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("long_const_nullable", DataTypes.STRING().nullable())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            data.add(GenericRow.of(i, i % 3 == 0 ? null : BinaryString.fromString(longStr)));
+        }
+
+        Path path = new Path(tempDir.toString(), "long_const_null.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(100);
+        for (int i = 0; i < 100; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            if (i % 3 == 0) {
+                assertThat(result.get(i).isNullAt(1)).isTrue();
+            } else {
+                assertThat(result.get(i).getString(1).toString()).isEqualTo(longStr);
+            }
+        }
+    }
+
+    @Test
+    public void testRepeatedLongStringsDict() throws IOException {
+        // 5 distinct 500-byte strings — should use DICT encoding
+        String[] values = new String[5];
+        for (int i = 0; i < 5; i++) {
+            values[i] = repeatChar((char) ('A' + i), 500);
+        }
+
+        RowType rowType =
+                RowType.builder()
+                        .field("id", DataTypes.INT())
+                        .field("long_dict", DataTypes.STRING())
+                        .build();
+
+        List<InternalRow> data = new ArrayList<>();
+        for (int i = 0; i < 500; i++) {
+            data.add(GenericRow.of(i, BinaryString.fromString(values[i % 5])));
+        }
+
+        Path path = new Path(tempDir.toString(), "long_dict.mosaic");
+        write(rowType, data, path);
+        List<InternalRow> result = read(rowType, rowType, path);
+
+        assertThat(result).hasSize(500);
+        for (int i = 0; i < 500; i++) {
+            assertThat(result.get(i).getInt(0)).isEqualTo(i);
+            assertThat(result.get(i).getString(1).toString()).isEqualTo(values[i % 5]);
+        }
+    }
+
+    private MosaicFileFormat createFormat() {
+        return new MosaicFileFormat(
+                new FormatContext(new Options(), 1024, 1024, MemorySize.ofMebiBytes(128), 3, null));
+    }
+
+    private static String repeatChar(char c, int count) {
+        char[] chars = new char[count];
+        Arrays.fill(chars, c);
+        return new String(chars);
+    }
+
+    private RowType buildWideRowType(int columnCount) {
+        RowType.Builder builder = RowType.builder();
+        for (int i = 0; i < columnCount; i++) {
+            builder.field(
+                    String.format(
+                            "this_is_a_very_long_column_name_for_testing_compression_ratio_column_index_%05d",
+                            i),
+                    DataTypes.INT());
+        }
+        return builder.build();
+    }
+}