apache · tarun11Mavani · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java
@@ -73,6 +73,7 @@ public static class Indexes {
     public static final String VECTOR_HNSW_INDEX_DOCID_MAPPING_FILE_EXTENSION = ".vector.hnsw.mapping";
     public static final String VECTOR_IVF_FLAT_INDEX_FILE_EXTENSION = ".vector.ivfflat.index";
     public static final String VECTOR_IVF_PQ_INDEX_FILE_EXTENSION = ".vector.ivfpq.index";
+    public static final String COLUMNAR_MAP_INDEX_FILE_EXTENSION = ".columnarmap.idx";
   }
 
   public static class MetadataKeys {
@@ -167,6 +168,11 @@ public static class Column {
       // Optional, default false
       public static final String IS_AUTO_GENERATED = "isAutoGenerated";
 
+      // Optional, default false. True for virtual columns materialized from a COLUMNAR_MAP parent column.
+      public static final String IS_MAP_VIRTUAL_COLUMN = "mapVirtualColumn";
+      // Optional. The name of the parent MAP column for virtual columns.
+      public static final String PARENT_MAP_COLUMN = "parentMapColumn";
+
       /// Partition function, all optional
       public static final String PARTITION_FUNCTION = "partitionFunction";
       public static final String NUM_PARTITIONS = "numPartitions";

diff --git a/...egment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java b/...egment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
@@ -530,6 +530,23 @@ public List<String> getComplexColumnNames() {
     return getQualifyingFields(FieldType.COMPLEX, true);
   }
 
+  /// Returns the names of columns that have COLUMNAR_MAP index enabled in the table config.
+  public List<String> getColumnarMapColumnNames() {
+    List<String> result = new ArrayList<>();
+    if (_tableConfig != null) {
+      List<FieldConfig> fieldConfigs = _tableConfig.getFieldConfigList();
+      if (fieldConfigs != null) {
+        for (FieldConfig fieldConfig : fieldConfigs) {
+          List<FieldConfig.IndexType> indexTypes = fieldConfig.getIndexTypes();
+          if (indexTypes != null && indexTypes.contains(FieldConfig.IndexType.COLUMNAR_MAP)) {
+            result.add(fieldConfig.getName());
+          }
+        }
+      }
+    }
+    return result;
+  }
+
   public void setSegmentPartitionConfig(SegmentPartitionConfig segmentPartitionConfig) {
     _segmentPartitionConfig = segmentPartitionConfig;
   }

diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/datasource/MapDataSource.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/datasource/MapDataSource.java
@@ -32,9 +32,22 @@ public interface MapDataSource extends DataSource {
 
   /**
    * Get the Data Source representation of a single key within this map column.
+   * Only call after confirming the key exists via {@link #containsKey(String)}.
    */
   DataSource getKeyDataSource(String key);
-  DataSource getKeyDataSource(String key);
+  @Nullable
+  DataSource getDataSource(String key);
-  DataSource getKeyDataSource(String key);
+  @Nullable
+  DataSource getDataSource(String key);
 
+  /**
+   * Returns true if {@code key} is present in this MAP column for at least one document in
+   * this segment. Call this before {@link #getKeyDataSource(String)} to avoid undefined
+   * behaviour on absent keys.
+   *
+   * <p>The default implementation delegates to {@link #getKeyDataSources()}, which may be
+   * expensive for large key sets. Implementations should override for O(1) performance.
+   */
+  default boolean containsKey(String key) {
+    return getKeyDataSources().containsKey(key);
+  }
+
   /**
    * Get the Data Source representation of all keys within this map column.
    */

diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/StandardIndexes.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/StandardIndexes.java
@@ -20,6 +20,7 @@
 package org.apache.pinot.segment.spi.index;
 
 import org.apache.pinot.segment.spi.index.creator.BloomFilterCreator;
+import org.apache.pinot.segment.spi.index.creator.ColumnarMapIndexCreator;
 import org.apache.pinot.segment.spi.index.creator.CombinedInvertedIndexCreator;
 import org.apache.pinot.segment.spi.index.creator.DictionaryBasedInvertedIndexCreator;
 import org.apache.pinot.segment.spi.index.creator.FSTIndexCreator;
@@ -31,6 +32,7 @@
 import org.apache.pinot.segment.spi.index.creator.VectorIndexConfig;
 import org.apache.pinot.segment.spi.index.creator.VectorIndexCreator;
 import org.apache.pinot.segment.spi.index.reader.BloomFilterReader;
+import org.apache.pinot.segment.spi.index.reader.ColumnarMapIndexReader;
 import org.apache.pinot.segment.spi.index.reader.Dictionary;
 import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader;
 import org.apache.pinot.segment.spi.index.reader.H3IndexReader;
@@ -41,6 +43,7 @@
 import org.apache.pinot.segment.spi.index.reader.TextIndexReader;
 import org.apache.pinot.segment.spi.index.reader.VectorIndexReader;
 import org.apache.pinot.spi.config.table.BloomFilterConfig;
+import org.apache.pinot.spi.config.table.ColumnarMapIndexConfig;
 import org.apache.pinot.spi.config.table.IndexConfig;
 import org.apache.pinot.spi.config.table.JsonIndexConfig;
 
@@ -79,6 +82,7 @@ public class StandardIndexes {
   public static final String TEXT_ID = "text_index";
   public static final String H3_ID = "h3_index";
   public static final String VECTOR_ID = "vector_index";
+  public static final String COLUMNAR_MAP_ID = "columnar_map";
 
   private StandardIndexes() {
   }
@@ -142,4 +146,12 @@ public static IndexType<VectorIndexConfig, VectorIndexReader, VectorIndexCreator
     return (IndexType<VectorIndexConfig, VectorIndexReader, VectorIndexCreator>)
         IndexService.getInstance().get(VECTOR_ID);
   }
+
+  /// Returns the COLUMNAR_MAP index type, which materializes MAP column keys as virtual columns.
+  @SuppressWarnings("unchecked")
+  public static IndexType<ColumnarMapIndexConfig, ColumnarMapIndexReader, ColumnarMapIndexCreator>
+      columnarMap() {
+    return (IndexType<ColumnarMapIndexConfig, ColumnarMapIndexReader, ColumnarMapIndexCreator>)
+        IndexService.getInstance().get(COLUMNAR_MAP_ID);
+  }
 }
diff --git a/...spi/src/main/java/org/apache/pinot/segment/spi/index/creator/ColumnarMapIndexCreator.java b/...spi/src/main/java/org/apache/pinot/segment/spi/index/creator/ColumnarMapIndexCreator.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.spi.index.creator;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+import javax.annotation.Nullable;
+import org.apache.commons.configuration2.PropertiesConfiguration;
+import org.apache.pinot.segment.spi.index.IndexCreator;
+
+
+/**
+ * Creator for the COLUMNAR_MAP index. Accepts one map per document during segment creation
+ * and decomposes it into per-key columnar storage on seal().
+ *
+ * <p>Implementations are not thread-safe; callers must serialize {@link #add} calls per
+ * creator instance.
+ *
+ * <p>The inherited {@code add(Object, int)} method from {@link IndexCreator} treats the
+ * first argument as the map and the second as the docId, matching the column-major creator
+ * path. Callers may use either entry point.
+ */
+public interface ColumnarMapIndexCreator extends IndexCreator {
+
+  /**
+   * Adds one document's map. Keys present in the map's entry set are routed to per-key
+   * columnar storage; keys with declared types are coerced to those types, others fall
+   * back to the configured default value type. A null or empty map is valid and means the
+   * document has no key/value pairs.
+   *
+   * @param mapValue the document's map (may be null or empty)
+   * @param docId the document id, must be monotonically non-decreasing across calls
+   */
+  void add(@Nullable Map<String, Object> mapValue, int docId)
+      throws IOException;
+
+  /**
+   * Returns metadata properties for any virtual columns this creator materialized during
+   * {@code seal()}. The framework merges the returned properties into the segment metadata.
+   * Implementations that do not produce virtual columns return an empty map.
+   *
+   * <p>Call after {@code seal()}.
+   *
+   * @return a map from virtual-column name to its {@link PropertiesConfiguration}; never null
+   */
+  default Map<String, PropertiesConfiguration> getVirtualColumnMetadata() {
+    return Collections.emptyMap();
+  }
+}
diff --git a/...ent-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java b/...ent-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java
@@ -72,6 +72,8 @@ public class ColumnMetadataImpl implements ColumnMetadata {
   private final PartitionFunction _partitionFunction;
   private final Set<Integer> _partitions;
   private final boolean _autoGenerated;
+  private final boolean _isMapVirtualColumn;
+  private final String _parentMapColumn;
 
   /// List of longs, each encodes:
   /// - 2 byte - numeric id of IndexType
@@ -84,7 +86,8 @@ private ColumnMetadataImpl(FieldSpec fieldSpec, int totalDocs, int cardinality,
       @Nullable Comparable minValue, @Nullable Comparable maxValue, boolean minMaxValueInvalid,
       int lengthOfShortestElement, int lengthOfLongestElement, boolean isAscii, int totalNumberOfEntries,
       int maxNumberOfMultiValues, int bitsPerElement, @Nullable PartitionFunction partitionFunction,
-      @Nullable Set<Integer> partitions, boolean autoGenerated) {
+      @Nullable Set<Integer> partitions, boolean autoGenerated, boolean isMapVirtualColumn,
+      @Nullable String parentMapColumn) {
     _fieldSpec = fieldSpec;
     _totalDocs = totalDocs;
     _cardinality = cardinality;
@@ -102,6 +105,8 @@ private ColumnMetadataImpl(FieldSpec fieldSpec, int totalDocs, int cardinality,
     _partitionFunction = partitionFunction;
     _partitions = partitions;
     _autoGenerated = autoGenerated;
+    _isMapVirtualColumn = isMapVirtualColumn;
+    _parentMapColumn = parentMapColumn;
   }
 
   @Override
@@ -193,6 +198,17 @@ public boolean isAutoGenerated() {
     return _autoGenerated;
   }
 
+  /// Returns {@code true} if this column is a virtual column materialized from a COLUMNAR_MAP parent column.
+  public boolean isMapVirtualColumn() {
+    return _isMapVirtualColumn;
+  }
+
+  /// Returns the name of the parent MAP column, or {@code null} if this is not a virtual column.
+  @Nullable
+  public String getParentMapColumn() {
+    return _parentMapColumn;
+  }
+
   @Override
   public long getIndexSizeFor(IndexType type) {
     short indexId = IndexService.getInstance().getNumericId(type);
@@ -252,17 +268,20 @@ public boolean equals(Object o) {
         && _lengthOfLongestElement == that._lengthOfLongestElement && _isAscii == that._isAscii
         && _totalNumberOfEntries == that._totalNumberOfEntries
         && _maxNumberOfMultiValues == that._maxNumberOfMultiValues && _bitsPerElement == that._bitsPerElement
-        && _autoGenerated == that._autoGenerated && Objects.equals(_fieldSpec, that._fieldSpec) && Objects.equals(
+        && _autoGenerated == that._autoGenerated && _isMapVirtualColumn == that._isMapVirtualColumn
+        && Objects.equals(_fieldSpec, that._fieldSpec) && Objects.equals(
         _minValue, that._minValue) && Objects.equals(_maxValue, that._maxValue) && Objects.equals(_partitionFunction,
-        that._partitionFunction) && Objects.equals(_partitions, that._partitions) && Objects.equals(_indexTypeSizeList,
-        that._indexTypeSizeList);
+        that._partitionFunction) && Objects.equals(_partitions, that._partitions)
+        && Objects.equals(_parentMapColumn, that._parentMapColumn)
+        && Objects.equals(_indexTypeSizeList, that._indexTypeSizeList);
   }
 
   @Override
   public int hashCode() {
     return Objects.hash(_fieldSpec, _totalDocs, _cardinality, _hasDictionary, _sorted, _minValue, _maxValue,
         _minMaxValueInvalid, _lengthOfShortestElement, _lengthOfLongestElement, _isAscii, _totalNumberOfEntries,
-        _maxNumberOfMultiValues, _bitsPerElement, _partitionFunction, _partitions, _autoGenerated, _indexTypeSizeList);
+        _maxNumberOfMultiValues, _bitsPerElement, _partitionFunction, _partitions, _autoGenerated, _isMapVirtualColumn,
+        _parentMapColumn, _indexTypeSizeList);
   }
 
   @Override
@@ -273,7 +292,8 @@ public String toString() {
         + _lengthOfShortestElement + ", _lengthOfLongestElement=" + _lengthOfLongestElement + ", _isAscii=" + _isAscii
         + ", _totalNumberOfEntries=" + _totalNumberOfEntries + ", _maxNumberOfMultiValues=" + _maxNumberOfMultiValues
         + ", _bitsPerElement=" + _bitsPerElement + ", _partitionFunction=" + _partitionFunction + ", _partitions="
-        + _partitions + ", _autoGenerated=" + _autoGenerated + ", _indexTypeSizeList=" + _indexTypeSizeList + '}';
+        + _partitions + ", _autoGenerated=" + _autoGenerated + ", _isMapVirtualColumn=" + _isMapVirtualColumn
+        + ", _parentMapColumn=" + _parentMapColumn + ", _indexTypeSizeList=" + _indexTypeSizeList + '}';
   }
 
   public static ColumnMetadataImpl fromPropertiesConfiguration(PropertiesConfiguration config, int totalDocs,
@@ -295,7 +315,9 @@ public static ColumnMetadataImpl fromPropertiesConfiguration(PropertiesConfigura
         .setMaxNumberOfMultiValues(
             config.getInt(Column.getKeyFor(column, Column.MAX_MULTI_VALUE_ELEMENTS), UNAVAILABLE))
         .setBitsPerElement(config.getInt(Column.getKeyFor(column, Column.BITS_PER_ELEMENT), UNAVAILABLE))
-        .setAutoGenerated(config.getBoolean(Column.getKeyFor(column, Column.IS_AUTO_GENERATED), false));
+        .setAutoGenerated(config.getBoolean(Column.getKeyFor(column, Column.IS_AUTO_GENERATED), false))
+        .setMapVirtualColumn(config.getBoolean(Column.getKeyFor(column, Column.IS_MAP_VIRTUAL_COLUMN), false))
+        .setParentMapColumn(config.getString(Column.getKeyFor(column, Column.PARENT_MAP_COLUMN), null));
 
     // Set min/max value
     DataType storedType = fieldSpec.getDataType().getStoredType();
@@ -383,9 +405,11 @@ public static FieldSpec generateFieldSpec(String column, PropertiesConfiguration
         List<String> childFieldNames =
             config.getList(String.class, Column.getKeyFor(column, Column.COMPLEX_CHILD_FIELD_NAMES));
         Map<String, FieldSpec> childFieldSpecs = new HashMap<>();
-        for (String childField : childFieldNames) {
-          childFieldSpecs.put(childField,
-              generateFieldSpec(ComplexFieldSpec.getFullChildName(column, childField), config));
+        if (childFieldNames != null) {
+          for (String childField : childFieldNames) {
+            childFieldSpecs.put(childField,
+                generateFieldSpec(ComplexFieldSpec.getFullChildName(column, childField), config));
+          }
         }
         return new ComplexFieldSpec(fieldName, dataType, true, childFieldSpecs);
       default:
@@ -451,6 +475,8 @@ public static class Builder {
     private PartitionFunction _partitionFunction;
     private Set<Integer> _partitions;
     private boolean _autoGenerated;
+    private boolean _isMapVirtualColumn;
+    private String _parentMapColumn;
 
     public Builder setFieldSpec(FieldSpec fieldSpec) {
       _fieldSpec = fieldSpec;
@@ -542,6 +568,16 @@ public Builder setAutoGenerated(boolean autoGenerated) {
       return this;
     }
 
+    public Builder setMapVirtualColumn(boolean isMapVirtualColumn) {
+      _isMapVirtualColumn = isMapVirtualColumn;
+      return this;
+    }
+
+    public Builder setParentMapColumn(@Nullable String parentMapColumn) {
+      _parentMapColumn = parentMapColumn;
+      return this;
+    }
+
     public ColumnMetadataImpl build() {
       // Canonicalize length of shortest/longest element
       DataType storedType = _fieldSpec.getDataType().getStoredType();
@@ -568,7 +604,9 @@ public ColumnMetadataImpl build() {
 
       return new ColumnMetadataImpl(_fieldSpec, _totalDocs, _cardinality, _hasDictionary, _sorted, _minValue, _maxValue,
           _minMaxValueInvalid, _lengthOfShortestElement, _lengthOfLongestElement, _isAscii, _totalNumberOfEntries,
-          _maxNumberOfMultiValues, _bitsPerElement, _partitionFunction, _partitions, _autoGenerated);
+          _maxNumberOfMultiValues, _bitsPerElement, _partitionFunction, _partitions, _autoGenerated,
+          _isMapVirtualColumn,
+          _parentMapColumn);
     }
   }
 }