diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java index fab40f3005a1..ea9d6e10e60c 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java @@ -73,6 +73,7 @@ public static class Indexes { public static final String VECTOR_HNSW_INDEX_DOCID_MAPPING_FILE_EXTENSION = ".vector.hnsw.mapping"; public static final String VECTOR_IVF_FLAT_INDEX_FILE_EXTENSION = ".vector.ivfflat.index"; public static final String VECTOR_IVF_PQ_INDEX_FILE_EXTENSION = ".vector.ivfpq.index"; + public static final String COLUMNAR_MAP_INDEX_FILE_EXTENSION = ".columnarmap.idx"; } public static class MetadataKeys { @@ -167,6 +168,16 @@ public static class Column { // Optional, default false public static final String IS_AUTO_GENERATED = "isAutoGenerated"; + // Optional, default false. True for materialized columns produced from a COLUMNAR_MAP parent column. + // Example: for a MAP column "metrics", materialized column "value_map_string$__latency" has: + // mapMaterializedColumn = true + // parentMapColumn = metrics + // Materialized columns appear in index_map alongside regular columns with their own forward index, + // optional inverted index, and null-value vector. + public static final String IS_MAP_MATERIALIZED_COLUMN = "mapMaterializedColumn"; + // Optional. The parent MAP column name for columns materialized from a COLUMNAR_MAP index. + public static final String PARENT_MAP_COLUMN = "parentMapColumn"; + /// Partition function, all optional public static final String PARTITION_FUNCTION = "partitionFunction"; public static final String NUM_PARTITIONS = "numPartitions"; diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java index 7c62a2e40a3f..9bb85c3efe08 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java @@ -530,6 +530,23 @@ public List getComplexColumnNames() { return getQualifyingFields(FieldType.COMPLEX, true); } + /// Returns the names of columns that have COLUMNAR_MAP index enabled in the table config. + public List getColumnarMapColumnNames() { + List result = new ArrayList<>(); + if (_tableConfig != null) { + List fieldConfigs = _tableConfig.getFieldConfigList(); + if (fieldConfigs != null) { + for (FieldConfig fieldConfig : fieldConfigs) { + List indexTypes = fieldConfig.getIndexTypes(); + if (indexTypes != null && indexTypes.contains(FieldConfig.IndexType.COLUMNAR_MAP)) { + result.add(fieldConfig.getName()); + } + } + } + } + return result; + } + public void setSegmentPartitionConfig(SegmentPartitionConfig segmentPartitionConfig) { _segmentPartitionConfig = segmentPartitionConfig; } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/datasource/MapDataSource.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/datasource/MapDataSource.java index f0de70763233..ee7d40f7a6db 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/datasource/MapDataSource.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/datasource/MapDataSource.java @@ -25,28 +25,28 @@ public interface MapDataSource extends DataSource { - /** - * Get the map FieldSpec. - */ + /// Returns the map FieldSpec. ComplexFieldSpec.MapFieldSpec getFieldSpec(); - /** - * Get the Data Source representation of a single key within this map column. - */ + /// Returns the DataSource for the given map key. For absent keys, returns a [NullDataSource] + /// that produces the column default value for every document. Callers need not null-check. DataSource getKeyDataSource(String key); - /** - * Get the Data Source representation of all keys within this map column. - */ + /// Returns whether this segment MAY contain the given key. Implementations are allowed to return + /// `true` conservatively (i.e., when it is not possible to determine key presence without a + /// full scan). Callers must handle the case where the key is absent even when this returns + /// `true` — [#getKeyDataSource(String)] will return a DataSource for an absent key + /// (forward-index reads return the column default value; null-value bitmap marks all rows as null). + default boolean containsKey(String key) { + return getKeyDataSources().containsKey(key); + } + + /// Returns DataSources for all keys present in this segment. Map getKeyDataSources(); - /** - * Get the DataSourceMetadata of a single key within this map column. - */ + /// Returns the DataSourceMetadata for the given key. DataSourceMetadata getKeyDataSourceMetadata(String key); - /** - * Get the IndexContainer of a single key within this map column. - */ + /// Returns the ColumnIndexContainer for the given key. ColumnIndexContainer getKeyIndexContainer(String key); } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/StandardIndexes.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/StandardIndexes.java index 951bff77b50e..ee5421e44fb1 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/StandardIndexes.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/StandardIndexes.java @@ -20,6 +20,7 @@ package org.apache.pinot.segment.spi.index; import org.apache.pinot.segment.spi.index.creator.BloomFilterCreator; +import org.apache.pinot.segment.spi.index.creator.ColumnarMapIndexCreator; import org.apache.pinot.segment.spi.index.creator.CombinedInvertedIndexCreator; import org.apache.pinot.segment.spi.index.creator.DictionaryBasedInvertedIndexCreator; import org.apache.pinot.segment.spi.index.creator.FSTIndexCreator; @@ -31,6 +32,7 @@ import org.apache.pinot.segment.spi.index.creator.VectorIndexConfig; import org.apache.pinot.segment.spi.index.creator.VectorIndexCreator; import org.apache.pinot.segment.spi.index.reader.BloomFilterReader; +import org.apache.pinot.segment.spi.index.reader.ColumnarMapIndexReader; import org.apache.pinot.segment.spi.index.reader.Dictionary; import org.apache.pinot.segment.spi.index.reader.ForwardIndexReader; import org.apache.pinot.segment.spi.index.reader.H3IndexReader; @@ -41,6 +43,7 @@ import org.apache.pinot.segment.spi.index.reader.TextIndexReader; import org.apache.pinot.segment.spi.index.reader.VectorIndexReader; import org.apache.pinot.spi.config.table.BloomFilterConfig; +import org.apache.pinot.spi.config.table.ColumnarMapIndexConfig; import org.apache.pinot.spi.config.table.IndexConfig; import org.apache.pinot.spi.config.table.JsonIndexConfig; @@ -79,6 +82,7 @@ public class StandardIndexes { public static final String TEXT_ID = "text_index"; public static final String H3_ID = "h3_index"; public static final String VECTOR_ID = "vector_index"; + public static final String COLUMNAR_MAP_ID = "columnar_map"; private StandardIndexes() { } @@ -142,4 +146,12 @@ public static IndexType) IndexService.getInstance().get(VECTOR_ID); } + + /// Returns the COLUMNAR_MAP index type, which materializes MAP column keys as virtual columns. + @SuppressWarnings("unchecked") + public static IndexType + columnarMap() { + return (IndexType) + IndexService.getInstance().get(COLUMNAR_MAP_ID); + } } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/ColumnarMapIndexCreator.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/ColumnarMapIndexCreator.java new file mode 100644 index 000000000000..faf2fb3219ea --- /dev/null +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/creator/ColumnarMapIndexCreator.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.spi.index.creator; + +import java.io.IOException; +import java.util.Map; +import org.apache.commons.configuration2.PropertiesConfiguration; +import org.apache.pinot.segment.spi.index.IndexCreator; + + +/// Creator for the COLUMNAR_MAP index. Accepts one map per document during segment creation and +/// decomposes it into per-key columnar storage on `seal()`. +/// +/// Implementations are not thread-safe; callers must serialize `add` calls per creator instance. +/// +/// The inherited `add(Object, int)` method from `IndexCreator` treats the first argument as the +/// map and the second as the docId, matching the column-major creator path. +public interface ColumnarMapIndexCreator extends IndexCreator { + + /// Adds one document's map. Keys are routed to per-key columnar storage; declared-type keys are + /// coerced to those types, others use the configured default value type. An empty map is valid. + /// Callers must pass an empty map rather than `null`. + /// + /// @param mapValue the document's map (non-null, may be empty) + /// @param docId document id, must be monotonically non-decreasing across calls + void add(Map mapValue, int docId) + throws IOException; + + /// Returns metadata properties for the materialized columns this creator produced during `seal()`. + /// The framework merges the returned properties into the segment metadata. + /// Returns an empty map for creators that produce no materialized columns. Call after `seal()`. + default Map getMaterializedColumnMetadata() { + return Map.of(); + } +} diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java index d5fd242af57d..ce8061b6d4a9 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImpl.java @@ -72,6 +72,7 @@ public class ColumnMetadataImpl implements ColumnMetadata { private final PartitionFunction _partitionFunction; private final Set _partitions; private final boolean _autoGenerated; + private final String _parentMapColumn; /// List of longs, each encodes: /// - 2 byte - numeric id of IndexType @@ -84,7 +85,8 @@ private ColumnMetadataImpl(FieldSpec fieldSpec, int totalDocs, int cardinality, @Nullable Comparable minValue, @Nullable Comparable maxValue, boolean minMaxValueInvalid, int lengthOfShortestElement, int lengthOfLongestElement, boolean isAscii, int totalNumberOfEntries, int maxNumberOfMultiValues, int bitsPerElement, @Nullable PartitionFunction partitionFunction, - @Nullable Set partitions, boolean autoGenerated) { + @Nullable Set partitions, boolean autoGenerated, + @Nullable String parentMapColumn) { _fieldSpec = fieldSpec; _totalDocs = totalDocs; _cardinality = cardinality; @@ -102,6 +104,7 @@ private ColumnMetadataImpl(FieldSpec fieldSpec, int totalDocs, int cardinality, _partitionFunction = partitionFunction; _partitions = partitions; _autoGenerated = autoGenerated; + _parentMapColumn = parentMapColumn; } @Override @@ -193,6 +196,17 @@ public boolean isAutoGenerated() { return _autoGenerated; } + /// Returns `true` if this column is a materialized column produced from a COLUMNAR_MAP parent column. + public boolean isMaterializedMapColumn() { + return _parentMapColumn != null; + } + + /// Returns the name of the parent MAP column, or `null` if this is not a materialized column. + @Nullable + public String getParentMapColumn() { + return _parentMapColumn; + } + @Override public long getIndexSizeFor(IndexType type) { short indexId = IndexService.getInstance().getNumericId(type); @@ -252,17 +266,20 @@ public boolean equals(Object o) { && _lengthOfLongestElement == that._lengthOfLongestElement && _isAscii == that._isAscii && _totalNumberOfEntries == that._totalNumberOfEntries && _maxNumberOfMultiValues == that._maxNumberOfMultiValues && _bitsPerElement == that._bitsPerElement - && _autoGenerated == that._autoGenerated && Objects.equals(_fieldSpec, that._fieldSpec) && Objects.equals( + && _autoGenerated == that._autoGenerated + && Objects.equals(_fieldSpec, that._fieldSpec) && Objects.equals( _minValue, that._minValue) && Objects.equals(_maxValue, that._maxValue) && Objects.equals(_partitionFunction, - that._partitionFunction) && Objects.equals(_partitions, that._partitions) && Objects.equals(_indexTypeSizeList, - that._indexTypeSizeList); + that._partitionFunction) && Objects.equals(_partitions, that._partitions) + && Objects.equals(_parentMapColumn, that._parentMapColumn) + && Objects.equals(_indexTypeSizeList, that._indexTypeSizeList); } @Override public int hashCode() { return Objects.hash(_fieldSpec, _totalDocs, _cardinality, _hasDictionary, _sorted, _minValue, _maxValue, _minMaxValueInvalid, _lengthOfShortestElement, _lengthOfLongestElement, _isAscii, _totalNumberOfEntries, - _maxNumberOfMultiValues, _bitsPerElement, _partitionFunction, _partitions, _autoGenerated, _indexTypeSizeList); + _maxNumberOfMultiValues, _bitsPerElement, _partitionFunction, _partitions, _autoGenerated, + _parentMapColumn, _indexTypeSizeList); } @Override @@ -273,7 +290,8 @@ public String toString() { + _lengthOfShortestElement + ", _lengthOfLongestElement=" + _lengthOfLongestElement + ", _isAscii=" + _isAscii + ", _totalNumberOfEntries=" + _totalNumberOfEntries + ", _maxNumberOfMultiValues=" + _maxNumberOfMultiValues + ", _bitsPerElement=" + _bitsPerElement + ", _partitionFunction=" + _partitionFunction + ", _partitions=" - + _partitions + ", _autoGenerated=" + _autoGenerated + ", _indexTypeSizeList=" + _indexTypeSizeList + '}'; + + _partitions + ", _autoGenerated=" + _autoGenerated + + ", _parentMapColumn=" + _parentMapColumn + ", _indexTypeSizeList=" + _indexTypeSizeList + '}'; } public static ColumnMetadataImpl fromPropertiesConfiguration(PropertiesConfiguration config, int totalDocs, @@ -295,7 +313,8 @@ public static ColumnMetadataImpl fromPropertiesConfiguration(PropertiesConfigura .setMaxNumberOfMultiValues( config.getInt(Column.getKeyFor(column, Column.MAX_MULTI_VALUE_ELEMENTS), UNAVAILABLE)) .setBitsPerElement(config.getInt(Column.getKeyFor(column, Column.BITS_PER_ELEMENT), UNAVAILABLE)) - .setAutoGenerated(config.getBoolean(Column.getKeyFor(column, Column.IS_AUTO_GENERATED), false)); + .setAutoGenerated(config.getBoolean(Column.getKeyFor(column, Column.IS_AUTO_GENERATED), false)) + .setParentMapColumn(config.getString(Column.getKeyFor(column, Column.PARENT_MAP_COLUMN), null)); // Set min/max value DataType storedType = fieldSpec.getDataType().getStoredType(); @@ -383,9 +402,11 @@ public static FieldSpec generateFieldSpec(String column, PropertiesConfiguration List childFieldNames = config.getList(String.class, Column.getKeyFor(column, Column.COMPLEX_CHILD_FIELD_NAMES)); Map childFieldSpecs = new HashMap<>(); - for (String childField : childFieldNames) { - childFieldSpecs.put(childField, - generateFieldSpec(ComplexFieldSpec.getFullChildName(column, childField), config)); + if (childFieldNames != null) { + for (String childField : childFieldNames) { + childFieldSpecs.put(childField, + generateFieldSpec(ComplexFieldSpec.getFullChildName(column, childField), config)); + } } return new ComplexFieldSpec(fieldName, dataType, true, childFieldSpecs); default: @@ -451,6 +472,7 @@ public static class Builder { private PartitionFunction _partitionFunction; private Set _partitions; private boolean _autoGenerated; + private String _parentMapColumn; public Builder setFieldSpec(FieldSpec fieldSpec) { _fieldSpec = fieldSpec; @@ -542,6 +564,11 @@ public Builder setAutoGenerated(boolean autoGenerated) { return this; } + public Builder setParentMapColumn(@Nullable String parentMapColumn) { + _parentMapColumn = parentMapColumn; + return this; + } + public ColumnMetadataImpl build() { // Canonicalize length of shortest/longest element DataType storedType = _fieldSpec.getDataType().getStoredType(); @@ -568,7 +595,8 @@ public ColumnMetadataImpl build() { return new ColumnMetadataImpl(_fieldSpec, _totalDocs, _cardinality, _hasDictionary, _sorted, _minValue, _maxValue, _minMaxValueInvalid, _lengthOfShortestElement, _lengthOfLongestElement, _isAscii, _totalNumberOfEntries, - _maxNumberOfMultiValues, _bitsPerElement, _partitionFunction, _partitions, _autoGenerated); + _maxNumberOfMultiValues, _bitsPerElement, _partitionFunction, _partitions, _autoGenerated, + _parentMapColumn); } } } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/ColumnarMapIndexReader.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/ColumnarMapIndexReader.java new file mode 100644 index 000000000000..6a7efc542098 --- /dev/null +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/ColumnarMapIndexReader.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.spi.index.reader; + +import java.util.Map; +import java.util.Set; +import javax.annotation.Nullable; +import org.apache.pinot.segment.spi.index.IndexReader; +import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; + + +/// Reader for the COLUMNAR_MAP index. Each indexed key is materialized as its own per-key +/// forward index plus a presence bitmap. +/// +/// Implementations must be safe for concurrent reads. Mutable implementations may impose +/// a single-writer constraint; refer to the concrete implementation's Javadoc for details. +/// +/// Per-key `DataSource` construction is the responsibility of the surrounding +/// `ColumnarMapDataSource` wrappers, not this reader. This interface exposes only +/// the primitives a wrapper needs (key set, type, presence bitmap, per-doc map view). +public interface ColumnarMapIndexReader extends IndexReader { + + /// Returns the set of all indexed key names. Never null; empty if no keys are indexed. + Set getKeys(); + + /// Returns the value DataType for the given key, or null if the key is not indexed. + @Nullable + DataType getValueType(String key); + + /// Returns the number of documents that have a non-null value for the given key. + /// Returns 0 if the key is not indexed. + int getNumDocsWithKey(String key); + + /// Returns the presence bitmap for the given key (docIds with non-null values). + /// Returns an empty bitmap if the key is not indexed. The returned bitmap must not be mutated. + ImmutableRoaringBitmap getPresenceBitmap(String key); + + /// Reconstructs the full map for a single document from per-key data. Only keys with a + /// non-null value at `docId` appear in the result. Returns an empty map if the document has + /// no values; behavior for an out-of-range `docId` is implementation-defined. + Map getMap(int docId); + + /// Returns whether the given key has an inverted index available. False if the key is not indexed. + default boolean hasInvertedIndex(String key) { + return false; + } + + /// Returns sorted distinct values for the key from the inverted index, or null if no + /// inverted index is available (or the key is not indexed). + @Nullable + default String[] getDistinctValuesForKey(String key) { + return null; + } +} diff --git a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImplTest.java b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImplTest.java new file mode 100644 index 000000000000..8ef70a1a0b7f --- /dev/null +++ b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/index/metadata/ColumnMetadataImplTest.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.spi.index.metadata; + +import org.apache.commons.configuration2.PropertiesConfiguration; +import org.apache.pinot.spi.data.DimensionFieldSpec; +import org.apache.pinot.spi.data.FieldSpec; +import org.testng.annotations.Test; + +import static org.testng.Assert.*; + + +public class ColumnMetadataImplTest { + + @Test + public void testVirtualColumnMetadataFromProperties() { + PropertiesConfiguration props = new PropertiesConfiguration(); + String column = "metrics$__tenancy"; + props.setProperty("column." + column + ".dataType", "STRING"); + props.setProperty("column." + column + ".columnType", "DIMENSION"); + props.setProperty("column." + column + ".isSingleValues", "true"); + props.setProperty("column." + column + ".cardinality", "47"); + props.setProperty("column." + column + ".hasDictionary", "true"); + props.setProperty("column." + column + ".bitsPerElement", "6"); + props.setProperty("column." + column + ".mapMaterializedColumn", "true"); + props.setProperty("column." + column + ".parentMapColumn", "metrics"); + + ColumnMetadataImpl metadata = ColumnMetadataImpl.fromPropertiesConfiguration(props, 5000000, column); + assertTrue(metadata.isMaterializedMapColumn()); + assertEquals(metadata.getParentMapColumn(), "metrics"); + assertEquals(metadata.getCardinality(), 47); + assertEquals(metadata.getTotalDocs(), 5000000); + } + + @Test + public void testNonVirtualColumnDefaultValues() { + PropertiesConfiguration props = new PropertiesConfiguration(); + String column = "normalCol"; + props.setProperty("column." + column + ".dataType", "INT"); + props.setProperty("column." + column + ".columnType", "DIMENSION"); + props.setProperty("column." + column + ".isSingleValues", "true"); + props.setProperty("column." + column + ".cardinality", "10"); + + ColumnMetadataImpl metadata = ColumnMetadataImpl.fromPropertiesConfiguration(props, 1000, column); + assertFalse(metadata.isMaterializedMapColumn()); + assertNull(metadata.getParentMapColumn()); + } + + @Test + public void testVirtualColumnViaBuilder() { + ColumnMetadataImpl metadata = ColumnMetadataImpl.builder() + .setFieldSpec(new DimensionFieldSpec("metrics$__tenancy", FieldSpec.DataType.STRING, true)) + .setTotalDocs(100) + .setCardinality(5) + .setParentMapColumn("metrics") + .build(); + + assertTrue(metadata.isMaterializedMapColumn()); + assertEquals(metadata.getParentMapColumn(), "metrics"); + } + + @Test + public void testVirtualColumnIncludedInEqualsAndHashCode() { + ColumnMetadataImpl m1 = ColumnMetadataImpl.builder() + .setFieldSpec(new DimensionFieldSpec("col", FieldSpec.DataType.STRING, true)) + .setTotalDocs(100) + .setCardinality(5) + .setParentMapColumn("parent") + .build(); + + ColumnMetadataImpl m2 = ColumnMetadataImpl.builder() + .setFieldSpec(new DimensionFieldSpec("col", FieldSpec.DataType.STRING, true)) + .setTotalDocs(100) + .setCardinality(5) + .build(); + + assertNotEquals(m1, m2); + + ColumnMetadataImpl m3 = ColumnMetadataImpl.builder() + .setFieldSpec(new DimensionFieldSpec("col", FieldSpec.DataType.STRING, true)) + .setTotalDocs(100) + .setCardinality(5) + .setParentMapColumn("parent") + .build(); + + assertEquals(m1, m3); + assertEquals(m1.hashCode(), m3.hashCode()); + } +} diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ColumnarMapIndexConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ColumnarMapIndexConfig.java new file mode 100644 index 000000000000..f72a6f41c23f --- /dev/null +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ColumnarMapIndexConfig.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.spi.config.table; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import javax.annotation.Nullable; + + +/// Configuration for the COLUMNAR_MAP index on a MAP column. +/// +/// **Dense vs sparse:** a key is materialized as its own column if (a) it appears in the explicit +/// `denseKeys` set, or (b) its fill rate (fraction of documents containing the key) is ≥ +/// `denseKeyMinFillRate`. Keys not satisfying either criterion go into a sparse MAP column. +/// +/// **maxDenseKeys cutoff:** when more keys qualify as dense than `maxDenseKeys` allows, the top +/// `maxDenseKeys` keys ranked by fill rate are materialized; the rest fall back to the sparse +/// column. Use `denseKeys` to pin specific keys regardless of fill rate ranking. +public class ColumnarMapIndexConfig extends IndexConfig { + public static final ColumnarMapIndexConfig DISABLED = new ColumnarMapIndexConfig(false); + public static final ColumnarMapIndexConfig DEFAULT = new ColumnarMapIndexConfig(true); + + public static final double DEFAULT_DENSE_KEY_MIN_FILL_RATE = 0.5; + + private final boolean _enableInvertedIndexForDense; + private final Set _invertedIndexKeys; + private final Set _noDictionaryKeys; + private final int _maxDenseKeys; + private final Set _denseKeys; + private final double _denseKeyMinFillRate; + + public static ColumnarMapIndexConfig fromProperties(@Nullable Map properties) { + if (properties == null || properties.isEmpty()) { + return DEFAULT; + } + int maxDenseKeys = Integer.parseInt( + properties.getOrDefault(FieldConfig.COLUMNAR_MAP_INDEX_MAX_DENSE_KEYS, "1000")); + Set invertedIndexKeys = parseCommaSeparated( + properties.get(FieldConfig.COLUMNAR_MAP_INDEX_INVERTED_INDEX_KEYS)); + Set noDictionaryKeys = parseCommaSeparated( + properties.get(FieldConfig.COLUMNAR_MAP_INDEX_NO_DICTIONARY_KEYS)); + boolean enableInvertedForDense = Boolean.parseBoolean( + properties.getOrDefault(FieldConfig.COLUMNAR_MAP_INDEX_ENABLE_INVERTED_FOR_DENSE, "false")); + Set denseKeys = parseCommaSeparated( + properties.get(FieldConfig.COLUMNAR_MAP_INDEX_DENSE_KEYS)); + double denseKeyMinFillRate = Double.parseDouble( + properties.getOrDefault(FieldConfig.COLUMNAR_MAP_INDEX_DENSE_KEY_MIN_FILL_RATE, + String.valueOf(DEFAULT_DENSE_KEY_MIN_FILL_RATE))); + return new ColumnarMapIndexConfig(true, enableInvertedForDense, invertedIndexKeys, noDictionaryKeys, maxDenseKeys, + denseKeys, denseKeyMinFillRate); + } + + @Nullable + private static Set parseCommaSeparated(@Nullable String value) { + if (value == null || value.trim().isEmpty()) { + return null; + } + Set result = new HashSet<>(); + for (String part : value.split(FieldConfig.COLUMNAR_MAP_INDEX_KEY_SEPARATOR)) { + String trimmed = part.trim(); + if (!trimmed.isEmpty()) { + result.add(trimmed); + } + } + return result.isEmpty() ? null : result; + } + + public ColumnarMapIndexConfig(boolean enabled) { + this(enabled, false, null, null, 1000, null, DEFAULT_DENSE_KEY_MIN_FILL_RATE); + } + + @JsonCreator + public ColumnarMapIndexConfig( + @JsonProperty("enabled") boolean enabled, + @JsonProperty("enableInvertedIndexForDense") boolean enableInvertedIndexForDense, + @JsonProperty("invertedIndexKeys") @Nullable Set invertedIndexKeys, + @JsonProperty("noDictionaryKeys") @Nullable Set noDictionaryKeys, + @JsonProperty("maxDenseKeys") int maxDenseKeys, + @JsonProperty("denseKeys") @Nullable Set denseKeys, + @JsonProperty("denseKeyMinFillRate") double denseKeyMinFillRate) { + super(!enabled); + _enableInvertedIndexForDense = enableInvertedIndexForDense; + _invertedIndexKeys = invertedIndexKeys; + _noDictionaryKeys = noDictionaryKeys; + _maxDenseKeys = maxDenseKeys > 0 ? maxDenseKeys : 1000; + _denseKeys = denseKeys; + _denseKeyMinFillRate = denseKeyMinFillRate >= 0 ? denseKeyMinFillRate : DEFAULT_DENSE_KEY_MIN_FILL_RATE; + } + + public boolean isEnableInvertedIndexForDense() { + return _enableInvertedIndexForDense; + } + + @Nullable + public Set getInvertedIndexKeys() { + return _invertedIndexKeys; + } + + public boolean shouldEnableInvertedIndexForKey(String key) { + return _enableInvertedIndexForDense + || (_invertedIndexKeys != null && _invertedIndexKeys.contains(key)); + } + + @Nullable + public Set getNoDictionaryKeys() { + return _noDictionaryKeys; + } + + public boolean shouldUseDictionaryForKey(String key) { + return _noDictionaryKeys == null || !_noDictionaryKeys.contains(key); + } + + /// Maximum number of MAP keys to materialise as dense columns. Default: 1000. + /// When more keys qualify as dense, the top `maxDenseKeys` by fill rate are materialized; + /// the rest fall back to the sparse MAP column. + public int getMaxDenseKeys() { + return _maxDenseKeys; + } + + public Set getDenseKeys() { + return _denseKeys != null ? _denseKeys : Set.of(); + } + + public double getDenseKeyMinFillRate() { + return _denseKeyMinFillRate; + } + + public boolean isDenseKey(String key) { + return _denseKeys != null && _denseKeys.contains(key); + } +} diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FieldConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FieldConfig.java index 064526695d45..d86691f53f93 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FieldConfig.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FieldConfig.java @@ -71,6 +71,24 @@ public class FieldConfig extends BaseJsonConfig { public static final String TEXT_INDEX_LUCENE_NRT_CACHING_DIRECTORY_BUFFER_SIZE = "luceneNRTCachingDirectoryMaxBufferSizeMB"; + /// COLUMNAR_MAP index property keys, passed via `FieldConfig.properties`. + /// See `ColumnarMapIndexConfig` for semantics. + + /// Maximum number of MAP keys to materialise as dense columns (default 1000). + public static final String COLUMNAR_MAP_INDEX_MAX_DENSE_KEYS = "maxDenseKeys"; + /// Comma-separated explicit list of dense key names (always materialised regardless of fill rate). + public static final String COLUMNAR_MAP_INDEX_DENSE_KEYS = "denseKeys"; + /// Minimum fill rate [0.0, 1.0] for a key to be auto-promoted to dense (default 0.5). + public static final String COLUMNAR_MAP_INDEX_DENSE_KEY_MIN_FILL_RATE = "denseKeyMinFillRate"; + /// Comma-separated keys that get a per-key inverted index (union with enableInvertedIndexForDense). + public static final String COLUMNAR_MAP_INDEX_INVERTED_INDEX_KEYS = "invertedIndexKeys"; + /// Comma-separated keys that are always stored raw (no dictionary), overriding global settings. + public static final String COLUMNAR_MAP_INDEX_NO_DICTIONARY_KEYS = "noDictionaryKeys"; + /// When true, every dense key gets an inverted index regardless of invertedIndexKeys. + public static final String COLUMNAR_MAP_INDEX_ENABLE_INVERTED_FOR_DENSE = "enableInvertedIndexForDense"; + /// Separator character used when parsing comma-separated key lists above. + public static final String COLUMNAR_MAP_INDEX_KEY_SEPARATOR = ","; + private final String _name; private final EncodingType _encodingType; private final List _indexTypes; @@ -128,7 +146,28 @@ public enum EncodingType { // If null, there won't be any index // NOTE: TIMESTAMP is ignored. In order to create TIMESTAMP index, configure 'timestampConfig' instead. public enum IndexType { - INVERTED, SORTED, TEXT, FST, IFST, H3, JSON, TIMESTAMP, VECTOR, RANGE + /** Standard inverted index mapping value → docId bitmap. */ + INVERTED, + /** Sorted forward index on a physically sorted column. */ + SORTED, + /** Lucene-based full-text index. */ + TEXT, + /** Finite State Transducer index for prefix/regex string queries. */ + FST, + /** In-memory Finite State Transducer index. */ + IFST, + /** Geospatial H3 index for geo-filtering. */ + H3, + /** JSON index for semi-structured columns. */ + JSON, + /** Timestamp index decomposing epoch values into bucketed granularity columns. */ + TIMESTAMP, + /** Approximate nearest-neighbor vector index. */ + VECTOR, + /** Range index for efficient inequality predicates on numeric/string columns. */ + RANGE, + /** Columnar MAP index storing MAP entries as per-key materialized columns. */ + COLUMNAR_MAP } public enum CompressionCodec { diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/ColumnarMapNaming.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/ColumnarMapNaming.java new file mode 100644 index 000000000000..c76df27ed714 --- /dev/null +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/ColumnarMapNaming.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.spi.data; + + +/// Naming convention for COLUMNAR_MAP materialized columns. Each dense MAP key is stored as +/// a column named `$__`. Sparse keys share a single synthetic JSON column +/// named `$____sparse__`. +public final class ColumnarMapNaming { + public static final String SEPARATOR = "$__"; + public static final String SPARSE_SUFFIX = "__sparse__"; + + private ColumnarMapNaming() { + } + + public static String virtualColumnName(String mapColumn, String key) { + return mapColumn + SEPARATOR + key; + } + + public static String sparseColumnName(String mapColumn) { + return mapColumn + SEPARATOR + SPARSE_SUFFIX; + } + + public static boolean isColumnarMapVirtualColumn(String columnName) { + return columnName.contains(SEPARATOR); + } + + public static boolean isSparseColumn(String columnName) { + return columnName.endsWith(SEPARATOR + SPARSE_SUFFIX); + } + + public static String parseMapColumn(String virtualColumnName) { + int idx = virtualColumnName.indexOf(SEPARATOR); + return idx >= 0 ? virtualColumnName.substring(0, idx) : virtualColumnName; + } + + public static String parseKey(String virtualColumnName) { + int idx = virtualColumnName.indexOf(SEPARATOR); + return idx >= 0 ? virtualColumnName.substring(idx + SEPARATOR.length()) : virtualColumnName; + } +} diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/ComplexFieldSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/ComplexFieldSpec.java index fd365ae822fa..4e2559e3922e 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/ComplexFieldSpec.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/ComplexFieldSpec.java @@ -20,10 +20,12 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.base.Preconditions; import java.util.HashMap; import java.util.Map; +import javax.annotation.Nullable; import org.apache.pinot.spi.utils.JsonUtils; import org.apache.pinot.spi.utils.StringUtil; @@ -58,17 +60,29 @@ public final class ComplexFieldSpec extends FieldSpec { private final Map _childFieldSpecs; + @JsonProperty("keyTypes") + private Map _keyTypes; + + @JsonProperty("defaultValueType") + private DataType _defaultValueType; + // Default constructor required by JSON de-serializer public ComplexFieldSpec() { super(); _childFieldSpecs = new HashMap<>(); } + public ComplexFieldSpec(String name, DataType dataType, boolean isSingleValueField) { + super(name, dataType, isSingleValueField); + Preconditions.checkArgument(dataType == DataType.STRUCT || dataType == DataType.MAP || dataType == DataType.LIST); + _childFieldSpecs = new HashMap<>(); + } + public ComplexFieldSpec(String name, DataType dataType, boolean isSingleValueField, Map childFieldSpecs) { super(name, dataType, isSingleValueField); Preconditions.checkArgument(dataType == DataType.STRUCT || dataType == DataType.MAP || dataType == DataType.LIST); - _childFieldSpecs = childFieldSpecs; + _childFieldSpecs = new HashMap<>(childFieldSpecs); } public static String[] getColumnPath(String column) { @@ -83,6 +97,24 @@ public Map getChildFieldSpecs() { return _childFieldSpecs; } + @Nullable + public Map getKeyTypes() { + return _keyTypes; + } + + public void setKeyTypes(@Nullable Map keyTypes) { + _keyTypes = keyTypes; + } + + @Nullable + public DataType getDefaultValueType() { + return _defaultValueType; + } + + public void setDefaultValueType(@Nullable DataType defaultValueType) { + _defaultValueType = defaultValueType; + } + @JsonIgnore @Override public FieldType getFieldType() { @@ -99,15 +131,21 @@ public static class MapFieldSpec { private final String _fieldName; private final FieldSpec _keyFieldSpec; private final FieldSpec _valueFieldSpec; + private final Map _keyTypes; + private final DataType _defaultValueType; private MapFieldSpec(ComplexFieldSpec complexFieldSpec) { - Preconditions.checkState(complexFieldSpec.getChildFieldSpecs().containsKey(KEY_FIELD), - "Missing 'key' in the 'childFieldSpec'"); - Preconditions.checkState(complexFieldSpec.getChildFieldSpecs().containsKey(VALUE_FIELD), - "Missing 'value' in the 'childFieldSpec'"); - _keyFieldSpec = complexFieldSpec.getChildFieldSpec(KEY_FIELD); - _valueFieldSpec = complexFieldSpec.getChildFieldSpec(VALUE_FIELD); _fieldName = complexFieldSpec.getName(); + Map children = complexFieldSpec.getChildFieldSpecs(); + if (children.containsKey(KEY_FIELD) && children.containsKey(VALUE_FIELD)) { + _keyFieldSpec = complexFieldSpec.getChildFieldSpec(KEY_FIELD); + _valueFieldSpec = complexFieldSpec.getChildFieldSpec(VALUE_FIELD); + } else { + _keyFieldSpec = new DimensionFieldSpec(KEY_FIELD, DataType.STRING, true); + _valueFieldSpec = new DimensionFieldSpec(VALUE_FIELD, DataType.STRING, true); + } + _keyTypes = complexFieldSpec.getKeyTypes(); + _defaultValueType = complexFieldSpec.getDefaultValueType(); } public String getFieldName() { @@ -121,6 +159,16 @@ public FieldSpec getKeyFieldSpec() { public FieldSpec getValueFieldSpec() { return _valueFieldSpec; } + + @Nullable + public Map getKeyTypes() { + return _keyTypes; + } + + @Nullable + public DataType getDefaultValueType() { + return _defaultValueType; + } } public static MapFieldSpec toMapFieldSpec(ComplexFieldSpec complexFieldSpec) { @@ -143,11 +191,32 @@ public static String getFullChildName(String... columns) { public ObjectNode toJsonObject() { ObjectNode jsonObject = super.toJsonObject(); - ObjectNode childFieldSpecsNode = JsonUtils.newObjectNode(); - for (Map.Entry entry : _childFieldSpecs.entrySet()) { - childFieldSpecsNode.put(entry.getKey(), entry.getValue().toJsonObject()); + // Always emit childFieldSpecs for MAP to preserve wire compatibility with older brokers/servers + // that deserialize MAP columns via ComplexFieldSpec.toMapFieldSpec() and expect key/value children. + // For MAP columns without explicit children, emit the legacy STRING defaults. + Map childSpecs = _childFieldSpecs; + if (childSpecs.isEmpty() && _dataType == DataType.MAP) { + childSpecs = Map.of( + KEY_FIELD, new DimensionFieldSpec(KEY_FIELD, DataType.STRING, true), + VALUE_FIELD, new DimensionFieldSpec(VALUE_FIELD, DataType.STRING, true)); + } + if (!childSpecs.isEmpty()) { + ObjectNode childFieldSpecsNode = JsonUtils.newObjectNode(); + for (Map.Entry entry : childSpecs.entrySet()) { + childFieldSpecsNode.put(entry.getKey(), entry.getValue().toJsonObject()); + } + jsonObject.put("childFieldSpecs", childFieldSpecsNode); + } + if (_keyTypes != null && !_keyTypes.isEmpty()) { + ObjectNode keyTypesNode = JsonUtils.newObjectNode(); + for (Map.Entry entry : _keyTypes.entrySet()) { + keyTypesNode.put(entry.getKey(), entry.getValue().name()); + } + jsonObject.set("keyTypes", keyTypesNode); + } + if (_defaultValueType != null) { + jsonObject.put("defaultValueType", _defaultValueType.name()); } - jsonObject.put("childFieldSpecs", childFieldSpecsNode); return jsonObject; } } diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/config/table/ColumnarMapIndexConfigTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/config/table/ColumnarMapIndexConfigTest.java new file mode 100644 index 000000000000..7ee893eb7707 --- /dev/null +++ b/pinot-spi/src/test/java/org/apache/pinot/spi/config/table/ColumnarMapIndexConfigTest.java @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.spi.config.table; + +import java.util.Map; +import java.util.Set; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; + + +public class ColumnarMapIndexConfigTest { + + @Test + public void testDefaultConfig() { + ColumnarMapIndexConfig config = ColumnarMapIndexConfig.DEFAULT; + assertTrue(config.isEnabled()); + assertEquals(config.getMaxDenseKeys(), 1000); + assertEquals(config.getDenseKeyMinFillRate(), 0.5); + assertTrue(config.getDenseKeys().isEmpty()); + assertNull(config.getInvertedIndexKeys()); + assertNull(config.getNoDictionaryKeys()); + assertFalse(config.isEnableInvertedIndexForDense()); + } + + @Test + public void testFromProperties() { + Map props = Map.of( + FieldConfig.COLUMNAR_MAP_INDEX_MAX_DENSE_KEYS, "500", + FieldConfig.COLUMNAR_MAP_INDEX_DENSE_KEYS, "country,tenancy", + FieldConfig.COLUMNAR_MAP_INDEX_DENSE_KEY_MIN_FILL_RATE, "0.3", + FieldConfig.COLUMNAR_MAP_INDEX_INVERTED_INDEX_KEYS, "country", + FieldConfig.COLUMNAR_MAP_INDEX_ENABLE_INVERTED_FOR_DENSE, "false" + ); + ColumnarMapIndexConfig config = ColumnarMapIndexConfig.fromProperties(props); + assertTrue(config.isEnabled()); + assertEquals(config.getMaxDenseKeys(), 500); + assertEquals(config.getDenseKeyMinFillRate(), 0.3); + assertEquals(config.getDenseKeys(), Set.of("country", "tenancy")); + assertTrue(config.isDenseKey("country")); + assertTrue(config.shouldEnableInvertedIndexForKey("country")); + assertFalse(config.shouldEnableInvertedIndexForKey("tenancy")); + } + + @Test + public void testFromPropertiesEnableInvertedForDense() { + Map props = Map.of( + FieldConfig.COLUMNAR_MAP_INDEX_ENABLE_INVERTED_FOR_DENSE, "true" + ); + ColumnarMapIndexConfig config = ColumnarMapIndexConfig.fromProperties(props); + assertTrue(config.isEnableInvertedIndexForDense()); + assertTrue(config.shouldEnableInvertedIndexForKey("anyKey")); + } + + @Test + public void testDisabledConfig() { + ColumnarMapIndexConfig config = ColumnarMapIndexConfig.DISABLED; + assertFalse(config.isEnabled()); + } + + @Test + public void testNoDictionaryKeys() { + ColumnarMapIndexConfig config = new ColumnarMapIndexConfig(true, false, null, + Set.of("raw_payload"), 1000, null, 0.5); + assertFalse(config.shouldUseDictionaryForKey("raw_payload")); + assertTrue(config.shouldUseDictionaryForKey("other_key")); + } + + @Test + public void testFromPropertiesDefaults() { + ColumnarMapIndexConfig config = ColumnarMapIndexConfig.fromProperties(null); + assertTrue(config.isEnabled()); + assertEquals(config.getMaxDenseKeys(), 1000); + assertEquals(config.getDenseKeyMinFillRate(), 0.5); + } + + @Test + public void testShouldEnableInvertedIndexForKeyGlobalFlag() { + ColumnarMapIndexConfig config = ColumnarMapIndexConfig.fromProperties( + Map.of( + FieldConfig.COLUMNAR_MAP_INDEX_ENABLE_INVERTED_FOR_DENSE, "true" + )); + assertTrue(config.shouldEnableInvertedIndexForKey("any_key")); + } + + @Test + public void testShouldEnableInvertedIndexForKeyPerKeyOnly() { + ColumnarMapIndexConfig config = ColumnarMapIndexConfig.fromProperties( + Map.of( + FieldConfig.COLUMNAR_MAP_INDEX_ENABLE_INVERTED_FOR_DENSE, "false", + FieldConfig.COLUMNAR_MAP_INDEX_INVERTED_INDEX_KEYS, "country,clicks" + )); + assertTrue(config.shouldEnableInvertedIndexForKey("country")); + assertTrue(config.shouldEnableInvertedIndexForKey("clicks")); + assertFalse(config.shouldEnableInvertedIndexForKey("other")); + } + + @Test + public void testShouldEnableInvertedIndexForKeyUnion() { + ColumnarMapIndexConfig config = ColumnarMapIndexConfig.fromProperties( + Map.of( + FieldConfig.COLUMNAR_MAP_INDEX_ENABLE_INVERTED_FOR_DENSE, "true", + FieldConfig.COLUMNAR_MAP_INDEX_INVERTED_INDEX_KEYS, "country" + )); + assertTrue(config.shouldEnableInvertedIndexForKey("country")); + assertTrue(config.shouldEnableInvertedIndexForKey("other")); + } + + @Test + public void testShouldUseDictionaryForKeyHardOverride() { + ColumnarMapIndexConfig config = ColumnarMapIndexConfig.fromProperties( + Map.of( + FieldConfig.COLUMNAR_MAP_INDEX_NO_DICTIONARY_KEYS, "blob,raw_payload" + )); + assertFalse(config.shouldUseDictionaryForKey("blob")); + assertFalse(config.shouldUseDictionaryForKey("raw_payload")); + assertTrue(config.shouldUseDictionaryForKey("country")); + } +} diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/data/ColumnarMapDataTypeTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/data/ColumnarMapDataTypeTest.java new file mode 100644 index 000000000000..14264e701d29 --- /dev/null +++ b/pinot-spi/src/test/java/org/apache/pinot/spi/data/ColumnarMapDataTypeTest.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.spi.data; + +import java.util.Map; +import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.apache.pinot.spi.utils.JsonUtils; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; + + +public class ColumnarMapDataTypeTest { + + @Test + public void testKeyTypesOnComplexFieldSpec() { + Map keyTypes = Map.of("clicks", DataType.LONG, "country", DataType.STRING); + ComplexFieldSpec spec = new ComplexFieldSpec("metrics", DataType.MAP, true); + spec.setKeyTypes(keyTypes); + spec.setDefaultValueType(DataType.STRING); + + assertEquals(spec.getKeyTypes(), keyTypes); + assertEquals(spec.getDefaultValueType(), DataType.STRING); + } + + @Test + public void testDefaultValueTypeDefaultsToNull() { + ComplexFieldSpec spec = new ComplexFieldSpec("metrics", DataType.MAP, true); + assertNull(spec.getKeyTypes()); + assertNull(spec.getDefaultValueType()); + } + + @Test + public void testJsonRoundTripWithKeyTypes() + throws Exception { + Map keyTypes = Map.of("clicks", DataType.LONG, "country", DataType.STRING); + + ComplexFieldSpec original = new ComplexFieldSpec("metrics", DataType.MAP, true); + original.setKeyTypes(keyTypes); + original.setDefaultValueType(DataType.STRING); + + String json = original.toJsonObject().toString(); + ComplexFieldSpec deserialized = JsonUtils.stringToObject(json, ComplexFieldSpec.class); + + assertNotNull(deserialized.getKeyTypes()); + assertEquals(deserialized.getKeyTypes().get("clicks"), DataType.LONG); + assertEquals(deserialized.getKeyTypes().get("country"), DataType.STRING); + assertEquals(deserialized.getDefaultValueType(), DataType.STRING); + } + + @Test + public void testJsonRoundTripWithoutKeyTypes() + throws Exception { + ComplexFieldSpec original = new ComplexFieldSpec("metrics", DataType.MAP, true); + String json = original.toJsonObject().toString(); + ComplexFieldSpec deserialized = JsonUtils.stringToObject(json, ComplexFieldSpec.class); + + assertNull(deserialized.getKeyTypes()); + assertNull(deserialized.getDefaultValueType()); + } + + @Test + public void testSchemaWithKeyTypesRoundTrip() + throws Exception { + String schemaJson = "{\n" + + " \"schemaName\": \"testSchema\",\n" + + " \"complexFieldSpecs\": [\n" + + " {\n" + + " \"name\": \"metrics\",\n" + + " \"dataType\": \"MAP\",\n" + + " \"keyTypes\": {\"clicks\": \"LONG\", \"country\": \"STRING\"},\n" + + " \"defaultValueType\": \"STRING\"\n" + + " }\n" + + " ]\n" + + "}"; + + Schema schema = JsonUtils.stringToObject(schemaJson, Schema.class); + FieldSpec fieldSpec = schema.getFieldSpecFor("metrics"); + assertNotNull(fieldSpec); + assertEquals(fieldSpec.getDataType(), DataType.MAP); + + ComplexFieldSpec complexSpec = (ComplexFieldSpec) fieldSpec; + assertNotNull(complexSpec.getKeyTypes()); + assertEquals(complexSpec.getKeyTypes().get("clicks"), DataType.LONG); + assertEquals(complexSpec.getKeyTypes().get("country"), DataType.STRING); + assertEquals(complexSpec.getDefaultValueType(), DataType.STRING); + + // Re-serialize and verify round-trip + String reJson = schema.toJsonObject().toString(); + Schema reSchema = JsonUtils.stringToObject(reJson, Schema.class); + ComplexFieldSpec reSpec = (ComplexFieldSpec) reSchema.getFieldSpecFor("metrics"); + assertEquals(reSpec.getKeyTypes().get("clicks"), DataType.LONG); + assertEquals(reSpec.getDefaultValueType(), DataType.STRING); + } +} diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/data/ColumnarMapNamingTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/data/ColumnarMapNamingTest.java new file mode 100644 index 000000000000..5e42307372c8 --- /dev/null +++ b/pinot-spi/src/test/java/org/apache/pinot/spi/data/ColumnarMapNamingTest.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.spi.data; + +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + + +public class ColumnarMapNamingTest { + + @Test + public void testVirtualColumnName() { + assertEquals(ColumnarMapNaming.virtualColumnName("metrics", "tenancy"), "metrics$__tenancy"); + } + + @Test + public void testSparseColumnName() { + assertEquals(ColumnarMapNaming.sparseColumnName("metrics"), "metrics$____sparse__"); + } + + @Test + public void testIsVirtualColumn() { + assertTrue(ColumnarMapNaming.isColumnarMapVirtualColumn("metrics$__tenancy")); + assertTrue(ColumnarMapNaming.isColumnarMapVirtualColumn("metrics$____sparse__")); + assertFalse(ColumnarMapNaming.isColumnarMapVirtualColumn("metrics")); + assertFalse(ColumnarMapNaming.isColumnarMapVirtualColumn("normal_column")); + assertFalse(ColumnarMapNaming.isColumnarMapVirtualColumn("metrics$$tenancy")); + } + + @Test + public void testParseMapColumn() { + assertEquals(ColumnarMapNaming.parseMapColumn("metrics$__tenancy"), "metrics"); + assertEquals(ColumnarMapNaming.parseMapColumn("m__data$__key$__nested"), "m__data"); + } + + @Test + public void testParseKey() { + assertEquals(ColumnarMapNaming.parseKey("metrics$__tenancy"), "tenancy"); + assertEquals(ColumnarMapNaming.parseKey("metrics$____sparse__"), "__sparse__"); + } + + @Test + public void testIsSparseColumn() { + assertTrue(ColumnarMapNaming.isSparseColumn("metrics$____sparse__")); + assertFalse(ColumnarMapNaming.isSparseColumn("metrics$__tenancy")); + } + + @Test + public void testRoundTrip() { + String mapCol = "event_props"; + String key = "country_iso2"; + String virtual = ColumnarMapNaming.virtualColumnName(mapCol, key); + assertEquals(ColumnarMapNaming.parseMapColumn(virtual), mapCol); + assertEquals(ColumnarMapNaming.parseKey(virtual), key); + assertTrue(ColumnarMapNaming.isColumnarMapVirtualColumn(virtual)); + assertFalse(ColumnarMapNaming.isSparseColumn(virtual)); + } +} diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/data/SchemaTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/data/SchemaTest.java index 51d79b01026f..9e96e3cefb7d 100644 --- a/pinot-spi/src/test/java/org/apache/pinot/spi/data/SchemaTest.java +++ b/pinot-spi/src/test/java/org/apache/pinot/spi/data/SchemaTest.java @@ -795,4 +795,24 @@ public void testWithoutVirtualColumnsCopiesDescription() { assertThat(withoutVirtual.getDescription()).isEqualTo("my description"); assertThat(withoutVirtual.getTags()).isEqualTo(List.of("tag1")); } + + @Test + public void testAcceptsVirtualColumnSeparatorInSchemaWithoutColumnarMap() { + // $__ is reserved by COLUMNAR_MAP virtual columns, but Schema.validate() no longer rejects it globally. + // The rejection happens in TableConfigUtils.validate() only when COLUMNAR_MAP is enabled for a table. + Schema schema = new Schema.SchemaBuilder() + .addSingleValueDimension("metrics$__tenancy", FieldSpec.DataType.STRING) + .build(); + schema.validate(); // should not throw + } + + @Test + public void testAcceptsNormalColumnNames() { + Schema schema = new Schema.SchemaBuilder() + .addSingleValueDimension("metrics__tenancy", FieldSpec.DataType.STRING) + .addSingleValueDimension("metrics$$tenancy", FieldSpec.DataType.STRING) + .addSingleValueDimension("metrics_dollar_sign", FieldSpec.DataType.STRING) + .build(); + schema.validate(); + } }