diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 87ebb03bee77..89030289c27d 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -385,6 +385,9 @@ Optimizations * GITHUB#16001: IndexSearcher.count() was calling query.rewrite twice, a regression since v9.10 (David Smiley) +* GITHUB#16050: Add SIMD-accelerated bulk range evaluation for dense numeric doc values via + BatchDocValuesRangeIterator and DocValuesRangeSupport. (Sagar Upadhyaya) + * GITHUB#16061, GITHUB#16070, GITHUB#16085: Improve cost estimation in SortedSetDocValuesRangeQuery and SortedNumericDocValuesRangeQuery when using DocValuesSkipper, the field is dense and is the primary sort of the index to reduce the number of doc values visited. (Ignacio Vera) diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/MultiFieldDocValuesRangeBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/MultiFieldDocValuesRangeBenchmark.java index 0767c8857260..35c44e91d501 100644 --- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/MultiFieldDocValuesRangeBenchmark.java +++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/MultiFieldDocValuesRangeBenchmark.java @@ -87,7 +87,7 @@ public static class Params { @Param({"1000000", "10000000"}) public int docCount; - @Param({"3", "5"}) + @Param({"1", "3", "5"}) public int fieldCount; @Param({CLUSTERED, MIXED, RANDOM, SORTED}) @@ -129,6 +129,17 @@ public void setup(Params params) throws Exception { SortedNumericDocValuesField.newSlowRangeQuery("field" + f, range[0], range[1]), Occur.FILTER); } + // For fieldCount=1 on non-sorted patterns, add a MatchAllDocsQuery so + // DenseConjunctionBulkScorer is used and intoBitSet() is called on the range iterator + // (enabling the SIMD path). Without this, a single-clause BooleanQuery rewrites to the + // query itself and goes through DefaultBulkScorer which doesn't call intoBitSet(). + // For the sorted pattern, field0 is the index sort key so + // getDocIdSetIteratorOrNullForPrimarySort + // fires and returns DocIdSetIterator.range() — adding MatchAllDocsQuery here would force it + // through DenseConjunctionBulkScorer and bypass that fast path, causing a regression. + if (params.fieldCount == 1 && !params.dataPattern.equals(SORTED)) { + bqBuilder.add(new org.apache.lucene.search.MatchAllDocsQuery(), Occur.FILTER); + } query = bqBuilder.build(); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java index 7c5f03373526..1ecabb4616ed 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java @@ -391,6 +391,24 @@ public void close() throws IOException { private record DocValuesSkipperEntry( long offset, long length, long minValue, long maxValue, int docCount, int maxDocId) {} + // Cached VectorizationProvider instance to avoid repeated stack walks in ensureCaller() + private static final org.apache.lucene.internal.vectorization.DocValuesRangeSupport + DOC_VALUES_RANGE_SUPPORT = + org.apache.lucene.internal.vectorization.VectorizationProvider.getInstance() + .getDocValuesRangeSupport(); + + static void rangeIntoBitSet( + org.apache.lucene.util.LongValues values, + int fromDoc, + int toDoc, + long minValue, + long maxValue, + FixedBitSet bitSet, + int offset) { + DOC_VALUES_RANGE_SUPPORT.rangeIntoBitSet( + values, fromDoc, toDoc, minValue, maxValue, bitSet, offset); + } + private static class NumericEntry { long[] table; int blockShift; @@ -610,6 +628,19 @@ public long longValue() throws IOException { public long longValue() throws IOException { return values.get(doc); } + + @Override + public void rangeIntoBitSet( + int fromDoc, + int toDoc, + long minValue, + long maxValue, + FixedBitSet bitSet, + int offset) { + // Bulk range evaluation via DocValuesRangeSupport + Lucene90DocValuesProducer.rangeIntoBitSet( + values, fromDoc, toDoc, minValue, maxValue, bitSet, offset); + } }; } else { final long mul = entry.gcd; @@ -619,6 +650,23 @@ public long longValue() throws IOException { public long longValue() throws IOException { return mul * values.get(doc) + delta; } + + @Override + public void rangeIntoBitSet( + int fromDoc, + int toDoc, + long minValue, + long maxValue, + FixedBitSet bitSet, + int offset) { + // Per-doc evaluation for gcd/delta encoded fields + for (int d = fromDoc; d < toDoc; d++) { + long v = mul * values.get(d) + delta; + if (v >= minValue && v <= maxValue) { + bitSet.set(d - offset); + } + } + } }; } } diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java index 37f628f4cc15..173673c6514e 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java @@ -25,6 +25,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.BatchDocValuesRangeIterator; import org.apache.lucene.search.ConstantScoreScorerSupplier; import org.apache.lucene.search.ConstantScoreWeight; import org.apache.lucene.search.DocIdSetIterator; @@ -140,9 +141,16 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti final SortField primarySortField; if (singleton != null) { - if (skipper != null - && (primarySortField = densePrimarySort(context.reader(), skipper)) != null) { - return getScorerSupplierFromDensePrimarySort(singleton, skipper, primarySortField); + if (skipper != null) { + if ((primarySortField = densePrimarySort(context.reader(), skipper)) != null) { + return getScorerSupplierFromDensePrimarySort(singleton, skipper, primarySortField); + } + // Use batch iterator for bulk block evaluation via intoBitSet() + return ConstantScoreScorerSupplier.fromIterator( + new BatchDocValuesRangeIterator(singleton, skipper, lowerValue, upperValue), + score(), + scoreMode, + maxDoc); } return ConstantScoreScorerSupplier.fromIterator( TwoPhaseIterator.asDocIdSetIterator( diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java index 250860fcebbc..39d0131d93ed 100644 --- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.FieldExistsQuery; +import org.apache.lucene.util.FixedBitSet; /** A per-document numeric value. */ public abstract class NumericDocValues extends DocValuesIterator { @@ -91,4 +92,33 @@ public void longValues(int size, int[] docs, long[] values, long defaultValue) values[i] = value; } } + + /** + * Fills a {@link org.apache.lucene.util.FixedBitSet} with the doc IDs in {@code [fromDoc, toDoc)} + * whose values are in {@code [minValue, maxValue]}. This is a bulk operation that avoids per-doc + * virtual dispatch overhead. + * + *

The default implementation falls back to per-doc evaluation via {@link #advanceExact} and + * {@link #longValue}. Subclasses with random-access storage (e.g., dense fixed-bitsPerValue + * fields) can override this for significantly better performance. + * + * @param fromDoc first doc ID to evaluate (inclusive) + * @param toDoc last doc ID to evaluate (exclusive) + * @param minValue lower bound of the range (inclusive) + * @param maxValue upper bound of the range (inclusive) + * @param bitSet the bitset to fill + * @param offset subtracted from each doc ID before setting the bit + */ + public void rangeIntoBitSet( + int fromDoc, int toDoc, long minValue, long maxValue, FixedBitSet bitSet, int offset) + throws IOException { + for (int d = fromDoc; d < toDoc; d++) { + if (advanceExact(d)) { + long v = longValue(); + if (v >= minValue && v <= maxValue) { + bitSet.set(d - offset); + } + } + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultDocValuesRangeSupport.java b/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultDocValuesRangeSupport.java new file mode 100644 index 000000000000..84a3c89c7e00 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultDocValuesRangeSupport.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.internal.vectorization; + +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.LongValues; + +/** Scalar (non-SIMD) implementation of {@link DocValuesRangeSupport}. */ +final class DefaultDocValuesRangeSupport implements DocValuesRangeSupport { + + static final DefaultDocValuesRangeSupport INSTANCE = new DefaultDocValuesRangeSupport(); + + private DefaultDocValuesRangeSupport() {} + + @Override + public void rangeIntoBitSet( + LongValues values, + int fromDoc, + int toDoc, + long minValue, + long maxValue, + FixedBitSet bitSet, + int offset) { + // Scalar fallback implementation + for (int d = fromDoc; d < toDoc; d++) { + long v = values.get(d); + if (v >= minValue && v <= maxValue) { + bitSet.set(d - offset); + } + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorizationProvider.java b/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorizationProvider.java index 21977fa3dc77..0394e1959663 100644 --- a/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorizationProvider.java +++ b/lucene/core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorizationProvider.java @@ -50,4 +50,9 @@ public FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() { public PostingDecodingUtil newPostingDecodingUtil(IndexInput input) { return new PostingDecodingUtil(input); } + + @Override + public DocValuesRangeSupport getDocValuesRangeSupport() { + return DefaultDocValuesRangeSupport.INSTANCE; + } } diff --git a/lucene/core/src/java/org/apache/lucene/internal/vectorization/DocValuesRangeSupport.java b/lucene/core/src/java/org/apache/lucene/internal/vectorization/DocValuesRangeSupport.java new file mode 100644 index 000000000000..7b8b4e2c949c --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/internal/vectorization/DocValuesRangeSupport.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.internal.vectorization; + +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.LongValues; + +/** + * Interface for SIMD-accelerated doc values range operations. + * + *

Implementations fill a {@link FixedBitSet} with the doc IDs in a range whose values satisfy a + * numeric range predicate. The default scalar implementation is used when the Panama Vector API is + * unavailable; a SIMD-accelerated implementation is used otherwise. + * + * @lucene.internal + */ +public interface DocValuesRangeSupport { + + /** + * Fills {@code bitSet} with the doc IDs in {@code [fromDoc, toDoc)} whose values (read via {@code + * values}) are in {@code [minValue, maxValue]}. + * + * @param values random-access reader for the doc values + * @param fromDoc first doc ID to evaluate (inclusive) + * @param toDoc last doc ID to evaluate (exclusive) + * @param minValue lower bound of the range (inclusive) + * @param maxValue upper bound of the range (inclusive) + * @param bitSet the bitset to fill + * @param offset subtracted from each doc ID before setting the bit + */ + void rangeIntoBitSet( + LongValues values, + int fromDoc, + int toDoc, + long minValue, + long maxValue, + FixedBitSet bitSet, + int offset); +} diff --git a/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorizationProvider.java b/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorizationProvider.java index abeb023fa80f..2bd1c2c3b1ea 100644 --- a/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorizationProvider.java +++ b/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorizationProvider.java @@ -115,6 +115,13 @@ public static VectorizationProvider getInstance() { /** Create a new {@link PostingDecodingUtil} for the given {@link IndexInput}. */ public abstract PostingDecodingUtil newPostingDecodingUtil(IndexInput input) throws IOException; + /** + * Returns a {@link DocValuesRangeSupport} instance for bulk numeric range evaluation. The + * returned instance uses SIMD when available (Panama Vector API), falling back to a scalar loop + * otherwise. + */ + public abstract DocValuesRangeSupport getDocValuesRangeSupport(); + // *** Lookup mechanism: *** private static final Logger LOG = Logger.getLogger(VectorizationProvider.class.getName()); @@ -213,6 +220,7 @@ private static Optional lookupVectorModule() { "org.apache.lucene.util.VectorUtil", "org.apache.lucene.codecs.lucene104.Lucene104PostingsReader", "org.apache.lucene.codecs.lucene104.PostingIndexInput", + "org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducer", "org.apache.lucene.tests.util.TestSysoutsLimits"); private static final StackWalker STACKWALKER = diff --git a/lucene/core/src/java/org/apache/lucene/search/BatchDocValuesRangeIterator.java b/lucene/core/src/java/org/apache/lucene/search/BatchDocValuesRangeIterator.java new file mode 100644 index 000000000000..971310172b40 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/BatchDocValuesRangeIterator.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.io.IOException; +import org.apache.lucene.index.DocValuesSkipper; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.util.FixedBitSet; + +/** + * A {@link DocIdSetIterator} for numeric doc values range queries that batch-evaluates values for + * MAYBE blocks. Instead of checking one doc at a time through a {@link + * org.apache.lucene.search.TwoPhaseIterator}, this iterator reads values in a tight loop and sets + * bits directly in a {@link FixedBitSet}, enabling the {@link DenseConjunctionBulkScorer} to use + * the faster bitset intersection path. + * + *

This is used for single-valued numeric fields with a skip index. + */ +public final class BatchDocValuesRangeIterator extends DocIdSetIterator { + + private final SkipBlockRangeIterator blockIterator; + private final NumericDocValues values; + private final long minValue; + private final long maxValue; + private int doc = -1; + + public BatchDocValuesRangeIterator( + NumericDocValues values, DocValuesSkipper skipper, long minValue, long maxValue) { + this.blockIterator = new SkipBlockRangeIterator(skipper, minValue, maxValue); + this.values = values; + this.minValue = minValue; + this.maxValue = maxValue; + } + + @Override + public int docID() { + return doc; + } + + @Override + public int nextDoc() throws IOException { + return advance(doc + 1); + } + + @Override + public int advance(int target) throws IOException { + int blockDoc = blockIterator.docID(); + if (blockDoc < target) { + blockDoc = blockIterator.advance(target); + } + if (blockDoc == NO_MORE_DOCS) { + return doc = NO_MORE_DOCS; + } + + // For YES blocks, all docs have values in range, so return the first doc + if (blockIterator.getMatch() == SkipBlockRangeIterator.Match.YES) { + return doc = blockDoc; + } + + // Scan forward through YES_IF_PRESENT and MAYBE blocks to find a matching doc. + // - YES_IF_PRESENT: all values are in range, but some docs may not have a value. + // We only need to check presence via advanceExact(). + // - MAYBE: docs may or may not have a value, and values may or may not be in range. + // We need both a presence check (advanceExact) and a range check (longValue). + int docToCheck = Math.max(target, blockDoc); + int currentBlockEnd = blockIterator.blockEnd(); + while (docToCheck != NO_MORE_DOCS) { + if (values.advanceExact(docToCheck)) { + // If we landed in a YES_IF_PRESENT block, skip the range check + if (blockIterator.getMatch() == SkipBlockRangeIterator.Match.YES_IF_PRESENT) { + return doc = docToCheck; + } + // This is a MAYBE block. We need to verify if the value is in range + long v = values.longValue(); + if (v >= minValue && v <= maxValue) { + return doc = docToCheck; + } + } + docToCheck++; + // Check if we've left the current block + if (docToCheck >= currentBlockEnd) { + // Move to next matching block + blockDoc = blockIterator.advance(docToCheck); + if (blockDoc == NO_MORE_DOCS) { + return doc = NO_MORE_DOCS; + } + docToCheck = blockDoc; + if (blockIterator.getMatch() == SkipBlockRangeIterator.Match.YES) { + return doc = docToCheck; + } + currentBlockEnd = blockIterator.blockEnd(); + } + } + return doc = NO_MORE_DOCS; + } + + @Override + public long cost() { + return values.cost(); + } + + @Override + public int docIDRunEnd() throws IOException { + return blockIterator.docIDRunEnd(); + } + + @Override + public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException { + while (doc < upTo) { + // Advance block iterator if needed + if (blockIterator.docID() < doc) { + blockIterator.advance(doc); + } + if (blockIterator.docID() >= upTo || blockIterator.docID() == NO_MORE_DOCS) { + doc = blockIterator.docID() == NO_MORE_DOCS ? NO_MORE_DOCS : blockIterator.docID(); + return; + } + + int blockStart = Math.max(doc, blockIterator.docID()); + SkipBlockRangeIterator.Match match = blockIterator.getMatch(); + + // Use blockEnd() for MAYBE blocks since docIDRunEnd() is conservative (returns doc+1) + int blockEnd = + match == SkipBlockRangeIterator.Match.MAYBE + ? Math.min(upTo, blockIterator.blockEnd()) + : Math.min(upTo, blockIterator.docIDRunEnd()); + + switch (match) { + case YES: + // All docs in this range match — set all bits + bitSet.set(blockStart - offset, blockEnd - offset); + break; + + case YES_IF_PRESENT: + // All values in this block are in range, but the field is sparse so some docs + // may not have a value. No range check needed here. + for (int d = blockStart; d < blockEnd; d++) { + if (values.advanceExact(d)) { + bitSet.set(d - offset); + } + } + break; + + case MAYBE: + // Use rangeIntoBitSet — SIMD bulk evaluation for the full block. + // For dense fields, this bypasses advanceExact() overhead entirely. + values.rangeIntoBitSet(blockStart, blockEnd, minValue, maxValue, bitSet, offset); + break; + } + + // Move past this block + doc = blockEnd; + if (doc < upTo) { + blockIterator.advance(doc); + if (blockIterator.docID() == NO_MORE_DOCS) { + doc = NO_MORE_DOCS; + return; + } + doc = blockIterator.docID(); + } + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/SkipBlockRangeIterator.java b/lucene/core/src/java/org/apache/lucene/search/SkipBlockRangeIterator.java index be4bb2b9ed1c..41fd421868d7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SkipBlockRangeIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/SkipBlockRangeIterator.java @@ -111,6 +111,16 @@ public long cost() { return DocIdSetIterator.NO_MORE_DOCS; } + /** + * Returns the exclusive end of the current skip block (the actual block boundary from the + * skipper), regardless of match state. Unlike {@link #docIDRunEnd()} which returns {@code doc+1} + * for MAYBE blocks, this always returns the full block boundary so callers can bulk-evaluate the + * entire block at once. + */ + public int blockEnd() { + return skipper.maxDocID(0) + 1; + } + @Override public int docIDRunEnd() throws IOException { if (match != Match.YES) { diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/NativeVectorizationProvider.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/NativeVectorizationProvider.java index 8602219e1aca..7234b0dbca90 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/NativeVectorizationProvider.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/NativeVectorizationProvider.java @@ -76,4 +76,9 @@ public FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() { public PostingDecodingUtil newPostingDecodingUtil(IndexInput input) throws IOException { return delegateVectorUtilProvider.newPostingDecodingUtil(input); } + + @Override + public DocValuesRangeSupport getDocValuesRangeSupport() { + return delegateVectorUtilProvider.getDocValuesRangeSupport(); + } } diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaDocValuesRangeSupport.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaDocValuesRangeSupport.java new file mode 100644 index 000000000000..8b8fe6c2ff76 --- /dev/null +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaDocValuesRangeSupport.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.internal.vectorization; + +import jdk.incubator.vector.LongVector; +import jdk.incubator.vector.VectorMask; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.LongValues; + +/** Panama Vector API implementation of {@link DocValuesRangeSupport}. */ +final class PanamaDocValuesRangeSupport implements DocValuesRangeSupport { + + static final PanamaDocValuesRangeSupport INSTANCE = new PanamaDocValuesRangeSupport(); + + private static final VectorSpecies LONG_SPECIES = LongVector.SPECIES_PREFERRED; + + private PanamaDocValuesRangeSupport() {} + + @Override + public void rangeIntoBitSet( + LongValues values, + int fromDoc, + int toDoc, + long minValue, + long maxValue, + FixedBitSet bitSet, + int offset) { + final int vectorLen = LONG_SPECIES.length(); + + // Scratch buffer for loading values before SIMD comparison + final long[] scratch = new long[vectorLen]; + final int loopBound = fromDoc + LONG_SPECIES.loopBound(toDoc - fromDoc); + + // SIMD loop: load vectorLen values into scratch, compare all at once + for (int d = fromDoc; d < loopBound; d += vectorLen) { + for (int i = 0; i < vectorLen; i++) { + scratch[i] = values.get(d + i); + } + LongVector v = LongVector.fromArray(LONG_SPECIES, scratch, 0); + VectorMask inRange = + v.compare(VectorOperators.GE, minValue).and(v.compare(VectorOperators.LE, maxValue)); + long maskBits = inRange.toLong(); + if (maskBits != 0) { + int base = d - offset; + while (maskBits != 0) { + int bit = Long.numberOfTrailingZeros(maskBits); + bitSet.set(base + bit); + maskBits &= maskBits - 1; + } + } + } + + // Scalar tail for remaining docs + for (int d = loopBound; d < toDoc; d++) { + long v = values.get(d); + if (v >= minValue && v <= maxValue) { + bitSet.set(d - offset); + } + } + } +} diff --git a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java index cf3ab94f417c..2e5917b9f9b0 100644 --- a/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java +++ b/lucene/core/src/java25/org/apache/lucene/internal/vectorization/PanamaVectorizationProvider.java @@ -93,4 +93,9 @@ public PostingDecodingUtil newPostingDecodingUtil(IndexInput input) throws IOExc } return new PostingDecodingUtil(input); } + + @Override + public DocValuesRangeSupport getDocValuesRangeSupport() { + return PanamaDocValuesRangeSupport.INSTANCE; + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/BaseDocValuesSkipperTests.java b/lucene/core/src/test/org/apache/lucene/search/BaseDocValuesSkipperTests.java index cfcd465d6427..bc4c681e883a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/BaseDocValuesSkipperTests.java +++ b/lucene/core/src/test/org/apache/lucene/search/BaseDocValuesSkipperTests.java @@ -24,10 +24,10 @@ public abstract class BaseDocValuesSkipperTests extends LuceneTestCase { /** - * Fake numeric doc values so that: - docs 0-256 all match - docs in 256-512 are all greater than - * queryMax - docs in 512-768 are all less than queryMin - docs in 768-1024 have some docs that - * match the range, others not - docs in 1024-2048 follow a similar pattern as docs in 0-1024 - * except that not all docs have a - value + * Fake numeric doc values so that: - docs 0-127 all match - docs in 128-255 are all greater than + * queryMax - docs in 256-511 are all less than queryMin - docs in 512-1023 have some docs that + * match the range, others not - docs in 1024-2047 follow a similar pattern as docs in 0-1023 + * except that not all docs have a value (only even docs) */ protected static NumericDocValues docValues(long queryMin, long queryMax) { return new NumericDocValues() { @@ -36,7 +36,8 @@ protected static NumericDocValues docValues(long queryMin, long queryMax) { @Override public boolean advanceExact(int target) throws IOException { - throw new UnsupportedOperationException(); + int advanced = advance(target); + return advanced == target; } @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSkipBlockRangeIteratorIntoBitSet.java b/lucene/core/src/test/org/apache/lucene/search/TestSkipBlockRangeIteratorIntoBitSet.java new file mode 100644 index 000000000000..abd1a2516f91 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestSkipBlockRangeIteratorIntoBitSet.java @@ -0,0 +1,508 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import org.apache.lucene.codecs.lucene104.Lucene104Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValuesSkipper; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.FixedBitSet; + +/** + * Tests correctness of {@link BatchDocValuesRangeIterator} and its {@code intoBitSet()} path, + * including YES, YES_IF_PRESENT, and MAYBE block states. + */ +public class TestSkipBlockRangeIteratorIntoBitSet extends BaseDocValuesSkipperTests { + + // Use enough docs to span at least 4 skip blocks (default skip block size = 4096 docs). + private static final int DOC_COUNT = 4096 * 4; + + private Directory dir; + private DirectoryReader reader; + private IndexSearcher searcher; + + private long[] values; + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(); + iwc.setCodec(new Lucene104Codec()); + IndexWriter w = new IndexWriter(dir, iwc); + values = new long[DOC_COUNT]; + for (int i = 0; i < DOC_COUNT; i++) { + values[i] = i % 100; // deterministic: 0..99 repeating + Document doc = new Document(); + doc.add(NumericDocValuesField.indexedField("age", values[i])); + doc.add(NumericDocValuesField.indexedField("score", (i * 7L) % 1000)); + w.addDocument(doc); + } + w.forceMerge(1); + reader = DirectoryReader.open(w); + w.close(); + searcher = new IndexSearcher(reader); + } + + @Override + public void tearDown() throws Exception { + reader.close(); + dir.close(); + super.tearDown(); + } + + /** + * Verifies that SortedNumericDocValuesRangeQuery wires BatchDocValuesRangeIterator when the field + * has a skip index and is single-valued. + */ + public void testBatchIteratorIsWired() throws Exception { + LeafReaderContext ctx = reader.leaves().get(0); + // Use a range that won't be rewritten to MatchAll or MatchNone + Query q = SortedNumericDocValuesField.newSlowRangeQuery("age", 20, 40); + Weight weight = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f); + ScorerSupplier ss = weight.scorerSupplier(ctx); + assertNotNull("ScorerSupplier must not be null", ss); + + assertTrue( + "ScorerSupplier must be a ConstantScoreScorerSupplier", + ss instanceof ConstantScoreScorerSupplier); + DocIdSetIterator iter = ((ConstantScoreScorerSupplier) ss).iterator(Long.MAX_VALUE); + assertTrue( + "Range query on single-valued field with skip index must use BatchDocValuesRangeIterator" + + " but got: " + + iter.getClass().getSimpleName(), + iter instanceof BatchDocValuesRangeIterator); + } + + public void testSingleFieldRangeCorrectness() throws Exception { + // Count expected matches by scanning values[] directly + int expected = 0; + for (long v : values) { + if (v >= 20 && v <= 40) expected++; + } + assertTrue("Should have some matches with controlled data", expected > 0); + + Query q = SortedNumericDocValuesField.newSlowRangeQuery("age", 20, 40); + assertEquals("Single-field count must match linear scan", expected, searcher.count(q)); + } + + public void testMultiFieldRangeCorrectness() throws Exception { + int expectedBoth = 0; + for (int i = 0; i < DOC_COUNT; i++) { + long age = values[i]; + long score = (i * 7L) % 1000; + if (age >= 20 && age <= 40 && score >= 100 && score <= 500) expectedBoth++; + } + + Query ageOnly = SortedNumericDocValuesField.newSlowRangeQuery("age", 20, 40); + Query scoreOnly = SortedNumericDocValuesField.newSlowRangeQuery("score", 100, 500); + Query both = + new BooleanQuery.Builder().add(ageOnly, Occur.FILTER).add(scoreOnly, Occur.FILTER).build(); + + assertEquals("Multi-field count must match linear scan", expectedBoth, searcher.count(both)); + assertTrue( + "Conjunction must be <= age-only count", searcher.count(both) <= searcher.count(ageOnly)); + assertTrue( + "Conjunction must be <= score-only count", + searcher.count(both) <= searcher.count(scoreOnly)); + } + + public void testResultsAreRepeatable() throws Exception { + Query q = + new BooleanQuery.Builder() + .add(SortedNumericDocValuesField.newSlowRangeQuery("age", 20, 40), Occur.FILTER) + .add(SortedNumericDocValuesField.newSlowRangeQuery("score", 100, 500), Occur.FILTER) + .build(); + + int count1 = searcher.count(q); + int count2 = searcher.count(q); + assertEquals("Results must be deterministic", count1, count2); + } + + /** + * Single-field range with a restrictive second clause exercises the single-field bitset path via + * DenseConjunctionBulkScorer. Uses a second range query on a different field as the restrictive + * clause — MatchAllDocsQuery is not used because BooleanQuery rewrites it away. + */ + public void testSingleFieldWithRestrictiveSecondClause() throws Exception { + // age in [20,40] AND score in [0,0]..score=0 only for docs where (i*7)%1000==0, i.e., i=0 + Query ageQ = SortedNumericDocValuesField.newSlowRangeQuery("age", 0, 99); // matches all + Query scoreQ = SortedNumericDocValuesField.newSlowRangeQuery("score", 0, 0); // very restrictive + + int expectedScore0 = 0; + for (int i = 0; i < DOC_COUNT; i++) { + if ((i * 7L) % 1000 == 0) expectedScore0++; + } + + Query combined = + new BooleanQuery.Builder().add(ageQ, Occur.FILTER).add(scoreQ, Occur.FILTER).build(); + + assertEquals("Should match only docs where score=0", expectedScore0, searcher.count(combined)); + } + + /** Directly tests intoBitSet() against a linear scan reference. */ + public void testIntoBitSetMatchesLinearScan() throws Exception { + doTestIntoBitSetMatchesLinearScan(reader); + } + + /** Large-scale version — 100 skip blocks */ + @Nightly + public void testIntoBitSetMatchesLinearScanHuge() throws Exception { + try (Directory hugeDir = newDirectory()) { + buildIndex(hugeDir, 4096 * 100); + try (DirectoryReader r = DirectoryReader.open(hugeDir)) { + doTestIntoBitSetMatchesLinearScan(r); + } + } + } + + private void doTestIntoBitSetMatchesLinearScan(DirectoryReader r) throws Exception { + LeafReaderContext ctx = r.leaves().get(0); + int maxDoc = ctx.reader().maxDoc(); + int windowSize = DenseConjunctionBulkScorer.WINDOW_SIZE; // 4096 + + FixedBitSet expected = new FixedBitSet(windowSize); + NumericDocValues refValues = ctx.reader().getNumericDocValues("age"); + for (int d = 0; d < Math.min(maxDoc, windowSize); d++) { + if (refValues.advanceExact(d) && refValues.longValue() >= 20 && refValues.longValue() <= 40) { + expected.set(d); + } + } + + DocValuesSkipper skipper = ctx.reader().getDocValuesSkipper("age"); + NumericDocValues dv = ctx.reader().getNumericDocValues("age"); + assertNotNull("Field must have a skip index", skipper); + + BatchDocValuesRangeIterator iter = new BatchDocValuesRangeIterator(dv, skipper, 20, 40); + iter.nextDoc(); + + FixedBitSet actual = new FixedBitSet(windowSize); + iter.intoBitSet(Math.min(maxDoc, windowSize), actual, 0); + + assertEquals( + "intoBitSet must set exactly the same bits as linear scan", + expected.cardinality(), + actual.cardinality()); + FixedBitSet diff = expected.clone(); + diff.xor(actual); + assertEquals("No bits should differ", 0, diff.cardinality()); + } + + /** Tests YES block path — range covers all values, all docs match. */ + public void testIntoBitSetAllMatchRange() throws Exception { + doTestIntoBitSetAllMatchRange(reader); + } + + @Nightly + public void testIntoBitSetAllMatchRangeHuge() throws Exception { + try (Directory hugeDir = newDirectory()) { + buildIndex(hugeDir, 4096 * 100); + try (DirectoryReader r = DirectoryReader.open(hugeDir)) { + doTestIntoBitSetAllMatchRange(r); + } + } + } + + private void doTestIntoBitSetAllMatchRange(DirectoryReader r) throws Exception { + LeafReaderContext ctx = r.leaves().get(0); + int maxDoc = ctx.reader().maxDoc(); + + DocValuesSkipper skipper = ctx.reader().getDocValuesSkipper("age"); + NumericDocValues dv = ctx.reader().getNumericDocValues("age"); + assertNotNull(skipper); + + BatchDocValuesRangeIterator iter = new BatchDocValuesRangeIterator(dv, skipper, 0, 99); + iter.nextDoc(); + + FixedBitSet bitSet = new FixedBitSet(maxDoc); + iter.intoBitSet(maxDoc, bitSet, 0); + + assertEquals("All docs should match range [0,99]", maxDoc, bitSet.cardinality()); + } + + /** Tests NO block path — range matches nothing. */ + public void testIntoBitSetNoMatchRange() throws Exception { + doTestIntoBitSetNoMatchRange(reader); + } + + @Nightly + public void testIntoBitSetNoMatchRangeHuge() throws Exception { + try (Directory hugeDir = newDirectory()) { + buildIndex(hugeDir, 4096 * 100); + try (DirectoryReader r = DirectoryReader.open(hugeDir)) { + doTestIntoBitSetNoMatchRange(r); + } + } + } + + private void doTestIntoBitSetNoMatchRange(DirectoryReader r) throws Exception { + LeafReaderContext ctx = r.leaves().get(0); + int maxDoc = ctx.reader().maxDoc(); + + DocValuesSkipper skipper = ctx.reader().getDocValuesSkipper("age"); + NumericDocValues dv = ctx.reader().getNumericDocValues("age"); + assertNotNull(skipper); + + BatchDocValuesRangeIterator iter = new BatchDocValuesRangeIterator(dv, skipper, 200, 300); + iter.nextDoc(); + + FixedBitSet bitSet = new FixedBitSet(maxDoc); + iter.intoBitSet(maxDoc, bitSet, 0); + + assertEquals("No docs should match out-of-range query", 0, bitSet.cardinality()); + } + + /** Tests YES_IF_PRESENT block path — sparse field, only even docs have a value. */ + public void testIntoBitSetSparseField() throws Exception { + doTestIntoBitSetSparseField(1000); + } + + @Nightly + public void testIntoBitSetSparseFieldHuge() throws Exception { + doTestIntoBitSetSparseField(4096 * 20); + } + + private void doTestIntoBitSetSparseField(int numDocs) throws Exception { + try (Directory sparseDir = newDirectory()) { + IndexWriterConfig iwc = new IndexWriterConfig().setCodec(new Lucene104Codec()); + IndexWriter w = new IndexWriter(sparseDir, iwc); + List expectedDocs = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + if (i % 2 == 0) { + // All values within [20, 40] so blocks are YES_IF_PRESENT (not MAYBE) + long val = 20 + (i % 21); + doc.add(NumericDocValuesField.indexedField("sparse", val)); + expectedDocs.add(i); + } + w.addDocument(doc); + } + w.forceMerge(1); + try (DirectoryReader sparseReader = DirectoryReader.open(w)) { + w.close(); + LeafReaderContext ctx = sparseReader.leaves().get(0); + + DocValuesSkipper skipper = ctx.reader().getDocValuesSkipper("sparse"); + NumericDocValues dv = ctx.reader().getNumericDocValues("sparse"); + if (skipper == null) return; + + BatchDocValuesRangeIterator iter = new BatchDocValuesRangeIterator(dv, skipper, 20, 40); + iter.nextDoc(); + + FixedBitSet bitSet = new FixedBitSet(numDocs); + iter.intoBitSet(numDocs, bitSet, 0); + + assertEquals( + "Sparse field intoBitSet must match expected count", + expectedDocs.size(), + bitSet.cardinality()); + for (int i = 1; i < numDocs; i += 2) { + assertFalse("Odd doc " + i + " has no value and must not be set", bitSet.get(i)); + } + } + } + } + + /** Tests nextDoc() and advance() return the same docs as a linear scan. */ + public void testIterationCorrectness() throws Exception { + doTestIterationCorrectness(reader); + } + + @Nightly + public void testIterationCorrectnessHuge() throws Exception { + try (Directory hugeDir = newDirectory()) { + buildIndex(hugeDir, 4096 * 100); + try (DirectoryReader r = DirectoryReader.open(hugeDir)) { + doTestIterationCorrectness(r); + } + } + } + + private void doTestIterationCorrectness(DirectoryReader r) throws Exception { + LeafReaderContext ctx = r.leaves().get(0); + + List expected = new ArrayList<>(); + NumericDocValues refValues = ctx.reader().getNumericDocValues("age"); + for (int d = 0; d < ctx.reader().maxDoc(); d++) { + if (refValues.advanceExact(d) && refValues.longValue() >= 20 && refValues.longValue() <= 40) { + expected.add(d); + } + } + assertTrue("Test requires some matching docs", expected.size() > 0); + + // Test 1: nextDoc() returns all matching docs in order + DocValuesSkipper skipper = ctx.reader().getDocValuesSkipper("age"); + NumericDocValues dv = ctx.reader().getNumericDocValues("age"); + assertNotNull(skipper); + + BatchDocValuesRangeIterator iter = new BatchDocValuesRangeIterator(dv, skipper, 20, 40); + List actual = new ArrayList<>(); + for (int d = iter.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = iter.nextDoc()) { + actual.add(d); + } + assertEquals("nextDoc() must return same docs as expected", expected, actual); + + // Test 2: advance() forward through the iterator with random skips. + // Uses a single iterator instance and advances forward multiple times, + // verifying each advance lands on the correct next matching doc. + skipper = ctx.reader().getDocValuesSkipper("age"); + dv = ctx.reader().getNumericDocValues("age"); + iter = new BatchDocValuesRangeIterator(dv, skipper, 20, 40); + Random rng = random(); + int idx = 0; + while (idx < expected.size()) { + int target = expected.get(idx); + int doc = iter.advance(target); + assertEquals("advance(" + target + ") must return the target", target, doc); + // Skip forward by a random amount (1-5 matching docs) + idx += rng.nextInt(5) + 1; + } + + // Test 3: advance() to a non-matching doc lands on the next match. + // Picks random targets between consecutive matches and verifies + // advance returns the next matching doc after the target. + skipper = ctx.reader().getDocValuesSkipper("age"); + dv = ctx.reader().getNumericDocValues("age"); + iter = new BatchDocValuesRangeIterator(dv, skipper, 20, 40); + int prevDoc = -1; + for (int i = 0; i < expected.size() && i < 20; i++) { + int matchDoc = expected.get(i); + if (matchDoc > prevDoc + 1) { + // Target a gap between previous position and this match + int target = prevDoc + 1 + rng.nextInt(matchDoc - prevDoc - 1); + int doc = iter.advance(target); + assertEquals( + "advance(" + target + ") must skip to next match at " + matchDoc, matchDoc, doc); + prevDoc = doc; + } else { + prevDoc = matchDoc; + } + } + + // Test 4: advance() past all docs returns NO_MORE_DOCS + skipper = ctx.reader().getDocValuesSkipper("age"); + dv = ctx.reader().getNumericDocValues("age"); + iter = new BatchDocValuesRangeIterator(dv, skipper, 20, 40); + int doc = iter.advance(ctx.reader().maxDoc()); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, doc); + } + + /** Helper: builds an index with {@code numDocs} docs, values = i % 100. */ + private void buildIndex(Directory dir, int numDocs) throws Exception { + IndexWriterConfig iwc = new IndexWriterConfig().setCodec(new Lucene104Codec()); + try (IndexWriter w = new IndexWriter(dir, iwc)) { + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(NumericDocValuesField.indexedField("age", i % 100)); + w.addDocument(doc); + } + w.forceMerge(1); + } + } + + /** + * Tests BatchDocValuesRangeIterator using the fake NumericDocValues and DocValuesSkipper from + * BaseDocValuesSkipperTests, which exercises all block types (YES, NO, MAYBE, YES_IF_PRESENT) + * across both dense and sparse regions. + */ + public void testAllBlockTypesWithFakeSkipper() throws Exception { + long queryMin = 10; + long queryMax = 20; + + // Collect expected matching docs by brute-force scan (up to 2048, the skipper's range) + List expected = new ArrayList<>(); + NumericDocValues refValues = docValues(queryMin, queryMax); + for (int d = refValues.nextDoc(); + d != DocIdSetIterator.NO_MORE_DOCS && d < 2048; + d = refValues.nextDoc()) { + long v = refValues.longValue(); + if (v >= queryMin && v <= queryMax) { + expected.add(d); + } + } + assertTrue("Should have matching docs", expected.size() > 0); + + // Test nextDoc() through all block types + NumericDocValues values = docValues(queryMin, queryMax); + DocValuesSkipper skipper = docValuesSkipper(queryMin, queryMax, true); + BatchDocValuesRangeIterator iter = + new BatchDocValuesRangeIterator(values, skipper, queryMin, queryMax); + List actual = new ArrayList<>(); + for (int d = iter.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = iter.nextDoc()) { + actual.add(d); + } + assertEquals("Must return same docs across all block types", expected, actual); + + // Test advance() across block types: + // - advance into YES block (docs 0-127) + // - advance into NO block, should skip to MAYBE (docs 512+) + // - advance into MAYBE block + // - advance into YES_IF_PRESENT block (docs 1024+, sparse) + // - advance past end + values = docValues(queryMin, queryMax); + skipper = docValuesSkipper(queryMin, queryMax, true); + iter = new BatchDocValuesRangeIterator(values, skipper, queryMin, queryMax); + + // advance into YES block — should land exactly on target + assertEquals(50, iter.advance(50)); + // advance within same YES block + assertEquals(100, iter.advance(100)); + // advance into NO block (128-511) — should skip to first MAYBE match (doc 514) + int doc = iter.advance(200); + assertTrue("advance(200) should skip NO blocks, got " + doc, doc >= 512); + assertTrue("advance(200) result must be in expected", expected.contains(doc)); + // advance into sparse region (YES_IF_PRESENT, docs 1024+) + doc = iter.advance(1024); + assertTrue("advance(1024) should find a doc >= 1024, got " + doc, doc >= 1024); + assertTrue("advance(1024) result must be in expected", expected.contains(doc)); + // advance past all docs + assertEquals(DocIdSetIterator.NO_MORE_DOCS, iter.advance(2048)); + + // Test intoBitSet() across all block types + values = docValues(queryMin, queryMax); + skipper = docValuesSkipper(queryMin, queryMax, true); + iter = new BatchDocValuesRangeIterator(values, skipper, queryMin, queryMax); + int firstDoc = iter.nextDoc(); + + FixedBitSet bitSet = new FixedBitSet(2048); + iter.intoBitSet(2048, bitSet, 0); + + // All expected docs after firstDoc should be set + for (int expectedDoc : expected) { + if (expectedDoc > firstDoc) { + assertTrue("Doc " + expectedDoc + " should be set in bitset", bitSet.get(expectedDoc)); + } + } + // No unexpected docs should be set + for (int d = bitSet.nextSetBit(0); + d != DocIdSetIterator.NO_MORE_DOCS; + d = d + 1 < bitSet.length() ? bitSet.nextSetBit(d + 1) : DocIdSetIterator.NO_MORE_DOCS) { + assertTrue("Doc " + d + " set in bitset but not in expected", expected.contains(d)); + } + } +}