Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,9 @@ Optimizations

* GITHUB#16001: IndexSearcher.count() was calling query.rewrite twice, a regression since v9.10 (David Smiley)

* GITHUB#16050: Add SIMD-accelerated bulk range evaluation for dense numeric doc values via
BatchDocValuesRangeIterator and DocValuesRangeSupport. (Sagar Upadhyaya)

* GITHUB16061#: Improve cost estimation in SortedSetDocValuesRangeQuery when using DocValuesSkipper and the field
is dense and is the primary sort of the index and reduce the number of doc values visited. (Ignacio Vera)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public static class Params {
@Param({"1000000", "10000000"})
public int docCount;

@Param({"3", "5"})
@Param({"1", "3", "5"})
public int fieldCount;

@Param({CLUSTERED, MIXED, RANDOM, SORTED})
Expand Down Expand Up @@ -129,6 +129,17 @@ public void setup(Params params) throws Exception {
SortedNumericDocValuesField.newSlowRangeQuery("field" + f, range[0], range[1]),
Occur.FILTER);
}
// For fieldCount=1 on non-sorted patterns, add a MatchAllDocsQuery so
// DenseConjunctionBulkScorer is used and intoBitSet() is called on the range iterator
// (enabling the SIMD path). Without this, a single-clause BooleanQuery rewrites to the
// query itself and goes through DefaultBulkScorer which doesn't call intoBitSet().
// For the sorted pattern, field0 is the index sort key so
// getDocIdSetIteratorOrNullForPrimarySort
// fires and returns DocIdSetIterator.range() — adding MatchAllDocsQuery here would force it
// through DenseConjunctionBulkScorer and bypass that fast path, causing a regression.
if (params.fieldCount == 1 && !params.dataPattern.equals(SORTED)) {
bqBuilder.add(new org.apache.lucene.search.MatchAllDocsQuery(), Occur.FILTER);
}
query = bqBuilder.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,24 @@ public void close() throws IOException {
private record DocValuesSkipperEntry(
long offset, long length, long minValue, long maxValue, int docCount, int maxDocId) {}

// Cached VectorizationProvider instance to avoid repeated stack walks in ensureCaller()
private static final org.apache.lucene.internal.vectorization.DocValuesRangeSupport
DOC_VALUES_RANGE_SUPPORT =
org.apache.lucene.internal.vectorization.VectorizationProvider.getInstance()
.getDocValuesRangeSupport();

static void rangeIntoBitSet(
org.apache.lucene.util.LongValues values,
int fromDoc,
int toDoc,
long minValue,
long maxValue,
org.apache.lucene.util.FixedBitSet bitSet,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add an import for FixedBitSet?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

int offset) {
DOC_VALUES_RANGE_SUPPORT.rangeIntoBitSet(
values, fromDoc, toDoc, minValue, maxValue, bitSet, offset);
}

private static class NumericEntry {
long[] table;
int blockShift;
Expand Down Expand Up @@ -610,6 +628,19 @@ public long longValue() throws IOException {
public long longValue() throws IOException {
return values.get(doc);
}

@Override
public void rangeIntoBitSet(
int fromDoc,
int toDoc,
long minValue,
long maxValue,
org.apache.lucene.util.FixedBitSet bitSet,
int offset) {
// Bulk range evaluation via DocValuesRangeSupport
Lucene90DocValuesProducer.rangeIntoBitSet(
values, fromDoc, toDoc, minValue, maxValue, bitSet, offset);
}
};
} else {
final long mul = entry.gcd;
Expand All @@ -619,6 +650,23 @@ public long longValue() throws IOException {
public long longValue() throws IOException {
return mul * values.get(doc) + delta;
}

@Override
public void rangeIntoBitSet(
int fromDoc,
int toDoc,
long minValue,
long maxValue,
org.apache.lucene.util.FixedBitSet bitSet,
int offset) {
// Per-doc evaluation for gcd/delta encoded fields
for (int d = fromDoc; d < toDoc; d++) {
long v = mul * values.get(d) + delta;
if (v >= minValue && v <= maxValue) {
bitSet.set(d - offset);
}
}
}
};
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.BatchDocValuesRangeIterator;
import org.apache.lucene.search.ConstantScoreScorerSupplier;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
Expand Down Expand Up @@ -145,6 +146,12 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
return ConstantScoreScorerSupplier.fromIterator(
psIterator, score(), scoreMode, maxDoc);
}
// Use batch iterator for bulk block evaluation via intoBitSet()
return ConstantScoreScorerSupplier.fromIterator(
new BatchDocValuesRangeIterator(singleton, skipper, lowerValue, upperValue),
score(),
scoreMode,
maxDoc);
}
return ConstantScoreScorerSupplier.fromIterator(
TwoPhaseIterator.asDocIdSetIterator(
Expand Down
30 changes: 30 additions & 0 deletions lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.util.FixedBitSet;

/** A per-document numeric value. */
public abstract class NumericDocValues extends DocValuesIterator {
Expand Down Expand Up @@ -91,4 +92,33 @@ public void longValues(int size, int[] docs, long[] values, long defaultValue)
values[i] = value;
}
}

/**
* Fills a {@link org.apache.lucene.util.FixedBitSet} with the doc IDs in {@code [fromDoc, toDoc)}
* whose values are in {@code [minValue, maxValue]}. This is a bulk operation that avoids per-doc
* virtual dispatch overhead.
*
* <p>The default implementation falls back to per-doc evaluation via {@link #advanceExact} and
* {@link #longValue}. Subclasses with random-access storage (e.g., dense fixed-bitsPerValue
* fields) can override this for significantly better performance.
*
* @param fromDoc first doc ID to evaluate (inclusive)
* @param toDoc last doc ID to evaluate (exclusive)
* @param minValue lower bound of the range (inclusive)
* @param maxValue upper bound of the range (inclusive)
* @param bitSet the bitset to fill
* @param offset subtracted from each doc ID before setting the bit
*/
public void rangeIntoBitSet(
int fromDoc, int toDoc, long minValue, long maxValue, FixedBitSet bitSet, int offset)
throws IOException {
for (int d = fromDoc; d < toDoc; d++) {
if (advanceExact(d)) {
long v = longValue();
if (v >= minValue && v <= maxValue) {
bitSet.set(d - offset);
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.internal.vectorization;

import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongValues;

/** Scalar (non-SIMD) implementation of {@link DocValuesRangeSupport}. */
final class DefaultDocValuesRangeSupport implements DocValuesRangeSupport {

static final DefaultDocValuesRangeSupport INSTANCE = new DefaultDocValuesRangeSupport();

private DefaultDocValuesRangeSupport() {}

@Override
public void rangeIntoBitSet(
LongValues values,
int fromDoc,
int toDoc,
long minValue,
long maxValue,
FixedBitSet bitSet,
int offset) {
// Scalar fallback implementation
for (int d = fromDoc; d < toDoc; d++) {
long v = values.get(d);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this tells me we eventually might actually want a int count = values.get(int[] docIds, long[] dest);

That is a larger change, but I suspect there is perf to be gained lower level just decoding the long values.

Copy link
Copy Markdown
Contributor Author

@sgup432 sgup432 May 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@benwtrent
Hmm doing in a batched manner like you mentioned would certainly help. Seems like another topic worthy of a separate issue or discussion.

if (v >= minValue && v <= maxValue) {
bitSet.set(d - offset);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,9 @@ public FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() {
public PostingDecodingUtil newPostingDecodingUtil(IndexInput input) {
return new PostingDecodingUtil(input);
}

@Override
public DocValuesRangeSupport getDocValuesRangeSupport() {
return DefaultDocValuesRangeSupport.INSTANCE;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.internal.vectorization;

import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongValues;

/**
* Interface for SIMD-accelerated doc values range operations.
*
* <p>Implementations fill a {@link FixedBitSet} with the doc IDs in a range whose values satisfy a
* numeric range predicate. The default scalar implementation is used when the Panama Vector API is
* unavailable; a SIMD-accelerated implementation is used otherwise.
*
* @lucene.internal
*/
public interface DocValuesRangeSupport {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this support path, etc. all matches our existing patterns. Seems OK to me.


/**
* Fills {@code bitSet} with the doc IDs in {@code [fromDoc, toDoc)} whose values (read via {@code
* values}) are in {@code [minValue, maxValue]}.
*
* @param values random-access reader for the doc values
* @param fromDoc first doc ID to evaluate (inclusive)
* @param toDoc last doc ID to evaluate (exclusive)
* @param minValue lower bound of the range (inclusive)
* @param maxValue upper bound of the range (inclusive)
* @param bitSet the bitset to fill
* @param offset subtracted from each doc ID before setting the bit
*/
void rangeIntoBitSet(
LongValues values,
int fromDoc,
int toDoc,
long minValue,
long maxValue,
FixedBitSet bitSet,
int offset);
}
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,13 @@ public static VectorizationProvider getInstance() {
/** Create a new {@link PostingDecodingUtil} for the given {@link IndexInput}. */
public abstract PostingDecodingUtil newPostingDecodingUtil(IndexInput input) throws IOException;

/**
* Returns a {@link DocValuesRangeSupport} instance for bulk numeric range evaluation. The
* returned instance uses SIMD when available (Panama Vector API), falling back to a scalar loop
* otherwise.
*/
public abstract DocValuesRangeSupport getDocValuesRangeSupport();

// *** Lookup mechanism: ***

private static final Logger LOG = Logger.getLogger(VectorizationProvider.class.getName());
Expand Down Expand Up @@ -213,6 +220,7 @@ private static Optional<Module> lookupVectorModule() {
"org.apache.lucene.util.VectorUtil",
"org.apache.lucene.codecs.lucene104.Lucene104PostingsReader",
"org.apache.lucene.codecs.lucene104.PostingIndexInput",
"org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducer",
"org.apache.lucene.tests.util.TestSysoutsLimits");

private static final StackWalker STACKWALKER =
Expand Down
Loading
Loading