apache · romseygeek · May 20, 2026 · May 12, 2026 · May 12, 2026 · May 12, 2026
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -364,6 +364,9 @@ Optimizations
 
 * GITHUB#16001: IndexSearcher.count() was calling query.rewrite twice, a regression since v9.10 (David Smiley)
 
+* GITHUB#16050: Add SIMD-accelerated bulk range evaluation for dense numeric doc values via
+  BatchDocValuesRangeIterator and DocValuesRangeSupport. (Sagar Upadhyaya)
+
 Bug Fixes
 ---------------------
 * GITHUB#15754: Fix HTMLStripCharFilter to prevent tags from incorrectly consuming subsequent

diff --git a/...hmark-jmh/src/java/org/apache/lucene/benchmark/jmh/MultiFieldDocValuesRangeBenchmark.java b/...hmark-jmh/src/java/org/apache/lucene/benchmark/jmh/MultiFieldDocValuesRangeBenchmark.java
@@ -87,7 +87,7 @@ public static class Params {
     @Param({"1000000", "10000000"})
     public int docCount;
 
-    @Param({"3", "5"})
+    @Param({"1", "3", "5"})
     public int fieldCount;
 
     @Param({CLUSTERED, MIXED, RANDOM, SORTED})
@@ -129,6 +129,17 @@ public void setup(Params params) throws Exception {
           SortedNumericDocValuesField.newSlowRangeQuery("field" + f, range[0], range[1]),
           Occur.FILTER);
     }
+    // For fieldCount=1 on non-sorted patterns, add a MatchAllDocsQuery so
+    // DenseConjunctionBulkScorer is used and intoBitSet() is called on the range iterator
+    // (enabling the SIMD path). Without this, a single-clause BooleanQuery rewrites to the
+    // query itself and goes through DefaultBulkScorer which doesn't call intoBitSet().
+    // For the sorted pattern, field0 is the index sort key so
+    // getDocIdSetIteratorOrNullForPrimarySort
+    // fires and returns DocIdSetIterator.range() — adding MatchAllDocsQuery here would force it
+    // through DenseConjunctionBulkScorer and bypass that fast path, causing a regression.
+    if (params.fieldCount == 1 && !params.dataPattern.equals(SORTED)) {
+      bqBuilder.add(new org.apache.lucene.search.MatchAllDocsQuery(), Occur.FILTER);
+    }
     query = bqBuilder.build();
   }
 

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
@@ -391,6 +391,25 @@ public void close() throws IOException {
   private record DocValuesSkipperEntry(
       long offset, long length, long minValue, long maxValue, int docCount, int maxDocId) {}
 
+  // Cached VectorizationProvider instance to avoid repeated stack walks in ensureCaller()
+  private static final org.apache.lucene.internal.vectorization.DocValuesRangeSupport
+      DOC_VALUES_RANGE_SUPPORT =
+          org.apache.lucene.internal.vectorization.VectorizationProvider.getInstance()
+              .getDocValuesRangeSupport();
+
+  // Static helper so anonymous inner classes can call DocValuesRangeSupport from the outer class
+  static void rangeIntoBitSetVectorized(
+      org.apache.lucene.util.LongValues values,
+      int fromDoc,
+      int toDoc,
+      long minValue,
+      long maxValue,
+      org.apache.lucene.util.FixedBitSet bitSet,
+      int offset) {
+    DOC_VALUES_RANGE_SUPPORT.rangeIntoBitSet(
+        values, fromDoc, toDoc, minValue, maxValue, bitSet, offset);
+  }
+
   private static class NumericEntry {
     long[] table;
     int blockShift;
@@ -610,6 +629,19 @@ public long longValue() throws IOException {
               public long longValue() throws IOException {
                 return values.get(doc);
               }
+
+              @Override
+              public void rangeIntoBitSet(
+                  int fromDoc,
+                  int toDoc,
+                  long minValue,
+                  long maxValue,
+                  org.apache.lucene.util.FixedBitSet bitSet,
+                  int offset) {
+                // Use SIMD via VectorizationProvider when available, scalar fallback otherwise
+                rangeIntoBitSetVectorized(
+                    values, fromDoc, toDoc, minValue, maxValue, bitSet, offset);
+              }
             };
           } else {
             final long mul = entry.gcd;
@@ -619,6 +651,23 @@ public long longValue() throws IOException {
               public long longValue() throws IOException {
                 return mul * values.get(doc) + delta;
               }
+
+              @Override
+              public void rangeIntoBitSet(
+                  int fromDoc,
+                  int toDoc,
+                  long minValue,
+                  long maxValue,
+                  org.apache.lucene.util.FixedBitSet bitSet,
+                  int offset) {
+                // Tight loop — JIT can auto-vectorize this (gcd/delta encoding)
+                for (int d = fromDoc; d < toDoc; d++) {
+                  long v = mul * values.get(d) + delta;
+                  if (v >= minValue && v <= maxValue) {
+                    bitSet.set(d - offset);
+                  }
+                }
+              }
             };
           }
         }

diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java
@@ -25,6 +25,7 @@
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.search.BatchDocValuesRangeIterator;
 import org.apache.lucene.search.ConstantScoreScorerSupplier;
 import org.apache.lucene.search.ConstantScoreWeight;
 import org.apache.lucene.search.DocIdSetIterator;
@@ -145,6 +146,14 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
               return ConstantScoreScorerSupplier.fromIterator(
                   psIterator, score(), scoreMode, maxDoc);
             }
+            // Use batch iterator: plain DocIdSetIterator (not TwoPhaseIterator) so that
+            // DenseConjunctionBulkScorer takes the bitset path and calls intoBitSet(),
+            // which dispatches to NumericDocValues.rangeIntoBitSet() (SIMD when available).
+            return ConstantScoreScorerSupplier.fromIterator(
+                new BatchDocValuesRangeIterator(singleton, skipper, lowerValue, upperValue),
+                score(),
+                scoreMode,
+                maxDoc);
           }
           return ConstantScoreScorerSupplier.fromIterator(
               TwoPhaseIterator.asDocIdSetIterator(

diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java
@@ -91,4 +91,38 @@ public void longValues(int size, int[] docs, long[] values, long defaultValue)
       values[i] = value;
     }
   }
+
+  /**
+   * Fills a {@link org.apache.lucene.util.FixedBitSet} with the doc IDs in {@code [fromDoc, toDoc)}
+   * whose values are in {@code [minValue, maxValue]}. This is a bulk operation that avoids per-doc
+   * virtual dispatch overhead.
+   *
+   * <p>The default implementation falls back to per-doc evaluation via {@link #advanceExact} and
+   * {@link #longValue}. Subclasses with random-access storage (e.g., dense fixed-bitsPerValue
+   * fields) can override this for significantly better performance.
+   *
+   * @param fromDoc first doc ID to evaluate (inclusive)
+   * @param toDoc last doc ID to evaluate (exclusive)
+   * @param minValue lower bound of the range (inclusive)
+   * @param maxValue upper bound of the range (inclusive)
+   * @param bitSet the bitset to fill
+   * @param offset subtracted from each doc ID before setting the bit
+   */
+  public void rangeIntoBitSet(
+      int fromDoc,
+      int toDoc,
+      long minValue,
+      long maxValue,
+      org.apache.lucene.util.FixedBitSet bitSet,
+      int offset)
+      throws IOException {
+    for (int d = fromDoc; d < toDoc; d++) {
+      if (advanceExact(d)) {
+        long v = longValue();
+        if (v >= minValue && v <= maxValue) {
+          bitSet.set(d - offset);
+        }
+      }
+    }
+  }
 }
diff --git a/.../core/src/java/org/apache/lucene/internal/vectorization/DefaultDocValuesRangeSupport.java b/.../core/src/java/org/apache/lucene/internal/vectorization/DefaultDocValuesRangeSupport.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.internal.vectorization;
+
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.LongValues;
+
+/** Scalar (non-SIMD) implementation of {@link DocValuesRangeSupport}. */
+final class DefaultDocValuesRangeSupport implements DocValuesRangeSupport {
+
+  static final DefaultDocValuesRangeSupport INSTANCE = new DefaultDocValuesRangeSupport();
+
+  private DefaultDocValuesRangeSupport() {}
+
+  @Override
+  public void rangeIntoBitSet(
+      LongValues values,
+      int fromDoc,
+      int toDoc,
+      long minValue,
+      long maxValue,
+      FixedBitSet bitSet,
+      int offset) {
+    // Scalar tight loop — JIT may auto-vectorize this on modern JVMs.
+    for (int d = fromDoc; d < toDoc; d++) {
+      long v = values.get(d);
+      if (v >= minValue && v <= maxValue) {
+        bitSet.set(d - offset);
+      }
+    }
+  }
+}
diff --git a/.../core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorizationProvider.java b/.../core/src/java/org/apache/lucene/internal/vectorization/DefaultVectorizationProvider.java
@@ -50,4 +50,9 @@ public FlatVectorsScorer getLucene99ScalarQuantizedVectorsScorer() {
   public PostingDecodingUtil newPostingDecodingUtil(IndexInput input) {
     return new PostingDecodingUtil(input);
   }
+
+  @Override
+  public DocValuesRangeSupport getDocValuesRangeSupport() {
+    return DefaultDocValuesRangeSupport.INSTANCE;
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/internal/vectorization/DocValuesRangeSupport.java b/lucene/core/src/java/org/apache/lucene/internal/vectorization/DocValuesRangeSupport.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.internal.vectorization;
+
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.LongValues;
+
+/**
+ * Interface for SIMD-accelerated doc values range operations.
+ *
+ * <p>Implementations fill a {@link FixedBitSet} with the doc IDs in a range whose values satisfy a
+ * numeric range predicate. The default scalar implementation is used when the Panama Vector API is
+ * unavailable; a SIMD-accelerated implementation is used otherwise.
+ *
+ * @lucene.internal
+ */
+public interface DocValuesRangeSupport {
+
+  /**
+   * Fills {@code bitSet} with the doc IDs in {@code [fromDoc, toDoc)} whose values (read via {@code
+   * values}) are in {@code [minValue, maxValue]}.
+   *
+   * @param values random-access reader for the doc values
+   * @param fromDoc first doc ID to evaluate (inclusive)
+   * @param toDoc last doc ID to evaluate (exclusive)
+   * @param minValue lower bound of the range (inclusive)
+   * @param maxValue upper bound of the range (inclusive)
+   * @param bitSet the bitset to fill
+   * @param offset subtracted from each doc ID before setting the bit
+   */
+  void rangeIntoBitSet(
+      LongValues values,
+      int fromDoc,
+      int toDoc,
+      long minValue,
+      long maxValue,
+      FixedBitSet bitSet,
+      int offset);
+}
diff --git a/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorizationProvider.java b/lucene/core/src/java/org/apache/lucene/internal/vectorization/VectorizationProvider.java
@@ -115,6 +115,13 @@ public static VectorizationProvider getInstance() {
   /** Create a new {@link PostingDecodingUtil} for the given {@link IndexInput}. */
   public abstract PostingDecodingUtil newPostingDecodingUtil(IndexInput input) throws IOException;
 
+  /**
+   * Returns a {@link DocValuesRangeSupport} instance for bulk numeric range evaluation. The
+   * returned instance uses SIMD when available (Panama Vector API), falling back to a scalar loop
+   * otherwise.
+   */
+  public abstract DocValuesRangeSupport getDocValuesRangeSupport();
+
   // *** Lookup mechanism: ***
 
   private static final Logger LOG = Logger.getLogger(VectorizationProvider.class.getName());
@@ -213,6 +220,7 @@ private static Optional<Module> lookupVectorModule() {
           "org.apache.lucene.util.VectorUtil",
           "org.apache.lucene.codecs.lucene104.Lucene104PostingsReader",
           "org.apache.lucene.codecs.lucene104.PostingIndexInput",
+          "org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducer",
           "org.apache.lucene.tests.util.TestSysoutsLimits");
 
   private static final StackWalker STACKWALKER =