apache · iverase · May 18, 2026 · romseygeek · May 18, 2026 · iverase
diff --git a/lucene/core/src/java/org/apache/lucene/document/RangeBulkScorer.java b/lucene/core/src/java/org/apache/lucene/document/RangeBulkScorer.java
@@ -19,10 +19,13 @@
 import java.io.IOException;
 import java.util.Objects;
 import org.apache.lucene.search.BulkScorer;
+import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.LRUQueryCache;
 import org.apache.lucene.search.LeafCollector;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.RamUsageEstimator;
 
 /**
  * A {@link BulkScorer} that restricts collection to the half-open doc ID interval {@code [minDocID,
@@ -95,4 +98,32 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr
   public long cost() {
     return maxDocID - minDocID;
   }
+
+  @Override
+  public LRUQueryCache.CacheAndCount intoCacheAndCount(int maxDoc) {
+    DocIdSet docIdSet = new RangeDocIdSet(minDocID, maxDocID);
+    return new LRUQueryCache.CacheAndCount(docIdSet, maxDocID - minDocID);
+  }
+
+  private static class RangeDocIdSet extends DocIdSet {
+    private static final long BASE_RAM_BYTES_USED =
+        RamUsageEstimator.shallowSizeOfInstance(RangeDocIdSet.class);
+    private final int minDocID;
+    private final int maxDocID;
+
+    RangeDocIdSet(int minDocID, int maxDocID) {
+      this.minDocID = minDocID;
+      this.maxDocID = maxDocID;
+    }
+
+    @Override
+    public DocIdSetIterator iterator() {
+      return DocIdSetIterator.range(minDocID, maxDocID);
+    }
+
+    @Override
+    public long ramBytesUsed() {
+      return BASE_RAM_BYTES_USED;
+    }
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/BulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/BulkScorer.java
@@ -17,7 +17,10 @@
 package org.apache.lucene.search;
 
 import java.io.IOException;
+import org.apache.lucene.util.BitDocIdSet;
 import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.RoaringDocIdSet;
 
 /**
  * This class is used to score a range of documents at once, and is returned by {@link
@@ -78,4 +81,113 @@ public abstract int score(LeafCollector collector, Bits acceptDocs, int min, int
 
   /** Same as {@link DocIdSetIterator#cost()} for bulk scorers. */
   public abstract long cost();
+
+  /**
+   * Materializes all matching document IDs in {@code [0, maxDoc)} into a {@link DocIdSet} together
+   * with an exact match count, for use by {@link LRUQueryCache} (see {@link
+   * LRUQueryCache#cacheImpl}).
+   *
+   * <p>Implementations score from document 0 up to {@link DocIdSetIterator#NO_MORE_DOCS} with no
+   * {@code acceptDocs} filter.
+   *
+   * <p>The default representation is chosen from {@link #cost()} versus {@code maxDoc}: when {@code
+   * cost * 100 >= maxDoc} (estimated density at least 1%), a dense {@link BitDocIdSet} is used;
+   * otherwise a sparse {@link RoaringDocIdSet} is built.
+   *
+   * @param maxDoc one past the maximum document ID in the index (e.g. {@link
+   *     org.apache.lucene.index.LeafReader#maxDoc()})
+   * @return the cached doc-id set and its cardinality
+   * @throws IOException if scoring fails
+   * @see LRUQueryCache.CacheAndCount
+   */
+  public LRUQueryCache.CacheAndCount intoCacheAndCount(int maxDoc) throws IOException {
+    if (cost() * 100 >= maxDoc) {
+      // FixedBitSet is faster for dense sets and will enable the random-access
+      // optimization in ConjunctionDISI
+      return cacheIntoBitSet(maxDoc);
+    } else {
+      return cacheIntoRoaringDocIdSet(maxDoc);
+    }
+  }
+
+  private LRUQueryCache.CacheAndCount cacheIntoBitSet(int maxDoc) throws IOException {
+    final FixedBitSet bitSet = new FixedBitSet(maxDoc);
+    int[] count = new int[1];
+    score(
+        new LeafCollector() {
+
+          private int[] buffer;
+
+          @Override
+          public void setScorer(Scorable scorer) {}
+
+          @Override
+          public void collect(int doc) {
+            count[0]++;
+            bitSet.set(doc);
+          }
+
+          @Override
+          public void collectRange(int min, int max) {
+            count[0] += max - min;
+            bitSet.set(min, max);
+          }
+
+          @Override
+          public void collect(DocIdStream stream) {
+            if (buffer == null) {
+              buffer = new int[128];
+            }
+            for (int c = stream.intoArray(buffer); c != 0; c = stream.intoArray(buffer)) {
+              for (int i = 0; i < c; ++i) {
+                bitSet.set(buffer[i]);
+              }
+              count[0] += c;
+            }
+          }
+        },
+        null,
+        0,
+        DocIdSetIterator.NO_MORE_DOCS);
+    return new LRUQueryCache.CacheAndCount(new BitDocIdSet(bitSet, count[0]), count[0]);
+  }
+
+  private LRUQueryCache.CacheAndCount cacheIntoRoaringDocIdSet(int maxDoc) throws IOException {
+    RoaringDocIdSet.Builder builder = new RoaringDocIdSet.Builder(maxDoc);
+    score(
+        new LeafCollector() {
+
+          private int[] buffer = null;
+
+          @Override
+          public void setScorer(Scorable scorer) {}
+
+          @Override
+          public void collect(int doc) {
+            builder.add(doc);
+          }
+
+          @Override
+          public void collectRange(int min, int max) {
+            builder.add(min, max);
+          }
+
+          @Override
+          public void collect(DocIdStream stream) {
+            if (buffer == null) {
+              buffer = new int[128];
+            }
+            for (int c = stream.intoArray(buffer); c != 0; c = stream.intoArray(buffer)) {
+              for (int i = 0; i < c; ++i) {
+                builder.add(buffer[i]);
+              }
+            }
+          }
+        },
+        null,
+        0,
+        DocIdSetIterator.NO_MORE_DOCS);
+    RoaringDocIdSet cache = builder.build();
+    return new LRUQueryCache.CacheAndCount(cache, cache.cardinality());
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
@@ -572,13 +572,7 @@ public Collection<Accountable> getChildResources() {
    * and a {@link BitDocIdSet} over a {@link FixedBitSet} otherwise.
    */
   protected CacheAndCount cacheImpl(BulkScorer scorer, int maxDoc) throws IOException {
-    if (scorer.cost() * 100 >= maxDoc) {
-      // FixedBitSet is faster for dense sets and will enable the random-access
-      // optimization in ConjunctionDISI
-      return cacheIntoBitSet(scorer, maxDoc);
-    } else {
-      return cacheIntoRoaringDocIdSet(scorer, maxDoc);
-    }
+    return scorer.intoCacheAndCount(maxDoc);
   }
 
   /**
@@ -601,88 +595,6 @@ protected CacheAndCount tryPopulateCache(
     return cached;
   }
 
-  private static CacheAndCount cacheIntoBitSet(BulkScorer scorer, int maxDoc) throws IOException {
-    final FixedBitSet bitSet = new FixedBitSet(maxDoc);
-    int[] count = new int[1];
-    scorer.score(
-        new LeafCollector() {
-
-          private int[] buffer;
-
-          @Override
-          public void setScorer(Scorable scorer) {}
-
-          @Override
-          public void collect(int doc) {
-            count[0]++;
-            bitSet.set(doc);
-          }
-
-          @Override
-          public void collectRange(int min, int max) {
-            count[0] += max - min;
-            bitSet.set(min, max);
-          }
-
-          @Override
-          public void collect(DocIdStream stream) {
-            if (buffer == null) {
-              buffer = new int[128];
-            }
-            for (int c = stream.intoArray(buffer); c != 0; c = stream.intoArray(buffer)) {
-              for (int i = 0; i < c; ++i) {
-                bitSet.set(buffer[i]);
-              }
-              count[0] += c;
-            }
-          }
-        },
-        null,
-        0,
-        DocIdSetIterator.NO_MORE_DOCS);
-    return new CacheAndCount(new BitDocIdSet(bitSet, count[0]), count[0]);
-  }
-
-  private static CacheAndCount cacheIntoRoaringDocIdSet(BulkScorer scorer, int maxDoc)
-      throws IOException {
-    RoaringDocIdSet.Builder builder = new RoaringDocIdSet.Builder(maxDoc);
-    scorer.score(
-        new LeafCollector() {
-
-          private int[] buffer = null;
-
-          @Override
-          public void setScorer(Scorable scorer) {}
-
-          @Override
-          public void collect(int doc) {
-            builder.add(doc);
-          }
-
-          @Override
-          public void collectRange(int min, int max) {
-            builder.add(min, max);
-          }
-
-          @Override
-          public void collect(DocIdStream stream) {
-            if (buffer == null) {
-              buffer = new int[128];
-            }
-            for (int c = stream.intoArray(buffer); c != 0; c = stream.intoArray(buffer)) {
-              for (int i = 0; i < c; ++i) {
-                builder.add(buffer[i]);
-              }
-            }
-          }
-        },
-        null,
-        0,
-        DocIdSetIterator.NO_MORE_DOCS);
-    RoaringDocIdSet cache = builder.build();
-    return new CacheAndCount(cache, cache.cardinality());
-  }
-
   /**
    * Return the total number of times that a {@link Query} has been looked up in this {@link
    * QueryCache}. Note that this number is incremented once per segment so running a cached query

diff --git a/lucene/core/src/test/org/apache/lucene/document/TestRangeFilteredBulkScorer.java b/lucene/core/src/test/org/apache/lucene/document/TestRangeFilteredBulkScorer.java
@@ -21,6 +21,7 @@
 import org.apache.lucene.search.BulkScorer;
 import org.apache.lucene.search.ConstantScoreScorer;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.LRUQueryCache;
 import org.apache.lucene.search.LeafCollector;
 import org.apache.lucene.search.Scorable;
 import org.apache.lucene.search.ScoreMode;
@@ -141,4 +142,23 @@ private static void assertCollectedRanges(List<int[]> expected, List<int[]> actu
       assertArrayEquals(expected.get(i), actual.get(i));
     }
   }
+
+  public void testIntoCacheAndCount() throws Exception {
+    int rangeMin = 20;
+    int rangeMaxExclusive = 80;
+    BulkScorer bs = newBulkScorer(rangeMin, rangeMaxExclusive);
+    int leafMaxDoc = 1000;
+
+    LRUQueryCache.CacheAndCount cached = bs.intoCacheAndCount(leafMaxDoc);
+    assertEquals(rangeMaxExclusive - rangeMin, cached.count());
+
+    DocIdSetIterator expected = DocIdSetIterator.range(rangeMin, rangeMaxExclusive);
+    DocIdSetIterator actual = cached.iterator();
+    for (int doc = expected.nextDoc();
+        doc != DocIdSetIterator.NO_MORE_DOCS;
+        doc = expected.nextDoc()) {
+      assertEquals(doc, actual.nextDoc());
+    }
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, actual.nextDoc());
+  }
 }