diff --git a/lucene/core/src/java/org/apache/lucene/document/RangeBulkScorer.java b/lucene/core/src/java/org/apache/lucene/document/RangeBulkScorer.java index 1a62cdffd575..30780fd3953f 100644 --- a/lucene/core/src/java/org/apache/lucene/document/RangeBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/document/RangeBulkScorer.java @@ -19,10 +19,13 @@ import java.io.IOException; import java.util.Objects; import org.apache.lucene.search.BulkScorer; +import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.LRUQueryCache; import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.RamUsageEstimator; /** * A {@link BulkScorer} that restricts collection to the half-open doc ID interval {@code [minDocID, @@ -95,4 +98,32 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr public long cost() { return maxDocID - minDocID; } + + @Override + public LRUQueryCache.CacheAndCount intoCacheAndCount(int maxDoc) { + DocIdSet docIdSet = new RangeDocIdSet(minDocID, maxDocID); + return new LRUQueryCache.CacheAndCount(docIdSet, maxDocID - minDocID); + } + + private static class RangeDocIdSet extends DocIdSet { + private static final long BASE_RAM_BYTES_USED = + RamUsageEstimator.shallowSizeOfInstance(RangeDocIdSet.class); + private final int minDocID; + private final int maxDocID; + + RangeDocIdSet(int minDocID, int maxDocID) { + this.minDocID = minDocID; + this.maxDocID = maxDocID; + } + + @Override + public DocIdSetIterator iterator() { + return DocIdSetIterator.range(minDocID, maxDocID); + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES_USED; + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/BulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/BulkScorer.java index 29234921bc42..dea5b0484d47 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BulkScorer.java @@ -17,7 +17,10 @@ package org.apache.lucene.search; import java.io.IOException; +import org.apache.lucene.util.BitDocIdSet; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; /** * This class is used to score a range of documents at once, and is returned by {@link @@ -78,4 +81,113 @@ public abstract int score(LeafCollector collector, Bits acceptDocs, int min, int /** Same as {@link DocIdSetIterator#cost()} for bulk scorers. */ public abstract long cost(); + + /** + * Materializes all matching document IDs in {@code [0, maxDoc)} into a {@link DocIdSet} together + * with an exact match count, for use by {@link LRUQueryCache} (see {@link + * LRUQueryCache#cacheImpl}). + * + *
Implementations score from document 0 up to {@link DocIdSetIterator#NO_MORE_DOCS} with no + * {@code acceptDocs} filter. + * + *
The default representation is chosen from {@link #cost()} versus {@code maxDoc}: when {@code
+ * cost * 100 >= maxDoc} (estimated density at least 1%), a dense {@link BitDocIdSet} is used;
+ * otherwise a sparse {@link RoaringDocIdSet} is built.
+ *
+ * @param maxDoc one past the maximum document ID in the index (e.g. {@link
+ * org.apache.lucene.index.LeafReader#maxDoc()})
+ * @return the cached doc-id set and its cardinality
+ * @throws IOException if scoring fails
+ * @see LRUQueryCache.CacheAndCount
+ */
+ public LRUQueryCache.CacheAndCount intoCacheAndCount(int maxDoc) throws IOException {
+ if (cost() * 100 >= maxDoc) {
+ // FixedBitSet is faster for dense sets and will enable the random-access
+ // optimization in ConjunctionDISI
+ return cacheIntoBitSet(maxDoc);
+ } else {
+ return cacheIntoRoaringDocIdSet(maxDoc);
+ }
+ }
+
+ private LRUQueryCache.CacheAndCount cacheIntoBitSet(int maxDoc) throws IOException {
+ final FixedBitSet bitSet = new FixedBitSet(maxDoc);
+ int[] count = new int[1];
+ score(
+ new LeafCollector() {
+
+ private int[] buffer;
+
+ @Override
+ public void setScorer(Scorable scorer) {}
+
+ @Override
+ public void collect(int doc) {
+ count[0]++;
+ bitSet.set(doc);
+ }
+
+ @Override
+ public void collectRange(int min, int max) {
+ count[0] += max - min;
+ bitSet.set(min, max);
+ }
+
+ @Override
+ public void collect(DocIdStream stream) {
+ if (buffer == null) {
+ buffer = new int[128];
+ }
+ for (int c = stream.intoArray(buffer); c != 0; c = stream.intoArray(buffer)) {
+ for (int i = 0; i < c; ++i) {
+ bitSet.set(buffer[i]);
+ }
+ count[0] += c;
+ }
+ }
+ },
+ null,
+ 0,
+ DocIdSetIterator.NO_MORE_DOCS);
+ return new LRUQueryCache.CacheAndCount(new BitDocIdSet(bitSet, count[0]), count[0]);
+ }
+
+ private LRUQueryCache.CacheAndCount cacheIntoRoaringDocIdSet(int maxDoc) throws IOException {
+ RoaringDocIdSet.Builder builder = new RoaringDocIdSet.Builder(maxDoc);
+ score(
+ new LeafCollector() {
+
+ private int[] buffer = null;
+
+ @Override
+ public void setScorer(Scorable scorer) {}
+
+ @Override
+ public void collect(int doc) {
+ builder.add(doc);
+ }
+
+ @Override
+ public void collectRange(int min, int max) {
+ builder.add(min, max);
+ }
+
+ @Override
+ public void collect(DocIdStream stream) {
+ if (buffer == null) {
+ buffer = new int[128];
+ }
+ for (int c = stream.intoArray(buffer); c != 0; c = stream.intoArray(buffer)) {
+ for (int i = 0; i < c; ++i) {
+ builder.add(buffer[i]);
+ }
+ }
+ }
+ },
+ null,
+ 0,
+ DocIdSetIterator.NO_MORE_DOCS);
+ RoaringDocIdSet cache = builder.build();
+ return new LRUQueryCache.CacheAndCount(cache, cache.cardinality());
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
index bc87550837e4..832155392588 100644
--- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
@@ -572,13 +572,7 @@ public Collection