diff --git a/lucene/core/src/java/org/apache/lucene/document/RangeBulkScorer.java b/lucene/core/src/java/org/apache/lucene/document/RangeBulkScorer.java index 1a62cdffd575..30780fd3953f 100644 --- a/lucene/core/src/java/org/apache/lucene/document/RangeBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/document/RangeBulkScorer.java @@ -19,10 +19,13 @@ import java.io.IOException; import java.util.Objects; import org.apache.lucene.search.BulkScorer; +import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.LRUQueryCache; import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.RamUsageEstimator; /** * A {@link BulkScorer} that restricts collection to the half-open doc ID interval {@code [minDocID, @@ -95,4 +98,32 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr public long cost() { return maxDocID - minDocID; } + + @Override + public LRUQueryCache.CacheAndCount intoCacheAndCount(int maxDoc) { + DocIdSet docIdSet = new RangeDocIdSet(minDocID, maxDocID); + return new LRUQueryCache.CacheAndCount(docIdSet, maxDocID - minDocID); + } + + private static class RangeDocIdSet extends DocIdSet { + private static final long BASE_RAM_BYTES_USED = + RamUsageEstimator.shallowSizeOfInstance(RangeDocIdSet.class); + private final int minDocID; + private final int maxDocID; + + RangeDocIdSet(int minDocID, int maxDocID) { + this.minDocID = minDocID; + this.maxDocID = maxDocID; + } + + @Override + public DocIdSetIterator iterator() { + return DocIdSetIterator.range(minDocID, maxDocID); + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES_USED; + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/BulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/BulkScorer.java index 29234921bc42..dea5b0484d47 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BulkScorer.java @@ -17,7 +17,10 @@ package org.apache.lucene.search; import java.io.IOException; +import org.apache.lucene.util.BitDocIdSet; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; /** * This class is used to score a range of documents at once, and is returned by {@link @@ -78,4 +81,113 @@ public abstract int score(LeafCollector collector, Bits acceptDocs, int min, int /** Same as {@link DocIdSetIterator#cost()} for bulk scorers. */ public abstract long cost(); + + /** + * Materializes all matching document IDs in {@code [0, maxDoc)} into a {@link DocIdSet} together + * with an exact match count, for use by {@link LRUQueryCache} (see {@link + * LRUQueryCache#cacheImpl}). + * + *

Implementations score from document 0 up to {@link DocIdSetIterator#NO_MORE_DOCS} with no + * {@code acceptDocs} filter. + * + *

The default representation is chosen from {@link #cost()} versus {@code maxDoc}: when {@code + * cost * 100 >= maxDoc} (estimated density at least 1%), a dense {@link BitDocIdSet} is used; + * otherwise a sparse {@link RoaringDocIdSet} is built. + * + * @param maxDoc one past the maximum document ID in the index (e.g. {@link + * org.apache.lucene.index.LeafReader#maxDoc()}) + * @return the cached doc-id set and its cardinality + * @throws IOException if scoring fails + * @see LRUQueryCache.CacheAndCount + */ + public LRUQueryCache.CacheAndCount intoCacheAndCount(int maxDoc) throws IOException { + if (cost() * 100 >= maxDoc) { + // FixedBitSet is faster for dense sets and will enable the random-access + // optimization in ConjunctionDISI + return cacheIntoBitSet(maxDoc); + } else { + return cacheIntoRoaringDocIdSet(maxDoc); + } + } + + private LRUQueryCache.CacheAndCount cacheIntoBitSet(int maxDoc) throws IOException { + final FixedBitSet bitSet = new FixedBitSet(maxDoc); + int[] count = new int[1]; + score( + new LeafCollector() { + + private int[] buffer; + + @Override + public void setScorer(Scorable scorer) {} + + @Override + public void collect(int doc) { + count[0]++; + bitSet.set(doc); + } + + @Override + public void collectRange(int min, int max) { + count[0] += max - min; + bitSet.set(min, max); + } + + @Override + public void collect(DocIdStream stream) { + if (buffer == null) { + buffer = new int[128]; + } + for (int c = stream.intoArray(buffer); c != 0; c = stream.intoArray(buffer)) { + for (int i = 0; i < c; ++i) { + bitSet.set(buffer[i]); + } + count[0] += c; + } + } + }, + null, + 0, + DocIdSetIterator.NO_MORE_DOCS); + return new LRUQueryCache.CacheAndCount(new BitDocIdSet(bitSet, count[0]), count[0]); + } + + private LRUQueryCache.CacheAndCount cacheIntoRoaringDocIdSet(int maxDoc) throws IOException { + RoaringDocIdSet.Builder builder = new RoaringDocIdSet.Builder(maxDoc); + score( + new LeafCollector() { + + private int[] buffer = null; + + @Override + public void setScorer(Scorable scorer) {} + + @Override + public void collect(int doc) { + builder.add(doc); + } + + @Override + public void collectRange(int min, int max) { + builder.add(min, max); + } + + @Override + public void collect(DocIdStream stream) { + if (buffer == null) { + buffer = new int[128]; + } + for (int c = stream.intoArray(buffer); c != 0; c = stream.intoArray(buffer)) { + for (int i = 0; i < c; ++i) { + builder.add(buffer[i]); + } + } + } + }, + null, + 0, + DocIdSetIterator.NO_MORE_DOCS); + RoaringDocIdSet cache = builder.build(); + return new LRUQueryCache.CacheAndCount(cache, cache.cardinality()); + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java index bc87550837e4..832155392588 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java @@ -572,13 +572,7 @@ public Collection getChildResources() { * and a {@link BitDocIdSet} over a {@link FixedBitSet} otherwise. */ protected CacheAndCount cacheImpl(BulkScorer scorer, int maxDoc) throws IOException { - if (scorer.cost() * 100 >= maxDoc) { - // FixedBitSet is faster for dense sets and will enable the random-access - // optimization in ConjunctionDISI - return cacheIntoBitSet(scorer, maxDoc); - } else { - return cacheIntoRoaringDocIdSet(scorer, maxDoc); - } + return scorer.intoCacheAndCount(maxDoc); } /** @@ -601,88 +595,6 @@ protected CacheAndCount tryPopulateCache( return cached; } - private static CacheAndCount cacheIntoBitSet(BulkScorer scorer, int maxDoc) throws IOException { - final FixedBitSet bitSet = new FixedBitSet(maxDoc); - int[] count = new int[1]; - scorer.score( - new LeafCollector() { - - private int[] buffer; - - @Override - public void setScorer(Scorable scorer) {} - - @Override - public void collect(int doc) { - count[0]++; - bitSet.set(doc); - } - - @Override - public void collectRange(int min, int max) { - count[0] += max - min; - bitSet.set(min, max); - } - - @Override - public void collect(DocIdStream stream) { - if (buffer == null) { - buffer = new int[128]; - } - for (int c = stream.intoArray(buffer); c != 0; c = stream.intoArray(buffer)) { - for (int i = 0; i < c; ++i) { - bitSet.set(buffer[i]); - } - count[0] += c; - } - } - }, - null, - 0, - DocIdSetIterator.NO_MORE_DOCS); - return new CacheAndCount(new BitDocIdSet(bitSet, count[0]), count[0]); - } - - private static CacheAndCount cacheIntoRoaringDocIdSet(BulkScorer scorer, int maxDoc) - throws IOException { - RoaringDocIdSet.Builder builder = new RoaringDocIdSet.Builder(maxDoc); - scorer.score( - new LeafCollector() { - - private int[] buffer = null; - - @Override - public void setScorer(Scorable scorer) {} - - @Override - public void collect(int doc) { - builder.add(doc); - } - - @Override - public void collectRange(int min, int max) { - builder.add(min, max); - } - - @Override - public void collect(DocIdStream stream) { - if (buffer == null) { - buffer = new int[128]; - } - for (int c = stream.intoArray(buffer); c != 0; c = stream.intoArray(buffer)) { - for (int i = 0; i < c; ++i) { - builder.add(buffer[i]); - } - } - } - }, - null, - 0, - DocIdSetIterator.NO_MORE_DOCS); - RoaringDocIdSet cache = builder.build(); - return new CacheAndCount(cache, cache.cardinality()); - } - /** * Return the total number of times that a {@link Query} has been looked up in this {@link * QueryCache}. Note that this number is incremented once per segment so running a cached query diff --git a/lucene/core/src/test/org/apache/lucene/document/TestRangeFilteredBulkScorer.java b/lucene/core/src/test/org/apache/lucene/document/TestRangeFilteredBulkScorer.java index 3e915365f025..50c332157ce9 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestRangeFilteredBulkScorer.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestRangeFilteredBulkScorer.java @@ -21,6 +21,7 @@ import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.LRUQueryCache; import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; @@ -141,4 +142,23 @@ private static void assertCollectedRanges(List expected, List actu assertArrayEquals(expected.get(i), actual.get(i)); } } + + public void testIntoCacheAndCount() throws Exception { + int rangeMin = 20; + int rangeMaxExclusive = 80; + BulkScorer bs = newBulkScorer(rangeMin, rangeMaxExclusive); + int leafMaxDoc = 1000; + + LRUQueryCache.CacheAndCount cached = bs.intoCacheAndCount(leafMaxDoc); + assertEquals(rangeMaxExclusive - rangeMin, cached.count()); + + DocIdSetIterator expected = DocIdSetIterator.range(rangeMin, rangeMaxExclusive); + DocIdSetIterator actual = cached.iterator(); + for (int doc = expected.nextDoc(); + doc != DocIdSetIterator.NO_MORE_DOCS; + doc = expected.nextDoc()) { + assertEquals(doc, actual.nextDoc()); + } + assertEquals(DocIdSetIterator.NO_MORE_DOCS, actual.nextDoc()); + } }