From 2190c7015607eceddf7cb289ba45f543fbf2f66d Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Thu, 23 Apr 2026 11:11:50 +0200
Subject: [PATCH 01/23] Removed unuseful checks

---
 .../join/DiversifyingNearestChildrenKnnCollector.java  | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
index 59cb58d22888..034a77dd31de 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
@@ -93,9 +93,6 @@ public String toString() {
   @Override
   public TopDocs topDocs() {
     assert heap.size() <= k() : "Tried to collect more results than the maximum number allowed";
-    while (heap.size() > k()) {
-      heap.popToDrain();
-    }
     ScoreDoc[] scoreDocs = new ScoreDoc[heap.size()];
     for (int i = 1; i <= scoreDocs.length; i++) {
       scoreDocs[scoreDocs.length - i] = new ScoreDoc(heap.topNode(), heap.topScore());
@@ -184,12 +181,7 @@ private void updateElement(int heapIndex, int nodeId, int parentId, float score)
       float oldScore = scores[heapIndex];
       childNodes[heapIndex] = nodeId;
       scores[heapIndex] = score;
-      // Since we are a min heap, if the new value is less, we need to make sure to bubble it up
-      if (score < oldScore) {
-        upHeap(heapIndex);
-      } else {
-        downHeap(heapIndex);
-      }
+      downHeap(heapIndex);
     }
 
     /**

From 66390c8a8dd7e25529c1b04e4ec5e12a49b39fd6 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Thu, 23 Apr 2026 11:12:48 +0200
Subject: [PATCH 02/23] Removed unused variable

---
 .../search/join/DiversifyingNearestChildrenKnnCollector.java     | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
index 034a77dd31de..de5b225e109d 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
@@ -178,7 +178,6 @@ private void pushIn(int nodeId, int parentId, float score) {
     private void updateElement(int heapIndex, int nodeId, int parentId, float score) {
       assert parentNodes[heapIndex] == parentId
           : "attempted to update heap element value but with a different parent id";
-      float oldScore = scores[heapIndex];
       childNodes[heapIndex] = nodeId;
       scores[heapIndex] = score;
       downHeap(heapIndex);

From da4ec3d9a60675d46c187523f62b2aa763bd7869 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Thu, 23 Apr 2026 11:14:01 +0200
Subject: [PATCH 03/23] Moved downHeap from int to void since the returned
 value is never used

---
 .../search/join/DiversifyingNearestChildrenKnnCollector.java   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
index de5b225e109d..41b12308043c 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
@@ -275,7 +275,7 @@ private void upHeap(int origPos) {
       scores[i] = savedScore;
     }
 
-    private int downHeap(int i) {
+    private void downHeap(int i) {
       int savedChild = childNodes[i];
       int savedParent = parentNodes[i];
       float savedScore = scores[i];
@@ -300,7 +300,6 @@ private int downHeap(int i) {
       childNodes[i] = savedChild;
       parentNodes[i] = savedParent;
       scores[i] = savedScore;
-      return i;
     }
 
     // Used only during popToDrain: the index map is never read again after closed=true,

From 2f33476373e3163f2ffeba2d6732ec885b1c6dd0 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Mon, 4 May 2026 12:14:03 +0200
Subject: [PATCH 04/23] Implemented siblings exploration

---
 .../util/hnsw/AbstractHnswGraphSearcher.java  |  58 +++
 .../util/hnsw/ChildrenSiblingExpansion.java   |  51 ++
 .../lucene/util/hnsw/DocSiblingExpansion.java |  50 ++
 .../lucene/util/hnsw/HnswGraphSearcher.java   |  26 +
 .../hnsw/OrdinalTranslatedKnnCollector.java   |  35 +-
 ...iversifyingChildrenByteKnnVectorQuery.java |   2 +-
 ...versifyingChildrenFloatKnnVectorQuery.java |   2 +-
 ...versifyingNearestChildrenKnnCollector.java |  76 ++-
 ...ingNearestChildrenKnnCollectorManager.java |  46 +-
 ...ersifyingChildrenKnnCollectorTestCase.java |  71 +++
 ...versifyingChildrenKnnSiblingExpansion.java | 468 ++++++++++++++++++
 ...earestChildrenKnnCollectorPerformance.java |  21 +-
 .../join/TestToParentJoinKnnResults.java      |   8 +-
 13 files changed, 871 insertions(+), 43 deletions(-)
 create mode 100644 lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java
 create mode 100644 lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
 create mode 100644 lucene/join/src/test/org/apache/lucene/search/join/DiversifyingChildrenKnnCollectorTestCase.java
 create mode 100644 lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java

diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
index 2f668d966d41..03836fa8d006 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
@@ -96,14 +96,72 @@ protected static void scoreEntryPoints(
     assert scores != null && scores.length >= eps.length;
     scorer.bulkScore(eps, scores, eps.length);
     results.incVisitedCount(eps.length);
+    float[] siblingScores = null;
     for (int i = 0; i < eps.length; i++) {
       float score = scores[i];
       int ep = eps[i];
       visited.set(ep);
       candidates.add(ep, score);
       if (acceptOrds == null || acceptOrds.get(ep)) {
+        // Fetch siblings BEFORE collect() so the parent is not yet in the heap
+        int[] siblings = null;
+        int numSiblingsToVisit = 0;
+        if (results instanceof ChildrenSiblingExpansion expander) {
+          siblings = expander.pendingSiblingOrdinals(ep, visited);
+          if (siblings != null) {
+            //  how many siblings are actually scored to avoid exceeding the visit budget.
+            numSiblingsToVisit =
+                (int) Math.min(siblings.length, results.visitLimit() - results.visitedCount());
+            // Only mark as visited the siblings we will actually score; the rest remain
+            // reachable via normal graph traversal so a better child can still be found
+            for (int s = 0; s < numSiblingsToVisit; s++) visited.set(siblings[s]);
+          }
+        }
+        // Collect the ep node here so after we have a correctly updated minCompetitiveSimilarity
         results.collect(ep, score);
+        if (numSiblingsToVisit > 0) {
+          siblingScores =
+              scoreSiblings(
+                  results, scorer, candidates, acceptOrds, siblings, numSiblingsToVisit, siblingScores);
+        }
       }
     }
   }
+
+  /**
+   * Scores and collects siblings, adding competitive ones to the candidate queue. Reuses and
+   * returns the siblingScores buffer, reallocating only if too small.
+   */
+  protected static float[] scoreSiblings(
+      KnnCollector results,
+      RandomVectorScorer scorer,
+      NeighborQueue candidates,
+      Bits acceptOrds,
+      int[] siblings,
+      int numSiblings,
+      float[] siblingScores)
+      throws IOException {
+    // If siblingScores not defined yet or too small to collect scores a new one is created
+    // Otherwise we reuse the old one that will be overridden in bulkScore with new scores
+    if (siblingScores == null || siblingScores.length < numSiblings) {
+      siblingScores = new float[numSiblings];
+    }
+    float maxScore = scorer.bulkScore(siblings, siblingScores, numSiblings);
+    results.incVisitedCount(numSiblings);
+    if (maxScore > results.minCompetitiveSimilarity()) {
+      float minSimilarity = Math.nextUp(results.minCompetitiveSimilarity());
+      for (int j = 0; j < numSiblings; j++) {
+        float sibScore = siblingScores[j];
+        // We avoid adding to candidates a sibling with a bad score
+        if (sibScore >= minSimilarity) {
+          candidates.add(siblings[j], sibScore);
+          if (acceptOrds == null || acceptOrds.get(siblings[j])) {
+            results.collect(siblings[j], sibScore);
+            minSimilarity = Math.nextUp(results.minCompetitiveSimilarity());
+          }
+        }
+      }
+    }
+    return siblingScores;
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java
new file mode 100644
index 000000000000..c176c166ceab
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.hnsw;
+
+import org.apache.lucene.util.BitSet;
+
+/**
+ * Implemented by {@link org.apache.lucene.search.KnnCollector} instances that support dynamic
+ * sibling expansion during HNSW graph search. When the graph searcher collects a child node
+ * belonging to a newly-discovered parent, all siblings of that parent are immediately scored and
+ * collected without requiring full graph traversal.
+ *
+ * <p>The graph searcher calls {@link #pendingSiblingOrdinals} <em>before</em> invoking {@link
+ * org.apache.lucene.search.KnnCollector#collect} for the triggering node, so that the collector
+ * can safely inspect its internal state before the parent is registered.
+ *
+ * @lucene.experimental
+ */
+public interface ChildrenSiblingExpansion {
+
+  /**
+   * Called by the HNSW graph searcher before {@link
+   * org.apache.lucene.search.KnnCollector#collect} is invoked for {@code collectedOrdinal}. The
+   * implementation inspects the node's parent and returns the ordinals of unvisited siblings that
+   * should be scored immediately.
+   *
+   * <p>The caller is responsible for marking the returned ordinals as visited in {@code
+   * visitedOrds} and for scoring and collecting them.
+   *
+   * @param collectedOrdinal the vector ordinal about to be collected
+   * @param visitedOrds the graph searcher's current visited bitset; returned siblings are not yet
+   *     marked here — the caller does that
+   * @return ordinals of sibling nodes to score next, or {@code null} if none
+   */
+  int[] pendingSiblingOrdinals(int collectedOrdinal, BitSet visitedOrds);
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
new file mode 100644
index 000000000000..4882ade8c2ee
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.hnsw;
+
+/**
+ * Doc-ID-level companion to {@link ChildrenSiblingExpansion}. Implemented by collectors that
+ * understand parent-child document relationships and can enumerate sibling document ids for a given
+ * child document id, as well as translate document ids back to vector ordinals.
+ *
+ * <p>This interface is used internally by {@link OrdinalTranslatedKnnCollector} to bridge between
+ * the ordinal space of the HNSW graph and the document-id space of the collector.
+ *
+ * @lucene.experimental
+ */
+public interface DocSiblingExpansion {
+
+  /**
+   * Returns the doc ids of all siblings of {@code childDocId} whose parent has not yet been added
+   * to the result heap, or {@code null} if the parent was already processed or there are no other
+   * siblings.
+   *
+   * @param childDocId the document id of the child that is about to be collected
+   * @return sibling doc ids, or {@code null}
+   */
+  int[] findSiblingDocIds(int childDocId);
+
+  /**
+   * Translates a document id to its vector ordinal, or returns {@code -1} if the document has no
+   * vector in this field.
+   *
+   * @param docId the document id
+   * @return the vector ordinal, or {@code -1}
+   */
+  int docIdToOrdinal(int docId);
+}
\ No newline at end of file
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
index d739915ca078..2bac2e046788 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
@@ -44,6 +44,7 @@ public class HnswGraphSearcher extends AbstractHnswGraphSearcher {
 
   protected int[] bulkNodes = null;
   protected float[] bulkScores = null;
+  protected float[] siblingScores = null;
 
   /**
    * HNSW search is roughly logarithmic. This doesn't take maxConn into account, but it is a pretty
@@ -347,6 +348,20 @@ void searchLevel(
           if (score >= minAcceptedSimilarity) {
             candidates.add(node, score);
             if (acceptOrds == null || acceptOrds.get(node)) {
+              // Fetch siblings BEFORE collect() so the parent is not yet in the heap
+              int[] siblings = null;
+              int numSiblingsToVisit = 0;
+              if (results instanceof ChildrenSiblingExpansion expander) {
+                siblings = expander.pendingSiblingOrdinals(node, visited);
+                if (siblings != null) {
+                  numSiblingsToVisit =
+                      (int)
+                          Math.min(siblings.length, results.visitLimit() - results.visitedCount());
+                  // Only mark as visited the siblings we will actually score; the rest remain
+                  // reachable via normal graph traversal so a better child can still be found
+                  for (int s = 0; s < numSiblingsToVisit; s++) visited.set(siblings[s]);
+                }
+              }
               if (results.collect(node, score)) {
                 float oldMinAcceptedSimilarity = minAcceptedSimilarity;
                 minAcceptedSimilarity = Math.nextUp(results.minCompetitiveSimilarity());
@@ -356,6 +371,17 @@ void searchLevel(
                   shouldExploreMinSim = true;
                 }
               }
+              // Score and collect all siblings of the newly-discovered parent
+              if (numSiblingsToVisit > 0) {
+                float prevMinSim = results.minCompetitiveSimilarity();
+                siblingScores =
+                    scoreSiblings(
+                        results, scorer, candidates, acceptOrds, siblings, numSiblingsToVisit, siblingScores);
+                if (results.minCompetitiveSimilarity() > prevMinSim) {
+                  minAcceptedSimilarity = Math.nextUp(results.minCompetitiveSimilarity());
+                  shouldExploreMinSim = true;
+                }
+              }
             }
           }
         }
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
index 5225fe700ab9..35f15e81193c 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
@@ -17,14 +17,19 @@
 
 package org.apache.lucene.util.hnsw;
 
+import java.util.Arrays;
 import org.apache.lucene.search.KnnCollector;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TotalHits;
+import org.apache.lucene.util.BitSet;
 
 /**
- * Wraps a provided KnnCollector object, translating the provided vectorId ordinal to a documentId
+ * Wraps a provided KnnCollector object, translating the provided vectorId ordinal to a documentId.
+ * This wrapper implements {@link ChildrenSiblingExpansion}; sibling expansion is active only
+ * when the wrapped collector also implements {@link DocSiblingExpansion}.
  */
-public final class OrdinalTranslatedKnnCollector extends KnnCollector.Decorator {
+public final class OrdinalTranslatedKnnCollector extends KnnCollector.Decorator
+    implements ChildrenSiblingExpansion {
 
   private final IntToIntFunction vectorOrdinalToDocId;
 
@@ -50,4 +55,30 @@ public TopDocs topDocs() {
                 : TotalHits.Relation.EQUAL_TO),
         td.scoreDocs);
   }
+
+  @Override
+  public int[] pendingSiblingOrdinals(int collectedOrdinal, BitSet visitedOrds) {
+    if (!(collector instanceof DocSiblingExpansion docExpander)) {
+      return null;
+    }
+    int docId = vectorOrdinalToDocId.apply(collectedOrdinal);
+    int[] siblingDocIds = docExpander.findSiblingDocIds(docId);
+    if (siblingDocIds == null) {
+      return null;
+    }
+    int[] siblingOrdinals = new int[siblingDocIds.length];
+    // siblingOrdinals is pre-allocated to siblingDocIds.length and Java initializes int arrays to 0 so this variable is necessary.
+    int count = 0;
+    for (int sibDocId : siblingDocIds) {
+      int sibOrd = docExpander.docIdToOrdinal(sibDocId);
+      //  sibOrd = -1 when a document has no vector for this field.
+      //  Such a doc has no node in the HNSW graph and can't be scored, so it must be skipped.
+      //  If a sibling was reached via normal graph traversal before sibling expansion triggered, re-adding it would
+      //  cause it to be scored twice. !visitedOrds.get(sibOrd) filters those out.
+      if (sibOrd >= 0 && !visitedOrds.get(sibOrd)) {
+        siblingOrdinals[count++] = sibOrd;
+      }
+    }
+    return count > 0 ? Arrays.copyOf(siblingOrdinals, count) : null;
+  }
 }
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenByteKnnVectorQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenByteKnnVectorQuery.java
index 877d004d5c78..77dde715426e 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenByteKnnVectorQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenByteKnnVectorQuery.java
@@ -156,7 +156,7 @@ protected TopDocs exactSearch(
 
   @Override
   protected KnnCollectorManager getKnnCollectorManager(int k, IndexSearcher searcher) {
-    return new DiversifyingNearestChildrenKnnCollectorManager(k, parentsFilter, searcher);
+    return new DiversifyingNearestChildrenKnnCollectorManager(k, parentsFilter, searcher, field);
   }
 
   @Override
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenFloatKnnVectorQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenFloatKnnVectorQuery.java
index e2d6179ec426..c5f2f95ddabf 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenFloatKnnVectorQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenFloatKnnVectorQuery.java
@@ -155,7 +155,7 @@ protected TopDocs exactSearch(
 
   @Override
   protected KnnCollectorManager getKnnCollectorManager(int k, IndexSearcher searcher) {
-    return new DiversifyingNearestChildrenKnnCollectorManager(k, parentsFilter, searcher);
+    return new DiversifyingNearestChildrenKnnCollectorManager(k, parentsFilter, searcher, field);
   }
 
   @Override
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
index 41b12308043c..e2ca8fbdb10e 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
@@ -25,40 +25,42 @@
 import org.apache.lucene.search.knn.KnnSearchStrategy;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BitSet;
+import org.apache.lucene.util.hnsw.DocSiblingExpansion;
 
 /**
  * This collects the nearest children vectors. Diversifying the results over the provided parent
  * filter. This means the nearest children vectors are returned, but only one per parent
  */
-class DiversifyingNearestChildrenKnnCollector extends AbstractKnnCollector {
+class DiversifyingNearestChildrenKnnCollector extends AbstractKnnCollector
+    implements DocSiblingExpansion {
 
   private final BitSet parentBitSet;
   private final NodeIdCachingHeap heap;
+  // docId → vector ordinal mapping; -1 means the doc has no vector. Null when not needed.
+  private final int[] docToOrd;
 
   /**
-   * Create a new object for joining nearest child kNN documents with a parent bitset
-   *
-   * @param k The number of joined parent documents to collect
-   * @param visitLimit how many child vectors can be visited
-   * @param parentBitSet The leaf parent bitset
-   */
-  public DiversifyingNearestChildrenKnnCollector(int k, int visitLimit, BitSet parentBitSet) {
-    this(k, visitLimit, null, parentBitSet);
-  }
-
-  /**
-   * Create a new object for joining nearest child kNN documents with a parent bitset
+   * Create a new object for joining nearest child kNN documents with a parent bitset and a
+   * precomputed docId-to-ordinal mapping that enables dynamic sibling expansion during HNSW graph
+   * search.
    *
    * @param k The number of joined parent documents to collect
    * @param visitLimit how many child vectors can be visited
    * @param searchStrategy The search strategy to use
    * @param parentBitSet The leaf parent bitset
+   * @param docToOrd precomputed array mapping docId to vector ordinal; {@code -1} entries mean the
+   *     document has no vector. May be {@code null} to disable sibling expansion.
    */
   public DiversifyingNearestChildrenKnnCollector(
-      int k, int visitLimit, KnnSearchStrategy searchStrategy, BitSet parentBitSet) {
+      int k,
+      int visitLimit,
+      KnnSearchStrategy searchStrategy,
+      BitSet parentBitSet,
+      int[] docToOrd) {
     super(k, visitLimit, searchStrategy);
     this.parentBitSet = parentBitSet;
     this.heap = new NodeIdCachingHeap(k);
+    this.docToOrd = docToOrd;
   }
 
   /**
@@ -111,6 +113,47 @@ public int numCollected() {
     return heap.size();
   }
 
+  @Override
+  public int[] findSiblingDocIds(int childDocId) {
+    int parent = parentBitSet.nextSetBit(childDocId);
+    // Skip if this parent's entry is already in the heap
+    if (heap.containsParent(parent)) {
+      return null;
+    }
+    // Find siblings range(prevParent, parent). If parent is 0 there are not prevParents so -1
+    int prevParent = parent > 0 ? parentBitSet.prevSetBit(parent - 1) : -1;
+    int from = prevParent + 1;
+    // Children of parent are all docIds in [from, parent); exclude childDocId itself
+    int capacity = parent - from - 1;
+    // One child case
+    if (capacity == 0) {
+      return null;
+    }
+    int[] siblings = new int[capacity];
+    int idx = 0;
+    for (int docId = from; docId < parent; docId++) {
+      if (docId != childDocId) {
+        siblings[idx++] = docId;
+      }
+    }
+    return idx > 0 ? siblings : null;
+  }
+
+  @Override
+  public int docIdToOrdinal(int docId) {
+    // Conditions explanation:
+    //  docToOrd == null — buildDocToOrd returns null when the segment has no vector values at all for the field (line 113). Without this guard you'd get a NullPointerException.
+    //  docId >= docToOrd.length — In production, docToOrd is sized exactly maxDoc for that segment (line 116), so every valid docId in that segment fits. This check is therefore a pure defensive guard — it can't be triggered by
+    //  correct production code, but it protects against:
+    //  - bugs in how the array is built or passed
+    //  - future refactoring that changes the array size
+    //  - misuse when constructing the collector manually (as in tests, e.g. the docIdToOrdinal(9999) test case)
+    if (docToOrd == null || docId >= docToOrd.length) {
+      return -1;
+    }
+    return docToOrd[docId];
+  }
+
   /**
    * This is a minimum binary heap, inspired by {@link org.apache.lucene.util.LongHeap}. But instead
    * of encoding and using `long` values. Node ids and scores are kept separate. Additionally, this
@@ -245,6 +288,11 @@ public final int size() {
       return size;
     }
 
+    /** Returns {@code true} if a child of the given parent is already in the heap. */
+    public boolean containsParent(int parentDocId) {
+      return nodeIdHeapIndex.containsKey(parentDocId);
+    }
+
     /**
      * Returns true if the element at heap position {@code k} should be evicted before the element
      * at position {@code j} (i.e. a is "less than" b in the min-heap ordering). Lower score loses
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
index 7ebacdd8194d..13071bc84fac 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
@@ -18,7 +18,10 @@
 package org.apache.lucene.search.join;
 
 import java.io.IOException;
+import java.util.Arrays;
+import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.KnnCollector;
 import org.apache.lucene.search.knn.KnnCollectorManager;
@@ -35,17 +38,21 @@ public class DiversifyingNearestChildrenKnnCollectorManager implements KnnCollec
   private final int k;
   // filter identifying the parent documents.
   private final BitSetProducer parentsFilter;
+  // vector field name; used to build the docId-to-ordinal mapping for sibling expansion
+  private final String field;
 
   /**
    * Constructor
    *
    * @param k - the number of top k vectors to collect
    * @param parentsFilter Filter identifying the parent documents.
+   * @param field the vector field name
    */
   public DiversifyingNearestChildrenKnnCollectorManager(
-      int k, BitSetProducer parentsFilter, IndexSearcher indexSearcher) {
+      int k, BitSetProducer parentsFilter, IndexSearcher indexSearcher, String field) {
     this.k = k;
     this.parentsFilter = parentsFilter;
+    this.field = field;
   }
 
   /**
@@ -62,8 +69,9 @@ public KnnCollector newCollector(
     if (parentBitSet == null) {
       return null;
     }
+    int[] docToOrd = buildDocToOrd(context);
     return new DiversifyingNearestChildrenKnnCollector(
-        k, visitedLimit, searchStrategy, parentBitSet);
+        k, visitedLimit, searchStrategy, parentBitSet, docToOrd);
   }
 
   @Override
@@ -74,12 +82,44 @@ public KnnCollector newOptimisticCollector(
     if (parentBitSet == null) {
       return null;
     }
+    int[] docToOrd = buildDocToOrd(context);
     return new DiversifyingNearestChildrenKnnCollector(
-        k, visitedLimit, searchStrategy, parentBitSet);
+        k, visitedLimit, searchStrategy, parentBitSet, docToOrd);
   }
 
   @Override
   public boolean isOptimistic() {
     return true;
   }
+
+  /**
+   * Builds a docId-to-ordinal array for the given leaf, mapping each docId to its vector ordinal.
+   *
+   * <p>Returns {@code null} if the field has no vector values in this segment at all — sibling
+   * expansion will be disabled for this leaf.
+   *
+   * <p>Otherwise returns an array of size {@code maxDoc} where each entry is the vector ordinal for
+   * that docId, or {@code -1} if that specific document has no vector (sparse indexing).
+   */
+  private int[] buildDocToOrd(LeafReaderContext context) throws IOException {
+    FieldInfo fi = context.reader().getFieldInfos().fieldInfo(field);
+    // fi = null if the field doesn't exist in this segment at all.
+    // fi.getVectorDimension() = 0 if the field exist in the segment but was not indexed as a vector field.
+    if (fi == null || fi.getVectorDimension() == 0) {
+      return null;
+    }
+    DocIdSetIterator iter =
+        switch (fi.getVectorEncoding()) {
+          case FLOAT32 -> context.reader().getFloatVectorValues(field).iterator();
+          case BYTE -> context.reader().getByteVectorValues(field).iterator();
+        };
+    int maxDoc = context.reader().maxDoc();
+    int[] docToOrd = new int[maxDoc];
+    Arrays.fill(docToOrd, -1);
+    int ord = 0;
+    while (iter.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+      docToOrd[iter.docID()] = ord++;
+    }
+    return docToOrd;
+  }
 }
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/DiversifyingChildrenKnnCollectorTestCase.java b/lucene/join/src/test/org/apache/lucene/search/join/DiversifyingChildrenKnnCollectorTestCase.java
new file mode 100644
index 000000000000..3d7fdecf31ca
--- /dev/null
+++ b/lucene/join/src/test/org/apache/lucene/search/join/DiversifyingChildrenKnnCollectorTestCase.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.join;
+
+import java.io.IOException;
+import java.util.Arrays;
+import org.apache.lucene.search.knn.KnnSearchStrategy;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.util.BitSet;
+
+/**
+ * Base class for {@link DiversifyingNearestChildrenKnnCollector} tests. Provides shared helpers
+ * for building the block-join parent {@link BitSet} used across test subclasses.
+ */
+abstract class DiversifyingChildrenKnnCollectorTestCase extends LuceneTestCase {
+
+  /**
+   * Builds a {@link BitSet} whose set bits are the parent doc ids in a contiguous block-join
+   * layout: {@code [child_0 … child_{C-1}, parent_C]}, repeated {@code numParents} times.
+   *
+   * <p>Example with {@code childrenPerParent=3}: parent doc ids are 3, 7, 11, …
+   */
+  static BitSet parentBitSet(int numParents, int childrenPerParent) throws IOException {
+    int[] parentDocIds = new int[numParents];
+    for (int p = 1; p <= numParents; p++) {
+      parentDocIds[p - 1] = p * (childrenPerParent + 1) - 1;
+    }
+    int totalDocs = numParents * (childrenPerParent + 1);
+    return BitSet.of(
+        new TestToParentJoinKnnResults.IntArrayDocIdSetIterator(parentDocIds, numParents),
+        totalDocs + 1);
+  }
+
+  /**
+   * Builds a docId-to-ordinal array for the block-join layout. Parent docs get -1 (no vector);
+   * child docs get consecutive ordinals starting from 0.
+   */
+  static int[] buildDocToOrd(int numParents, int childrenPerParent) {
+    int totalDocs = numParents * (childrenPerParent + 1);
+    int[] docToOrd = new int[totalDocs];
+    Arrays.fill(docToOrd, -1);
+    int ord = 0;
+    for (int d = 0; d < totalDocs; d++) {
+      if ((d + 1) % (childrenPerParent + 1) != 0) {
+        docToOrd[d] = ord++;
+      }
+    }
+    return docToOrd;
+  }
+
+  static DiversifyingNearestChildrenKnnCollector makeCollector(
+      int k, BitSet parents, int[] docToOrd) {
+    return new DiversifyingNearestChildrenKnnCollector(
+        k, Integer.MAX_VALUE, (KnnSearchStrategy) null, parents, docToOrd);
+  }
+}
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
new file mode 100644
index 000000000000..bb170ec3d79e
--- /dev/null
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
@@ -0,0 +1,468 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.join;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.KnnFloatVectorField;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BitSet;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector;
+
+/**
+ * Tests for sibling expansion in {@link DiversifyingNearestChildrenKnnCollector} ({@link
+ * org.apache.lucene.util.hnsw.DocSiblingExpansion} contract) and end-to-end correctness via
+ * {@link DiversifyingChildrenFloatKnnVectorQuery}.
+ */
+public class TestDiversifyingChildrenKnnSiblingExpansion
+    extends DiversifyingChildrenKnnCollectorTestCase {
+
+  // ---------------------------------------------------------------------------
+  // Unit tests: DocSiblingExpansion — findSiblingDocIds
+  // ---------------------------------------------------------------------------
+
+  public void testFindSiblingDocIds_returnsAllSiblings() throws IOException {
+    // 2 parents, 2 children each → blocks: [0,1|2], [3,4|5]
+    int numParents = 2, childrenPerParent = 2;
+    BitSet parents = parentBitSet(numParents, childrenPerParent);
+    int[] docToOrd = buildDocToOrd(numParents, childrenPerParent);
+    DiversifyingNearestChildrenKnnCollector c = makeCollector(10, parents, docToOrd);
+
+    // child 0: sibling is 1
+    int[] s0 = c.findSiblingDocIds(0);
+    assertNotNull(s0);
+    assertArrayEquals(new int[] {1}, s0);
+
+    // child 1: sibling is 0
+    int[] s1 = c.findSiblingDocIds(1);
+    assertNotNull(s1);
+    assertArrayEquals(new int[] {0}, s1);
+
+    // child 3 (second block): sibling is 4
+    int[] s3 = c.findSiblingDocIds(3);
+    assertNotNull(s3);
+    assertArrayEquals(new int[] {4}, s3);
+
+    // child 4 (second block): sibling is 3
+    int[] s4 = c.findSiblingDocIds(4);
+    assertNotNull(s4);
+    assertArrayEquals(new int[] {3}, s4);
+  }
+
+  public void testFindSiblingDocIds_parentAlreadyInHeap_returnsNull() throws IOException {
+    // 1 parents, 2 children → block: [0,1|2]
+    int numParents = 1, childrenPerParent = 2;
+    BitSet parents = parentBitSet(numParents, childrenPerParent);
+    int[] docToOrd = buildDocToOrd(numParents, childrenPerParent);
+    DiversifyingNearestChildrenKnnCollector c = makeCollector(10, parents, docToOrd);
+
+    // Collect child 0 → parent 2 enters the heap
+    c.collect(0, 0.8f);
+
+    // Now asking for siblings of child 1 (same parent 2) must return null
+    assertNull(c.findSiblingDocIds(1));
+  }
+
+  public void testFindSiblingDocIds_singleChildParent_returnsNull() throws IOException {
+    // 1 parents, 1 child each → blocks: [0|1]
+    int numParents = 1, childrenPerParent = 1;
+    BitSet parents = parentBitSet(numParents, childrenPerParent);
+    int[] docToOrd = buildDocToOrd(numParents, childrenPerParent);
+    DiversifyingNearestChildrenKnnCollector c = makeCollector(10, parents, docToOrd);
+
+    assertNull("sole child has no siblings", c.findSiblingDocIds(0));
+  }
+
+  /**
+   * The trigger child must not appear in its own sibling list. Only the *other* children of the
+   * same parent are returned.
+   */
+  public void testFindSiblingDocIds_excludesTriggerChild() throws IOException {
+    // 1 parent, 3 children: [C0, C1, C2 | P0=3]
+    int numParents = 1, childrenPerParent = 3;
+    BitSet parents = parentBitSet(numParents, childrenPerParent);
+    int[] docToOrd = buildDocToOrd(numParents, childrenPerParent);
+    DiversifyingNearestChildrenKnnCollector c = makeCollector(10, parents, docToOrd);
+
+    // Trigger is C1 (docId=1); expected siblings: C0 and C2 only
+    int[] siblings = c.findSiblingDocIds(1);
+    assertNotNull(siblings);
+    assertEquals(2, siblings.length);
+    for (int s : siblings) {
+      assertNotEquals("trigger child must not appear in sibling list", 1, s);
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Unit tests: DocSiblingExpansion — docIdToOrdinal
+  // ---------------------------------------------------------------------------
+
+  public void testDocIdToOrdinal_correctMapping() throws IOException {
+    // 2 parents, 2 children: docToOrd = [0, 1, -1, 2, 3, -1]
+    int numParents = 2, childrenPerParent = 2;
+    BitSet parents = parentBitSet(numParents, childrenPerParent);
+    int[] docToOrd = buildDocToOrd(numParents, childrenPerParent);
+    DiversifyingNearestChildrenKnnCollector c = makeCollector(10, parents, docToOrd);
+
+    assertEquals(0, c.docIdToOrdinal(0));
+    assertEquals(1, c.docIdToOrdinal(1));
+    assertEquals(-1, c.docIdToOrdinal(2));  // parent → no vector
+    assertEquals(2, c.docIdToOrdinal(3));
+    assertEquals(3, c.docIdToOrdinal(4));
+    assertEquals(-1, c.docIdToOrdinal(5));  // parent → no vector
+    assertEquals(-1, c.docIdToOrdinal(9999)); // beyond array bounds
+  }
+
+  public void testDocIdToOrdinal_nullMapping_alwaysMinusOne() throws IOException {
+    // Collector created without a docToOrd array (sibling expansion disabled)
+    BitSet parents = parentBitSet(2, 2);
+    DiversifyingNearestChildrenKnnCollector c =
+        new DiversifyingNearestChildrenKnnCollector(5, Integer.MAX_VALUE, null, parents, null);
+
+    assertEquals(-1, c.docIdToOrdinal(0));
+    assertEquals(-1, c.docIdToOrdinal(1));
+  }
+
+  // ---------------------------------------------------------------------------
+  // Unit tests: heap replacement behaviour
+  // ---------------------------------------------------------------------------
+
+  /**
+   * C3 is the entry point (score 0.85) but its siblings C4 (0.95)
+   * and C5 (0.90) are expanded immediately. The parent must be represented by C4, not C3.
+   */
+  public void testBestSiblingReplacesFirstFoundChild() throws IOException {
+    // 1 parent, 3 children: [C0=0, C1=1, C2=2 | P0=3]
+    int numParents = 1, childrenPerParent = 3;
+    BitSet parents = parentBitSet(numParents, childrenPerParent);
+    int[] docToOrd = buildDocToOrd(numParents, childrenPerParent);
+    DiversifyingNearestChildrenKnnCollector c = makeCollector(1, parents, docToOrd);
+
+    // C0 found first as entry point (like C3=0.85 in the example)
+    c.collect(0, 0.85f);
+    // Sibling expansion scores C1 (like C4=0.95) and C2 (like C5=0.90)
+    c.collect(1, 0.95f);
+    c.collect(2, 0.90f);
+
+    TopDocs td = c.topDocs();
+    assertEquals(1, td.scoreDocs.length);
+    assertEquals("best sibling must win over first-found child", 0.95f, td.scoreDocs[0].score, 1e-5f);
+    assertEquals("best child doc id must be C1", 1, td.scoreDocs[0].doc);
+  }
+
+  /**
+   * When two parents are found and the heap is full (k=2), a sibling of the second parent that
+   * scores better than the trigger child replaces it.
+   */
+  public void testBestSiblingReplacesWorseChildWhenHeapFull() throws IOException {
+    // 2 parents, 2 children each: [C0, C1 | P0=2], [C2, C3 | P1=5]
+    int numParents = 2, childrenPerParent = 2;
+    BitSet parents = parentBitSet(numParents, childrenPerParent);
+    int[] docToOrd = buildDocToOrd(numParents, childrenPerParent);
+    DiversifyingNearestChildrenKnnCollector c = makeCollector(2, parents, docToOrd);
+
+    // P0: C0 found first (0.85), C1 is better sibling (0.95) → P0 represented by C1
+    c.collect(0, 0.85f);
+    c.collect(1, 0.95f);
+
+    // P1: C2 found first (0.60), C3 is better sibling (0.65)
+    c.collect(3, 0.60f);  // trigger child → P1 enters heap, now FULL (size=2=k)
+    c.collect(4, 0.65f);  // better sibling → heap full but P1 already present, updates entry
+
+    TopDocs td = c.topDocs();
+    assertEquals(2, td.scoreDocs.length);
+    assertEquals("P0 best child score", 0.95f, td.scoreDocs[0].score, 1e-5f);
+    assertEquals("P1 best child score", 0.65f, td.scoreDocs[1].score, 1e-5f);
+    assertEquals("P0 best child doc", 1, td.scoreDocs[0].doc);
+    assertEquals("P1 best child doc", 4, td.scoreDocs[1].doc);
+  }
+
+  // ---------------------------------------------------------------------------
+  // Unit tests: OrdinalTranslatedKnnCollector — pendingSiblingOrdinals
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Siblings whose ordinal is already in visitedOrds must be filtered out to prevent
+   * double-scoring when the sibling was independently discovered via normal graph traversal.
+   */
+  public void testPendingSiblingOrdinals_filtersAlreadyVisited() throws IOException {
+    // 1 parent, 3 children: [C0=doc0, C1=doc1, C2=doc2 | P0=doc3]
+    // docToOrd=[0,1,2,-1]; ordToDoc=[0,1,2]
+    BitSet parents = parentBitSet(1, 3);
+    int[] docToOrd = buildDocToOrd(1, 3);
+    int[] ordToDoc = {0, 1, 2};
+    OrdinalTranslatedKnnCollector collector =
+        new OrdinalTranslatedKnnCollector(makeCollector(10, parents, docToOrd), ord -> ordToDoc[ord]);
+
+    // Mark C1 (ordinal 1) as already visited by normal graph traversal
+    FixedBitSet visited = new FixedBitSet(4);
+    visited.set(1);
+
+    // Trigger on C0 (ordinal 0 → docId 0); siblings are C1 and C2
+    int[] result = collector.pendingSiblingOrdinals(0, visited);
+    assertNotNull(result);
+    assertArrayEquals("C1 must be filtered (visited); only C2 remains", new int[]{2}, result);
+  }
+
+  /**
+   * Siblings with no vector in this field (docIdToOrdinal returns -1) must be skipped because
+   * they have no node in the HNSW graph and cannot be scored.
+   */
+  public void testPendingSiblingOrdinals_filtersSparseSiblings() throws IOException {
+    // 1 parent, 3 children but C1 has no vector:
+    // [C0=doc0, C1(sparse)=doc1, C2=doc2 | P0=doc3]
+    // docToOrd=[0,-1,1,-1]; ordToDoc=[0,2]
+    BitSet parents = parentBitSet(1, 3);
+    int[] docToOrd = {0, -1, 1, -1};
+    int[] ordToDoc = {0, 2};
+    OrdinalTranslatedKnnCollector collector =
+        new OrdinalTranslatedKnnCollector(makeCollector(10, parents, docToOrd), ord -> ordToDoc[ord]);
+
+    FixedBitSet visited = new FixedBitSet(4);
+
+    // Trigger on C0 (ordinal 0 → docId 0); siblings are C1 (sparse) and C2
+    int[] result = collector.pendingSiblingOrdinals(0, visited);
+    assertNotNull(result);
+    assertArrayEquals("C1 (no vector) must be filtered; only C2 remains", new int[]{1}, result);
+  }
+
+  // ---------------------------------------------------------------------------
+  // Index fixtures (used by integration tests only)
+  // ---------------------------------------------------------------------------
+
+  private static final String FIELD = "vec";
+  private static final int DIM = 4;
+
+  /**
+   * Builds a block-join float-vector index. Parent p has numChildren children; child c of parent p
+   * gets vector: v[i] = (p * numChildren + c + i) * 0.1, then normalised for COSINE/DOT_PRODUCT.
+   */
+  private Directory buildIndex(
+      int numParents, int numChildren, VectorSimilarityFunction sim) throws IOException {
+    Directory dir = newDirectory();
+    try (IndexWriter w =
+        new IndexWriter(
+            dir, new IndexWriterConfig().setMergePolicy(newMergePolicy(random(), false)))) {
+      for (int p = 0; p < numParents; p++) {
+        List<Document> block = new ArrayList<>();
+        for (int c = 0; c < numChildren; c++) {
+          float[] vec = childVector(p, c, numChildren);
+          if (sim == VectorSimilarityFunction.COSINE || sim == VectorSimilarityFunction.DOT_PRODUCT) {
+            normalise(vec);
+          }
+          Document child = new Document();
+          child.add(new KnnFloatVectorField(FIELD, vec, sim));
+          child.add(new StoredField("parent", p));
+          block.add(child);
+        }
+        Document parent = new Document();
+        parent.add(new StringField("docType", "_parent", Field.Store.NO));
+        parent.add(new StoredField("parent", p));
+        block.add(parent);
+        w.addDocuments(block);
+      }
+    }
+    return dir;
+  }
+
+  private static float[] childVector(int parent, int child, int numChildren) {
+    float[] vec = new float[DIM];
+    for (int i = 0; i < DIM; i++) {
+      vec[i] = (parent * numChildren + child + i + 1) * 0.1f;
+    }
+    return vec;
+  }
+
+  private static void normalise(float[] vec) {
+    float norm = 0;
+    for (float v : vec) norm += v * v;
+    norm = (float) Math.sqrt(norm);
+    if (norm > 0) {
+      for (int i = 0; i < DIM; i++) vec[i] /= norm;
+    }
+  }
+
+  /** Computes brute-force top-k scores: max similarity per parent, sorted descending. */
+  private static float[] bruteForceTopK(
+      float[] query,
+      int numParents,
+      int numChildren,
+      VectorSimilarityFunction sim,
+      int k) {
+    float[] parentBest = new float[numParents];
+    Arrays.fill(parentBest, Float.NEGATIVE_INFINITY);
+    for (int p = 0; p < numParents; p++) {
+      for (int c = 0; c < numChildren; c++) {
+        float[] vec = childVector(p, c, numChildren);
+        if (sim == VectorSimilarityFunction.COSINE || sim == VectorSimilarityFunction.DOT_PRODUCT) {
+          normalise(vec);
+        }
+        float score = sim.compare(query, vec);
+        if (score > parentBest[p]) parentBest[p] = score;
+      }
+    }
+    float[] sorted = parentBest.clone();
+    Arrays.sort(sorted);
+    int n = Math.min(k, numParents);
+    float[] top = new float[n];
+    for (int i = 0; i < n; i++) top[i] = sorted[numParents - 1 - i];
+    return top;
+  }
+
+  // ---------------------------------------------------------------------------
+  // Integration tests: end-to-end correctness via DiversifyingChildrenFloatKnnVectorQuery
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Each result doc must belong to a distinct parent. Sibling expansion must not cause the same
+   * parent to appear multiple times.
+   */
+  public void testSiblingExpansionNoDuplicateParents() throws Exception {
+    int numParents = 15, numChildren = 4, k = 8;
+    VectorSimilarityFunction sim = VectorSimilarityFunction.EUCLIDEAN;
+    float[] query = {1f, 0f, 0f, 0f};
+
+    try (Directory dir = buildIndex(numParents, numChildren, sim);
+        IndexReader reader = DirectoryReader.open(dir)) {
+      IndexSearcher searcher = newSearcher(reader);
+      BitSetProducer parentFilter =
+          new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
+
+      Query knnQuery =
+          new DiversifyingChildrenFloatKnnVectorQuery(FIELD, query, null, k, parentFilter);
+      TopDocs results = searcher.search(knnQuery, k);
+
+      Set<Integer> seenParents = new HashSet<>();
+      for (ScoreDoc sd : results.scoreDocs) {
+        int parentId =
+            reader.storedFields().document(sd.doc).getField("parent").numericValue().intValue();
+        assertTrue("parent " + parentId + " appeared more than once", seenParents.add(parentId));
+      }
+    }
+  }
+
+  /**
+   * When every parent has exactly one child there are no siblings to expand. Results must still be
+   * correct.
+   */
+  public void testSiblingExpansionSingleChildParents() throws Exception {
+    int numParents = 12, numChildren = 1, k = 5;
+    VectorSimilarityFunction sim = VectorSimilarityFunction.EUCLIDEAN;
+    float[] query = {0.2f, 0.3f, 0.4f, 0.5f};
+
+    try (Directory dir = buildIndex(numParents, numChildren, sim);
+        IndexReader reader = DirectoryReader.open(dir)) {
+      IndexSearcher searcher = newSearcher(reader);
+      BitSetProducer parentFilter =
+          new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
+
+      Query knnQuery =
+          new DiversifyingChildrenFloatKnnVectorQuery(FIELD, query, null, k, parentFilter);
+      TopDocs results = searcher.search(knnQuery, k);
+
+      float[] expected = bruteForceTopK(query, numParents, numChildren, sim, k);
+      assertEquals(Math.min(k, numParents), results.scoreDocs.length);
+      for (int i = 0; i < results.scoreDocs.length; i++) {
+        assertEquals("score at rank " + i, expected[i], results.scoreDocs[i].score, 1e-4f);
+      }
+    }
+  }
+
+  /**
+   * The query is close to one parent's children; sibling expansion must find the best child of each discovered parent
+   * rather than whichever child the graph traversal happens to reach first.
+   */
+  public void testSiblingExpansion_bestChildPerParentFound() throws Exception {
+    int numParents = 3, numChildren = 3, k = 2;
+    VectorSimilarityFunction sim = VectorSimilarityFunction.EUCLIDEAN;
+    float[] query = {0.9f, 0.9f, 0.9f, 0.9f};
+
+    try (Directory dir = buildIndex(numParents, numChildren, sim);
+        IndexReader reader = DirectoryReader.open(dir)) {
+      IndexSearcher searcher = newSearcher(reader);
+      BitSetProducer parentFilter =
+          new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
+      CheckJoinIndex.check(reader, parentFilter);
+
+      Query knnQuery =
+          new DiversifyingChildrenFloatKnnVectorQuery(FIELD, query, null, k, parentFilter);
+      TopDocs results = searcher.search(knnQuery, k);
+
+      assertEquals(k, results.scoreDocs.length);
+      for (ScoreDoc sd : results.scoreDocs) {
+        int parentIdx = reader.storedFields().document(sd.doc)
+            .getField("parent").numericValue().intValue();
+        // verify no other child of the same parent scores higher than the returned one
+        for (int c = 0; c < numChildren; c++) {
+          float[] vec = childVector(parentIdx, c, numChildren);
+          float cScore = sim.compare(query, vec);
+          assertTrue(
+              "parent " + parentIdx + " has a better child (score " + cScore
+                  + ") than returned doc " + sd.doc + " (score " + sd.score + ")",
+              cScore <= sd.score + 1e-4f);
+        }
+      }
+    }
+  }
+
+  /**
+   * With a single parent and many children, sibling expansion must score all children so the best
+   * one is returned. Without expansion the graph might stop early and miss the best child.
+   */
+  public void testSiblingExpansion_singleParentManyChildren() throws Exception {
+    int numParents = 1, numChildren = 8, k = 1;
+    VectorSimilarityFunction sim = VectorSimilarityFunction.EUCLIDEAN;
+    float[] query = {0.9f, 0.8f, 0.7f, 0.6f};
+
+    try (Directory dir = buildIndex(numParents, numChildren, sim);
+        IndexReader reader = DirectoryReader.open(dir)) {
+      IndexSearcher searcher = newSearcher(reader);
+      BitSetProducer parentFilter =
+          new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
+      CheckJoinIndex.check(reader, parentFilter);
+
+      Query knnQuery =
+          new DiversifyingChildrenFloatKnnVectorQuery(FIELD, query, null, k, parentFilter);
+      TopDocs results = searcher.search(knnQuery, k);
+
+      float[] expected = bruteForceTopK(query, numParents, numChildren, sim, k);
+      assertEquals(1, results.scoreDocs.length);
+      assertEquals("best child of single parent", expected[0], results.scoreDocs[0].score, 1e-4f);
+    }
+  }
+}
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingNearestChildrenKnnCollectorPerformance.java b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingNearestChildrenKnnCollectorPerformance.java
index 99a1384144b1..14addde94549 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingNearestChildrenKnnCollectorPerformance.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingNearestChildrenKnnCollectorPerformance.java
@@ -19,7 +19,6 @@
 
 import java.io.IOException;
 import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.tests.util.LuceneTestCase;
 import org.apache.lucene.util.BitSet;
 
 /**
@@ -28,27 +27,13 @@
  * <p>Correctness tests verify behaviour of the collector in various scenarios, including edge
  * cases.
  */
-public class TestDiversifyingNearestChildrenKnnCollectorPerformance extends LuceneTestCase {
-
-  /** Builds a BitSet whose set bits are the parent doc ids in a contiguous block-join layout. */
-  private static BitSet parentBitSet(int numParents, int childrenPerParent) throws IOException {
-    int[] parentDocIds = new int[numParents];
-    for (int p = 1; p <= numParents; p++) {
-      // layout: [child_0 … child_{C-1}, parent_C], repeated
-      // e.g. with 3 children per parent: [0,1,2,3, 4,5,6,7, 8,9,10,11, ...] → parent doc ids are
-      // 3,7,11,...
-      parentDocIds[p - 1] = p * (childrenPerParent + 1) - 1;
-    }
-    int totalDocs = numParents * (childrenPerParent + 1); // children + 1 parent per block
-    return BitSet.of(
-        new TestToParentJoinKnnResults.IntArrayDocIdSetIterator(parentDocIds, numParents),
-        totalDocs + 1);
-  }
+public class TestDiversifyingNearestChildrenKnnCollectorPerformance
+    extends DiversifyingChildrenKnnCollectorTestCase {
 
   /** Collects all children in order and returns topDocs. */
   private static TopDocs collectAll(int k, BitSet parents, int[] childIds, float[] scores) {
     DiversifyingNearestChildrenKnnCollector collector =
-        new DiversifyingNearestChildrenKnnCollector(k, Integer.MAX_VALUE, parents);
+        new DiversifyingNearestChildrenKnnCollector(k, Integer.MAX_VALUE, null, parents, null);
     for (int i = 0; i < childIds.length; i++) {
       collector.collect(childIds[i], scores[i]);
     }
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestToParentJoinKnnResults.java b/lucene/join/src/test/org/apache/lucene/search/join/TestToParentJoinKnnResults.java
index 175e1f264400..212990bfac88 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestToParentJoinKnnResults.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestToParentJoinKnnResults.java
@@ -33,7 +33,7 @@ public void testNeighborsProduct() throws IOException {
     // make sure we have the sign correct
     BitSet parentBitSet = BitSet.of(new IntArrayDocIdSetIterator(new int[] {1, 3, 5}, 3), 6);
     DiversifyingNearestChildrenKnnCollector nn =
-        new DiversifyingNearestChildrenKnnCollector(2, Integer.MAX_VALUE, parentBitSet);
+        new DiversifyingNearestChildrenKnnCollector(2, Integer.MAX_VALUE, null, parentBitSet, null);
     assertTrue(nn.collect(2, 0.5f));
     assertTrue(nn.collect(0, 0.2f));
     assertTrue(nn.collect(4, 1f));
@@ -48,7 +48,7 @@ public void testInsertions() throws IOException {
     float[] scores = new float[] {1f, 0.5f, 0.6f, 2f, 2f, 1.2f, 4f};
     BitSet parentBitSet = BitSet.of(new IntArrayDocIdSetIterator(new int[] {3, 6, 9, 12}, 4), 13);
     DiversifyingNearestChildrenKnnCollector results =
-        new DiversifyingNearestChildrenKnnCollector(7, Integer.MAX_VALUE, parentBitSet);
+        new DiversifyingNearestChildrenKnnCollector(7, Integer.MAX_VALUE, null, parentBitSet, null);
     for (int i = 0; i < nodes.length; i++) {
       results.collect(nodes[i], scores[i]);
     }
@@ -69,7 +69,7 @@ public void testInsertionWithOverflow() throws IOException {
     BitSet parentBitSet =
         BitSet.of(new IntArrayDocIdSetIterator(new int[] {3, 6, 9, 11, 13, 15}, 6), 16);
     DiversifyingNearestChildrenKnnCollector results =
-        new DiversifyingNearestChildrenKnnCollector(5, Integer.MAX_VALUE, parentBitSet);
+        new DiversifyingNearestChildrenKnnCollector(5, Integer.MAX_VALUE, null, parentBitSet, null);
     for (int i = 0; i < nodes.length - 1; i++) {
       results.collect(nodes[i], scores[i]);
     }
@@ -104,7 +104,7 @@ public void testRandomInsertionsWithOverflow() throws IOException {
     BitSet parentBitSet =
         BitSet.of(new IntArrayDocIdSetIterator(parents, parents.length), nextParent + 1);
     DiversifyingNearestChildrenKnnCollector results =
-        new DiversifyingNearestChildrenKnnCollector(20, Integer.MAX_VALUE, parentBitSet);
+        new DiversifyingNearestChildrenKnnCollector(20, Integer.MAX_VALUE, null, parentBitSet, null);
     for (int i = 0; i < children.size(); i++) {
       results.collect(children.get(i), childrenScores.get(i));
     }

From b1f6b868a0e0a5a8334138b733cd7f4089dceb53 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Mon, 4 May 2026 15:52:00 +0200
Subject: [PATCH 05/23] Added benchmark for measuring speed

---
 lucene/benchmark-jmh/build.gradle             |   1 +
 .../benchmark-jmh/src/java/module-info.java   |   1 +
 ...DiversifyingChildrenKnnQueryBenchmark.java | 163 ++++++++++++++++++
 3 files changed, 165 insertions(+)
 create mode 100644 lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java

diff --git a/lucene/benchmark-jmh/build.gradle b/lucene/benchmark-jmh/build.gradle
index 6f874e410b9b..6d64bb7a4f72 100644
--- a/lucene/benchmark-jmh/build.gradle
+++ b/lucene/benchmark-jmh/build.gradle
@@ -20,6 +20,7 @@ description = 'Lucene JMH micro-benchmarking module'
 dependencies {
   moduleImplementation project(':lucene:core')
   moduleImplementation project(':lucene:expressions')
+  moduleImplementation project(':lucene:join')
   moduleImplementation project(':lucene:sandbox')
   moduleTestImplementation project(':lucene:test-framework')
 
diff --git a/lucene/benchmark-jmh/src/java/module-info.java b/lucene/benchmark-jmh/src/java/module-info.java
index bb6b9d516bb0..8090c7554739 100644
--- a/lucene/benchmark-jmh/src/java/module-info.java
+++ b/lucene/benchmark-jmh/src/java/module-info.java
@@ -24,6 +24,7 @@
   requires jdk.unsupported;
   requires org.apache.lucene.core;
   requires org.apache.lucene.expressions;
+  requires org.apache.lucene.join;
   requires org.apache.lucene.sandbox;
   requires commons.math3;
 
diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java
new file mode 100644
index 000000000000..7940b1fd2bc6
--- /dev/null
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.KnnFloatVectorField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery;
+import org.apache.lucene.search.join.QueryBitSetProducer;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.MMapDirectory;
+import org.apache.lucene.util.IOUtils;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+
+/**
+ * Benchmarks end-to-end latency of {@link DiversifyingChildrenFloatKnnVectorQuery} with sibling
+ * expansion enabled. Run with:
+ *
+ * <pre>
+ *   ./gradlew -p lucene/benchmark-jmh jmh --args="DiversifyingChildrenKnnQueryBenchmark"
+ * </pre>
+ */
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+@Warmup(iterations = 4, time = 1)
+@Measurement(iterations = 5, time = 1)
+@Fork(
+    value = 3,
+    jvmArgsAppend = {"-Xmx4g", "-Xms4g", "-XX:+AlwaysPreTouch"})
+public class DiversifyingChildrenKnnQueryBenchmark {
+
+  private static final String FIELD = "vec";
+  private static final String PARENT_FIELD = "docType";
+  private static final String PARENT_VALUE = "_parent";
+  private static final int NUM_QUERY_VECTORS = 256;
+
+  @Param({"5000"})
+  public int numParents;
+
+  @Param({"4", "8"})
+  public int childrenPerParent;
+
+  @Param({"10"})
+  public int k;
+
+  @Param({"128"})
+  public int dim;
+
+  private Path tmpDir;
+  private Directory dir;
+  private IndexReader reader;
+  private IndexSearcher searcher;
+  private QueryBitSetProducer parentFilter;
+  private float[][] queryVectors;
+  private int queryIdx;
+
+  @Setup(Level.Trial)
+  public void setup() throws IOException {
+    tmpDir = Files.createTempDirectory("DiversifyingChildrenKnnQueryBenchmark");
+    dir = MMapDirectory.open(tmpDir);
+
+    Random rnd = new Random(42);
+    try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig())) {
+      for (int p = 0; p < numParents; p++) {
+        List<Document> block = new ArrayList<>();
+        for (int c = 0; c < childrenPerParent; c++) {
+          Document child = new Document();
+          child.add(
+              new KnnFloatVectorField(FIELD, randomUnitVector(dim, rnd), VectorSimilarityFunction.DOT_PRODUCT));
+          block.add(child);
+        }
+        Document parent = new Document();
+        parent.add(new StringField(PARENT_FIELD, PARENT_VALUE, Field.Store.NO));
+        block.add(parent);
+        w.addDocuments(block);
+      }
+      w.forceMerge(1);
+    }
+
+    reader = DirectoryReader.open(dir);
+    searcher = new IndexSearcher(reader);
+    parentFilter = new QueryBitSetProducer(new TermQuery(new Term(PARENT_FIELD, PARENT_VALUE)));
+
+    Random qrnd = new Random(123);
+    queryVectors = new float[NUM_QUERY_VECTORS][];
+    for (int i = 0; i < NUM_QUERY_VECTORS; i++) {
+      queryVectors[i] = randomUnitVector(dim, qrnd);
+    }
+  }
+
+  @TearDown(Level.Trial)
+  public void teardown() throws IOException {
+    IOUtils.close(reader, dir);
+    IOUtils.rm(tmpDir);
+  }
+
+  @Benchmark
+  public TopDocs search() throws IOException {
+    float[] query = queryVectors[queryIdx++ & (NUM_QUERY_VECTORS - 1)];
+    Query knnQuery =
+        new DiversifyingChildrenFloatKnnVectorQuery(FIELD, query, null, k, parentFilter);
+    return searcher.search(knnQuery, k);
+  }
+
+  private static float[] randomUnitVector(int dim, Random rnd) {
+    float[] v = new float[dim];
+    float norm = 0;
+    for (int i = 0; i < dim; i++) {
+      v[i] = rnd.nextFloat() * 2 - 1;
+      norm += v[i] * v[i];
+    }
+    norm = (float) Math.sqrt(norm);
+    for (int i = 0; i < dim; i++) {
+      v[i] /= norm;
+    }
+    return v;
+  }
+}
\ No newline at end of file

From 9ab84ad1334cfc1cc99d86035f13fe120636d0c7 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Mon, 4 May 2026 16:23:46 +0200
Subject: [PATCH 06/23] Added cache and update benchmark for testing different
 scenarios

---
 ...DiversifyingChildrenKnnQueryBenchmark.java | 63 +++++++++++++++----
 ...ingNearestChildrenKnnCollectorManager.java | 43 ++++++++++++-
 2 files changed, 93 insertions(+), 13 deletions(-)

diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java
index 7940b1fd2bc6..f31e74de75a1 100644
--- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java
@@ -58,10 +58,21 @@
 
 /**
  * Benchmarks end-to-end latency of {@link DiversifyingChildrenFloatKnnVectorQuery} with sibling
- * expansion enabled. Run with:
+ * expansion enabled across three sibling-correlation scenarios:
+ *
+ * <ul>
+ *   <li><b>best</b> — siblings are nearly identical (small noise around a parent centroid).
+ *       Expansion finds the best sibling immediately; HNSW terminates early.
+ *   <li><b>standard</b> — siblings have moderate correlation (realistic use case).
+ *   <li><b>worst</b> — siblings are fully independent random vectors. Expansion fires but adds no
+ *       recall benefit; measures pure overhead.
+ * </ul>
+ *
+ * Run with:
  *
  * <pre>
- *   ./gradlew -p lucene/benchmark-jmh jmh --args="DiversifyingChildrenKnnQueryBenchmark"
+ *   ./gradlew -p lucene/benchmark-jmh assemble
+ *   java -jar lucene/benchmark-jmh/build/benchmarks/lucene-benchmark-jmh-*.jar DiversifyingChildrenKnnQueryBenchmark
  * </pre>
  */
 @BenchmarkMode(Mode.AverageTime)
@@ -79,6 +90,17 @@ public class DiversifyingChildrenKnnQueryBenchmark {
   private static final String PARENT_VALUE = "_parent";
   private static final int NUM_QUERY_VECTORS = 256;
 
+  /**
+   * Sibling correlation scenario:
+   * <ul>
+   *   <li>{@code best} — siblings nearly identical (noise = 0.05); best case for expansion.
+   *   <li>{@code standard} — siblings moderately correlated (noise = 0.3); realistic case.
+   *   <li>{@code worst} — siblings fully random; pure overhead, no recall benefit.
+   * </ul>
+   */
+  @Param({"best", "standard", "worst"})
+  public String siblingCorrelation;
+
   @Param({"5000"})
   public int numParents;
 
@@ -104,14 +126,24 @@ public void setup() throws IOException {
     tmpDir = Files.createTempDirectory("DiversifyingChildrenKnnQueryBenchmark");
     dir = MMapDirectory.open(tmpDir);
 
+    float noiseLevel =
+        switch (siblingCorrelation) {
+          case "best"     -> 0.05f;
+          case "standard" -> 0.30f;
+          default         -> Float.NaN; // worst: fully random, no centroid
+        };
+
     Random rnd = new Random(42);
     try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig())) {
       for (int p = 0; p < numParents; p++) {
+        float[] centroid = Float.isNaN(noiseLevel) ? null : randomUnitVector(dim, rnd);
         List<Document> block = new ArrayList<>();
         for (int c = 0; c < childrenPerParent; c++) {
+          float[] vec = centroid == null
+              ? randomUnitVector(dim, rnd)
+              : perturbedUnitVector(centroid, noiseLevel, rnd);
           Document child = new Document();
-          child.add(
-              new KnnFloatVectorField(FIELD, randomUnitVector(dim, rnd), VectorSimilarityFunction.DOT_PRODUCT));
+          child.add(new KnnFloatVectorField(FIELD, vec, VectorSimilarityFunction.DOT_PRODUCT));
           block.add(child);
         }
         Document parent = new Document();
@@ -149,15 +181,24 @@ public TopDocs search() throws IOException {
 
   private static float[] randomUnitVector(int dim, Random rnd) {
     float[] v = new float[dim];
-    float norm = 0;
-    for (int i = 0; i < dim; i++) {
-      v[i] = rnd.nextFloat() * 2 - 1;
-      norm += v[i] * v[i];
+    for (int i = 0; i < dim; i++) v[i] = rnd.nextFloat() * 2 - 1;
+    return normalise(v);
+  }
+
+  /** Returns a unit vector near {@code centroid} with per-dimension noise scaled by noiseLevel. */
+  private static float[] perturbedUnitVector(float[] centroid, float noiseLevel, Random rnd) {
+    float[] v = new float[centroid.length];
+    for (int i = 0; i < centroid.length; i++) {
+      v[i] = centroid[i] + noiseLevel * (rnd.nextFloat() * 2 - 1);
     }
+    return normalise(v);
+  }
+
+  private static float[] normalise(float[] v) {
+    float norm = 0;
+    for (float x : v) norm += x * x;
     norm = (float) Math.sqrt(norm);
-    for (int i = 0; i < dim; i++) {
-      v[i] /= norm;
-    }
+    for (int i = 0; i < v.length; i++) v[i] /= norm;
     return v;
   }
 }
\ No newline at end of file
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
index 13071bc84fac..03e1267b8c47 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
@@ -19,7 +19,9 @@
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.concurrent.ConcurrentHashMap;
 import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
@@ -34,6 +36,15 @@
  */
 public class DiversifyingNearestChildrenKnnCollectorManager implements KnnCollectorManager {
 
+  // Sentinel stored in the cache to represent "this segment has no vectors for the field",
+  // because ConcurrentHashMap does not allow null values.
+  private static final int[] NO_VECTORS = new int[0];
+
+  // Cache keyed by (segment core cache key → (field name → docToOrd array)).
+  // Entries are evicted automatically when the segment is closed via addClosedListener.
+  private static final ConcurrentHashMap<IndexReader.CacheKey, ConcurrentHashMap<String, int[]>>
+      DOC_TO_ORD_CACHE = new ConcurrentHashMap<>();
+
   // the number of docs to collect
   private final int k;
   // filter identifying the parent documents.
@@ -69,7 +80,7 @@ public KnnCollector newCollector(
     if (parentBitSet == null) {
       return null;
     }
-    int[] docToOrd = buildDocToOrd(context);
+    int[] docToOrd = getCachedDocToOrd(context);
     return new DiversifyingNearestChildrenKnnCollector(
         k, visitedLimit, searchStrategy, parentBitSet, docToOrd);
   }
@@ -82,7 +93,7 @@ public KnnCollector newOptimisticCollector(
     if (parentBitSet == null) {
       return null;
     }
-    int[] docToOrd = buildDocToOrd(context);
+    int[] docToOrd = getCachedDocToOrd(context);
     return new DiversifyingNearestChildrenKnnCollector(
         k, visitedLimit, searchStrategy, parentBitSet, docToOrd);
   }
@@ -92,6 +103,34 @@ public boolean isOptimistic() {
     return true;
   }
 
+  /**
+   * Returns the docId-to-ordinal array for the given leaf, building and caching it on first access.
+   * The cached array is evicted automatically when the segment closes.
+   */
+  private int[] getCachedDocToOrd(LeafReaderContext context) throws IOException {
+    IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper();
+    if (cacheHelper == null) {
+      return buildDocToOrd(context);
+    }
+    IndexReader.CacheKey cacheKey = cacheHelper.getKey();
+    ConcurrentHashMap<String, int[]> fieldMap = new ConcurrentHashMap<>();
+    ConcurrentHashMap<String, int[]> existing = DOC_TO_ORD_CACHE.putIfAbsent(cacheKey, fieldMap);
+    if (existing == null) {
+      // We inserted the new entry — register cleanup when the segment closes
+      cacheHelper.addClosedListener(DOC_TO_ORD_CACHE::remove);
+    } else {
+      fieldMap = existing;
+    }
+    int[] cached = fieldMap.get(field);
+    if (cached != null) {
+      return cached == NO_VECTORS ? null : cached;
+    }
+    int[] built = buildDocToOrd(context);
+    int[] stored = built != null ? built : NO_VECTORS;
+    int[] race = fieldMap.putIfAbsent(field, stored);
+    return race != null ? (race == NO_VECTORS ? null : race) : built;
+  }
+
   /**
    * Builds a docId-to-ordinal array for the given leaf, mapping each docId to its vector ordinal.
    *

From 07421fb2148f2c6bd3d3af0684e5f12f8420ded0 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Mon, 4 May 2026 17:22:55 +0200
Subject: [PATCH 07/23] Updated tests for triggering early termination

---
 .../benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java
index f31e74de75a1..649c58e9fb56 100644
--- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java
@@ -104,10 +104,10 @@ public class DiversifyingChildrenKnnQueryBenchmark {
   @Param({"5000"})
   public int numParents;
 
-  @Param({"4", "8"})
+  @Param({"4", "8", "16"})
   public int childrenPerParent;
 
-  @Param({"10"})
+  @Param({"10", "100"})
   public int k;
 
   @Param({"128"})

From 8fae7fbd1cfe1c8fd56340e0d16b29e6aef2e1af Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Tue, 5 May 2026 14:50:14 +0200
Subject: [PATCH 08/23] Added comments to tests

---
 ...DiversifyingChildrenKnnQueryBenchmark.java | 26 ++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java
index 649c58e9fb56..d99424466304 100644
--- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java
@@ -78,8 +78,11 @@
 @BenchmarkMode(Mode.AverageTime)
 @OutputTimeUnit(TimeUnit.MILLISECONDS)
 @State(Scope.Benchmark)
+// 4 iterations 1 second each - results discarded
 @Warmup(iterations = 4, time = 1)
+// 5 iterations 1 second each - results recorded (how many calls we can do in 1 sec)
 @Measurement(iterations = 5, time = 1)
+// 3 separate JVM processes
 @Fork(
     value = 3,
     jvmArgsAppend = {"-Xmx4g", "-Xms4g", "-XX:+AlwaysPreTouch"})
@@ -126,41 +129,55 @@ public void setup() throws IOException {
     tmpDir = Files.createTempDirectory("DiversifyingChildrenKnnQueryBenchmark");
     dir = MMapDirectory.open(tmpDir);
 
+    // How much siblings are near to each other
     float noiseLevel =
         switch (siblingCorrelation) {
-          case "best"     -> 0.05f;
-          case "standard" -> 0.30f;
+          case "best"     -> 0.05f; // nearly identical
+          case "standard" -> 0.30f; // moderately correlated
           default         -> Float.NaN; // worst: fully random, no centroid
         };
 
     Random rnd = new Random(42);
+    // index creation
     try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig())) {
+      // 5000 parents
       for (int p = 0; p < numParents; p++) {
+        // vector of 128-dim
         float[] centroid = Float.isNaN(noiseLevel) ? null : randomUnitVector(dim, rnd);
         List<Document> block = new ArrayList<>();
+        // 4 - 8 - 16 children per parent
         for (int c = 0; c < childrenPerParent; c++) {
           float[] vec = centroid == null
               ? randomUnitVector(dim, rnd)
               : perturbedUnitVector(centroid, noiseLevel, rnd);
+          // create child doc
           Document child = new Document();
           child.add(new KnnFloatVectorField(FIELD, vec, VectorSimilarityFunction.DOT_PRODUCT));
+          // add to the index block
           block.add(child);
         }
+        // create parent document
         Document parent = new Document();
+        // docType = _parent
         parent.add(new StringField(PARENT_FIELD, PARENT_VALUE, Field.Store.NO));
+        // add to the index block
         block.add(parent);
+        // add to the index writer
         w.addDocuments(block);
       }
+      // compress to one segment
       w.forceMerge(1);
     }
 
     reader = DirectoryReader.open(dir);
     searcher = new IndexSearcher(reader);
+    // parent filter docType = _parent
     parentFilter = new QueryBitSetProducer(new TermQuery(new Term(PARENT_FIELD, PARENT_VALUE)));
 
     Random qrnd = new Random(123);
     queryVectors = new float[NUM_QUERY_VECTORS][];
     for (int i = 0; i < NUM_QUERY_VECTORS; i++) {
+      // random query vectors
       queryVectors[i] = randomUnitVector(dim, qrnd);
     }
   }
@@ -173,6 +190,8 @@ public void teardown() throws IOException {
 
   @Benchmark
   public TopDocs search() throws IOException {
+    // benchmarked part - search
+    // iterates on all the queries in a round-robin
     float[] query = queryVectors[queryIdx++ & (NUM_QUERY_VECTORS - 1)];
     Query knnQuery =
         new DiversifyingChildrenFloatKnnVectorQuery(FIELD, query, null, k, parentFilter);
@@ -194,6 +213,7 @@ private static float[] perturbedUnitVector(float[] centroid, float noiseLevel, R
     return normalise(v);
   }
 
+  // Since we use DOT PRODUCT
   private static float[] normalise(float[] v) {
     float norm = 0;
     for (float x : v) norm += x * x;
@@ -201,4 +221,4 @@ private static float[] normalise(float[] v) {
     for (int i = 0; i < v.length; i++) v[i] /= norm;
     return v;
   }
-}
\ No newline at end of file
+}

From 32d406fe0616360039054328f7a924fd6c8d3369 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Tue, 5 May 2026 15:57:37 +0200
Subject: [PATCH 09/23] Gradlew tidy

---
 ...DiversifyingChildrenKnnQueryBenchmark.java | 12 ++--
 .../util/hnsw/AbstractHnswGraphSearcher.java  |  8 ++-
 .../util/hnsw/ChildrenSiblingExpansion.java   | 13 ++--
 .../lucene/util/hnsw/DocSiblingExpansion.java |  2 +-
 .../lucene/util/hnsw/HnswGraphSearcher.java   |  8 ++-
 .../hnsw/OrdinalTranslatedKnnCollector.java   | 10 +--
 ...versifyingNearestChildrenKnnCollector.java | 10 ++-
 ...ingNearestChildrenKnnCollectorManager.java |  3 +-
 ...ersifyingChildrenKnnCollectorTestCase.java |  4 +-
 ...versifyingChildrenKnnSiblingExpansion.java | 69 ++++++++++---------
 .../join/TestToParentJoinKnnResults.java      |  3 +-
 11 files changed, 85 insertions(+), 57 deletions(-)

diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java
index d99424466304..f6f0f10358c8 100644
--- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DiversifyingChildrenKnnQueryBenchmark.java
@@ -95,6 +95,7 @@ public class DiversifyingChildrenKnnQueryBenchmark {
 
   /**
    * Sibling correlation scenario:
+   *
    * <ul>
    *   <li>{@code best} — siblings nearly identical (noise = 0.05); best case for expansion.
    *   <li>{@code standard} — siblings moderately correlated (noise = 0.3); realistic case.
@@ -132,9 +133,9 @@ public void setup() throws IOException {
     // How much siblings are near to each other
     float noiseLevel =
         switch (siblingCorrelation) {
-          case "best"     -> 0.05f; // nearly identical
+          case "best" -> 0.05f; // nearly identical
           case "standard" -> 0.30f; // moderately correlated
-          default         -> Float.NaN; // worst: fully random, no centroid
+          default -> Float.NaN; // worst: fully random, no centroid
         };
 
     Random rnd = new Random(42);
@@ -147,9 +148,10 @@ public void setup() throws IOException {
         List<Document> block = new ArrayList<>();
         // 4 - 8 - 16 children per parent
         for (int c = 0; c < childrenPerParent; c++) {
-          float[] vec = centroid == null
-              ? randomUnitVector(dim, rnd)
-              : perturbedUnitVector(centroid, noiseLevel, rnd);
+          float[] vec =
+              centroid == null
+                  ? randomUnitVector(dim, rnd)
+                  : perturbedUnitVector(centroid, noiseLevel, rnd);
           // create child doc
           Document child = new Document();
           child.add(new KnnFloatVectorField(FIELD, vec, VectorSimilarityFunction.DOT_PRODUCT));
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
index 03836fa8d006..dc6ae9560706 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
@@ -122,7 +122,13 @@ protected static void scoreEntryPoints(
         if (numSiblingsToVisit > 0) {
           siblingScores =
               scoreSiblings(
-                  results, scorer, candidates, acceptOrds, siblings, numSiblingsToVisit, siblingScores);
+                  results,
+                  scorer,
+                  candidates,
+                  acceptOrds,
+                  siblings,
+                  numSiblingsToVisit,
+                  siblingScores);
         }
       }
     }
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java
index c176c166ceab..ee9823cee925 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java
@@ -26,18 +26,17 @@
  * collected without requiring full graph traversal.
  *
  * <p>The graph searcher calls {@link #pendingSiblingOrdinals} <em>before</em> invoking {@link
- * org.apache.lucene.search.KnnCollector#collect} for the triggering node, so that the collector
- * can safely inspect its internal state before the parent is registered.
+ * org.apache.lucene.search.KnnCollector#collect} for the triggering node, so that the collector can
+ * safely inspect its internal state before the parent is registered.
  *
  * @lucene.experimental
  */
 public interface ChildrenSiblingExpansion {
 
   /**
-   * Called by the HNSW graph searcher before {@link
-   * org.apache.lucene.search.KnnCollector#collect} is invoked for {@code collectedOrdinal}. The
-   * implementation inspects the node's parent and returns the ordinals of unvisited siblings that
-   * should be scored immediately.
+   * Called by the HNSW graph searcher before {@link org.apache.lucene.search.KnnCollector#collect}
+   * is invoked for {@code collectedOrdinal}. The implementation inspects the node's parent and
+   * returns the ordinals of unvisited siblings that should be scored immediately.
    *
    * <p>The caller is responsible for marking the returned ordinals as visited in {@code
    * visitedOrds} and for scoring and collecting them.
@@ -48,4 +47,4 @@ public interface ChildrenSiblingExpansion {
    * @return ordinals of sibling nodes to score next, or {@code null} if none
    */
   int[] pendingSiblingOrdinals(int collectedOrdinal, BitSet visitedOrds);
-}
\ No newline at end of file
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
index 4882ade8c2ee..6832948f7386 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
@@ -47,4 +47,4 @@ public interface DocSiblingExpansion {
    * @return the vector ordinal, or {@code -1}
    */
   int docIdToOrdinal(int docId);
-}
\ No newline at end of file
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
index 2bac2e046788..e64cbed10a0c 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
@@ -376,7 +376,13 @@ void searchLevel(
                 float prevMinSim = results.minCompetitiveSimilarity();
                 siblingScores =
                     scoreSiblings(
-                        results, scorer, candidates, acceptOrds, siblings, numSiblingsToVisit, siblingScores);
+                        results,
+                        scorer,
+                        candidates,
+                        acceptOrds,
+                        siblings,
+                        numSiblingsToVisit,
+                        siblingScores);
                 if (results.minCompetitiveSimilarity() > prevMinSim) {
                   minAcceptedSimilarity = Math.nextUp(results.minCompetitiveSimilarity());
                   shouldExploreMinSim = true;
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
index 35f15e81193c..4814e799826b 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
@@ -25,8 +25,8 @@
 
 /**
  * Wraps a provided KnnCollector object, translating the provided vectorId ordinal to a documentId.
- * This wrapper implements {@link ChildrenSiblingExpansion}; sibling expansion is active only
- * when the wrapped collector also implements {@link DocSiblingExpansion}.
+ * This wrapper implements {@link ChildrenSiblingExpansion}; sibling expansion is active only when
+ * the wrapped collector also implements {@link DocSiblingExpansion}.
  */
 public final class OrdinalTranslatedKnnCollector extends KnnCollector.Decorator
     implements ChildrenSiblingExpansion {
@@ -67,13 +67,15 @@ public int[] pendingSiblingOrdinals(int collectedOrdinal, BitSet visitedOrds) {
       return null;
     }
     int[] siblingOrdinals = new int[siblingDocIds.length];
-    // siblingOrdinals is pre-allocated to siblingDocIds.length and Java initializes int arrays to 0 so this variable is necessary.
+    // siblingOrdinals is pre-allocated to siblingDocIds.length and Java initializes int arrays to 0
+    // so this variable is necessary.
     int count = 0;
     for (int sibDocId : siblingDocIds) {
       int sibOrd = docExpander.docIdToOrdinal(sibDocId);
       //  sibOrd = -1 when a document has no vector for this field.
       //  Such a doc has no node in the HNSW graph and can't be scored, so it must be skipped.
-      //  If a sibling was reached via normal graph traversal before sibling expansion triggered, re-adding it would
+      //  If a sibling was reached via normal graph traversal before sibling expansion triggered,
+      // re-adding it would
       //  cause it to be scored twice. !visitedOrds.get(sibOrd) filters those out.
       if (sibOrd >= 0 && !visitedOrds.get(sibOrd)) {
         siblingOrdinals[count++] = sibOrd;
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
index e2ca8fbdb10e..1cd2d8f32ea7 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
@@ -142,12 +142,16 @@ public int[] findSiblingDocIds(int childDocId) {
   @Override
   public int docIdToOrdinal(int docId) {
     // Conditions explanation:
-    //  docToOrd == null — buildDocToOrd returns null when the segment has no vector values at all for the field (line 113). Without this guard you'd get a NullPointerException.
-    //  docId >= docToOrd.length — In production, docToOrd is sized exactly maxDoc for that segment (line 116), so every valid docId in that segment fits. This check is therefore a pure defensive guard — it can't be triggered by
+    //  docToOrd == null — buildDocToOrd returns null when the segment has no vector values at all
+    // for the field (line 113). Without this guard you'd get a NullPointerException.
+    //  docId >= docToOrd.length — In production, docToOrd is sized exactly maxDoc for that segment
+    // (line 116), so every valid docId in that segment fits. This check is therefore a pure
+    // defensive guard — it can't be triggered by
     //  correct production code, but it protects against:
     //  - bugs in how the array is built or passed
     //  - future refactoring that changes the array size
-    //  - misuse when constructing the collector manually (as in tests, e.g. the docIdToOrdinal(9999) test case)
+    //  - misuse when constructing the collector manually (as in tests, e.g. the
+    // docIdToOrdinal(9999) test case)
     if (docToOrd == null || docId >= docToOrd.length) {
       return -1;
     }
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
index 03e1267b8c47..9914b285e0ac 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
@@ -143,7 +143,8 @@ private int[] getCachedDocToOrd(LeafReaderContext context) throws IOException {
   private int[] buildDocToOrd(LeafReaderContext context) throws IOException {
     FieldInfo fi = context.reader().getFieldInfos().fieldInfo(field);
     // fi = null if the field doesn't exist in this segment at all.
-    // fi.getVectorDimension() = 0 if the field exist in the segment but was not indexed as a vector field.
+    // fi.getVectorDimension() = 0 if the field exist in the segment but was not indexed as a vector
+    // field.
     if (fi == null || fi.getVectorDimension() == 0) {
       return null;
     }
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/DiversifyingChildrenKnnCollectorTestCase.java b/lucene/join/src/test/org/apache/lucene/search/join/DiversifyingChildrenKnnCollectorTestCase.java
index 3d7fdecf31ca..82ce76196a73 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/DiversifyingChildrenKnnCollectorTestCase.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/DiversifyingChildrenKnnCollectorTestCase.java
@@ -24,8 +24,8 @@
 import org.apache.lucene.util.BitSet;
 
 /**
- * Base class for {@link DiversifyingNearestChildrenKnnCollector} tests. Provides shared helpers
- * for building the block-join parent {@link BitSet} used across test subclasses.
+ * Base class for {@link DiversifyingNearestChildrenKnnCollector} tests. Provides shared helpers for
+ * building the block-join parent {@link BitSet} used across test subclasses.
  */
 abstract class DiversifyingChildrenKnnCollectorTestCase extends LuceneTestCase {
 
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
index bb170ec3d79e..934ff8f593de 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
@@ -46,8 +46,8 @@
 
 /**
  * Tests for sibling expansion in {@link DiversifyingNearestChildrenKnnCollector} ({@link
- * org.apache.lucene.util.hnsw.DocSiblingExpansion} contract) and end-to-end correctness via
- * {@link DiversifyingChildrenFloatKnnVectorQuery}.
+ * org.apache.lucene.util.hnsw.DocSiblingExpansion} contract) and end-to-end correctness via {@link
+ * DiversifyingChildrenFloatKnnVectorQuery}.
  */
 public class TestDiversifyingChildrenKnnSiblingExpansion
     extends DiversifyingChildrenKnnCollectorTestCase {
@@ -141,10 +141,10 @@ public void testDocIdToOrdinal_correctMapping() throws IOException {
 
     assertEquals(0, c.docIdToOrdinal(0));
     assertEquals(1, c.docIdToOrdinal(1));
-    assertEquals(-1, c.docIdToOrdinal(2));  // parent → no vector
+    assertEquals(-1, c.docIdToOrdinal(2)); // parent → no vector
     assertEquals(2, c.docIdToOrdinal(3));
     assertEquals(3, c.docIdToOrdinal(4));
-    assertEquals(-1, c.docIdToOrdinal(5));  // parent → no vector
+    assertEquals(-1, c.docIdToOrdinal(5)); // parent → no vector
     assertEquals(-1, c.docIdToOrdinal(9999)); // beyond array bounds
   }
 
@@ -163,8 +163,8 @@ public void testDocIdToOrdinal_nullMapping_alwaysMinusOne() throws IOException {
   // ---------------------------------------------------------------------------
 
   /**
-   * C3 is the entry point (score 0.85) but its siblings C4 (0.95)
-   * and C5 (0.90) are expanded immediately. The parent must be represented by C4, not C3.
+   * C3 is the entry point (score 0.85) but its siblings C4 (0.95) and C5 (0.90) are expanded
+   * immediately. The parent must be represented by C4, not C3.
    */
   public void testBestSiblingReplacesFirstFoundChild() throws IOException {
     // 1 parent, 3 children: [C0=0, C1=1, C2=2 | P0=3]
@@ -181,7 +181,8 @@ public void testBestSiblingReplacesFirstFoundChild() throws IOException {
 
     TopDocs td = c.topDocs();
     assertEquals(1, td.scoreDocs.length);
-    assertEquals("best sibling must win over first-found child", 0.95f, td.scoreDocs[0].score, 1e-5f);
+    assertEquals(
+        "best sibling must win over first-found child", 0.95f, td.scoreDocs[0].score, 1e-5f);
     assertEquals("best child doc id must be C1", 1, td.scoreDocs[0].doc);
   }
 
@@ -201,8 +202,8 @@ public void testBestSiblingReplacesWorseChildWhenHeapFull() throws IOException {
     c.collect(1, 0.95f);
 
     // P1: C2 found first (0.60), C3 is better sibling (0.65)
-    c.collect(3, 0.60f);  // trigger child → P1 enters heap, now FULL (size=2=k)
-    c.collect(4, 0.65f);  // better sibling → heap full but P1 already present, updates entry
+    c.collect(3, 0.60f); // trigger child → P1 enters heap, now FULL (size=2=k)
+    c.collect(4, 0.65f); // better sibling → heap full but P1 already present, updates entry
 
     TopDocs td = c.topDocs();
     assertEquals(2, td.scoreDocs.length);
@@ -217,8 +218,8 @@ public void testBestSiblingReplacesWorseChildWhenHeapFull() throws IOException {
   // ---------------------------------------------------------------------------
 
   /**
-   * Siblings whose ordinal is already in visitedOrds must be filtered out to prevent
-   * double-scoring when the sibling was independently discovered via normal graph traversal.
+   * Siblings whose ordinal is already in visitedOrds must be filtered out to prevent double-scoring
+   * when the sibling was independently discovered via normal graph traversal.
    */
   public void testPendingSiblingOrdinals_filtersAlreadyVisited() throws IOException {
     // 1 parent, 3 children: [C0=doc0, C1=doc1, C2=doc2 | P0=doc3]
@@ -227,7 +228,8 @@ public void testPendingSiblingOrdinals_filtersAlreadyVisited() throws IOExceptio
     int[] docToOrd = buildDocToOrd(1, 3);
     int[] ordToDoc = {0, 1, 2};
     OrdinalTranslatedKnnCollector collector =
-        new OrdinalTranslatedKnnCollector(makeCollector(10, parents, docToOrd), ord -> ordToDoc[ord]);
+        new OrdinalTranslatedKnnCollector(
+            makeCollector(10, parents, docToOrd), ord -> ordToDoc[ord]);
 
     // Mark C1 (ordinal 1) as already visited by normal graph traversal
     FixedBitSet visited = new FixedBitSet(4);
@@ -236,12 +238,12 @@ public void testPendingSiblingOrdinals_filtersAlreadyVisited() throws IOExceptio
     // Trigger on C0 (ordinal 0 → docId 0); siblings are C1 and C2
     int[] result = collector.pendingSiblingOrdinals(0, visited);
     assertNotNull(result);
-    assertArrayEquals("C1 must be filtered (visited); only C2 remains", new int[]{2}, result);
+    assertArrayEquals("C1 must be filtered (visited); only C2 remains", new int[] {2}, result);
   }
 
   /**
-   * Siblings with no vector in this field (docIdToOrdinal returns -1) must be skipped because
-   * they have no node in the HNSW graph and cannot be scored.
+   * Siblings with no vector in this field (docIdToOrdinal returns -1) must be skipped because they
+   * have no node in the HNSW graph and cannot be scored.
    */
   public void testPendingSiblingOrdinals_filtersSparseSiblings() throws IOException {
     // 1 parent, 3 children but C1 has no vector:
@@ -251,14 +253,15 @@ public void testPendingSiblingOrdinals_filtersSparseSiblings() throws IOExceptio
     int[] docToOrd = {0, -1, 1, -1};
     int[] ordToDoc = {0, 2};
     OrdinalTranslatedKnnCollector collector =
-        new OrdinalTranslatedKnnCollector(makeCollector(10, parents, docToOrd), ord -> ordToDoc[ord]);
+        new OrdinalTranslatedKnnCollector(
+            makeCollector(10, parents, docToOrd), ord -> ordToDoc[ord]);
 
     FixedBitSet visited = new FixedBitSet(4);
 
     // Trigger on C0 (ordinal 0 → docId 0); siblings are C1 (sparse) and C2
     int[] result = collector.pendingSiblingOrdinals(0, visited);
     assertNotNull(result);
-    assertArrayEquals("C1 (no vector) must be filtered; only C2 remains", new int[]{1}, result);
+    assertArrayEquals("C1 (no vector) must be filtered; only C2 remains", new int[] {1}, result);
   }
 
   // ---------------------------------------------------------------------------
@@ -272,8 +275,8 @@ public void testPendingSiblingOrdinals_filtersSparseSiblings() throws IOExceptio
    * Builds a block-join float-vector index. Parent p has numChildren children; child c of parent p
    * gets vector: v[i] = (p * numChildren + c + i) * 0.1, then normalised for COSINE/DOT_PRODUCT.
    */
-  private Directory buildIndex(
-      int numParents, int numChildren, VectorSimilarityFunction sim) throws IOException {
+  private Directory buildIndex(int numParents, int numChildren, VectorSimilarityFunction sim)
+      throws IOException {
     Directory dir = newDirectory();
     try (IndexWriter w =
         new IndexWriter(
@@ -282,7 +285,8 @@ dir, new IndexWriterConfig().setMergePolicy(newMergePolicy(random(), false)))) {
         List<Document> block = new ArrayList<>();
         for (int c = 0; c < numChildren; c++) {
           float[] vec = childVector(p, c, numChildren);
-          if (sim == VectorSimilarityFunction.COSINE || sim == VectorSimilarityFunction.DOT_PRODUCT) {
+          if (sim == VectorSimilarityFunction.COSINE
+              || sim == VectorSimilarityFunction.DOT_PRODUCT) {
             normalise(vec);
           }
           Document child = new Document();
@@ -319,11 +323,7 @@ private static void normalise(float[] vec) {
 
   /** Computes brute-force top-k scores: max similarity per parent, sorted descending. */
   private static float[] bruteForceTopK(
-      float[] query,
-      int numParents,
-      int numChildren,
-      VectorSimilarityFunction sim,
-      int k) {
+      float[] query, int numParents, int numChildren, VectorSimilarityFunction sim, int k) {
     float[] parentBest = new float[numParents];
     Arrays.fill(parentBest, Float.NEGATIVE_INFINITY);
     for (int p = 0; p < numParents; p++) {
@@ -404,8 +404,8 @@ public void testSiblingExpansionSingleChildParents() throws Exception {
   }
 
   /**
-   * The query is close to one parent's children; sibling expansion must find the best child of each discovered parent
-   * rather than whichever child the graph traversal happens to reach first.
+   * The query is close to one parent's children; sibling expansion must find the best child of each
+   * discovered parent rather than whichever child the graph traversal happens to reach first.
    */
   public void testSiblingExpansion_bestChildPerParentFound() throws Exception {
     int numParents = 3, numChildren = 3, k = 2;
@@ -425,15 +425,22 @@ public void testSiblingExpansion_bestChildPerParentFound() throws Exception {
 
       assertEquals(k, results.scoreDocs.length);
       for (ScoreDoc sd : results.scoreDocs) {
-        int parentIdx = reader.storedFields().document(sd.doc)
-            .getField("parent").numericValue().intValue();
+        int parentIdx =
+            reader.storedFields().document(sd.doc).getField("parent").numericValue().intValue();
         // verify no other child of the same parent scores higher than the returned one
         for (int c = 0; c < numChildren; c++) {
           float[] vec = childVector(parentIdx, c, numChildren);
           float cScore = sim.compare(query, vec);
           assertTrue(
-              "parent " + parentIdx + " has a better child (score " + cScore
-                  + ") than returned doc " + sd.doc + " (score " + sd.score + ")",
+              "parent "
+                  + parentIdx
+                  + " has a better child (score "
+                  + cScore
+                  + ") than returned doc "
+                  + sd.doc
+                  + " (score "
+                  + sd.score
+                  + ")",
               cScore <= sd.score + 1e-4f);
         }
       }
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestToParentJoinKnnResults.java b/lucene/join/src/test/org/apache/lucene/search/join/TestToParentJoinKnnResults.java
index 212990bfac88..fe1b008b5f4d 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestToParentJoinKnnResults.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestToParentJoinKnnResults.java
@@ -104,7 +104,8 @@ public void testRandomInsertionsWithOverflow() throws IOException {
     BitSet parentBitSet =
         BitSet.of(new IntArrayDocIdSetIterator(parents, parents.length), nextParent + 1);
     DiversifyingNearestChildrenKnnCollector results =
-        new DiversifyingNearestChildrenKnnCollector(20, Integer.MAX_VALUE, null, parentBitSet, null);
+        new DiversifyingNearestChildrenKnnCollector(
+            20, Integer.MAX_VALUE, null, parentBitSet, null);
     for (int i = 0; i < children.size(); i++) {
       results.collect(children.get(i), childrenScores.get(i));
     }

From 5dbf0f7dbe836f6a37424240c276281484032199 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Tue, 5 May 2026 16:02:57 +0200
Subject: [PATCH 10/23] Gradlew check

---
 .../lucene/util/hnsw/OrdinalTranslatedKnnCollector.java       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
index 4814e799826b..308a3b9a0ded 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
@@ -17,10 +17,10 @@
 
 package org.apache.lucene.util.hnsw;
 
-import java.util.Arrays;
 import org.apache.lucene.search.KnnCollector;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TotalHits;
+import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BitSet;
 
 /**
@@ -81,6 +81,6 @@ public int[] pendingSiblingOrdinals(int collectedOrdinal, BitSet visitedOrds) {
         siblingOrdinals[count++] = sibOrd;
       }
     }
-    return count > 0 ? Arrays.copyOf(siblingOrdinals, count) : null;
+    return count > 0 ? ArrayUtil.copyOfSubArray(siblingOrdinals, 0, count) : null;
   }
 }

From 0448dcf94941930c457135e1ac4eacb04531776e Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Wed, 6 May 2026 15:42:37 +0200
Subject: [PATCH 11/23] Small names refactoring

---
 .../util/hnsw/AbstractHnswGraphSearcher.java  | 34 ++++++++++++-------
 .../util/hnsw/ChildrenSiblingExpansion.java   |  4 +--
 .../lucene/util/hnsw/HnswGraphSearcher.java   |  5 +--
 .../hnsw/OrdinalTranslatedKnnCollector.java   |  8 ++---
 ...versifyingNearestChildrenKnnCollector.java |  9 +++--
 ...versifyingChildrenKnnSiblingExpansion.java | 10 +++---
 6 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
index dc6ae9560706..6ab42b9808fe 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
@@ -106,10 +106,18 @@ protected static void scoreEntryPoints(
         // Fetch siblings BEFORE collect() so the parent is not yet in the heap
         int[] siblings = null;
         int numSiblingsToVisit = 0;
+        // This check is needed since this method is also called by the GraphBuilderKnnCollector
         if (results instanceof ChildrenSiblingExpansion expander) {
-          siblings = expander.pendingSiblingOrdinals(ep, visited);
+          siblings = expander.getSiblingOrdinals(ep, visited);
           if (siblings != null) {
             //  how many siblings are actually scored to avoid exceeding the visit budget.
+            //  controls the early termination condition. early terminates the search if we reach
+            //  visitLimit nodes
+            //  if this visit limit is high we just navigate the graph until we do not have any node
+            //  with a score higher than the ones already collected
+            //  Current values it could assume:
+            //  -  No filter → Integer.MAX_VALUE (no constraint tighter than the full segment)
+            //  -  With filter → cardinality (no constraint tighter than the full accepted set)
             numSiblingsToVisit =
                 (int) Math.min(siblings.length, results.visitLimit() - results.visitedCount());
             // Only mark as visited the siblings we will actually score; the rest remain
@@ -121,7 +129,7 @@ protected static void scoreEntryPoints(
         results.collect(ep, score);
         if (numSiblingsToVisit > 0) {
           siblingScores =
-              scoreSiblings(
+              scoreHnswNodes(
                   results,
                   scorer,
                   candidates,
@@ -138,36 +146,36 @@ protected static void scoreEntryPoints(
    * Scores and collects siblings, adding competitive ones to the candidate queue. Reuses and
    * returns the siblingScores buffer, reallocating only if too small.
    */
-  protected static float[] scoreSiblings(
+  protected static float[] scoreHnswNodes(
       KnnCollector results,
       RandomVectorScorer scorer,
       NeighborQueue candidates,
       Bits acceptOrds,
-      int[] siblings,
+      int[] hnswNodes,
       int numSiblings,
-      float[] siblingScores)
+      float[] scores)
       throws IOException {
     // If siblingScores not defined yet or too small to collect scores a new one is created
     // Otherwise we reuse the old one that will be overridden in bulkScore with new scores
-    if (siblingScores == null || siblingScores.length < numSiblings) {
-      siblingScores = new float[numSiblings];
+    if (scores == null || scores.length < numSiblings) {
+      scores = new float[numSiblings];
     }
-    float maxScore = scorer.bulkScore(siblings, siblingScores, numSiblings);
+    float maxScore = scorer.bulkScore(hnswNodes, scores, numSiblings);
     results.incVisitedCount(numSiblings);
     if (maxScore > results.minCompetitiveSimilarity()) {
       float minSimilarity = Math.nextUp(results.minCompetitiveSimilarity());
       for (int j = 0; j < numSiblings; j++) {
-        float sibScore = siblingScores[j];
+        float sibScore = scores[j];
         // We avoid adding to candidates a sibling with a bad score
         if (sibScore >= minSimilarity) {
-          candidates.add(siblings[j], sibScore);
-          if (acceptOrds == null || acceptOrds.get(siblings[j])) {
-            results.collect(siblings[j], sibScore);
+          candidates.add(hnswNodes[j], sibScore);
+          if (acceptOrds == null || acceptOrds.get(hnswNodes[j])) {
+            results.collect(hnswNodes[j], sibScore);
             minSimilarity = Math.nextUp(results.minCompetitiveSimilarity());
           }
         }
       }
     }
-    return siblingScores;
+    return scores;
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java
index ee9823cee925..763e3503a5d2 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java
@@ -25,7 +25,7 @@
  * belonging to a newly-discovered parent, all siblings of that parent are immediately scored and
  * collected without requiring full graph traversal.
  *
- * <p>The graph searcher calls {@link #pendingSiblingOrdinals} <em>before</em> invoking {@link
+ * <p>The graph searcher calls {@link #getSiblingOrdinals} <em>before</em> invoking {@link
  * org.apache.lucene.search.KnnCollector#collect} for the triggering node, so that the collector can
  * safely inspect its internal state before the parent is registered.
  *
@@ -46,5 +46,5 @@ public interface ChildrenSiblingExpansion {
    *     marked here — the caller does that
    * @return ordinals of sibling nodes to score next, or {@code null} if none
    */
-  int[] pendingSiblingOrdinals(int collectedOrdinal, BitSet visitedOrds);
+  int[] getSiblingOrdinals(int collectedOrdinal, BitSet visitedOrds);
 }
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
index e64cbed10a0c..1686b581a1bb 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
@@ -351,8 +351,9 @@ void searchLevel(
               // Fetch siblings BEFORE collect() so the parent is not yet in the heap
               int[] siblings = null;
               int numSiblingsToVisit = 0;
+              // This check is needed since this method is also called by the GraphBuilderKnnCollector
               if (results instanceof ChildrenSiblingExpansion expander) {
-                siblings = expander.pendingSiblingOrdinals(node, visited);
+                siblings = expander.getSiblingOrdinals(node, visited);
                 if (siblings != null) {
                   numSiblingsToVisit =
                       (int)
@@ -375,7 +376,7 @@ void searchLevel(
               if (numSiblingsToVisit > 0) {
                 float prevMinSim = results.minCompetitiveSimilarity();
                 siblingScores =
-                    scoreSiblings(
+                    scoreHnswNodes(
                         results,
                         scorer,
                         candidates,
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
index 308a3b9a0ded..def69b981df4 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
@@ -57,11 +57,11 @@ public TopDocs topDocs() {
   }
 
   @Override
-  public int[] pendingSiblingOrdinals(int collectedOrdinal, BitSet visitedOrds) {
+  public int[] getSiblingOrdinals(int hnswNode, BitSet visitedHnswNodes) {
     if (!(collector instanceof DocSiblingExpansion docExpander)) {
       return null;
     }
-    int docId = vectorOrdinalToDocId.apply(collectedOrdinal);
+    int docId = vectorOrdinalToDocId.apply(hnswNode);
     int[] siblingDocIds = docExpander.findSiblingDocIds(docId);
     if (siblingDocIds == null) {
       return null;
@@ -76,8 +76,8 @@ public int[] pendingSiblingOrdinals(int collectedOrdinal, BitSet visitedOrds) {
       //  Such a doc has no node in the HNSW graph and can't be scored, so it must be skipped.
       //  If a sibling was reached via normal graph traversal before sibling expansion triggered,
       // re-adding it would
-      //  cause it to be scored twice. !visitedOrds.get(sibOrd) filters those out.
-      if (sibOrd >= 0 && !visitedOrds.get(sibOrd)) {
+      //  cause it to be scored twice. !visitedHnswNodes.get(sibOrd) filters those out.
+      if (sibOrd >= 0 && !visitedHnswNodes.get(sibOrd)) {
         siblingOrdinals[count++] = sibOrd;
       }
     }
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
index 1cd2d8f32ea7..eba2181676ad 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
@@ -113,7 +113,7 @@ public int numCollected() {
     return heap.size();
   }
 
-  @Override
+  // SOLO PER VISITARE LA FAMIGLIA ALTRIMENTI NON LO CHIAMIAMO PROPRIO
   public int[] findSiblingDocIds(int childDocId) {
     int parent = parentBitSet.nextSetBit(childDocId);
     // Skip if this parent's entry is already in the heap
@@ -124,12 +124,12 @@ public int[] findSiblingDocIds(int childDocId) {
     int prevParent = parent > 0 ? parentBitSet.prevSetBit(parent - 1) : -1;
     int from = prevParent + 1;
     // Children of parent are all docIds in [from, parent); exclude childDocId itself
-    int capacity = parent - from - 1;
+    int siblingsSize = parent - from - 1;
     // One child case
-    if (capacity == 0) {
+    if (siblingsSize == 0) {
       return null;
     }
-    int[] siblings = new int[capacity];
+    int[] siblings = new int[siblingsSize];
     int idx = 0;
     for (int docId = from; docId < parent; docId++) {
       if (docId != childDocId) {
@@ -139,7 +139,6 @@ public int[] findSiblingDocIds(int childDocId) {
     return idx > 0 ? siblings : null;
   }
 
-  @Override
   public int docIdToOrdinal(int docId) {
     // Conditions explanation:
     //  docToOrd == null — buildDocToOrd returns null when the segment has no vector values at all
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
index 934ff8f593de..94e2f085503d 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
@@ -53,7 +53,7 @@ public class TestDiversifyingChildrenKnnSiblingExpansion
     extends DiversifyingChildrenKnnCollectorTestCase {
 
   // ---------------------------------------------------------------------------
-  // Unit tests: DocSiblingExpansion — findSiblingDocIds
+  // Unit tests: DiversifyingNearestChildrenKnnCollector — findSiblingDocIds
   // ---------------------------------------------------------------------------
 
   public void testFindSiblingDocIds_returnsAllSiblings() throws IOException {
@@ -129,7 +129,7 @@ public void testFindSiblingDocIds_excludesTriggerChild() throws IOException {
   }
 
   // ---------------------------------------------------------------------------
-  // Unit tests: DocSiblingExpansion — docIdToOrdinal
+  // Unit tests: DiversifyingNearestChildrenKnnCollector — docIdToOrdinal
   // ---------------------------------------------------------------------------
 
   public void testDocIdToOrdinal_correctMapping() throws IOException {
@@ -214,7 +214,7 @@ public void testBestSiblingReplacesWorseChildWhenHeapFull() throws IOException {
   }
 
   // ---------------------------------------------------------------------------
-  // Unit tests: OrdinalTranslatedKnnCollector — pendingSiblingOrdinals
+  // Unit tests: OrdinalTranslatedKnnCollector — getSiblingOrdinals
   // ---------------------------------------------------------------------------
 
   /**
@@ -236,7 +236,7 @@ public void testPendingSiblingOrdinals_filtersAlreadyVisited() throws IOExceptio
     visited.set(1);
 
     // Trigger on C0 (ordinal 0 → docId 0); siblings are C1 and C2
-    int[] result = collector.pendingSiblingOrdinals(0, visited);
+    int[] result = collector.getSiblingOrdinals(0, visited);
     assertNotNull(result);
     assertArrayEquals("C1 must be filtered (visited); only C2 remains", new int[] {2}, result);
   }
@@ -259,7 +259,7 @@ public void testPendingSiblingOrdinals_filtersSparseSiblings() throws IOExceptio
     FixedBitSet visited = new FixedBitSet(4);
 
     // Trigger on C0 (ordinal 0 → docId 0); siblings are C1 (sparse) and C2
-    int[] result = collector.pendingSiblingOrdinals(0, visited);
+    int[] result = collector.getSiblingOrdinals(0, visited);
     assertNotNull(result);
     assertArrayEquals("C1 (no vector) must be filtered; only C2 remains", new int[] {1}, result);
   }

From ebc30a08ed30e04fc9dd26325bca7530217ef2a3 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Wed, 6 May 2026 15:48:44 +0200
Subject: [PATCH 12/23] Removed ChildrenSiblingExpansion interface

---
 .../util/hnsw/AbstractHnswGraphSearcher.java  |  4 +-
 .../util/hnsw/ChildrenSiblingExpansion.java   | 50 -------------------
 .../lucene/util/hnsw/DocSiblingExpansion.java |  2 +-
 .../lucene/util/hnsw/HnswGraphSearcher.java   |  4 +-
 .../hnsw/OrdinalTranslatedKnnCollector.java   |  8 ++-
 5 files changed, 8 insertions(+), 60 deletions(-)
 delete mode 100644 lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java

diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
index 6ab42b9808fe..e95101efc054 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
@@ -107,8 +107,8 @@ protected static void scoreEntryPoints(
         int[] siblings = null;
         int numSiblingsToVisit = 0;
         // This check is needed since this method is also called by the GraphBuilderKnnCollector
-        if (results instanceof ChildrenSiblingExpansion expander) {
-          siblings = expander.getSiblingOrdinals(ep, visited);
+        if (results instanceof OrdinalTranslatedKnnCollector collector) {
+          siblings = collector.getSiblingOrdinals(ep, visited);
           if (siblings != null) {
             //  how many siblings are actually scored to avoid exceeding the visit budget.
             //  controls the early termination condition. early terminates the search if we reach
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java
deleted file mode 100644
index 763e3503a5d2..000000000000
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/ChildrenSiblingExpansion.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.util.hnsw;
-
-import org.apache.lucene.util.BitSet;
-
-/**
- * Implemented by {@link org.apache.lucene.search.KnnCollector} instances that support dynamic
- * sibling expansion during HNSW graph search. When the graph searcher collects a child node
- * belonging to a newly-discovered parent, all siblings of that parent are immediately scored and
- * collected without requiring full graph traversal.
- *
- * <p>The graph searcher calls {@link #getSiblingOrdinals} <em>before</em> invoking {@link
- * org.apache.lucene.search.KnnCollector#collect} for the triggering node, so that the collector can
- * safely inspect its internal state before the parent is registered.
- *
- * @lucene.experimental
- */
-public interface ChildrenSiblingExpansion {
-
-  /**
-   * Called by the HNSW graph searcher before {@link org.apache.lucene.search.KnnCollector#collect}
-   * is invoked for {@code collectedOrdinal}. The implementation inspects the node's parent and
-   * returns the ordinals of unvisited siblings that should be scored immediately.
-   *
-   * <p>The caller is responsible for marking the returned ordinals as visited in {@code
-   * visitedOrds} and for scoring and collecting them.
-   *
-   * @param collectedOrdinal the vector ordinal about to be collected
-   * @param visitedOrds the graph searcher's current visited bitset; returned siblings are not yet
-   *     marked here — the caller does that
-   * @return ordinals of sibling nodes to score next, or {@code null} if none
-   */
-  int[] getSiblingOrdinals(int collectedOrdinal, BitSet visitedOrds);
-}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
index 6832948f7386..53b9fe9447b9 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
@@ -18,7 +18,7 @@
 package org.apache.lucene.util.hnsw;
 
 /**
- * Doc-ID-level companion to {@link ChildrenSiblingExpansion}. Implemented by collectors that
+ * Implemented by collectors that
  * understand parent-child document relationships and can enumerate sibling document ids for a given
  * child document id, as well as translate document ids back to vector ordinals.
  *
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
index 1686b581a1bb..a0f5cf8525e8 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
@@ -352,8 +352,8 @@ void searchLevel(
               int[] siblings = null;
               int numSiblingsToVisit = 0;
               // This check is needed since this method is also called by the GraphBuilderKnnCollector
-              if (results instanceof ChildrenSiblingExpansion expander) {
-                siblings = expander.getSiblingOrdinals(node, visited);
+              if (results instanceof OrdinalTranslatedKnnCollector collector) {
+                siblings = collector.getSiblingOrdinals(node, visited);
                 if (siblings != null) {
                   numSiblingsToVisit =
                       (int)
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
index def69b981df4..1ee335dbbd9e 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
@@ -25,11 +25,10 @@
 
 /**
  * Wraps a provided KnnCollector object, translating the provided vectorId ordinal to a documentId.
- * This wrapper implements {@link ChildrenSiblingExpansion}; sibling expansion is active only when
- * the wrapped collector also implements {@link DocSiblingExpansion}.
+ * Sibling expansion is active only when the wrapped collector also implements {@link
+ * DocSiblingExpansion}.
  */
-public final class OrdinalTranslatedKnnCollector extends KnnCollector.Decorator
-    implements ChildrenSiblingExpansion {
+public final class OrdinalTranslatedKnnCollector extends KnnCollector.Decorator {
 
   private final IntToIntFunction vectorOrdinalToDocId;
 
@@ -56,7 +55,6 @@ public TopDocs topDocs() {
         td.scoreDocs);
   }
 
-  @Override
   public int[] getSiblingOrdinals(int hnswNode, BitSet visitedHnswNodes) {
     if (!(collector instanceof DocSiblingExpansion docExpander)) {
       return null;

From ace844f82fdc4bc0cf430e98d0ee79181f414e61 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Wed, 6 May 2026 16:10:35 +0200
Subject: [PATCH 13/23] Analyzed if DocSiblingExpansion could be removed -> NO

---
 .../lucene/util/hnsw/AbstractHnswGraphSearcher.java  |  3 ++-
 .../apache/lucene/util/hnsw/DocSiblingExpansion.java |  8 ++++++++
 .../apache/lucene/util/hnsw/HnswGraphSearcher.java   |  6 ++++--
 .../util/hnsw/OrdinalTranslatedKnnCollector.java     | 12 +++++++-----
 4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
index e95101efc054..527929969220 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
@@ -107,7 +107,8 @@ protected static void scoreEntryPoints(
         int[] siblings = null;
         int numSiblingsToVisit = 0;
         // This check is needed since this method is also called by the GraphBuilderKnnCollector
-        if (results instanceof OrdinalTranslatedKnnCollector collector) {
+        if (results instanceof OrdinalTranslatedKnnCollector collector
+            && collector.supportsSiblingExpansion()) {
           siblings = collector.getSiblingOrdinals(ep, visited);
           if (siblings != null) {
             //  how many siblings are actually scored to avoid exceeding the visit budget.
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
index 53b9fe9447b9..bc4226f21c28 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
@@ -27,6 +27,14 @@
  *
  * @lucene.experimental
  */
+//  The interface cannot be removed. It exists for a module-boundary reason.
+//  DocSiblingExpansion is in lucene/core, while DiversifyingNearestChildrenKnnCollector is in lucene/join.
+//  The dependency is one-way: join depends on core, never the reverse. So OrdinalTranslatedKnnCollector (in core)
+//  has no way to reference DiversifyingNearestChildrenKnnCollector directly.
+//
+//  The interface is the bridge — it lets core call findSiblingDocIds and docIdToOrdinal on the collector without
+//  creating a circular dependency. Removing it would require either moving OrdinalTranslatedKnnCollector into
+//  join (bigger refactor) or adding a core → join dependency (illegal in this architecture).
 public interface DocSiblingExpansion {
 
   /**
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
index a0f5cf8525e8..cb005361397c 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
@@ -351,8 +351,10 @@ void searchLevel(
               // Fetch siblings BEFORE collect() so the parent is not yet in the heap
               int[] siblings = null;
               int numSiblingsToVisit = 0;
-              // This check is needed since this method is also called by the GraphBuilderKnnCollector
-              if (results instanceof OrdinalTranslatedKnnCollector collector) {
+              // The first check is needed since this method is also called by the GraphBuilderKnnCollector
+              // The second check is needed since we could have a TopKnnCollector at this point
+              if (results instanceof OrdinalTranslatedKnnCollector collector
+                  && collector.supportsSiblingExpansion()) {
                 siblings = collector.getSiblingOrdinals(node, visited);
                 if (siblings != null) {
                   numSiblingsToVisit =
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
index 1ee335dbbd9e..e20bc343046e 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
@@ -55,12 +55,14 @@ public TopDocs topDocs() {
         td.scoreDocs);
   }
 
+  public boolean supportsSiblingExpansion() {
+    return collector instanceof DocSiblingExpansion;
+  }
+
   public int[] getSiblingOrdinals(int hnswNode, BitSet visitedHnswNodes) {
-    if (!(collector instanceof DocSiblingExpansion docExpander)) {
-      return null;
-    }
+    DocSiblingExpansion docExpanderCollector = (DocSiblingExpansion) collector;
     int docId = vectorOrdinalToDocId.apply(hnswNode);
-    int[] siblingDocIds = docExpander.findSiblingDocIds(docId);
+    int[] siblingDocIds = docExpanderCollector.findSiblingDocIds(docId);
     if (siblingDocIds == null) {
       return null;
     }
@@ -69,7 +71,7 @@ public int[] getSiblingOrdinals(int hnswNode, BitSet visitedHnswNodes) {
     // so this variable is necessary.
     int count = 0;
     for (int sibDocId : siblingDocIds) {
-      int sibOrd = docExpander.docIdToOrdinal(sibDocId);
+      int sibOrd = docExpanderCollector.docIdToOrdinal(sibDocId);
       //  sibOrd = -1 when a document has no vector for this field.
       //  Such a doc has no node in the HNSW graph and can't be scored, so it must be skipped.
       //  If a sibling was reached via normal graph traversal before sibling expansion triggered,

From 315462217e48d6e70aad5ef5eaeec5d454cde989 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Wed, 6 May 2026 17:54:24 +0200
Subject: [PATCH 14/23] Removed return null from getSiblingOrdinals. Empty
 array given.

---
 .../util/hnsw/AbstractHnswGraphSearcher.java  | 37 ++++++++--------
 .../lucene/util/hnsw/DocSiblingExpansion.java |  5 +--
 .../lucene/util/hnsw/HnswGraphSearcher.java   | 24 +++++------
 .../hnsw/OrdinalTranslatedKnnCollector.java   | 23 ++++++----
 ...versifyingNearestChildrenKnnCollector.java |  6 +--
 ...versifyingChildrenKnnSiblingExpansion.java | 43 ++++++++++++-------
 6 files changed, 76 insertions(+), 62 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
index 527929969220..bb8c36a2e3b6 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
@@ -106,24 +106,25 @@ protected static void scoreEntryPoints(
         // Fetch siblings BEFORE collect() so the parent is not yet in the heap
         int[] siblings = null;
         int numSiblingsToVisit = 0;
-        // This check is needed since this method is also called by the GraphBuilderKnnCollector
-        if (results instanceof OrdinalTranslatedKnnCollector collector
-            && collector.supportsSiblingExpansion()) {
-          siblings = collector.getSiblingOrdinals(ep, visited);
-          if (siblings != null) {
-            //  how many siblings are actually scored to avoid exceeding the visit budget.
-            //  controls the early termination condition. early terminates the search if we reach
-            //  visitLimit nodes
-            //  if this visit limit is high we just navigate the graph until we do not have any node
-            //  with a score higher than the ones already collected
-            //  Current values it could assume:
-            //  -  No filter → Integer.MAX_VALUE (no constraint tighter than the full segment)
-            //  -  With filter → cardinality (no constraint tighter than the full accepted set)
-            numSiblingsToVisit =
-                (int) Math.min(siblings.length, results.visitLimit() - results.visitedCount());
-            // Only mark as visited the siblings we will actually score; the rest remain
-            // reachable via normal graph traversal so a better child can still be found
-            for (int s = 0; s < numSiblingsToVisit; s++) visited.set(siblings[s]);
+        // The instanceof check is needed: this method is also called with a GraphBuilderKnnCollector
+        if (results instanceof OrdinalTranslatedKnnCollector collector) {
+          if (collector.isSiblingExpansionCollector()) {
+            siblings = collector.getSiblingOrdinals(ep, visited);
+            if (siblings.length > 0) {
+              //  how many siblings are actually scored to avoid exceeding the visit budget.
+              //  controls the early termination condition. early terminates the search if we reach
+              //  visitLimit nodes
+              //  if this visit limit is high we just navigate the graph until we do not have any node
+              //  with a score higher than the ones already collected
+              //  Current values it could assume:
+              //  -  No filter → Integer.MAX_VALUE (no constraint tighter than the full segment)
+              //  -  With filter → cardinality (no constraint tighter than the full accepted set)
+              numSiblingsToVisit =
+                  (int) Math.min(siblings.length, results.visitLimit() - results.visitedCount());
+              // Only mark as visited the siblings we will actually score; the rest remain
+              // reachable via normal graph traversal so a better child can still be found
+              for (int s = 0; s < numSiblingsToVisit; s++) visited.set(siblings[s]);
+            }
           }
         }
         // Collect the ep node here so after we have a correctly updated minCompetitiveSimilarity
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
index bc4226f21c28..4942f3a006eb 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
@@ -38,9 +38,8 @@
 public interface DocSiblingExpansion {
 
   /**
-   * Returns the doc ids of all siblings of {@code childDocId} whose parent has not yet been added
-   * to the result heap, or {@code null} if the parent was already processed or there are no other
-   * siblings.
+   * Returns the doc ids of all siblings of {@code childDocId}, or {@code null} if there are no
+   * other siblings.
    *
    * @param childDocId the document id of the child that is about to be collected
    * @return sibling doc ids, or {@code null}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
index cb005361397c..5a516e435930 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
@@ -351,18 +351,18 @@ void searchLevel(
               // Fetch siblings BEFORE collect() so the parent is not yet in the heap
               int[] siblings = null;
               int numSiblingsToVisit = 0;
-              // The first check is needed since this method is also called by the GraphBuilderKnnCollector
-              // The second check is needed since we could have a TopKnnCollector at this point
-              if (results instanceof OrdinalTranslatedKnnCollector collector
-                  && collector.supportsSiblingExpansion()) {
-                siblings = collector.getSiblingOrdinals(node, visited);
-                if (siblings != null) {
-                  numSiblingsToVisit =
-                      (int)
-                          Math.min(siblings.length, results.visitLimit() - results.visitedCount());
-                  // Only mark as visited the siblings we will actually score; the rest remain
-                  // reachable via normal graph traversal so a better child can still be found
-                  for (int s = 0; s < numSiblingsToVisit; s++) visited.set(siblings[s]);
+              // The instanceof check is needed: this method is also called with a GraphBuilderKnnCollector
+              if (results instanceof OrdinalTranslatedKnnCollector collector) {
+                if (collector.isSiblingExpansionCollector()) {
+                  siblings = collector.getSiblingOrdinals(node, visited);
+                  if (siblings.length > 0) {
+                    numSiblingsToVisit =
+                        (int)
+                            Math.min(siblings.length, results.visitLimit() - results.visitedCount());
+                    // Only mark as visited the siblings we will actually score; the rest remain
+                    // reachable via normal graph traversal so a better child can still be found
+                    for (int s = 0; s < numSiblingsToVisit; s++) visited.set(siblings[s]);
+                  }
                 }
               }
               if (results.collect(node, score)) {
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
index e20bc343046e..18c04a40a210 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
@@ -55,32 +55,39 @@ public TopDocs topDocs() {
         td.scoreDocs);
   }
 
-  public boolean supportsSiblingExpansion() {
+  // Needed since we could have a TopKnnCollector at this point
+  public boolean isSiblingExpansionCollector() {
     return collector instanceof DocSiblingExpansion;
   }
 
   public int[] getSiblingOrdinals(int hnswNode, BitSet visitedHnswNodes) {
     DocSiblingExpansion docExpanderCollector = (DocSiblingExpansion) collector;
     int docId = vectorOrdinalToDocId.apply(hnswNode);
+    // We do not check if parent is in heap since if we already seed A
+    // - A was found and scored (parent added), but we reach the budget limit and were not able to score B,
+    //   we then found B through graph traversal. We want to visit it even if we already visited A.
+    //   We do not visit siblings in score order.
     int[] siblingDocIds = docExpanderCollector.findSiblingDocIds(docId);
     if (siblingDocIds == null) {
-      return null;
+      return new int[0];
     }
     int[] siblingOrdinals = new int[siblingDocIds.length];
     // siblingOrdinals is pre-allocated to siblingDocIds.length and Java initializes int arrays to 0
     // so this variable is necessary.
+    // due to visited result we could have a partial array to return
     int count = 0;
     for (int sibDocId : siblingDocIds) {
       int sibOrd = docExpanderCollector.docIdToOrdinal(sibDocId);
-      //  sibOrd = -1 when a document has no vector for this field.
-      //  Such a doc has no node in the HNSW graph and can't be scored, so it must be skipped.
-      //  If a sibling was reached via normal graph traversal before sibling expansion triggered,
-      // re-adding it would
-      //  cause it to be scored twice. !visitedHnswNodes.get(sibOrd) filters those out.
+      // sibOrd = -1: sibling has no vector for this field → no HNSW node, cannot be scored.
+      //
+      // !visitedHnswNodes: sibling was already reached via normal graph traversal:
+      //   - B was scored via traversal but with score < minAcceptedSimilarity, so collect()
+      //     was never called and the parent is not in the heap. Expansion from a different
+      //     child A finds B already visited.
       if (sibOrd >= 0 && !visitedHnswNodes.get(sibOrd)) {
         siblingOrdinals[count++] = sibOrd;
       }
     }
-    return count > 0 ? ArrayUtil.copyOfSubArray(siblingOrdinals, 0, count) : null;
+    return count > 0 ? ArrayUtil.copyOfSubArray(siblingOrdinals, 0, count) : new int[0];
   }
 }
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
index eba2181676ad..46fe7b6975ef 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
@@ -113,13 +113,9 @@ public int numCollected() {
     return heap.size();
   }
 
-  // SOLO PER VISITARE LA FAMIGLIA ALTRIMENTI NON LO CHIAMIAMO PROPRIO
+  @Override
   public int[] findSiblingDocIds(int childDocId) {
     int parent = parentBitSet.nextSetBit(childDocId);
-    // Skip if this parent's entry is already in the heap
-    if (heap.containsParent(parent)) {
-      return null;
-    }
     // Find siblings range(prevParent, parent). If parent is 0 there are not prevParents so -1
     int prevParent = parent > 0 ? parentBitSet.prevSetBit(parent - 1) : -1;
     int from = prevParent + 1;
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
index 94e2f085503d..5a8afcb4c1a8 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
@@ -84,20 +84,6 @@ public void testFindSiblingDocIds_returnsAllSiblings() throws IOException {
     assertArrayEquals(new int[] {3}, s4);
   }
 
-  public void testFindSiblingDocIds_parentAlreadyInHeap_returnsNull() throws IOException {
-    // 1 parents, 2 children → block: [0,1|2]
-    int numParents = 1, childrenPerParent = 2;
-    BitSet parents = parentBitSet(numParents, childrenPerParent);
-    int[] docToOrd = buildDocToOrd(numParents, childrenPerParent);
-    DiversifyingNearestChildrenKnnCollector c = makeCollector(10, parents, docToOrd);
-
-    // Collect child 0 → parent 2 enters the heap
-    c.collect(0, 0.8f);
-
-    // Now asking for siblings of child 1 (same parent 2) must return null
-    assertNull(c.findSiblingDocIds(1));
-  }
-
   public void testFindSiblingDocIds_singleChildParent_returnsNull() throws IOException {
     // 1 parents, 1 child each → blocks: [0|1]
     int numParents = 1, childrenPerParent = 1;
@@ -217,6 +203,31 @@ public void testBestSiblingReplacesWorseChildWhenHeapFull() throws IOException {
   // Unit tests: OrdinalTranslatedKnnCollector — getSiblingOrdinals
   // ---------------------------------------------------------------------------
 
+  /**
+   * Even when the parent is already in the heap (from a prior expansion), getSiblingOrdinals must
+   * still return unvisited siblings. This allows a budget-truncated first expansion to be continued
+   * when additional children of the same parent are later discovered via normal graph traversal.
+   */
+  public void testGetSiblingOrdinals_parentAlreadyInHeap_returnsUnvisitedSiblings()
+      throws IOException {
+    // 1 parent, 2 children: [C0=doc0, C1=doc1 | P0=doc2]; ordToDoc=[0,1]
+    BitSet parents = parentBitSet(1, 2);
+    int[] docToOrd = buildDocToOrd(1, 2);
+    int[] ordToDoc = {0, 1};
+    OrdinalTranslatedKnnCollector collector =
+        new OrdinalTranslatedKnnCollector(
+            makeCollector(10, parents, docToOrd), ord -> ordToDoc[ord]);
+
+    // Collect C0 → parent 2 enters the heap
+    collector.collect(0, 0.8f);
+
+    // C1 (ordinal 1) is not yet visited; must still be returned as an expansion candidate
+    FixedBitSet visited = new FixedBitSet(3);
+    int[] result = collector.getSiblingOrdinals(0, visited);
+    assertNotEquals("unvisited sibling must be returned even when parent is already in heap", 0, result.length);
+    assertArrayEquals(new int[] {1}, result);
+  }
+
   /**
    * Siblings whose ordinal is already in visitedOrds must be filtered out to prevent double-scoring
    * when the sibling was independently discovered via normal graph traversal.
@@ -237,7 +248,7 @@ public void testPendingSiblingOrdinals_filtersAlreadyVisited() throws IOExceptio
 
     // Trigger on C0 (ordinal 0 → docId 0); siblings are C1 and C2
     int[] result = collector.getSiblingOrdinals(0, visited);
-    assertNotNull(result);
+    assertNotEquals(0, result.length);
     assertArrayEquals("C1 must be filtered (visited); only C2 remains", new int[] {2}, result);
   }
 
@@ -260,7 +271,7 @@ public void testPendingSiblingOrdinals_filtersSparseSiblings() throws IOExceptio
 
     // Trigger on C0 (ordinal 0 → docId 0); siblings are C1 (sparse) and C2
     int[] result = collector.getSiblingOrdinals(0, visited);
-    assertNotNull(result);
+    assertNotEquals(0, result.length);
     assertArrayEquals("C1 (no vector) must be filtered; only C2 remains", new int[] {1}, result);
   }
 

From 286d7ab94a2986c12f8209a88723ce0ed66f99d1 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Thu, 7 May 2026 14:42:40 +0200
Subject: [PATCH 15/23] Returned empty array instead of null in
 findiSiblingDocIds

---
 .../org/apache/lucene/util/hnsw/DocSiblingExpansion.java     | 4 ++--
 .../lucene/util/hnsw/OrdinalTranslatedKnnCollector.java      | 3 ---
 .../search/join/DiversifyingNearestChildrenKnnCollector.java | 5 +++--
 .../join/TestDiversifyingChildrenKnnSiblingExpansion.java    | 2 +-
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
index 4942f3a006eb..41112e35ccb1 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
@@ -38,11 +38,11 @@
 public interface DocSiblingExpansion {
 
   /**
-   * Returns the doc ids of all siblings of {@code childDocId}, or {@code null} if there are no
+   * Returns the doc ids of all siblings of {@code childDocId}, or an empty array if there are no
    * other siblings.
    *
    * @param childDocId the document id of the child that is about to be collected
-   * @return sibling doc ids, or {@code null}
+   * @return sibling doc ids, or an empty array
    */
   int[] findSiblingDocIds(int childDocId);
 
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
index 18c04a40a210..9f3324a567f3 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
@@ -68,9 +68,6 @@ public int[] getSiblingOrdinals(int hnswNode, BitSet visitedHnswNodes) {
     //   we then found B through graph traversal. We want to visit it even if we already visited A.
     //   We do not visit siblings in score order.
     int[] siblingDocIds = docExpanderCollector.findSiblingDocIds(docId);
-    if (siblingDocIds == null) {
-      return new int[0];
-    }
     int[] siblingOrdinals = new int[siblingDocIds.length];
     // siblingOrdinals is pre-allocated to siblingDocIds.length and Java initializes int arrays to 0
     // so this variable is necessary.
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
index 46fe7b6975ef..f4e98cea8d8a 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
@@ -120,10 +120,11 @@ public int[] findSiblingDocIds(int childDocId) {
     int prevParent = parent > 0 ? parentBitSet.prevSetBit(parent - 1) : -1;
     int from = prevParent + 1;
     // Children of parent are all docIds in [from, parent); exclude childDocId itself
+    // The childDoc itself is scored in collect()
     int siblingsSize = parent - from - 1;
     // One child case
     if (siblingsSize == 0) {
-      return null;
+      return new int[0];
     }
     int[] siblings = new int[siblingsSize];
     int idx = 0;
@@ -132,7 +133,7 @@ public int[] findSiblingDocIds(int childDocId) {
         siblings[idx++] = docId;
       }
     }
-    return idx > 0 ? siblings : null;
+    return idx > 0 ? siblings : new int[0];
   }
 
   public int docIdToOrdinal(int docId) {
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
index 5a8afcb4c1a8..0f9c3467d162 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
@@ -91,7 +91,7 @@ public void testFindSiblingDocIds_singleChildParent_returnsNull() throws IOExcep
     int[] docToOrd = buildDocToOrd(numParents, childrenPerParent);
     DiversifyingNearestChildrenKnnCollector c = makeCollector(10, parents, docToOrd);
 
-    assertNull("sole child has no siblings", c.findSiblingDocIds(0));
+    assertArrayEquals("sole child has no siblings", new int[0], c.findSiblingDocIds(0));
   }
 
   /**

From c9481f76cab94bd429d2b3ed05fb2e87ad12ea58 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Thu, 7 May 2026 15:26:38 +0200
Subject: [PATCH 16/23] Checked if numHnswNodes needed in scoreHnswNodes and
 changed some variables names

---
 .../util/hnsw/AbstractHnswGraphSearcher.java  | 39 ++++++++++---------
 .../lucene/util/hnsw/HnswGraphSearcher.java   | 20 +++++-----
 2 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
index bb8c36a2e3b6..e9c2896f916a 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
@@ -103,15 +103,15 @@ protected static void scoreEntryPoints(
       visited.set(ep);
       candidates.add(ep, score);
       if (acceptOrds == null || acceptOrds.get(ep)) {
-        // Fetch siblings BEFORE collect() so the parent is not yet in the heap
-        int[] siblings = null;
+        // Fetch siblingsOrd BEFORE collect() so the parent is not yet in the heap
+        int[] siblingsOrd = null;
         int numSiblingsToVisit = 0;
         // The instanceof check is needed: this method is also called with a GraphBuilderKnnCollector
         if (results instanceof OrdinalTranslatedKnnCollector collector) {
           if (collector.isSiblingExpansionCollector()) {
-            siblings = collector.getSiblingOrdinals(ep, visited);
-            if (siblings.length > 0) {
-              //  how many siblings are actually scored to avoid exceeding the visit budget.
+            siblingsOrd = collector.getSiblingOrdinals(ep, visited);
+            if (siblingsOrd.length > 0) {
+              //  how many siblingsOrd are actually scored to avoid exceeding the visit budget.
               //  controls the early termination condition. early terminates the search if we reach
               //  visitLimit nodes
               //  if this visit limit is high we just navigate the graph until we do not have any node
@@ -120,23 +120,24 @@ protected static void scoreEntryPoints(
               //  -  No filter → Integer.MAX_VALUE (no constraint tighter than the full segment)
               //  -  With filter → cardinality (no constraint tighter than the full accepted set)
               numSiblingsToVisit =
-                  (int) Math.min(siblings.length, results.visitLimit() - results.visitedCount());
-              // Only mark as visited the siblings we will actually score; the rest remain
+                  (int) Math.min(siblingsOrd.length, results.visitLimit() - results.visitedCount());
+              // Only mark as visited the siblingsOrd we will actually score; the rest remain
               // reachable via normal graph traversal so a better child can still be found
-              for (int s = 0; s < numSiblingsToVisit; s++) visited.set(siblings[s]);
+              for (int s = 0; s < numSiblingsToVisit; s++) visited.set(siblingsOrd[s]);
             }
           }
         }
         // Collect the ep node here so after we have a correctly updated minCompetitiveSimilarity
         results.collect(ep, score);
         if (numSiblingsToVisit > 0) {
+          // IF NUMSIBLINGSTOVISIT IS NOT LIMITED WE CAN REMOVE THE VARIABLE AND USE SIBLINGS LENGTH
           siblingScores =
               scoreHnswNodes(
                   results,
                   scorer,
                   candidates,
                   acceptOrds,
-                  siblings,
+                  siblingsOrd,
                   numSiblingsToVisit,
                   siblingScores);
         }
@@ -153,26 +154,26 @@ protected static float[] scoreHnswNodes(
       RandomVectorScorer scorer,
       NeighborQueue candidates,
       Bits acceptOrds,
-      int[] hnswNodes,
-      int numSiblings,
+      int[] hnswNodesOrd,
+      int numHnswNodes,
       float[] scores)
       throws IOException {
     // If siblingScores not defined yet or too small to collect scores a new one is created
     // Otherwise we reuse the old one that will be overridden in bulkScore with new scores
-    if (scores == null || scores.length < numSiblings) {
-      scores = new float[numSiblings];
+    if (scores == null || scores.length < numHnswNodes) {
+      scores = new float[numHnswNodes];
     }
-    float maxScore = scorer.bulkScore(hnswNodes, scores, numSiblings);
-    results.incVisitedCount(numSiblings);
+    float maxScore = scorer.bulkScore(hnswNodesOrd, scores, numHnswNodes);
+    results.incVisitedCount(numHnswNodes);
     if (maxScore > results.minCompetitiveSimilarity()) {
       float minSimilarity = Math.nextUp(results.minCompetitiveSimilarity());
-      for (int j = 0; j < numSiblings; j++) {
+      for (int j = 0; j < numHnswNodes; j++) {
         float sibScore = scores[j];
         // We avoid adding to candidates a sibling with a bad score
         if (sibScore >= minSimilarity) {
-          candidates.add(hnswNodes[j], sibScore);
-          if (acceptOrds == null || acceptOrds.get(hnswNodes[j])) {
-            results.collect(hnswNodes[j], sibScore);
+          candidates.add(hnswNodesOrd[j], sibScore);
+          if (acceptOrds == null || acceptOrds.get(hnswNodesOrd[j])) {
+            results.collect(hnswNodesOrd[j], sibScore);
             minSimilarity = Math.nextUp(results.minCompetitiveSimilarity());
           }
         }
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
index 5a516e435930..57b04e26a2df 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
@@ -348,20 +348,21 @@ void searchLevel(
           if (score >= minAcceptedSimilarity) {
             candidates.add(node, score);
             if (acceptOrds == null || acceptOrds.get(node)) {
-              // Fetch siblings BEFORE collect() so the parent is not yet in the heap
-              int[] siblings = null;
+              // Fetch siblingsOrd BEFORE collect() so the parent is not yet in the heap
+              int[] siblingsOrd = null;
               int numSiblingsToVisit = 0;
               // The instanceof check is needed: this method is also called with a GraphBuilderKnnCollector
               if (results instanceof OrdinalTranslatedKnnCollector collector) {
                 if (collector.isSiblingExpansionCollector()) {
-                  siblings = collector.getSiblingOrdinals(node, visited);
-                  if (siblings.length > 0) {
+                  siblingsOrd = collector.getSiblingOrdinals(node, visited);
+                  if (siblingsOrd.length > 0) {
+                    // TO DISCUSS IF NECESSARY
                     numSiblingsToVisit =
                         (int)
-                            Math.min(siblings.length, results.visitLimit() - results.visitedCount());
-                    // Only mark as visited the siblings we will actually score; the rest remain
+                            Math.min(siblingsOrd.length, results.visitLimit() - results.visitedCount());
+                    // Only mark as visited the siblingsOrd we will actually score; the rest remain
                     // reachable via normal graph traversal so a better child can still be found
-                    for (int s = 0; s < numSiblingsToVisit; s++) visited.set(siblings[s]);
+                    for (int s = 0; s < numSiblingsToVisit; s++) visited.set(siblingsOrd[s]);
                   }
                 }
               }
@@ -374,16 +375,17 @@ void searchLevel(
                   shouldExploreMinSim = true;
                 }
               }
-              // Score and collect all siblings of the newly-discovered parent
+              // Score and collect all siblingsOrd of the newly-discovered parent
               if (numSiblingsToVisit > 0) {
                 float prevMinSim = results.minCompetitiveSimilarity();
+                // IF NUMSIBLING IS NOT LIMITED WE CAN REMOVE THE VARIABLE AND USE SIBLINGS LENGTH
                 siblingScores =
                     scoreHnswNodes(
                         results,
                         scorer,
                         candidates,
                         acceptOrds,
-                        siblings,
+                        siblingsOrd,
                         numSiblingsToVisit,
                         siblingScores);
                 if (results.minCompetitiveSimilarity() > prevMinSim) {

From 70ea159db6cf16282e6a4235c2733c6b7a506e81 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Thu, 7 May 2026 16:07:50 +0200
Subject: [PATCH 17/23] Return empty array instead of null in buildDocToOrd

---
 ...versifyingNearestChildrenKnnCollector.java |  4 +-
 ...ingNearestChildrenKnnCollectorManager.java | 37 ++++++++++++++-----
 ...versifyingChildrenKnnSiblingExpansion.java |  6 +--
 3 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
index f4e98cea8d8a..50ca21074fa7 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
@@ -49,7 +49,7 @@ class DiversifyingNearestChildrenKnnCollector extends AbstractKnnCollector
    * @param searchStrategy The search strategy to use
    * @param parentBitSet The leaf parent bitset
    * @param docToOrd precomputed array mapping docId to vector ordinal; {@code -1} entries mean the
-   *     document has no vector. May be {@code null} to disable sibling expansion.
+   *     document has no vector. An empty array disables sibling expansion.
    */
   public DiversifyingNearestChildrenKnnCollector(
       int k,
@@ -148,7 +148,7 @@ public int docIdToOrdinal(int docId) {
     //  - future refactoring that changes the array size
     //  - misuse when constructing the collector manually (as in tests, e.g. the
     // docIdToOrdinal(9999) test case)
-    if (docToOrd == null || docId >= docToOrd.length) {
+    if (docId >= docToOrd.length) {
       return -1;
     }
     return docToOrd[docId];
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
index 9914b285e0ac..a908b203663a 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
@@ -36,10 +36,6 @@
  */
 public class DiversifyingNearestChildrenKnnCollectorManager implements KnnCollectorManager {
 
-  // Sentinel stored in the cache to represent "this segment has no vectors for the field",
-  // because ConcurrentHashMap does not allow null values.
-  private static final int[] NO_VECTORS = new int[0];
-
   // Cache keyed by (segment core cache key → (field name → docToOrd array)).
   // Entries are evicted automatically when the segment is closed via addClosedListener.
   private static final ConcurrentHashMap<IndexReader.CacheKey, ConcurrentHashMap<String, int[]>>
@@ -123,30 +119,51 @@ private int[] getCachedDocToOrd(LeafReaderContext context) throws IOException {
     }
     int[] cached = fieldMap.get(field);
     if (cached != null) {
-      return cached == NO_VECTORS ? null : cached;
+      return cached;
     }
     int[] built = buildDocToOrd(context);
-    int[] stored = built != null ? built : NO_VECTORS;
-    int[] race = fieldMap.putIfAbsent(field, stored);
-    return race != null ? (race == NO_VECTORS ? null : race) : built;
+    int[] race = fieldMap.putIfAbsent(field, built);
+    return race != null ? race : built;
   }
 
   /**
    * Builds a docId-to-ordinal array for the given leaf, mapping each docId to its vector ordinal.
    *
-   * <p>Returns {@code null} if the field has no vector values in this segment at all — sibling
+   * <p>Returns an empty array if the field has no vector values in this segment at all — sibling
    * expansion will be disabled for this leaf.
    *
    * <p>Otherwise returns an array of size {@code maxDoc} where each entry is the vector ordinal for
    * that docId, or {@code -1} if that specific document has no vector (sparse indexing).
    */
+  // Step 1 — Index time: ordinals are assigned by insertion order
+  // In Lucene99FlatVectorsWriter.addValue(), each vector is appended to an ArrayList (vectors.add(copy)) and its docId
+  // is recorded in docsWithField. Documents are always added in ascending docId order (enforced by assert docID >
+  // lastDocID). So ordinal 0 = first doc with a vector, ordinal 1 = second, etc.
+  //
+  // Step 2 — Index time: ordToDoc mapping is written in the same order
+  // In OrdToDocDISIReaderConfiguration.writeStoredMeta(), docsWithField.iterator() is iterated in ascending docId
+  // order, and each docId is written to DirectMonotonicWriter sequentially. The i-th value written becomes ordinal
+  // i — so the ordToDoc array stored on disk is exactly: ordToDoc[0] = first docId, ordToDoc[1] = second docId, ...
+  //
+  // Step 3 — Query time: buildDocToOrd inverts the same ordering
+  // getFloatVectorValues(field).iterator() also yields docIds in ascending order (same set, same order as
+  // docsWithField at index time). The loop:
+  // while (iter.nextDoc() != NO_MORE_DOCS) {
+  //   docToOrd[iter.docID()] = ord++;
+  // }
+  // assigns ord = 0 to the first docId, ord = 1 to the second — exactly inverting the ordToDoc array written at step 2.
+  //
+  // Step 4 — The HNSW graph uses these same ordinals as node IDs
+  // HNSW nodes are identified by their ordinal (the position in the flat vector store). So when the searcher returns
+  // ordinal k as a graph node, docToOrd[docId] = k being correct means docIdToOrdinal will find the right HNSW node
+  // for any sibling docId.
   private int[] buildDocToOrd(LeafReaderContext context) throws IOException {
     FieldInfo fi = context.reader().getFieldInfos().fieldInfo(field);
     // fi = null if the field doesn't exist in this segment at all.
     // fi.getVectorDimension() = 0 if the field exist in the segment but was not indexed as a vector
     // field.
     if (fi == null || fi.getVectorDimension() == 0) {
-      return null;
+      return new int[0];
     }
     DocIdSetIterator iter =
         switch (fi.getVectorEncoding()) {
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
index 0f9c3467d162..9eaf98924191 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
@@ -134,11 +134,11 @@ public void testDocIdToOrdinal_correctMapping() throws IOException {
     assertEquals(-1, c.docIdToOrdinal(9999)); // beyond array bounds
   }
 
-  public void testDocIdToOrdinal_nullMapping_alwaysMinusOne() throws IOException {
-    // Collector created without a docToOrd array (sibling expansion disabled)
+  public void testDocIdToOrdinal_emptyMapping_alwaysMinusOne() throws IOException {
+    // Collector created with an empty docToOrd array (sibling expansion disabled)
     BitSet parents = parentBitSet(2, 2);
     DiversifyingNearestChildrenKnnCollector c =
-        new DiversifyingNearestChildrenKnnCollector(5, Integer.MAX_VALUE, null, parents, null);
+        new DiversifyingNearestChildrenKnnCollector(5, Integer.MAX_VALUE, null, parents, new int[0]);
 
     assertEquals(-1, c.docIdToOrdinal(0));
     assertEquals(-1, c.docIdToOrdinal(1));

From 0d6154d1dff6dd1dde18c4d1bff098fcd10a34ad Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Thu, 7 May 2026 16:11:37 +0200
Subject: [PATCH 18/23] Why check on field info is needed

---
 .../DiversifyingNearestChildrenKnnCollectorManager.java  | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
index a908b203663a..9b0a19cc13f4 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
@@ -162,6 +162,15 @@ private int[] buildDocToOrd(LeafReaderContext context) throws IOException {
     // fi = null if the field doesn't exist in this segment at all.
     // fi.getVectorDimension() = 0 if the field exist in the segment but was not indexed as a vector
     // field.
+    //
+    // 1. approximateSearch calls newCollector before searchNearestVectors
+    // 2. newCollector calls buildDocToOrd
+    // 3. Only then searchNearestVectors is called
+    // So buildDocToOrd is called for every segment before Lucene gets a chance to short-circuit on no vectors.
+    // The guard in buildDocToOrd is genuinely needed — without it, calling getFloatVectorValues on a segment with
+    // no vectors would return null (as we saw in CodecReader) and .iterator() would NPE.
+    // The alternative would be to guard in newCollector itself before calling getCachedDocToOrd, but the current
+    // placement is fine.
     if (fi == null || fi.getVectorDimension() == 0) {
       return new int[0];
     }

From 97828c6d139b823e5e393a5867f95abf032770ff Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Thu, 7 May 2026 16:46:04 +0200
Subject: [PATCH 19/23] Addressed Ben comments about reusing scratch space

---
 .../lucene/util/hnsw/AbstractHnswGraphSearcher.java |  4 ++--
 .../apache/lucene/util/hnsw/HnswGraphSearcher.java  |  4 ++--
 .../util/hnsw/OrdinalTranslatedKnnCollector.java    | 13 +++++++++----
 ...TestDiversifyingChildrenKnnSiblingExpansion.java |  6 +++---
 4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
index e9c2896f916a..0823eb43b12d 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
@@ -97,6 +97,7 @@ protected static void scoreEntryPoints(
     scorer.bulkScore(eps, scores, eps.length);
     results.incVisitedCount(eps.length);
     float[] siblingScores = null;
+    int[] siblingsOrd = new int[0];
     for (int i = 0; i < eps.length; i++) {
       float score = scores[i];
       int ep = eps[i];
@@ -104,12 +105,11 @@ protected static void scoreEntryPoints(
       candidates.add(ep, score);
       if (acceptOrds == null || acceptOrds.get(ep)) {
         // Fetch siblingsOrd BEFORE collect() so the parent is not yet in the heap
-        int[] siblingsOrd = null;
         int numSiblingsToVisit = 0;
         // The instanceof check is needed: this method is also called with a GraphBuilderKnnCollector
         if (results instanceof OrdinalTranslatedKnnCollector collector) {
           if (collector.isSiblingExpansionCollector()) {
-            siblingsOrd = collector.getSiblingOrdinals(ep, visited);
+            siblingsOrd = collector.getSiblingOrdinals(ep, visited, siblingsOrd);
             if (siblingsOrd.length > 0) {
               //  how many siblingsOrd are actually scored to avoid exceeding the visit budget.
               //  controls the early termination condition. early terminates the search if we reach
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
index 57b04e26a2df..40f005fa1956 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
@@ -45,6 +45,7 @@ public class HnswGraphSearcher extends AbstractHnswGraphSearcher {
   protected int[] bulkNodes = null;
   protected float[] bulkScores = null;
   protected float[] siblingScores = null;
+  protected int[] siblingsOrd = new int[0];
 
   /**
    * HNSW search is roughly logarithmic. This doesn't take maxConn into account, but it is a pretty
@@ -349,12 +350,11 @@ void searchLevel(
             candidates.add(node, score);
             if (acceptOrds == null || acceptOrds.get(node)) {
               // Fetch siblingsOrd BEFORE collect() so the parent is not yet in the heap
-              int[] siblingsOrd = null;
               int numSiblingsToVisit = 0;
               // The instanceof check is needed: this method is also called with a GraphBuilderKnnCollector
               if (results instanceof OrdinalTranslatedKnnCollector collector) {
                 if (collector.isSiblingExpansionCollector()) {
-                  siblingsOrd = collector.getSiblingOrdinals(node, visited);
+                  siblingsOrd = collector.getSiblingOrdinals(node, visited, siblingsOrd);
                   if (siblingsOrd.length > 0) {
                     // TO DISCUSS IF NECESSARY
                     numSiblingsToVisit =
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
index 9f3324a567f3..a2b70044bc95 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
@@ -60,7 +60,7 @@ public boolean isSiblingExpansionCollector() {
     return collector instanceof DocSiblingExpansion;
   }
 
-  public int[] getSiblingOrdinals(int hnswNode, BitSet visitedHnswNodes) {
+  public int[] getSiblingOrdinals(int hnswNode, BitSet visitedHnswNodes, int[] siblingOrdinals) {
     DocSiblingExpansion docExpanderCollector = (DocSiblingExpansion) collector;
     int docId = vectorOrdinalToDocId.apply(hnswNode);
     // We do not check if parent is in heap since if we already seed A
@@ -68,7 +68,9 @@ public int[] getSiblingOrdinals(int hnswNode, BitSet visitedHnswNodes) {
     //   we then found B through graph traversal. We want to visit it even if we already visited A.
     //   We do not visit siblings in score order.
     int[] siblingDocIds = docExpanderCollector.findSiblingDocIds(docId);
-    int[] siblingOrdinals = new int[siblingDocIds.length];
+    if (siblingOrdinals.length < siblingDocIds.length) {
+        siblingOrdinals = new int[siblingDocIds.length];
+    }
     // siblingOrdinals is pre-allocated to siblingDocIds.length and Java initializes int arrays to 0
     // so this variable is necessary.
     // due to visited result we could have a partial array to return
@@ -82,9 +84,12 @@ public int[] getSiblingOrdinals(int hnswNode, BitSet visitedHnswNodes) {
       //     was never called and the parent is not in the heap. Expansion from a different
       //     child A finds B already visited.
       if (sibOrd >= 0 && !visitedHnswNodes.get(sibOrd)) {
-        siblingOrdinals[count++] = sibOrd;
+          siblingOrdinals[count++] = sibOrd;
       }
     }
-    return count > 0 ? ArrayUtil.copyOfSubArray(siblingOrdinals, 0, count) : new int[0];
+    if (count == 0) {
+      return new int[0];
+    }
+    return count < siblingOrdinals.length ? ArrayUtil.copyOfSubArray(siblingOrdinals, 0, count) : siblingOrdinals;
   }
 }
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
index 9eaf98924191..012124453052 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
@@ -223,7 +223,7 @@ public void testGetSiblingOrdinals_parentAlreadyInHeap_returnsUnvisitedSiblings(
 
     // C1 (ordinal 1) is not yet visited; must still be returned as an expansion candidate
     FixedBitSet visited = new FixedBitSet(3);
-    int[] result = collector.getSiblingOrdinals(0, visited);
+    int[] result = collector.getSiblingOrdinals(0, visited, new int[0]);
     assertNotEquals("unvisited sibling must be returned even when parent is already in heap", 0, result.length);
     assertArrayEquals(new int[] {1}, result);
   }
@@ -247,7 +247,7 @@ public void testPendingSiblingOrdinals_filtersAlreadyVisited() throws IOExceptio
     visited.set(1);
 
     // Trigger on C0 (ordinal 0 → docId 0); siblings are C1 and C2
-    int[] result = collector.getSiblingOrdinals(0, visited);
+    int[] result = collector.getSiblingOrdinals(0, visited, new int[0]);
     assertNotEquals(0, result.length);
     assertArrayEquals("C1 must be filtered (visited); only C2 remains", new int[] {2}, result);
   }
@@ -270,7 +270,7 @@ public void testPendingSiblingOrdinals_filtersSparseSiblings() throws IOExceptio
     FixedBitSet visited = new FixedBitSet(4);
 
     // Trigger on C0 (ordinal 0 → docId 0); siblings are C1 (sparse) and C2
-    int[] result = collector.getSiblingOrdinals(0, visited);
+    int[] result = collector.getSiblingOrdinals(0, visited, new int[0]);
     assertNotEquals(0, result.length);
     assertArrayEquals("C1 (no vector) must be filtered; only C2 remains", new int[] {1}, result);
   }

From 51ddd9faeb476fe8d8f3dda093840339046f6503 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Thu, 7 May 2026 16:58:36 +0200
Subject: [PATCH 20/23] Gradlew tidy

---
 .../util/hnsw/AbstractHnswGraphSearcher.java  |  6 ++--
 .../lucene/util/hnsw/DocSiblingExpansion.java | 18 ++++++----
 .../lucene/util/hnsw/HnswGraphSearcher.java   |  6 ++--
 .../hnsw/OrdinalTranslatedKnnCollector.java   | 11 +++---
 ...versifyingNearestChildrenKnnCollector.java |  6 +---
 ...ingNearestChildrenKnnCollectorManager.java | 36 ++++++++++++-------
 ...versifyingChildrenKnnSiblingExpansion.java |  6 ++--
 7 files changed, 55 insertions(+), 34 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
index 0823eb43b12d..f7ebbf4321b4 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
@@ -106,7 +106,8 @@ protected static void scoreEntryPoints(
       if (acceptOrds == null || acceptOrds.get(ep)) {
         // Fetch siblingsOrd BEFORE collect() so the parent is not yet in the heap
         int numSiblingsToVisit = 0;
-        // The instanceof check is needed: this method is also called with a GraphBuilderKnnCollector
+        // The instanceof check is needed: this method is also called with a
+        // GraphBuilderKnnCollector
         if (results instanceof OrdinalTranslatedKnnCollector collector) {
           if (collector.isSiblingExpansionCollector()) {
             siblingsOrd = collector.getSiblingOrdinals(ep, visited, siblingsOrd);
@@ -114,7 +115,8 @@ protected static void scoreEntryPoints(
               //  how many siblingsOrd are actually scored to avoid exceeding the visit budget.
               //  controls the early termination condition. early terminates the search if we reach
               //  visitLimit nodes
-              //  if this visit limit is high we just navigate the graph until we do not have any node
+              //  if this visit limit is high we just navigate the graph until we do not have any
+              // node
               //  with a score higher than the ones already collected
               //  Current values it could assume:
               //  -  No filter → Integer.MAX_VALUE (no constraint tighter than the full segment)
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
index 41112e35ccb1..d0200f58b06c 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/DocSiblingExpansion.java
@@ -18,9 +18,9 @@
 package org.apache.lucene.util.hnsw;
 
 /**
- * Implemented by collectors that
- * understand parent-child document relationships and can enumerate sibling document ids for a given
- * child document id, as well as translate document ids back to vector ordinals.
+ * Implemented by collectors that understand parent-child document relationships and can enumerate
+ * sibling document ids for a given child document id, as well as translate document ids back to
+ * vector ordinals.
  *
  * <p>This interface is used internally by {@link OrdinalTranslatedKnnCollector} to bridge between
  * the ordinal space of the HNSW graph and the document-id space of the collector.
@@ -28,12 +28,16 @@
  * @lucene.experimental
  */
 //  The interface cannot be removed. It exists for a module-boundary reason.
-//  DocSiblingExpansion is in lucene/core, while DiversifyingNearestChildrenKnnCollector is in lucene/join.
-//  The dependency is one-way: join depends on core, never the reverse. So OrdinalTranslatedKnnCollector (in core)
+//  DocSiblingExpansion is in lucene/core, while DiversifyingNearestChildrenKnnCollector is in
+// lucene/join.
+//  The dependency is one-way: join depends on core, never the reverse. So
+// OrdinalTranslatedKnnCollector (in core)
 //  has no way to reference DiversifyingNearestChildrenKnnCollector directly.
 //
-//  The interface is the bridge — it lets core call findSiblingDocIds and docIdToOrdinal on the collector without
-//  creating a circular dependency. Removing it would require either moving OrdinalTranslatedKnnCollector into
+//  The interface is the bridge — it lets core call findSiblingDocIds and docIdToOrdinal on the
+// collector without
+//  creating a circular dependency. Removing it would require either moving
+// OrdinalTranslatedKnnCollector into
 //  join (bigger refactor) or adding a core → join dependency (illegal in this architecture).
 public interface DocSiblingExpansion {
 
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
index 40f005fa1956..62a518a5cc54 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
@@ -351,7 +351,8 @@ void searchLevel(
             if (acceptOrds == null || acceptOrds.get(node)) {
               // Fetch siblingsOrd BEFORE collect() so the parent is not yet in the heap
               int numSiblingsToVisit = 0;
-              // The instanceof check is needed: this method is also called with a GraphBuilderKnnCollector
+              // The instanceof check is needed: this method is also called with a
+              // GraphBuilderKnnCollector
               if (results instanceof OrdinalTranslatedKnnCollector collector) {
                 if (collector.isSiblingExpansionCollector()) {
                   siblingsOrd = collector.getSiblingOrdinals(node, visited, siblingsOrd);
@@ -359,7 +360,8 @@ void searchLevel(
                     // TO DISCUSS IF NECESSARY
                     numSiblingsToVisit =
                         (int)
-                            Math.min(siblingsOrd.length, results.visitLimit() - results.visitedCount());
+                            Math.min(
+                                siblingsOrd.length, results.visitLimit() - results.visitedCount());
                     // Only mark as visited the siblingsOrd we will actually score; the rest remain
                     // reachable via normal graph traversal so a better child can still be found
                     for (int s = 0; s < numSiblingsToVisit; s++) visited.set(siblingsOrd[s]);
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
index a2b70044bc95..d0afb7e0da93 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/OrdinalTranslatedKnnCollector.java
@@ -64,12 +64,13 @@ public int[] getSiblingOrdinals(int hnswNode, BitSet visitedHnswNodes, int[] sib
     DocSiblingExpansion docExpanderCollector = (DocSiblingExpansion) collector;
     int docId = vectorOrdinalToDocId.apply(hnswNode);
     // We do not check if parent is in heap since if we already seed A
-    // - A was found and scored (parent added), but we reach the budget limit and were not able to score B,
+    // - A was found and scored (parent added), but we reach the budget limit and were not able to
+    // score B,
     //   we then found B through graph traversal. We want to visit it even if we already visited A.
     //   We do not visit siblings in score order.
     int[] siblingDocIds = docExpanderCollector.findSiblingDocIds(docId);
     if (siblingOrdinals.length < siblingDocIds.length) {
-        siblingOrdinals = new int[siblingDocIds.length];
+      siblingOrdinals = new int[siblingDocIds.length];
     }
     // siblingOrdinals is pre-allocated to siblingDocIds.length and Java initializes int arrays to 0
     // so this variable is necessary.
@@ -84,12 +85,14 @@ public int[] getSiblingOrdinals(int hnswNode, BitSet visitedHnswNodes, int[] sib
       //     was never called and the parent is not in the heap. Expansion from a different
       //     child A finds B already visited.
       if (sibOrd >= 0 && !visitedHnswNodes.get(sibOrd)) {
-          siblingOrdinals[count++] = sibOrd;
+        siblingOrdinals[count++] = sibOrd;
       }
     }
     if (count == 0) {
       return new int[0];
     }
-    return count < siblingOrdinals.length ? ArrayUtil.copyOfSubArray(siblingOrdinals, 0, count) : siblingOrdinals;
+    return count < siblingOrdinals.length
+        ? ArrayUtil.copyOfSubArray(siblingOrdinals, 0, count)
+        : siblingOrdinals;
   }
 }
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
index 50ca21074fa7..404f8cd156c9 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollector.java
@@ -136,6 +136,7 @@ public int[] findSiblingDocIds(int childDocId) {
     return idx > 0 ? siblings : new int[0];
   }
 
+  @Override
   public int docIdToOrdinal(int docId) {
     // Conditions explanation:
     //  docToOrd == null — buildDocToOrd returns null when the segment has no vector values at all
@@ -288,11 +289,6 @@ public final int size() {
       return size;
     }
 
-    /** Returns {@code true} if a child of the given parent is already in the heap. */
-    public boolean containsParent(int parentDocId) {
-      return nodeIdHeapIndex.containsKey(parentDocId);
-    }
-
     /**
      * Returns true if the element at heap position {@code k} should be evicted before the element
      * at position {@code j} (i.e. a is "less than" b in the min-heap ordering). Lower score loses
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
index 9b0a19cc13f4..845a0267c5e2 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingNearestChildrenKnnCollectorManager.java
@@ -136,26 +136,35 @@ private int[] getCachedDocToOrd(LeafReaderContext context) throws IOException {
    * that docId, or {@code -1} if that specific document has no vector (sparse indexing).
    */
   // Step 1 — Index time: ordinals are assigned by insertion order
-  // In Lucene99FlatVectorsWriter.addValue(), each vector is appended to an ArrayList (vectors.add(copy)) and its docId
-  // is recorded in docsWithField. Documents are always added in ascending docId order (enforced by assert docID >
+  // In Lucene99FlatVectorsWriter.addValue(), each vector is appended to an ArrayList
+  // (vectors.add(copy)) and its docId
+  // is recorded in docsWithField. Documents are always added in ascending docId order (enforced by
+  // assert docID >
   // lastDocID). So ordinal 0 = first doc with a vector, ordinal 1 = second, etc.
   //
   // Step 2 — Index time: ordToDoc mapping is written in the same order
-  // In OrdToDocDISIReaderConfiguration.writeStoredMeta(), docsWithField.iterator() is iterated in ascending docId
-  // order, and each docId is written to DirectMonotonicWriter sequentially. The i-th value written becomes ordinal
-  // i — so the ordToDoc array stored on disk is exactly: ordToDoc[0] = first docId, ordToDoc[1] = second docId, ...
+  // In OrdToDocDISIReaderConfiguration.writeStoredMeta(), docsWithField.iterator() is iterated in
+  // ascending docId
+  // order, and each docId is written to DirectMonotonicWriter sequentially. The i-th value written
+  // becomes ordinal
+  // i — so the ordToDoc array stored on disk is exactly: ordToDoc[0] = first docId, ordToDoc[1] =
+  // second docId, ...
   //
   // Step 3 — Query time: buildDocToOrd inverts the same ordering
-  // getFloatVectorValues(field).iterator() also yields docIds in ascending order (same set, same order as
+  // getFloatVectorValues(field).iterator() also yields docIds in ascending order (same set, same
+  // order as
   // docsWithField at index time). The loop:
   // while (iter.nextDoc() != NO_MORE_DOCS) {
   //   docToOrd[iter.docID()] = ord++;
   // }
-  // assigns ord = 0 to the first docId, ord = 1 to the second — exactly inverting the ordToDoc array written at step 2.
+  // assigns ord = 0 to the first docId, ord = 1 to the second — exactly inverting the ordToDoc
+  // array written at step 2.
   //
   // Step 4 — The HNSW graph uses these same ordinals as node IDs
-  // HNSW nodes are identified by their ordinal (the position in the flat vector store). So when the searcher returns
-  // ordinal k as a graph node, docToOrd[docId] = k being correct means docIdToOrdinal will find the right HNSW node
+  // HNSW nodes are identified by their ordinal (the position in the flat vector store). So when the
+  // searcher returns
+  // ordinal k as a graph node, docToOrd[docId] = k being correct means docIdToOrdinal will find the
+  // right HNSW node
   // for any sibling docId.
   private int[] buildDocToOrd(LeafReaderContext context) throws IOException {
     FieldInfo fi = context.reader().getFieldInfos().fieldInfo(field);
@@ -166,10 +175,13 @@ private int[] buildDocToOrd(LeafReaderContext context) throws IOException {
     // 1. approximateSearch calls newCollector before searchNearestVectors
     // 2. newCollector calls buildDocToOrd
     // 3. Only then searchNearestVectors is called
-    // So buildDocToOrd is called for every segment before Lucene gets a chance to short-circuit on no vectors.
-    // The guard in buildDocToOrd is genuinely needed — without it, calling getFloatVectorValues on a segment with
+    // So buildDocToOrd is called for every segment before Lucene gets a chance to short-circuit on
+    // no vectors.
+    // The guard in buildDocToOrd is genuinely needed — without it, calling getFloatVectorValues on
+    // a segment with
     // no vectors would return null (as we saw in CodecReader) and .iterator() would NPE.
-    // The alternative would be to guard in newCollector itself before calling getCachedDocToOrd, but the current
+    // The alternative would be to guard in newCollector itself before calling getCachedDocToOrd,
+    // but the current
     // placement is fine.
     if (fi == null || fi.getVectorDimension() == 0) {
       return new int[0];
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
index 012124453052..2202384bb114 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestDiversifyingChildrenKnnSiblingExpansion.java
@@ -138,7 +138,8 @@ public void testDocIdToOrdinal_emptyMapping_alwaysMinusOne() throws IOException
     // Collector created with an empty docToOrd array (sibling expansion disabled)
     BitSet parents = parentBitSet(2, 2);
     DiversifyingNearestChildrenKnnCollector c =
-        new DiversifyingNearestChildrenKnnCollector(5, Integer.MAX_VALUE, null, parents, new int[0]);
+        new DiversifyingNearestChildrenKnnCollector(
+            5, Integer.MAX_VALUE, null, parents, new int[0]);
 
     assertEquals(-1, c.docIdToOrdinal(0));
     assertEquals(-1, c.docIdToOrdinal(1));
@@ -224,7 +225,8 @@ public void testGetSiblingOrdinals_parentAlreadyInHeap_returnsUnvisitedSiblings(
     // C1 (ordinal 1) is not yet visited; must still be returned as an expansion candidate
     FixedBitSet visited = new FixedBitSet(3);
     int[] result = collector.getSiblingOrdinals(0, visited, new int[0]);
-    assertNotEquals("unvisited sibling must be returned even when parent is already in heap", 0, result.length);
+    assertNotEquals(
+        "unvisited sibling must be returned even when parent is already in heap", 0, result.length);
     assertArrayEquals(new int[] {1}, result);
   }
 

From 9b3aae05bdd1d65e77ffaba49bef08f0fddcb783 Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Fri, 8 May 2026 10:00:42 +0200
Subject: [PATCH 21/23] Changes.txt

---
 lucene/CHANGES.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 331aab15c639..6dd5a7c3e91f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -146,6 +146,8 @@ Optimizations
 
 * GITHUB#15597, GITHUB#15777: Reduce memory usage of NeighborArray (Viliam Durina)
 
+* GITHUB#16034: Sibling expansion as an optimization for KNN vector search over parent-child document relationships. (Anna Ruggero, Alessandro Benedetti)
+
 Bug Fixes
 ---------------------
 * GITHUB#14049: Randomize KNN codec params in RandomCodec. Fixes scalar quantization div-by-zero

From 765cd13842a49383cf5a2a4b392782627cae43cc Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Mon, 11 May 2026 12:31:50 +0200
Subject: [PATCH 22/23] Removed unuseful part related to numSiblingsToVisit

---
 .../util/hnsw/AbstractHnswGraphSearcher.java  | 34 +++++--------------
 .../lucene/util/hnsw/HnswGraphSearcher.java   | 16 ++-------
 2 files changed, 10 insertions(+), 40 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
index f7ebbf4321b4..813b5845a122 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
@@ -105,34 +105,17 @@ protected static void scoreEntryPoints(
       candidates.add(ep, score);
       if (acceptOrds == null || acceptOrds.get(ep)) {
         // Fetch siblingsOrd BEFORE collect() so the parent is not yet in the heap
-        int numSiblingsToVisit = 0;
         // The instanceof check is needed: this method is also called with a
         // GraphBuilderKnnCollector
         if (results instanceof OrdinalTranslatedKnnCollector collector) {
           if (collector.isSiblingExpansionCollector()) {
             siblingsOrd = collector.getSiblingOrdinals(ep, visited, siblingsOrd);
-            if (siblingsOrd.length > 0) {
-              //  how many siblingsOrd are actually scored to avoid exceeding the visit budget.
-              //  controls the early termination condition. early terminates the search if we reach
-              //  visitLimit nodes
-              //  if this visit limit is high we just navigate the graph until we do not have any
-              // node
-              //  with a score higher than the ones already collected
-              //  Current values it could assume:
-              //  -  No filter → Integer.MAX_VALUE (no constraint tighter than the full segment)
-              //  -  With filter → cardinality (no constraint tighter than the full accepted set)
-              numSiblingsToVisit =
-                  (int) Math.min(siblingsOrd.length, results.visitLimit() - results.visitedCount());
-              // Only mark as visited the siblingsOrd we will actually score; the rest remain
-              // reachable via normal graph traversal so a better child can still be found
-              for (int s = 0; s < numSiblingsToVisit; s++) visited.set(siblingsOrd[s]);
-            }
+            for (int ord : siblingsOrd) visited.set(ord);
           }
         }
         // Collect the ep node here so after we have a correctly updated minCompetitiveSimilarity
         results.collect(ep, score);
-        if (numSiblingsToVisit > 0) {
-          // IF NUMSIBLINGSTOVISIT IS NOT LIMITED WE CAN REMOVE THE VARIABLE AND USE SIBLINGS LENGTH
+        if (siblingsOrd.length > 0) {
           siblingScores =
               scoreHnswNodes(
                   results,
@@ -140,7 +123,6 @@ protected static void scoreEntryPoints(
                   candidates,
                   acceptOrds,
                   siblingsOrd,
-                  numSiblingsToVisit,
                   siblingScores);
         }
       }
@@ -157,19 +139,19 @@ protected static float[] scoreHnswNodes(
       NeighborQueue candidates,
       Bits acceptOrds,
       int[] hnswNodesOrd,
-      int numHnswNodes,
       float[] scores)
       throws IOException {
+    int numNodes = hnswNodesOrd.length;
     // If siblingScores not defined yet or too small to collect scores a new one is created
     // Otherwise we reuse the old one that will be overridden in bulkScore with new scores
-    if (scores == null || scores.length < numHnswNodes) {
-      scores = new float[numHnswNodes];
+    if (scores == null || scores.length < numNodes) {
+      scores = new float[numNodes];
     }
-    float maxScore = scorer.bulkScore(hnswNodesOrd, scores, numHnswNodes);
-    results.incVisitedCount(numHnswNodes);
+    float maxScore = scorer.bulkScore(hnswNodesOrd, scores, numNodes);
+    results.incVisitedCount(numNodes);
     if (maxScore > results.minCompetitiveSimilarity()) {
       float minSimilarity = Math.nextUp(results.minCompetitiveSimilarity());
-      for (int j = 0; j < numHnswNodes; j++) {
+      for (int j = 0; j < numNodes; j++) {
         float sibScore = scores[j];
         // We avoid adding to candidates a sibling with a bad score
         if (sibScore >= minSimilarity) {
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
index 62a518a5cc54..786fc8ee53da 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java
@@ -350,22 +350,12 @@ void searchLevel(
             candidates.add(node, score);
             if (acceptOrds == null || acceptOrds.get(node)) {
               // Fetch siblingsOrd BEFORE collect() so the parent is not yet in the heap
-              int numSiblingsToVisit = 0;
               // The instanceof check is needed: this method is also called with a
               // GraphBuilderKnnCollector
               if (results instanceof OrdinalTranslatedKnnCollector collector) {
                 if (collector.isSiblingExpansionCollector()) {
                   siblingsOrd = collector.getSiblingOrdinals(node, visited, siblingsOrd);
-                  if (siblingsOrd.length > 0) {
-                    // TO DISCUSS IF NECESSARY
-                    numSiblingsToVisit =
-                        (int)
-                            Math.min(
-                                siblingsOrd.length, results.visitLimit() - results.visitedCount());
-                    // Only mark as visited the siblingsOrd we will actually score; the rest remain
-                    // reachable via normal graph traversal so a better child can still be found
-                    for (int s = 0; s < numSiblingsToVisit; s++) visited.set(siblingsOrd[s]);
-                  }
+                  for (int ord : siblingsOrd) visited.set(ord);
                 }
               }
               if (results.collect(node, score)) {
@@ -378,9 +368,8 @@ void searchLevel(
                 }
               }
               // Score and collect all siblingsOrd of the newly-discovered parent
-              if (numSiblingsToVisit > 0) {
+              if (siblingsOrd.length > 0) {
                 float prevMinSim = results.minCompetitiveSimilarity();
-                // IF NUMSIBLING IS NOT LIMITED WE CAN REMOVE THE VARIABLE AND USE SIBLINGS LENGTH
                 siblingScores =
                     scoreHnswNodes(
                         results,
@@ -388,7 +377,6 @@ void searchLevel(
                         candidates,
                         acceptOrds,
                         siblingsOrd,
-                        numSiblingsToVisit,
                         siblingScores);
                 if (results.minCompetitiveSimilarity() > prevMinSim) {
                   minAcceptedSimilarity = Math.nextUp(results.minCompetitiveSimilarity());

From e7604c4fd094027d5e82be8628ebc7b14c0a598a Mon Sep 17 00:00:00 2001
From: Anna Ruggero <a.ruggero@sease.io>
Date: Mon, 11 May 2026 12:34:04 +0200
Subject: [PATCH 23/23] Updated comment

---
 .../org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
index 813b5845a122..db9ef79b2163 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/AbstractHnswGraphSearcher.java
@@ -142,7 +142,7 @@ protected static float[] scoreHnswNodes(
       float[] scores)
       throws IOException {
     int numNodes = hnswNodesOrd.length;
-    // If siblingScores not defined yet or too small to collect scores a new one is created
+    // If scores not defined yet or too small to collect scores a new one is created
     // Otherwise we reuse the old one that will be overridden in bulkScore with new scores
     if (scores == null || scores.length < numNodes) {
       scores = new float[numNodes];