Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/144548.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
area: Vector Search
issues: []
pr: 144548
summary: Nested KNN vs inner hits scoring consistency
type: bug
Original file line number Diff line number Diff line change
Expand Up @@ -587,3 +587,110 @@ setup:

- match: { hits.total.value: 0 }

---
"nested kNN with score_mode max - root score matches inner_hits score (#138496, #138011)":
- requires:
test_runner_features: ["close_to"]
reason: "Assert root and inner hit scores match"

- do:
search:
index: test
body:
_source: false
query:
nested:
path: nested
score_mode: max
query:
knn:
field: nested.vector
query_vector: [-0.5, 90.0, -10, 14.8, -156.0]
num_candidates: 5
inner_hits: { size: 3, "fields": [ "nested.paragraph_id" ], _source: false }

- match: { hits.total.value: 3 }
- match: { hits.hits.0._id: "2" }
- match: { hits.hits.1._id: "3" }
- match: { hits.hits.2._id: "1" }
# Root score must equal top inner hit score (score_mode max)
- close_to: { hits.hits.0._score: { value: 0.00909090, error: 0.0001 } }
- close_to: { hits.hits.0.inner_hits.nested.hits.hits.0._score: { value: 0.00909090, error: 0.0001 } }
- close_to: { hits.hits.1._score: { value: 0.0021519717, error: 0.0001 } }
- close_to: { hits.hits.1.inner_hits.nested.hits.hits.0._score: { value: 0.0021519717, error: 0.0001 } }
- close_to: { hits.hits.2._score: { value: 0.00001, error: 0.0001 } }
- close_to: { hits.hits.2.inner_hits.nested.hits.hits.0._score: { value: 0.00001, error: 0.0001 } }

---
"nested bool(knn + match) with score_mode max - root score matches inner_hits score (#138011)":
- requires:
test_runner_features: ["close_to"]
reason: "Assert root and inner hit scores match with multi-clause scoring"

- do:
indices.create:
index: nested_score_consistency
body:
mappings:
properties:
nested:
type: nested
properties:
embedding:
type: dense_vector
dims: 5
index: true
similarity: l2_norm
text:
type: text

- do:
index:
index: nested_score_consistency
id: "1"
body:
nested:
- embedding: [0.0, 0.0, 0.0, 0.0, 0.0]
text: "foo bar"

- do:
index:
index: nested_score_consistency
id: "2"
body:
nested:
- embedding: [1.0, 1.0, 1.0, 1.0, 1.0]
text: "foo bar"

- do:
indices.refresh: {}

- do:
search:
index: nested_score_consistency
body:
_source: false
query:
nested:
path: nested
score_mode: max
query:
bool:
must:
- knn:
field: nested.embedding
query_vector: [0.0, 0.0, 0.0, 0.0, 0.0]
num_candidates: 5
- match:
nested.text: "foo"
inner_hits: { size: 3, _source: false }

- match: { hits.total.value: 2 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.1._id: "2" }
# Root score must equal top inner hit score (bool KNN + match combines clause scores; compare via set/match)
- set: { hits.hits.0.inner_hits.nested.hits.hits.0._score: inner_score_doc0 }
- match: { hits.hits.0._score: $inner_score_doc0 }
- set: { hits.hits.1.inner_hits.nested.hits.hits.0._score: inner_score_doc1 }
- match: { hits.hits.1._score: $inner_score_doc1 }

Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
IndexSettings.INDEX_FAST_REFRESH_SETTING,
IndexSettings.MAX_RESULT_WINDOW_SETTING,
IndexSettings.MAX_INNER_RESULT_WINDOW_SETTING,
IndexSettings.MAX_NESTED_KNN_CANDIDATES_SETTING,
IndexSettings.MAX_TOKEN_COUNT_SETTING,
IndexSettings.MAX_DOCVALUE_FIELDS_SEARCH_SETTING,
IndexSettings.MAX_SCRIPT_FIELDS_SETTING,
Expand Down
29 changes: 29 additions & 0 deletions server/src/main/java/org/elasticsearch/index/IndexSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,20 @@ public final class IndexSettings {
Property.IndexScope
);

/**
* Index setting describing the maximum value of children per parent to be considered by the
* diversifying KNN when a nested query's inner query is a bool with both a KNN clause and other scoring clauses.
* The root score should be equal to the max combined score, hence consistent with inner_hits.
*/
public static final Setting<Integer> MAX_NESTED_KNN_CANDIDATES_SETTING = Setting.intSetting(
"index.max_nested_knn_candidates",
100,
1,
10000,
Property.Dynamic,
Property.IndexScope
);

/**
* Index setting describing the maximum value of allowed `script_fields`that can be retrieved
* per search request. The default maximum of 32 is defensive for the reason that retrieving
Expand Down Expand Up @@ -1197,6 +1211,7 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) {
private volatile boolean warmerEnabled;
private volatile int maxResultWindow;
private volatile int maxInnerResultWindow;
private volatile int maxNestedKnnCandidates;
private volatile int maxRescoreWindow;
private volatile int maxDocvalueFields;
private volatile int maxScriptFields;
Expand Down Expand Up @@ -1398,6 +1413,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
warmerEnabled = scopedSettings.get(INDEX_WARMER_ENABLED_SETTING);
maxResultWindow = scopedSettings.get(MAX_RESULT_WINDOW_SETTING);
maxInnerResultWindow = scopedSettings.get(MAX_INNER_RESULT_WINDOW_SETTING);
maxNestedKnnCandidates = scopedSettings.get(MAX_NESTED_KNN_CANDIDATES_SETTING);
maxRescoreWindow = scopedSettings.get(MAX_RESCORE_WINDOW_SETTING);
maxDocvalueFields = scopedSettings.get(MAX_DOCVALUE_FIELDS_SEARCH_SETTING);
maxScriptFields = scopedSettings.get(MAX_SCRIPT_FIELDS_SETTING);
Expand Down Expand Up @@ -1529,6 +1545,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
scopedSettings.addSettingsUpdateConsumer(INDEX_TRANSLOG_SYNC_INTERVAL_SETTING, this::setTranslogSyncInterval);
scopedSettings.addSettingsUpdateConsumer(MAX_RESULT_WINDOW_SETTING, this::setMaxResultWindow);
scopedSettings.addSettingsUpdateConsumer(MAX_INNER_RESULT_WINDOW_SETTING, this::setMaxInnerResultWindow);
scopedSettings.addSettingsUpdateConsumer(MAX_NESTED_KNN_CANDIDATES_SETTING, this::setMaxNestedKnnCandidates);
scopedSettings.addSettingsUpdateConsumer(MAX_RESCORE_WINDOW_SETTING, this::setMaxRescoreWindow);
scopedSettings.addSettingsUpdateConsumer(MAX_DOCVALUE_FIELDS_SEARCH_SETTING, this::setMaxDocvalueFields);
scopedSettings.addSettingsUpdateConsumer(MAX_SCRIPT_FIELDS_SETTING, this::setMaxScriptFields);
Expand Down Expand Up @@ -1880,6 +1897,18 @@ private void setMaxInnerResultWindow(int maxInnerResultWindow) {
this.maxInnerResultWindow = maxInnerResultWindow;
}

/**
* Returns the maximum number of nested children per parent to consider when the nested inner
* query has both KNN and other scoring clauses (so root score matches inner_hits).
*/
public int getMaxNestedKnnCandidates() {
return maxNestedKnnCandidates;
}

private void setMaxNestedKnnCandidates(int maxNestedKnnCandidates) {
this.maxNestedKnnCandidates = maxNestedKnnCandidates;
}

/**
* Returns the maximum rescore window for search requests.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,15 @@
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.fetch.subphase.InnerHitsContext;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.vectors.KnnVectorQueryBuilder;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -268,7 +271,71 @@ protected int doHashCode() {

@Override
protected Query doToQuery(SearchExecutionContext context) throws IOException {
return toQuery((this.query::toQuery), path, scoreMode, ignoreUnmapped, context);
QueryBuilder innerToBuild = hasNestedKnnWithOtherScoringClauses(query) ? rewriteInnerForNestedKnnMultiClause(query) : query;
return toQuery(innerToBuild::toQuery, path, scoreMode, ignoreUnmapped, context);
}

/**
* Rewrites the inner query so that each KnnVectorQueryBuilder in must/should is replaced by a
* copy with useMaxNestedKnnCandidatesInNested set, so the built query considers more children
* per parent and the root score matches inner_hits. Only rewrites top-level bool.
*/
private static QueryBuilder rewriteInnerForNestedKnnMultiClause(QueryBuilder inner) {
if ((inner instanceof BoolQueryBuilder) == false) {
return inner;
}
BoolQueryBuilder bool = (BoolQueryBuilder) inner;
BoolQueryBuilder rewritten = new BoolQueryBuilder();
rewritten.boost(bool.boost());
rewritten.queryName(bool.queryName());
if (bool.minimumShouldMatch() != null) {
rewritten.minimumShouldMatch(bool.minimumShouldMatch());
}
for (QueryBuilder clause : bool.must()) {
rewritten.must(clause instanceof KnnVectorQueryBuilder knn ? knn.copyWithUseMaxNestedKnnCandidatesInNested(true) : clause);
}
for (QueryBuilder clause : bool.should()) {
rewritten.should(clause instanceof KnnVectorQueryBuilder knn ? knn.copyWithUseMaxNestedKnnCandidatesInNested(true) : clause);
}
for (QueryBuilder clause : bool.filter()) {
rewritten.filter(clause);
}
for (QueryBuilder clause : bool.mustNot()) {
rewritten.mustNot(clause);
}
return rewritten;
}

/**
* Returns true when the inner query is a bool that contains both a KNN clause and at least one
* other scoring clause (must or should). In that case the diversifying KNN should consider
* more children per parent so the root score equals the max combined score (inner_hits).
*/
static boolean hasNestedKnnWithOtherScoringClauses(QueryBuilder query) {
if ((query instanceof BoolQueryBuilder) == false) {
return false;
}
BoolQueryBuilder bool = (BoolQueryBuilder) query;
boolean hasKnn = false;
boolean hasOtherScoring = false;
for (QueryBuilder clause : concat(bool.must(), bool.should())) {
if (clause instanceof KnnVectorQueryBuilder) {
hasKnn = true;
} else {
hasOtherScoring = true;
}
if (hasKnn && hasOtherScoring) {
return true;
}
}
return false;
}

private static List<QueryBuilder> concat(List<QueryBuilder> a, List<QueryBuilder> b) {
List<QueryBuilder> out = new ArrayList<>(a.size() + b.size());
out.addAll(a);
out.addAll(b);
return out;
}

/**
Expand Down
Loading
Loading