Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,10 @@ New Features
or mixed) via log-odds fusion with softplus gating and sqrt(n) confidence scaling.
(Jaepil Jeong)

# GITHUB#16029: Scalar quantization option to disable centering and writing of float vectors. This
reduces vector storage costs by 4x or more but also reduces quantization accuracy.
(Trevor McCulloch)

Improvements
---------------------
* GITHUB#15823: Implement method to add all stream elements into a PriorityQueue.
Expand Down
5 changes: 4 additions & 1 deletion lucene/core/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
exports org.apache.lucene.codecs.lucene99;
exports org.apache.lucene.codecs.lucene103.blocktree;
exports org.apache.lucene.codecs.lucene104;
exports org.apache.lucene.codecs.lucene105;
exports org.apache.lucene.codecs.perfield;
exports org.apache.lucene.codecs;
exports org.apache.lucene.document;
Expand Down Expand Up @@ -86,7 +87,9 @@
provides org.apache.lucene.codecs.KnnVectorsFormat with
org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat,
org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat,
org.apache.lucene.codecs.lucene104.Lucene104HnswScalarQuantizedVectorsFormat;
org.apache.lucene.codecs.lucene104.Lucene104HnswScalarQuantizedVectorsFormat,
org.apache.lucene.codecs.lucene105.Lucene105ScalarQuantizedVectorsFormat,
org.apache.lucene.codecs.lucene105.Lucene105HnswScalarQuantizedVectorsFormat;
provides org.apache.lucene.codecs.PostingsFormat with
org.apache.lucene.codecs.lucene104.Lucene104PostingsFormat;
provides org.apache.lucene.index.SortFieldProvider with
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene105;

import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_NUM_MERGE_WORKER;
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.HNSW_GRAPH_THRESHOLD;
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.MAXIMUM_BEAM_WIDTH;
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.MAXIMUM_MAX_CONN;

import java.io.IOException;
import java.util.concurrent.ExecutorService;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.search.TaskExecutor;
import org.apache.lucene.util.hnsw.HnswGraph;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues.ScalarEncoding;

/**
* A vectors format that uses HNSW graph to store and search for vectors. But vectors are binary
* quantized using {@link Lucene105ScalarQuantizedVectorsFormat} before being stored in the graph.
*
* @lucene.experimental
*/
public class Lucene105HnswScalarQuantizedVectorsFormat extends KnnVectorsFormat {

public static final String NAME = "Lucene105HnswBinaryQuantizedVectorsFormat";

/**
* Controls how many of the nearest neighbor candidates are connected to the new node. Defaults to
* {@link Lucene99HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link HnswGraph} for more details.
*/
private final int maxConn;

/**
* The number of candidate neighbors to track while searching the graph for each newly inserted
* node. Defaults to {@link Lucene99HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link HnswGraph}
* for details.
*/
private final int beamWidth;

/** The format for storing, reading, merging vectors on disk */
private final Lucene105ScalarQuantizedVectorsFormat flatVectorsFormat;

/**
* The threshold to use to bypass HNSW graph building for tiny segments in terms of k for a graph
* i.e. number of docs to match the query (default is {@link
* Lucene99HnswVectorsFormat#HNSW_GRAPH_THRESHOLD}).
*
* <ul>
* <li>0 indicates that the graph is always built.
* <li>greater than 0 indicates that the graph needs a certain number of nodes before it starts
* building. See {@link Lucene99HnswVectorsFormat#HNSW_GRAPH_THRESHOLD} for details.
* <li>Negative values aren't allowed.
* </ul>
*/
private final int tinySegmentsThreshold;

private final int numMergeWorkers;
private final TaskExecutor mergeExec;

/** Constructs a format using default graph construction parameters */
public Lucene105HnswScalarQuantizedVectorsFormat() {
this(
ScalarEncoding.UNSIGNED_BYTE,
DEFAULT_MAX_CONN,
DEFAULT_BEAM_WIDTH,
DEFAULT_NUM_MERGE_WORKER,
null,
HNSW_GRAPH_THRESHOLD);
}

/**
* Constructs a format using the given graph construction parameters.
*
* @param maxConn the maximum number of connections to a node in the HNSW graph
* @param beamWidth the size of the queue maintained during graph construction.
*/
public Lucene105HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth) {
this(
ScalarEncoding.UNSIGNED_BYTE,
maxConn,
beamWidth,
DEFAULT_NUM_MERGE_WORKER,
null,
HNSW_GRAPH_THRESHOLD);
}

/**
* Constructs a format using the given graph construction parameters.
*
* @param encoding the quantization encoding used to encode the vectors
* @param maxConn the maximum number of connections to a node in the HNSW graph
* @param beamWidth the size of the queue maintained during graph construction.
*/
public Lucene105HnswScalarQuantizedVectorsFormat(
ScalarEncoding encoding, int maxConn, int beamWidth) {
this(encoding, maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null, HNSW_GRAPH_THRESHOLD);
}

/**
* Constructs a format using the given graph construction parameters and scalar quantization.
*
* @param encoding the quantization encoding used to encode the vectors
* @param maxConn the maximum number of connections to a node in the HNSW graph
* @param beamWidth the size of the queue maintained during graph construction.
* @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
* larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
* @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
* generated by this format to do the merge
*/
public Lucene105HnswScalarQuantizedVectorsFormat(
ScalarEncoding encoding,
int maxConn,
int beamWidth,
int numMergeWorkers,
ExecutorService mergeExec) {
this(encoding, maxConn, beamWidth, numMergeWorkers, mergeExec, HNSW_GRAPH_THRESHOLD);
}

/**
* Constructs a format using the given graph construction parameters and scalar quantization.
*
* @param maxConn the maximum number of connections to a node in the HNSW graph
* @param beamWidth the size of the queue maintained during graph construction.
* @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
* larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
* @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
* generated by this format to do the merge
*/
public Lucene105HnswScalarQuantizedVectorsFormat(
ScalarEncoding encoding,
int maxConn,
int beamWidth,
int numMergeWorkers,
ExecutorService mergeExec,
int tinySegmentsThreshold) {
this(encoding, true, maxConn, beamWidth, numMergeWorkers, mergeExec, tinySegmentsThreshold);
}

/**
* Constructs a format using the given graph construction parameters and scalar quantization.
*
* @param encoding the quantization encoding used to encode the vectors
* @param enableCentering if {@code false}, no centroid is computed and raw float vectors are not
* written to disk (data-blind mode)
* @param maxConn the maximum number of connections to a node in the HNSW graph
* @param beamWidth the size of the queue maintained during graph construction.
* @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
* larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
* @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
* generated by this format to do the merge
* @param tinySegmentsThreshold the threshold below which HNSW graph building is skipped
*/
public Lucene105HnswScalarQuantizedVectorsFormat(
ScalarEncoding encoding,
boolean enableCentering,
int maxConn,
int beamWidth,
int numMergeWorkers,
ExecutorService mergeExec,
int tinySegmentsThreshold) {
super(NAME);
flatVectorsFormat = new Lucene105ScalarQuantizedVectorsFormat(encoding, enableCentering);
if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) {
throw new IllegalArgumentException(
"maxConn must be positive and less than or equal to "
+ MAXIMUM_MAX_CONN
+ "; maxConn="
+ maxConn);
}
if (beamWidth <= 0 || beamWidth > MAXIMUM_BEAM_WIDTH) {
throw new IllegalArgumentException(
"beamWidth must be positive and less than or equal to "
+ MAXIMUM_BEAM_WIDTH
+ "; beamWidth="
+ beamWidth);
}
this.maxConn = maxConn;
this.beamWidth = beamWidth;
this.tinySegmentsThreshold = tinySegmentsThreshold;
if (numMergeWorkers == 1 && mergeExec != null) {
throw new IllegalArgumentException(
"No executor service is needed as we'll use single thread to merge");
}
this.numMergeWorkers = numMergeWorkers;
if (mergeExec != null) {
this.mergeExec = new TaskExecutor(mergeExec);
} else {
this.mergeExec = null;
}
}

@Override
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new Lucene99HnswVectorsWriter(
state,
maxConn,
beamWidth,
flatVectorsFormat,
flatVectorsFormat.fieldsWriter(state),
numMergeWorkers,
mergeExec,
tinySegmentsThreshold);
}

@Override
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state));
}

@Override
public int getMaxDimensions(String fieldName) {
return 1024;
}

@Override
public String toString() {
return "Lucene105HnswScalarQuantizedVectorsFormat(name=Lucene105HnswScalarQuantizedVectorsFormat, maxConn="
+ maxConn
+ ", beamWidth="
+ beamWidth
+ ", tinySegmentsThreshold="
+ tinySegmentsThreshold
+ ", flatVectorFormat="
+ flatVectorsFormat
+ ")";
}
}
Loading
Loading