Skip to content

Add filtering for kNN vector indexer test scenarios #130751

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ record CmdLineArgs(
int indexThreads,
boolean reindex,
boolean forceMerge,
float filterSelectivity,
long seed,
VectorSimilarityFunction vectorSpace,
int quantizeBits,
VectorEncoding vectorEncoding,
Expand Down Expand Up @@ -75,6 +77,8 @@ record CmdLineArgs(
static final ParseField VECTOR_ENCODING_FIELD = new ParseField("vector_encoding");
static final ParseField DIMENSIONS_FIELD = new ParseField("dimensions");
static final ParseField EARLY_TERMINATION_FIELD = new ParseField("early_termination");
static final ParseField FILTER_SELECTIVITY_FIELD = new ParseField("filter_selectivity");
static final ParseField SEED_FIELD = new ParseField("seed");

static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
Builder builder = PARSER.apply(parser, null);
Expand Down Expand Up @@ -106,6 +110,8 @@ static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
PARSER.declareString(Builder::setVectorEncoding, VECTOR_ENCODING_FIELD);
PARSER.declareInt(Builder::setDimensions, DIMENSIONS_FIELD);
PARSER.declareBoolean(Builder::setEarlyTermination, EARLY_TERMINATION_FIELD);
PARSER.declareFloat(Builder::setFilterSelectivity, FILTER_SELECTIVITY_FIELD);
PARSER.declareLong(Builder::setSeed, SEED_FIELD);
}

@Override
Expand Down Expand Up @@ -136,6 +142,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.field(QUANTIZE_BITS_FIELD.getPreferredName(), quantizeBits);
builder.field(VECTOR_ENCODING_FIELD.getPreferredName(), vectorEncoding.name().toLowerCase(Locale.ROOT));
builder.field(DIMENSIONS_FIELD.getPreferredName(), dimensions);
builder.field(EARLY_TERMINATION_FIELD.getPreferredName(), earlyTermination);
builder.field(FILTER_SELECTIVITY_FIELD.getPreferredName(), filterSelectivity);
builder.field(SEED_FIELD.getPreferredName(), seed);
return builder.endObject();
}

Expand Down Expand Up @@ -167,6 +176,8 @@ static class Builder {
private VectorEncoding vectorEncoding = VectorEncoding.FLOAT32;
private int dimensions;
private boolean earlyTermination;
private float filterSelectivity = 1f;
private long seed = 1751900822751L;

public Builder setDocVectors(String docVectors) {
this.docVectors = PathUtils.get(docVectors);
Expand Down Expand Up @@ -278,6 +289,16 @@ public Builder setEarlyTermination(Boolean patience) {
return this;
}

public Builder setFilterSelectivity(float filterSelectivity) {
this.filterSelectivity = filterSelectivity;
return this;
}

public Builder setSeed(long seed) {
this.seed = seed;
return this;
}

public CmdLineArgs build() {
if (docVectors == null) {
throw new IllegalArgumentException("Document vectors path must be provided");
Expand Down Expand Up @@ -305,6 +326,8 @@ public CmdLineArgs build() {
indexThreads,
reindex,
forceMerge,
filterSelectivity,
seed,
vectorSpace,
quantizeBits,
vectorEncoding,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,20 @@ public static void main(String[] args) throws Exception {
? cmdLineArgs.nProbes()
: new int[] { 0 };
String indexType = cmdLineArgs.indexType().name().toLowerCase(Locale.ROOT);
Results indexResults = new Results(cmdLineArgs.docVectors().getFileName().toString(), indexType, cmdLineArgs.numDocs());
Results indexResults = new Results(
cmdLineArgs.docVectors().getFileName().toString(),
indexType,
cmdLineArgs.numDocs(),
cmdLineArgs.filterSelectivity()
);
Results[] results = new Results[nProbes.length];
for (int i = 0; i < nProbes.length; i++) {
results[i] = new Results(cmdLineArgs.docVectors().getFileName().toString(), indexType, cmdLineArgs.numDocs());
results[i] = new Results(
cmdLineArgs.docVectors().getFileName().toString(),
indexType,
cmdLineArgs.numDocs(),
cmdLineArgs.filterSelectivity()
);
}
logger.info("Running KNN index tester with arguments: " + cmdLineArgs);
Codec codec = createCodec(cmdLineArgs);
Expand Down Expand Up @@ -244,7 +254,8 @@ public String toString() {
"avg_cpu_count",
"QPS",
"recall",
"visited" };
"visited",
"filter_selectivity" };

// Calculate appropriate column widths based on headers and data

Expand Down Expand Up @@ -274,7 +285,8 @@ public String toString() {
String.format(Locale.ROOT, "%.2f", queryResult.avgCpuCount),
String.format(Locale.ROOT, "%.2f", queryResult.qps),
String.format(Locale.ROOT, "%.2f", queryResult.avgRecall),
String.format(Locale.ROOT, "%.2f", queryResult.averageVisited) };
String.format(Locale.ROOT, "%.2f", queryResult.averageVisited),
String.format(Locale.ROOT, "%.2f", queryResult.filterSelectivity), };
}

printBlock(sb, searchHeaders, queryResultsArray);
Expand Down Expand Up @@ -339,6 +351,7 @@ private int[] calculateColumnWidths(String[] headers, String[]... data) {
static class Results {
final String indexType, indexName;
final int numDocs;
final float filterSelectivity;
long indexTimeMS;
long forceMergeTimeMS;
int numSegments;
Expand All @@ -350,10 +363,11 @@ static class Results {
double netCpuTimeMS;
double avgCpuCount;

Results(String indexName, String indexType, int numDocs) {
Results(String indexName, String indexType, int numDocs, float filterSelectivity) {
this.indexName = indexName;
this.indexType = indexType;
this.numDocs = numDocs;
this.filterSelectivity = filterSelectivity;
}
}

Expand Down
Loading