Skip to content

Commit 950fc83

Browse files
committed
Changes on rapidsai#37
1 parent acf44fd commit 950fc83

14 files changed

+425
-49
lines changed

.github/workflows/build.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,6 @@ jobs:
5050
date: ${{ inputs.date }}
5151
container_image: "rapidsai/ci-conda:25.10-cuda${{ matrix.cuda_version }}-ubuntu24.04-py3.13"
5252
script: "ci/build_java.sh"
53-
file_to_upload: "java/cuvs-java/target/"
53+
file_to_upload: "target/"
5454
artifact-name: "cuvs-lucene-cuda${{ matrix.cuda_version }}"
5555
sha: ${{ inputs.sha }}

.github/workflows/pr.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ jobs:
7575
arch: "amd64"
7676
container_image: "rapidsai/ci-conda:25.10-cuda${{ matrix.cuda_version }}-ubuntu24.04-py3.13"
7777
script: "ci/test_java.sh"
78-
file_to_upload: "java/cuvs-java/target/"
78+
file_to_upload: "target/"
7979
artifact-name: "cuvs-lucene-cuda${{ matrix.cuda_version }}"
8080
telemetry-summarize:
8181
# This job must use a self-hosted runner to record telemetry traces.

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
*.jar
22
target
33
**/.DS_Store
4+
bin
5+
.project

pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@
4848
<version>10.2.0</version>
4949
<scope>test</scope>
5050
</dependency>
51+
<dependency>
52+
<groupId>org.apache.lucene</groupId>
53+
<artifactId>lucene-backward-codecs</artifactId>
54+
<version>10.2.0</version>
55+
</dependency>
5156
<dependency>
5257
<groupId>org.apache.lucene</groupId>
5358
<artifactId>lucene-test-framework</artifactId>

src/main/java/com/nvidia/cuvs/lucene/CuVS2510GPUVectorsFormat.java

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import org.apache.lucene.codecs.KnnVectorsReader;
2727
import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
2828
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
29-
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
3029
import org.apache.lucene.index.SegmentReadState;
3130
import org.apache.lucene.index.SegmentWriteState;
3231

@@ -54,17 +53,25 @@ public class CuVS2510GPUVectorsFormat extends KnnVectorsFormat {
5453
static final IndexType DEFAULT_INDEX_TYPE = IndexType.CAGRA;
5554

5655
static CuVSResources resources = cuVSResourcesOrNull();
57-
58-
/** The format for storing, reading, and merging raw vectors on disk. */
59-
private static final FlatVectorsFormat flatVectorsFormat =
60-
new Lucene99FlatVectorsFormat(DefaultFlatVectorScorer.INSTANCE);
56+
static final LuceneProvider LUCENE_PROVIDER;
57+
static final FlatVectorsFormat FLAT_VECTORS_FORMAT;
6158

6259
final int maxDimensions = 4096;
6360
final int cuvsWriterThreads;
6461
final int intGraphDegree;
6562
final int graphDegree;
6663
final CuVS2510GPUVectorsWriter.IndexType indexType; // the index type to build, when writing
6764

65+
static {
66+
try {
67+
LUCENE_PROVIDER = LuceneProvider.getInstance("99");
68+
FLAT_VECTORS_FORMAT =
69+
LUCENE_PROVIDER.getLuceneFlatVectorsFormatInstance(DefaultFlatVectorScorer.INSTANCE);
70+
} catch (Exception e) {
71+
throw new ExceptionInInitializerError(e.getMessage());
72+
}
73+
}
74+
6875
/**
6976
* Initializes the {@link CuVS2510GPUVectorsFormat} with default parameter values.
7077
*
@@ -103,7 +110,7 @@ public CuVS2510GPUVectorsFormat(
103110
@Override
104111
public CuVS2510GPUVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
105112
checkSupported();
106-
var flatWriter = flatVectorsFormat.fieldsWriter(state);
113+
var flatWriter = FLAT_VECTORS_FORMAT.fieldsWriter(state);
107114
return new CuVS2510GPUVectorsWriter(
108115
state, cuvsWriterThreads, intGraphDegree, graphDegree, indexType, resources, flatWriter);
109116
}
@@ -114,7 +121,7 @@ public CuVS2510GPUVectorsWriter fieldsWriter(SegmentWriteState state) throws IOE
114121
@Override
115122
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
116123
checkSupported();
117-
return new CuVS2510GPUVectorsReader(state, resources, flatVectorsFormat.fieldsReader(state));
124+
return new CuVS2510GPUVectorsReader(state, resources, FLAT_VECTORS_FORMAT.fieldsReader(state));
118125
}
119126

120127
/**

src/main/java/com/nvidia/cuvs/lucene/CuVS2510GPUVectorsWriter.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import static com.nvidia.cuvs.lucene.CuVS2510GPUVectorsFormat.CUVS_META_CODEC_EXT;
2121
import static com.nvidia.cuvs.lucene.CuVS2510GPUVectorsFormat.CUVS_META_CODEC_NAME;
2222
import static com.nvidia.cuvs.lucene.CuVS2510GPUVectorsFormat.VERSION_CURRENT;
23-
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS;
2423
import static org.apache.lucene.index.VectorEncoding.FLOAT32;
2524
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
2625
import static org.apache.lucene.util.RamUsageEstimator.shallowSizeOfInstance;
@@ -79,6 +78,9 @@ public class CuVS2510GPUVectorsWriter extends KnnVectorsWriter {
7978
/** The name of the CUVS component for the info-stream * */
8079
private static final String CUVS_COMPONENT = "CUVS";
8180

81+
private static final LuceneProvider LUCENE_PROVIDER;
82+
private static final List<VectorSimilarityFunction> VECTOR_SIMILARITY_FUNCTIONS;
83+
8284
// The minimum number of vectors in the dataset required before
8385
// we attempt to build a Cagra index
8486
static final int MIN_CAGRA_INDEX_SIZE = 2;
@@ -96,6 +98,15 @@ public class CuVS2510GPUVectorsWriter extends KnnVectorsWriter {
9698
private final InfoStream infoStream;
9799
private boolean finished;
98100

101+
static {
102+
try {
103+
LUCENE_PROVIDER = LuceneProvider.getInstance("99");
104+
VECTOR_SIMILARITY_FUNCTIONS = LUCENE_PROVIDER.getSimilarityFunctions();
105+
} catch (Exception e) {
106+
throw new ExceptionInInitializerError(e.getMessage());
107+
}
108+
}
109+
99110
/**
100111
* The cuVS index Types.
101112
*/
@@ -453,8 +464,8 @@ private void writeMeta(
453464
}
454465

455466
static int distFuncToOrd(VectorSimilarityFunction func) {
456-
for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) {
457-
if (SIMILARITY_FUNCTIONS.get(i).equals(func)) {
467+
for (int i = 0; i < VECTOR_SIMILARITY_FUNCTIONS.size(); i++) {
468+
if (VECTOR_SIMILARITY_FUNCTIONS.get(i).equals(func)) {
458469
return (byte) i;
459470
}
460471
}

src/main/java/com/nvidia/cuvs/lucene/Lucene99AcceleratedHNSWVectorsFormat.java

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@
1616
package com.nvidia.cuvs.lucene;
1717

1818
import static com.nvidia.cuvs.lucene.Utils.cuVSResourcesOrNull;
19-
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
20-
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
21-
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_NUM_MERGE_WORKER;
2219

2320
import com.nvidia.cuvs.CuVSResources;
2421
import com.nvidia.cuvs.LibraryException;
@@ -29,9 +26,6 @@
2926
import org.apache.lucene.codecs.KnnVectorsWriter;
3027
import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
3128
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
32-
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
33-
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader;
34-
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsWriter;
3529
import org.apache.lucene.index.SegmentReadState;
3630
import org.apache.lucene.index.SegmentWriteState;
3731

@@ -54,22 +48,36 @@ public class Lucene99AcceleratedHNSWVectorsFormat extends KnnVectorsFormat {
5448
static final String HNSW_META_CODEC_EXT = "vem";
5549
static final String HNSW_INDEX_CODEC_NAME = "Lucene99HnswVectorsFormatIndex";
5650
static final String HNSW_INDEX_EXT = "vex";
51+
static final LuceneProvider LUCENE_PROVIDER;
5752

5853
private static CuVSResources resources = cuVSResourcesOrNull();
5954

60-
/** The format for storing, reading, and merging raw vectors on disk. */
61-
private static final FlatVectorsFormat flatVectorsFormat =
62-
new Lucene99FlatVectorsFormat(DefaultFlatVectorScorer.INSTANCE);
55+
private static final FlatVectorsFormat FLAT_VECTORS_FORMAT;
56+
private static final Integer MAX_CONN;
57+
private static final Integer BEAM_WIDTH;
58+
private static final Integer NUM_MERGE_WORKERS;
6359

6460
private final int maxDimensions = 4096;
6561
private final int cuvsWriterThreads;
6662
private final int intGraphDegree;
6763
private final int graphDegree;
6864
private final int hnswLayers;
69-
7065
private final int maxConn;
7166
private final int beamWidth;
7267

68+
static {
69+
try {
70+
LUCENE_PROVIDER = LuceneProvider.getInstance("99");
71+
MAX_CONN = LUCENE_PROVIDER.getStaticIntParam("DEFAULT_MAX_CONN");
72+
BEAM_WIDTH = LUCENE_PROVIDER.getStaticIntParam("DEFAULT_BEAM_WIDTH");
73+
NUM_MERGE_WORKERS = LUCENE_PROVIDER.getStaticIntParam("DEFAULT_BEAM_WIDTH");
74+
FLAT_VECTORS_FORMAT =
75+
LUCENE_PROVIDER.getLuceneFlatVectorsFormatInstance(DefaultFlatVectorScorer.INSTANCE);
76+
} catch (Exception e) {
77+
throw new ExceptionInInitializerError(e.getMessage());
78+
}
79+
}
80+
7381
/**
7482
* Initializes {@link Lucene99AcceleratedHNSWVectorsFormat} with default values.
7583
*
@@ -81,8 +89,8 @@ public Lucene99AcceleratedHNSWVectorsFormat() {
8189
DEFAULT_INTERMEDIATE_GRAPH_DEGREE,
8290
DEFAULT_GRAPH_DEGREE,
8391
DEFAULT_HNSW_GRAPH_LAYERS,
84-
DEFAULT_MAX_CONN,
85-
DEFAULT_BEAM_WIDTH);
92+
MAX_CONN,
93+
BEAM_WIDTH);
8694
}
8795

8896
/**
@@ -116,7 +124,7 @@ public Lucene99AcceleratedHNSWVectorsFormat(
116124
*/
117125
@Override
118126
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
119-
var flatWriter = flatVectorsFormat.fieldsWriter(state);
127+
var flatWriter = FLAT_VECTORS_FORMAT.fieldsWriter(state);
120128
if (supported()) {
121129
log.info("cuVS is supported so using the Lucene99AcceleratedHNSWVectorsWriter");
122130
return new Lucene99AcceleratedHNSWVectorsWriter(
@@ -125,8 +133,13 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException
125133
log.warning(
126134
"GPU based indexing not supported, falling back to using the Lucene99HnswVectorsWriter");
127135
// TODO: Make num merge workers configurable.
128-
return new Lucene99HnswVectorsWriter(
129-
state, maxConn, beamWidth, flatWriter, DEFAULT_NUM_MERGE_WORKER, null);
136+
try {
137+
return LUCENE_PROVIDER.getLuceneHnswVectorsWriterInstance(
138+
state, maxConn, beamWidth, flatWriter, NUM_MERGE_WORKERS, null);
139+
} catch (Exception e) {
140+
// maybe there is a better suited option to throwing RuntimeException? Need to explore.
141+
throw new RuntimeException(e.getMessage());
142+
}
130143
}
131144
}
132145

@@ -135,7 +148,13 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException
135148
*/
136149
@Override
137150
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
138-
return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state));
151+
try {
152+
return LUCENE_PROVIDER.getLuceneHnswVectorsReaderInstance(
153+
state, FLAT_VECTORS_FORMAT.fieldsReader(state));
154+
} catch (Exception e) {
155+
// maybe there is a better suited option to throwing RuntimeException? Need to explore.
156+
throw new RuntimeException(e.getMessage());
157+
}
139158
}
140159

141160
/**

src/main/java/com/nvidia/cuvs/lucene/Lucene99AcceleratedHNSWVectorsWriter.java

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import static com.nvidia.cuvs.lucene.Lucene99AcceleratedHNSWVectorsFormat.HNSW_INDEX_EXT;
2020
import static com.nvidia.cuvs.lucene.Lucene99AcceleratedHNSWVectorsFormat.HNSW_META_CODEC_EXT;
2121
import static com.nvidia.cuvs.lucene.Lucene99AcceleratedHNSWVectorsFormat.HNSW_META_CODEC_NAME;
22-
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS;
2322
import static org.apache.lucene.index.VectorEncoding.FLOAT32;
2423
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
2524
import static org.apache.lucene.util.RamUsageEstimator.shallowSizeOfInstance;
@@ -48,7 +47,6 @@
4847
import org.apache.lucene.codecs.KnnVectorsWriter;
4948
import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter;
5049
import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
51-
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
5250
import org.apache.lucene.index.DocsWithFieldSet;
5351
import org.apache.lucene.index.FieldInfo;
5452
import org.apache.lucene.index.FieldInfos;
@@ -87,6 +85,10 @@ public class Lucene99AcceleratedHNSWVectorsWriter extends KnnVectorsWriter {
8785
/** The name of the CUVS component for the info-stream * */
8886
private static final String CUVS_COMPONENT = "CUVS";
8987

88+
private static final LuceneProvider LUCENE_PROVIDER;
89+
private static final Integer VERSION_CURRENT;
90+
private static final List<VectorSimilarityFunction> VECTOR_SIMILARITY_FUNCTIONS;
91+
9092
private final int cuvsWriterThreads;
9193
private final int intGraphDegree;
9294
private final int graphDegree;
@@ -101,6 +103,16 @@ public class Lucene99AcceleratedHNSWVectorsWriter extends KnnVectorsWriter {
101103
private String vemFileName;
102104
private String vexFileName;
103105

106+
static {
107+
try {
108+
LUCENE_PROVIDER = LuceneProvider.getInstance("99");
109+
VERSION_CURRENT = LUCENE_PROVIDER.getStaticIntParam("VERSION_CURRENT");
110+
VECTOR_SIMILARITY_FUNCTIONS = LUCENE_PROVIDER.getSimilarityFunctions();
111+
} catch (Exception e) {
112+
throw new ExceptionInInitializerError(e.getMessage());
113+
}
114+
}
115+
104116
/**
105117
* Initializes {@link Lucene99AcceleratedHNSWVectorsWriter}
106118
*
@@ -147,13 +159,13 @@ public Lucene99AcceleratedHNSWVectorsWriter(
147159
CodecUtil.writeIndexHeader(
148160
hnswMeta,
149161
HNSW_META_CODEC_NAME,
150-
Lucene99HnswVectorsFormat.VERSION_CURRENT,
162+
VERSION_CURRENT,
151163
state.segmentInfo.getId(),
152164
state.segmentSuffix);
153165
CodecUtil.writeIndexHeader(
154166
hnswVectorIndex,
155167
HNSW_INDEX_CODEC_NAME,
156-
Lucene99HnswVectorsFormat.VERSION_CURRENT,
168+
VERSION_CURRENT,
157169
state.segmentInfo.getId(),
158170
state.segmentSuffix);
159171

@@ -673,8 +685,8 @@ private void writeEmpty(FieldInfo fieldInfo) throws IOException {
673685
}
674686

675687
static int distFuncToOrd(VectorSimilarityFunction func) {
676-
for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) {
677-
if (SIMILARITY_FUNCTIONS.get(i).equals(func)) {
688+
for (int i = 0; i < VECTOR_SIMILARITY_FUNCTIONS.size(); i++) {
689+
if (VECTOR_SIMILARITY_FUNCTIONS.get(i).equals(func)) {
678690
return (byte) i;
679691
}
680692
}

0 commit comments

Comments
 (0)