Skip to content

Commit 8a28b91

Browse files
removing check in TextToVectorUpdateProcessorFactory which brakes update for new dynamic fields (#3426)
* distinguish between dynamic field and field, dynamic fields do not have to exist --------- Co-authored-by: Alessandro Benedetti <[email protected]>
1 parent c3b5f57 commit 8a28b91

File tree

6 files changed

+110
-45
lines changed

6 files changed

+110
-45
lines changed

solr/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,8 @@ Bug Fixes
374374

375375
* SOLR-17792: Fix deadlocks in ParallelHttpShardHandler, re-implement synchronization in HttpShardHandler (Houston Putman)
376376

377+
* GITHUB#3426: Improving check in TextToVectorUpdateProcessorFactory, which breaks update for new dynamic fields. (Renato Haeberli via Alessandro Benedetti)
378+
377379
Dependency Upgrades
378380
---------------------
379381
* SOLR-17795: Upgrade Lucene to 9.12.2. (Pierre Salagnac, Christine Poerschke, Houston Putman)

solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,28 @@
1818

1919
import dev.langchain4j.data.embedding.Embedding;
2020
import dev.langchain4j.model.embedding.EmbeddingModel;
21+
import java.lang.invoke.MethodHandles;
2122
import java.lang.reflect.Method;
2223
import java.time.Duration;
2324
import java.util.ArrayList;
2425
import java.util.Map;
2526
import java.util.Objects;
2627
import org.apache.lucene.util.Accountable;
2728
import org.apache.lucene.util.RamUsageEstimator;
29+
import org.apache.solr.common.SolrException;
2830
import org.apache.solr.core.SolrResourceLoader;
2931
import org.apache.solr.llm.textvectorisation.store.TextToVectorModelException;
3032
import org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModelStore;
33+
import org.slf4j.Logger;
34+
import org.slf4j.LoggerFactory;
3135

3236
/**
3337
* This object wraps a {@link dev.langchain4j.model.embedding.EmbeddingModel} to encode text to
3438
* vector. It's meant to be used as a managed resource with the {@link
3539
* ManagedTextToVectorModelStore}
3640
*/
3741
public class SolrTextToVectorModel implements Accountable {
42+
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
3843
private static final long BASE_RAM_BYTES =
3944
RamUsageEstimator.shallowSizeOfInstance(SolrTextToVectorModel.class);
4045
private static final String TIMEOUT_PARAM = "timeout";
@@ -80,41 +85,40 @@ public static SolrTextToVectorModel getInstance(
8085
* support, some of them may require to be handled in here as separate switch cases
8186
*/
8287
switch (paramName) {
83-
case TIMEOUT_PARAM:
88+
case TIMEOUT_PARAM -> {
8489
Duration timeOut = Duration.ofSeconds((Long) params.get(paramName));
8590
builder.getClass().getMethod(paramName, Duration.class).invoke(builder, timeOut);
86-
break;
87-
case MAX_SEGMENTS_PER_BATCH_PARAM:
88-
builder
89-
.getClass()
90-
.getMethod(paramName, Integer.class)
91-
.invoke(builder, ((Long) params.get(paramName)).intValue());
92-
break;
93-
case MAX_RETRIES_PARAM:
94-
builder
95-
.getClass()
96-
.getMethod(paramName, Integer.class)
97-
.invoke(builder, ((Long) params.get(paramName)).intValue());
98-
break;
91+
}
92+
case MAX_SEGMENTS_PER_BATCH_PARAM, MAX_RETRIES_PARAM -> builder
93+
.getClass()
94+
.getMethod(paramName, Integer.class)
95+
.invoke(builder, ((Long) params.get(paramName)).intValue());
96+
9997
/*
10098
* For primitive params if there's only one setter available, we call it.
10199
* If there's choice we default to the string one
102100
*/
103-
default:
101+
default -> {
104102
ArrayList<Method> paramNameMatches = new ArrayList<>();
105103
for (var method : builder.getClass().getMethods()) {
106104
if (paramName.equals(method.getName()) && method.getParameterCount() == 1) {
107105
paramNameMatches.add(method);
108106
}
109107
}
110108
if (paramNameMatches.size() == 1) {
111-
paramNameMatches.get(0).invoke(builder, params.get(paramName));
109+
paramNameMatches.getFirst().invoke(builder, params.get(paramName));
112110
} else {
113-
builder
114-
.getClass()
115-
.getMethod(paramName, String.class)
116-
.invoke(builder, params.get(paramName).toString());
111+
try {
112+
builder
113+
.getClass()
114+
.getMethod(paramName, String.class)
115+
.invoke(builder, params.get(paramName).toString());
116+
} catch (NoSuchMethodException e) {
117+
log.error("Parameter {} not supported by model {}", paramName, className);
118+
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e);
119+
}
117120
}
121+
}
118122
}
119123
}
120124
}

solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,16 @@ public void init(final NamedList<?> args) {
7070
public UpdateRequestProcessor getInstance(
7171
SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
7272
IndexSchema latestSchema = req.getCore().getLatestSchema();
73-
if (!latestSchema.hasExplicitField(inputField)) {
73+
74+
if (!latestSchema.isDynamicField(inputField) && !latestSchema.hasExplicitField(inputField)) {
7475
throw new SolrException(
7576
SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + inputField + "\"");
7677
}
77-
if (!latestSchema.hasExplicitField(outputField)) {
78+
79+
if (!latestSchema.isDynamicField(outputField) && !latestSchema.hasExplicitField(outputField)) {
7880
throw new SolrException(
7981
SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + outputField + "\"");
8082
}
81-
8283
final SchemaField outputFieldSchema = latestSchema.getField(outputField);
8384
assertIsDenseVectorField(outputFieldSchema);
8485

solr/modules/llm/src/test-files/solr/collection1/conf/schema.xml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
<fieldType name="high_dimensional_float_knn_vector" class="solr.DenseVectorField" vectorDimension="2048" similarityFunction="cosine" vectorEncoding="FLOAT32"/>
2626
<fieldType name="high_dimensional_byte_knn_vector" class="solr.DenseVectorField" vectorDimension="2048" similarityFunction="cosine" vectorEncoding="BYTE"/>
2727
<fieldType name="plong" class="solr.LongPointField" useDocValuesAsStored="false"/>
28+
<dynamicField name="*_s" type="string" indexed="true" stored="true" />
29+
<dynamicField name="*_vector1024" type="knn_vector" indexed="true" stored="true" />
2830

2931
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
3032
<field name="vector" type="knn_vector" indexed="true" stored="true"/>

solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,13 @@
2121
import org.apache.solr.common.util.NamedList;
2222
import org.apache.solr.core.SolrCore;
2323
import org.apache.solr.llm.TestLlmBase;
24+
import org.apache.solr.llm.textvectorisation.model.SolrTextToVectorModel;
25+
import org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModelStore;
2426
import org.apache.solr.request.SolrQueryRequestBase;
27+
import org.apache.solr.update.processor.UpdateRequestProcessor;
28+
import org.junit.After;
2529
import org.junit.AfterClass;
30+
import org.junit.Before;
2631
import org.junit.BeforeClass;
2732
import org.junit.Test;
2833

@@ -38,6 +43,18 @@ public static void cleanup() throws Exception {
3843
afterTest();
3944
}
4045

46+
SolrCore collection1;
47+
48+
@Before
49+
public void setup() {
50+
collection1 = solrClientTestRule.getCoreContainer().getCore("collection1");
51+
}
52+
53+
@After
54+
public void after() {
55+
collection1.close();
56+
}
57+
4158
@Test
4259
public void init_fullArgs_shouldInitAllParams() {
4360
NamedList<String> args = new NamedList<>();
@@ -100,13 +117,11 @@ public void init_notExistentOutputField_shouldThrowExceptionWithDetailedMessage(
100117
TextToVectorUpdateProcessorFactory factoryToTest = new TextToVectorUpdateProcessorFactory();
101118

102119
ModifiableSolrParams params = new ModifiableSolrParams();
103-
SolrCore collection1 = solrClientTestRule.getCoreContainer().getCore("collection1");
104120
SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {};
105121
factoryToTest.init(args);
106122
SolrException e =
107123
assertThrows(SolrException.class, () -> factoryToTest.getInstance(req, null, null));
108124
assertEquals("undefined field: \"notExistentOutput\"", e.getMessage());
109-
collection1.close();
110125
}
111126

112127
/* Following test depends on a real solr schema and depends on BeforeClass-AfterClass methods */
@@ -120,14 +135,12 @@ public void init_notDenseVectorOutputField_shouldThrowExceptionWithDetailedMessa
120135
TextToVectorUpdateProcessorFactory factoryToTest = new TextToVectorUpdateProcessorFactory();
121136

122137
ModifiableSolrParams params = new ModifiableSolrParams();
123-
SolrCore collection1 = solrClientTestRule.getCoreContainer().getCore("collection1");
124138
SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {};
125139
factoryToTest.init(args);
126140
SolrException e =
127141
assertThrows(SolrException.class, () -> factoryToTest.getInstance(req, null, null));
128142
assertEquals(
129143
"only DenseVectorField is compatible with Vector Query Parsers: _text_", e.getMessage());
130-
collection1.close();
131144
}
132145

133146
/* Following test depends on a real solr schema and depends on BeforeClass-AfterClass methods */
@@ -141,12 +154,51 @@ public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage()
141154
TextToVectorUpdateProcessorFactory factoryToTest = new TextToVectorUpdateProcessorFactory();
142155

143156
ModifiableSolrParams params = new ModifiableSolrParams();
144-
SolrCore collection1 = solrClientTestRule.getCoreContainer().getCore("collection1");
145157
SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {};
146158
factoryToTest.init(args);
147159
SolrException e =
148160
assertThrows(SolrException.class, () -> factoryToTest.getInstance(req, null, null));
149161
assertEquals("undefined field: \"notExistentInput\"", e.getMessage());
150-
collection1.close();
162+
}
163+
164+
/* Following test depends on a real solr schema and depends on BeforeClass-AfterClass methods */
165+
@Test
166+
public void init_notExistentDynamicInputField_shouldNotThrowException() {
167+
String inputFieldName = "text_s";
168+
String outputFieldName = "vector";
169+
170+
UpdateRequestProcessor instance =
171+
createUpdateProcessor(inputFieldName, outputFieldName, collection1, "model1");
172+
assertNotNull(instance);
173+
}
174+
175+
/* Following test depends on a real solr schema and depends on BeforeClass-AfterClass methods */
176+
@Test
177+
public void init_notExistingDynamicOutputField_shouldNotThrowException() {
178+
String inputFieldName = "_text_";
179+
String outputFieldName = "vec_vector1024";
180+
181+
UpdateRequestProcessor instance =
182+
createUpdateProcessor(inputFieldName, outputFieldName, collection1, "model2");
183+
assertNotNull(instance);
184+
}
185+
186+
private UpdateRequestProcessor createUpdateProcessor(
187+
String inputFieldName, String outputFieldName, SolrCore collection1, String modelName) {
188+
NamedList<String> args = new NamedList<>();
189+
190+
ManagedTextToVectorModelStore.getManagedModelStore(collection1)
191+
.addModel(new SolrTextToVectorModel(modelName, null, null));
192+
args.add("inputField", inputFieldName);
193+
args.add("outputField", outputFieldName);
194+
args.add("model", modelName);
195+
196+
TextToVectorUpdateProcessorFactory factoryToTest = new TextToVectorUpdateProcessorFactory();
197+
ModifiableSolrParams params = new ModifiableSolrParams();
198+
factoryToTest.init(args);
199+
200+
SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {};
201+
202+
return factoryToTest.getInstance(req, null, null);
151203
}
152204
}

solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.solr.common.SolrInputDocument;
2525
import org.apache.solr.llm.TestLlmBase;
2626
import org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModelStore;
27+
import org.junit.After;
2728
import org.junit.AfterClass;
2829
import org.junit.BeforeClass;
2930
import org.junit.Test;
@@ -40,6 +41,13 @@ public static void cleanup() throws Exception {
4041
afterTest();
4142
}
4243

44+
@After
45+
public void afterEachTest() throws Exception {
46+
restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/dummy-1");
47+
restTestHarness.delete(
48+
ManagedTextToVectorModelStore.REST_END_POINT + "/exception-throwing-model"); // clean
49+
}
50+
4351
@Test
4452
public void processAdd_inputField_shouldVectoriseInputField() throws Exception {
4553
loadModel("dummy-model.json"); // preparation
@@ -50,10 +58,7 @@ public void processAdd_inputField_shouldVectoriseInputField() throws Exception {
5058
"textToVector");
5159
assertU(commit());
5260

53-
final String solrQuery = "*:*";
54-
final SolrQuery query = new SolrQuery();
55-
query.setQuery(solrQuery);
56-
query.add("fl", "id,vector");
61+
final SolrQuery query = getSolrQuery();
5762

5863
assertJQ(
5964
"/query" + query.toQueryString(),
@@ -66,6 +71,14 @@ public void processAdd_inputField_shouldVectoriseInputField() throws Exception {
6671
restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/dummy-1"); // clean up
6772
}
6873

74+
private SolrQuery getSolrQuery() {
75+
final String solrQuery = "*:*";
76+
final SolrQuery query = new SolrQuery();
77+
query.setQuery(solrQuery);
78+
query.add("fl", "id,vector");
79+
return query;
80+
}
81+
6982
/*
7083
This test looks for the 'dummy-1' model, but such model is not loaded, the model store is empty, so the update fails
7184
*/
@@ -93,10 +106,7 @@ public void processAdd_emptyInputField_shouldLogAndIndexWithNoVector() throws Ex
93106
addWithChain(sdoc("id", "98", "_text_", "Vegeta is the saiyan prince."), "textToVector");
94107
assertU(commit());
95108

96-
final String solrQuery = "*:*";
97-
final SolrQuery query = new SolrQuery();
98-
query.setQuery(solrQuery);
99-
query.add("fl", "id,vector");
109+
final SolrQuery query = getSolrQuery();
100110

101111
assertJQ(
102112
"/query" + query.toQueryString(),
@@ -116,10 +126,7 @@ public void processAdd_nullInputField_shouldLogAndIndexWithNoVector() throws Exc
116126
assertU(adoc("id", "98"));
117127
assertU(commit());
118128

119-
final String solrQuery = "*:*";
120-
final SolrQuery query = new SolrQuery();
121-
query.setQuery(solrQuery);
122-
query.add("fl", "id,vector");
129+
final SolrQuery query = getSolrQuery();
123130

124131
assertJQ(
125132
"/query" + query.toQueryString(),
@@ -141,10 +148,7 @@ public void processAdd_failingVectorisation_shouldLogAndIndexWithNoVector() thro
141148
"failingTextToVector");
142149
assertU(commit());
143150

144-
final String solrQuery = "*:*";
145-
final SolrQuery query = new SolrQuery();
146-
query.setQuery(solrQuery);
147-
query.add("fl", "id,vector");
151+
final SolrQuery query = getSolrQuery();
148152

149153
assertJQ(
150154
"/query" + query.toQueryString(),

0 commit comments

Comments
 (0)