From f91f287c4862a2cd1416b202703a8fa1a87cbd22 Mon Sep 17 00:00:00 2001 From: Renato Haeberli Date: Wed, 9 Jul 2025 20:59:38 +0200 Subject: [PATCH 1/7] removing check which brakes update with new dynamic field --- .../update/processor/TextToVectorUpdateProcessorFactory.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java index 559ea9a4309..540b6d0f695 100644 --- a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java +++ b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java @@ -70,10 +70,6 @@ public void init(final NamedList args) { public UpdateRequestProcessor getInstance( SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { IndexSchema latestSchema = req.getCore().getLatestSchema(); - if (!latestSchema.hasExplicitField(inputField)) { - throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + inputField + "\""); - } if (!latestSchema.hasExplicitField(outputField)) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + outputField + "\""); From 285a6ef5279ff4046a07db79a459d1fef1437532 Mon Sep 17 00:00:00 2001 From: Renato Haeberli Date: Thu, 10 Jul 2025 21:16:00 +0200 Subject: [PATCH 2/7] removing check which brakes update with new dynamic field, adjust tests for it --- ...extToVectorUpdateProcessorFactoryTest.java | 5 +-- .../TextToVectorUpdateProcessorTest.java | 36 ++++++++++--------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java index f01aa187537..ad873ecd538 100644 --- a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java +++ b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java @@ -132,7 +132,7 @@ public void init_notDenseVectorOutputField_shouldThrowExceptionWithDetailedMessa /* Following test depends on a real solr schema and depends on BeforeClass-AfterClass methods */ @Test - public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() { + public void init_notExistentInputField_shouldNotThrowException() { NamedList args = new NamedList<>(); args.add("inputField", "notExistentInput"); args.add("outputField", "vector"); @@ -144,9 +144,6 @@ public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() SolrCore collection1 = solrClientTestRule.getCoreContainer().getCore("collection1"); SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; factoryToTest.init(args); - SolrException e = - assertThrows(SolrException.class, () -> factoryToTest.getInstance(req, null, null)); - assertEquals("undefined field: \"notExistentInput\"", e.getMessage()); collection1.close(); } } diff --git a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java index 8614d637c5f..55cf735f9d4 100644 --- a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java +++ b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorTest.java @@ -24,6 +24,7 @@ import org.apache.solr.common.SolrInputDocument; import org.apache.solr.llm.TestLlmBase; import org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModelStore; +import org.junit.After; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -40,6 +41,13 @@ public static void cleanup() throws Exception { afterTest(); } + @After + public void afterEachTest() throws Exception { + restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/dummy-1"); + restTestHarness.delete( + ManagedTextToVectorModelStore.REST_END_POINT + "/exception-throwing-model"); // clean + } + @Test public void processAdd_inputField_shouldVectoriseInputField() throws Exception { loadModel("dummy-model.json"); // preparation @@ -50,10 +58,7 @@ public void processAdd_inputField_shouldVectoriseInputField() throws Exception { "textToVector"); assertU(commit()); - final String solrQuery = "*:*"; - final SolrQuery query = new SolrQuery(); - query.setQuery(solrQuery); - query.add("fl", "id,vector"); + final SolrQuery query = getSolrQuery(); assertJQ( "/query" + query.toQueryString(), @@ -66,6 +71,14 @@ public void processAdd_inputField_shouldVectoriseInputField() throws Exception { restTestHarness.delete(ManagedTextToVectorModelStore.REST_END_POINT + "/dummy-1"); // clean up } + private SolrQuery getSolrQuery() { + final String solrQuery = "*:*"; + final SolrQuery query = new SolrQuery(); + query.setQuery(solrQuery); + query.add("fl", "id,vector"); + return query; + } + /* This test looks for the 'dummy-1' model, but such model is not loaded, the model store is empty, so the update fails */ @@ -93,10 +106,7 @@ public void processAdd_emptyInputField_shouldLogAndIndexWithNoVector() throws Ex addWithChain(sdoc("id", "98", "_text_", "Vegeta is the saiyan prince."), "textToVector"); assertU(commit()); - final String solrQuery = "*:*"; - final SolrQuery query = new SolrQuery(); - query.setQuery(solrQuery); - query.add("fl", "id,vector"); + final SolrQuery query = getSolrQuery(); assertJQ( "/query" + query.toQueryString(), @@ -116,10 +126,7 @@ public void processAdd_nullInputField_shouldLogAndIndexWithNoVector() throws Exc assertU(adoc("id", "98")); assertU(commit()); - final String solrQuery = "*:*"; - final SolrQuery query = new SolrQuery(); - query.setQuery(solrQuery); - query.add("fl", "id,vector"); + final SolrQuery query = getSolrQuery(); assertJQ( "/query" + query.toQueryString(), @@ -141,10 +148,7 @@ public void processAdd_failingVectorisation_shouldLogAndIndexWithNoVector() thro "failingTextToVector"); assertU(commit()); - final String solrQuery = "*:*"; - final SolrQuery query = new SolrQuery(); - query.setQuery(solrQuery); - query.add("fl", "id,vector"); + final SolrQuery query = getSolrQuery(); assertJQ( "/query" + query.toQueryString(), From 5cec14e23b8de2d55c7c5d83cce18256f604b543 Mon Sep 17 00:00:00 2001 From: Renato Haeberli Date: Thu, 10 Jul 2025 21:16:23 +0200 Subject: [PATCH 3/7] clean up duplicate code segment; add log statement --- .../model/SolrTextToVectorModel.java | 43 ++++++++++--------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java index e8c5bcf014a..786055941d5 100644 --- a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java +++ b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java @@ -18,6 +18,7 @@ import dev.langchain4j.data.embedding.Embedding; import dev.langchain4j.model.embedding.EmbeddingModel; +import java.lang.invoke.MethodHandles; import java.lang.reflect.Method; import java.time.Duration; import java.util.ArrayList; @@ -28,6 +29,8 @@ import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.llm.textvectorisation.store.TextToVectorModelException; import org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModelStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This object wraps a {@link dev.langchain4j.model.embedding.EmbeddingModel} to encode text to @@ -35,6 +38,7 @@ * ManagedTextToVectorModelStore} */ public class SolrTextToVectorModel implements Accountable { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(SolrTextToVectorModel.class); private static final String TIMEOUT_PARAM = "timeout"; @@ -80,27 +84,20 @@ public static SolrTextToVectorModel getInstance( * support, some of them may require to be handled in here as separate switch cases */ switch (paramName) { - case TIMEOUT_PARAM: + case TIMEOUT_PARAM -> { Duration timeOut = Duration.ofSeconds((Long) params.get(paramName)); builder.getClass().getMethod(paramName, Duration.class).invoke(builder, timeOut); - break; - case MAX_SEGMENTS_PER_BATCH_PARAM: - builder - .getClass() - .getMethod(paramName, Integer.class) - .invoke(builder, ((Long) params.get(paramName)).intValue()); - break; - case MAX_RETRIES_PARAM: - builder - .getClass() - .getMethod(paramName, Integer.class) - .invoke(builder, ((Long) params.get(paramName)).intValue()); - break; + } + case MAX_SEGMENTS_PER_BATCH_PARAM, MAX_RETRIES_PARAM -> builder + .getClass() + .getMethod(paramName, Integer.class) + .invoke(builder, ((Long) params.get(paramName)).intValue()); + /* * For primitive params if there's only one setter available, we call it. * If there's choice we default to the string one */ - default: + default -> { ArrayList paramNameMatches = new ArrayList<>(); for (var method : builder.getClass().getMethods()) { if (paramName.equals(method.getName()) && method.getParameterCount() == 1) { @@ -108,13 +105,19 @@ public static SolrTextToVectorModel getInstance( } } if (paramNameMatches.size() == 1) { - paramNameMatches.get(0).invoke(builder, params.get(paramName)); + paramNameMatches.getFirst().invoke(builder, params.get(paramName)); } else { - builder - .getClass() - .getMethod(paramName, String.class) - .invoke(builder, params.get(paramName).toString()); + try { + builder + .getClass() + .getMethod(paramName, String.class) + .invoke(builder, params.get(paramName).toString()); + } catch (NoSuchMethodException e) { + log.error("Parameter {} not supported by model {}", paramName, className); + throw e; + } } + } } } } From 20b11744f0e9c35f52d8797bd75155fffb274271 Mon Sep 17 00:00:00 2001 From: Renato Haeberli Date: Fri, 11 Jul 2025 18:57:54 +0200 Subject: [PATCH 4/7] distinguish between dynamic field and field, dynamic fields do not have to exist --- .../TextToVectorUpdateProcessorFactory.java | 9 ++- .../solr/collection1/conf/schema.xml | 2 + ...extToVectorUpdateProcessorFactoryTest.java | 69 +++++++++++++++++-- 3 files changed, 71 insertions(+), 9 deletions(-) diff --git a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java index 540b6d0f695..07653df62dd 100644 --- a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java +++ b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java @@ -70,11 +70,16 @@ public void init(final NamedList args) { public UpdateRequestProcessor getInstance( SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { IndexSchema latestSchema = req.getCore().getLatestSchema(); - if (!latestSchema.hasExplicitField(outputField)) { + + if (!latestSchema.isDynamicField(inputField) && !latestSchema.hasExplicitField(inputField)) { throw new SolrException( - SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + outputField + "\""); + SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + inputField + "\""); } + if (!latestSchema.isDynamicField(outputField) && !latestSchema.hasExplicitField(outputField)) { + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "undefined field: \"" + outputField + "\""); + } final SchemaField outputFieldSchema = latestSchema.getField(outputField); assertIsDenseVectorField(outputFieldSchema); diff --git a/solr/modules/llm/src/test-files/solr/collection1/conf/schema.xml b/solr/modules/llm/src/test-files/solr/collection1/conf/schema.xml index 92aa0e76591..a574b93294a 100644 --- a/solr/modules/llm/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/modules/llm/src/test-files/solr/collection1/conf/schema.xml @@ -25,6 +25,8 @@ + + diff --git a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java index ad873ecd538..c5b7991b3c5 100644 --- a/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java +++ b/solr/modules/llm/src/test/org/apache/solr/llm/textvectorisation/update/processor/TextToVectorUpdateProcessorFactoryTest.java @@ -21,8 +21,13 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.llm.TestLlmBase; +import org.apache.solr.llm.textvectorisation.model.SolrTextToVectorModel; +import org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModelStore; import org.apache.solr.request.SolrQueryRequestBase; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.junit.After; import org.junit.AfterClass; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -38,6 +43,18 @@ public static void cleanup() throws Exception { afterTest(); } + SolrCore collection1; + + @Before + public void setup() { + collection1 = solrClientTestRule.getCoreContainer().getCore("collection1"); + } + + @After + public void after() { + collection1.close(); + } + @Test public void init_fullArgs_shouldInitAllParams() { NamedList args = new NamedList<>(); @@ -100,13 +117,11 @@ public void init_notExistentOutputField_shouldThrowExceptionWithDetailedMessage( TextToVectorUpdateProcessorFactory factoryToTest = new TextToVectorUpdateProcessorFactory(); ModifiableSolrParams params = new ModifiableSolrParams(); - SolrCore collection1 = solrClientTestRule.getCoreContainer().getCore("collection1"); SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; factoryToTest.init(args); SolrException e = assertThrows(SolrException.class, () -> factoryToTest.getInstance(req, null, null)); assertEquals("undefined field: \"notExistentOutput\"", e.getMessage()); - collection1.close(); } /* Following test depends on a real solr schema and depends on BeforeClass-AfterClass methods */ @@ -120,19 +135,17 @@ public void init_notDenseVectorOutputField_shouldThrowExceptionWithDetailedMessa TextToVectorUpdateProcessorFactory factoryToTest = new TextToVectorUpdateProcessorFactory(); ModifiableSolrParams params = new ModifiableSolrParams(); - SolrCore collection1 = solrClientTestRule.getCoreContainer().getCore("collection1"); SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; factoryToTest.init(args); SolrException e = assertThrows(SolrException.class, () -> factoryToTest.getInstance(req, null, null)); assertEquals( "only DenseVectorField is compatible with Vector Query Parsers: _text_", e.getMessage()); - collection1.close(); } /* Following test depends on a real solr schema and depends on BeforeClass-AfterClass methods */ @Test - public void init_notExistentInputField_shouldNotThrowException() { + public void init_notExistentInputField_shouldThrowExceptionWithDetailedMessage() { NamedList args = new NamedList<>(); args.add("inputField", "notExistentInput"); args.add("outputField", "vector"); @@ -141,9 +154,51 @@ public void init_notExistentInputField_shouldNotThrowException() { TextToVectorUpdateProcessorFactory factoryToTest = new TextToVectorUpdateProcessorFactory(); ModifiableSolrParams params = new ModifiableSolrParams(); - SolrCore collection1 = solrClientTestRule.getCoreContainer().getCore("collection1"); SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; factoryToTest.init(args); - collection1.close(); + SolrException e = + assertThrows(SolrException.class, () -> factoryToTest.getInstance(req, null, null)); + assertEquals("undefined field: \"notExistentInput\"", e.getMessage()); + } + + /* Following test depends on a real solr schema and depends on BeforeClass-AfterClass methods */ + @Test + public void init_notExistentDynamicInputField_shouldNotThrowException() { + String inputFieldName = "text_s"; + String outputFieldName = "vector"; + + UpdateRequestProcessor instance = + createUpdateProcessor(inputFieldName, outputFieldName, collection1, "model1"); + assertNotNull(instance); + } + + /* Following test depends on a real solr schema and depends on BeforeClass-AfterClass methods */ + @Test + public void init_notExistingDynamicOutputField_shouldNotThrowException() { + String inputFieldName = "_text_"; + String outputFieldName = "vec_vector1024"; + + UpdateRequestProcessor instance = + createUpdateProcessor(inputFieldName, outputFieldName, collection1, "model2"); + assertNotNull(instance); + } + + private UpdateRequestProcessor createUpdateProcessor( + String inputFieldName, String outputFieldName, SolrCore collection1, String modelName) { + NamedList args = new NamedList<>(); + + ManagedTextToVectorModelStore.getManagedModelStore(collection1) + .addModel(new SolrTextToVectorModel(modelName, null, null)); + args.add("inputField", inputFieldName); + args.add("outputField", outputFieldName); + args.add("model", modelName); + + TextToVectorUpdateProcessorFactory factoryToTest = new TextToVectorUpdateProcessorFactory(); + ModifiableSolrParams params = new ModifiableSolrParams(); + factoryToTest.init(args); + + SolrQueryRequestBase req = new SolrQueryRequestBase(collection1, params) {}; + + return factoryToTest.getInstance(req, null, null); } } From 94d60a55fa4ebe73d45a4c21dfdaad8d47de7580 Mon Sep 17 00:00:00 2001 From: Renato Haeberli Date: Fri, 11 Jul 2025 19:03:01 +0200 Subject: [PATCH 5/7] wrapping exception into SolrException --- .../llm/textvectorisation/model/SolrTextToVectorModel.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java index 786055941d5..75e6e5905d9 100644 --- a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java +++ b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java @@ -26,6 +26,8 @@ import java.util.Objects; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.RamUsageEstimator; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.llm.textvectorisation.store.TextToVectorModelException; import org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModelStore; @@ -114,7 +116,7 @@ public static SolrTextToVectorModel getInstance( .invoke(builder, params.get(paramName).toString()); } catch (NoSuchMethodException e) { log.error("Parameter {} not supported by model {}", paramName, className); - throw e; + throw new SolrException(ErrorCode.BAD_REQUEST, ,e); } } } From 997376c2ab13e0fdc64d8623107b5b1aecf6d713 Mon Sep 17 00:00:00 2001 From: Renato Haeberli Date: Sat, 12 Jul 2025 14:10:15 +0200 Subject: [PATCH 6/7] wrapping exception into SolrException --- .../llm/textvectorisation/model/SolrTextToVectorModel.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java index 75e6e5905d9..9a22087b990 100644 --- a/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java +++ b/solr/modules/llm/src/java/org/apache/solr/llm/textvectorisation/model/SolrTextToVectorModel.java @@ -27,7 +27,6 @@ import org.apache.lucene.util.Accountable; import org.apache.lucene.util.RamUsageEstimator; import org.apache.solr.common.SolrException; -import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.llm.textvectorisation.store.TextToVectorModelException; import org.apache.solr.llm.textvectorisation.store.rest.ManagedTextToVectorModelStore; @@ -116,7 +115,7 @@ public static SolrTextToVectorModel getInstance( .invoke(builder, params.get(paramName).toString()); } catch (NoSuchMethodException e) { log.error("Parameter {} not supported by model {}", paramName, className); - throw new SolrException(ErrorCode.BAD_REQUEST, ,e); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e); } } } From a6a45e40bdc66d8d041f4d38d687253a40b95950 Mon Sep 17 00:00:00 2001 From: Alessandro Benedetti Date: Mon, 14 Jul 2025 11:55:09 +0100 Subject: [PATCH 7/7] Update CHANGES.txt --- solr/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 85b4be69d12..2ca073e8ef8 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -374,6 +374,8 @@ Bug Fixes * SOLR-17792: Fix deadlocks in ParallelHttpShardHandler, re-implement synchronization in HttpShardHandler (Houston Putman) +* GITHUB#3426: Improving check in TextToVectorUpdateProcessorFactory, which breaks update for new dynamic fields. (Renato Haeberli via Alessandro Benedetti) + Dependency Upgrades --------------------- * SOLR-17795: Upgrade Lucene to 9.12.2. (Pierre Salagnac, Christine Poerschke, Houston Putman)