Skip to content

Commit c3b5f57

Browse files
risdenkhossmandsmileyrjernst
authored
SOLR-5707: Lucene Expressions via ExpressionValueSourceParser (#1244)
New ExpressionValueSourceParser that allows custom function queries / VSPs to be defined in a subset of JavaScript, pre-compiled, and that which can access the score and fields. It's powered by the Lucene Expressions module. ValueSourceAugmenter: score propagation --------- Co-authored-by: Chris Hostetter <[email protected]> Co-authored-by: David Smiley <[email protected]> Co-authored-by: Ryan Ernst <[email protected]>
1 parent 2a5cc9d commit c3b5f57

File tree

13 files changed

+729
-17
lines changed

13 files changed

+729
-17
lines changed

gradle/documentation/pull-lucene-javadocs.gradle

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,12 @@ configure(project(":solr:documentation")) {
4141
// - For now this list is focused solely on the javadocs needed for ref-guide link validation.
4242
// - If/when additional links are added from the ref-guide to additional lucene modules not listed here,
4343
// they can be added.
44-
// - If/when we need the lucene javadocs for "all" lucene depdencies in Solr (ie: to do link checking
45-
// from all Solr javadocs?) then perhaps we can find a way to build this list programatically?
46-
// - If these javadocs are (only every) consumed by the ref guide only, then these deps & associated tasks
44+
// - If/when we need the lucene javadocs for "all" lucene dependencies in Solr (ie: to do link checking
45+
// from all Solr javadocs?) then perhaps we can find a way to build this list programmatically?
46+
// - If these javadocs are only consumed by the ref guide, then these deps & associated tasks
4747
// should just be moved to the ref-guide build.gradle
4848
javadocs variantOf(libs.apache.lucene.core) { classifier 'javadoc' }
49+
javadocs variantOf(libs.apache.lucene.expressions) { classifier 'javadoc' }
4950
javadocs variantOf(libs.apache.lucene.analysis.common) { classifier 'javadoc' }
5051
javadocs variantOf(libs.apache.lucene.analysis.stempel) { classifier 'javadoc' }
5152
javadocs variantOf(libs.apache.lucene.queryparser) { classifier 'javadoc' }
@@ -65,7 +66,7 @@ configure(project(":solr:documentation")) {
6566
def resolved = configurations.javadocs.resolvedConfiguration
6667
resolved.resolvedArtifacts.each { artifact ->
6768
def id = artifact.moduleVersion.id
68-
// This mimics the directory stucture used on lucene.apache.org for the javadocs of all modules.
69+
// This mimics the directory structure used on lucene.apache.org for the javadocs of all modules.
6970
//
7071
// HACK: the lucene.apache.org javadocs are organized to match the module directory structure in the repo,
7172
// not the "flat" artifact names -- so there is no one size fits all way to determine the directory name.

solr/CHANGES.txt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -223,10 +223,6 @@ Other Changes
223223
================== 9.9.0 ==================
224224
New Features
225225
---------------------
226-
* SOLR-17582: The CLUSTERSTATUS API will now stream each collection's status to the response,
227-
fetching and computing it on the fly. To avoid a backwards compatibility concern, this won't work
228-
for wt=javabin. (Matthew Biscocho, David Smiley)
229-
230226
* SOLR-17626: Add RawTFSimilarityFactory class. (Christine Poerschke)
231227

232228
* SOLR-17656: New 'skipLeaderRecovery' replica property allows PULL replicas with existing indexes to immediately become ACTIVE (hossman)
@@ -243,6 +239,10 @@ New Features
243239

244240
* SOLR-17749: Added linear function support for RankField via RankQParserPlugin. (Christine Poerschke)
245241

242+
* SOLR-5707: New ExpressionValueSourceParser that allows custom function queries / VSPs to be defined in a
243+
subset of JavaScript, pre-compiled, and that which can access the score and fields. It's powered by
244+
the Lucene Expressions module. (hossman, David Smiley, Ryan Ernst, Kevin Risden)
245+
246246
Improvements
247247
---------------------
248248
* SOLR-15751: The v2 API now has parity with the v1 "COLSTATUS" and "segments" APIs, which can be used to fetch detailed information about
@@ -291,6 +291,10 @@ Improvements
291291

292292
Optimizations
293293
---------------------
294+
* SOLR-17582: The CLUSTERSTATUS API will now stream each collection's status to the response,
295+
fetching and computing it on the fly. To avoid a backwards compatibility concern, this won't work
296+
for wt=javabin. (Matthew Biscocho, David Smiley)
297+
294298
* SOLR-17578: Remove ZkController internal core supplier, for slightly faster reconnection after Zookeeper session loss. (Pierre Salagnac)
295299

296300
* SOLR-17669: Reduced memory usage in SolrJ getBeans() method when handling dynamic fields with wildcards. (Martin Anzinger)

solr/core/src/java/org/apache/solr/response/transform/ValueSourceAugmenter.java

Lines changed: 76 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@
2222
import java.util.Map;
2323
import org.apache.lucene.index.LeafReaderContext;
2424
import org.apache.lucene.index.ReaderUtil;
25+
import org.apache.lucene.internal.hppc.IntFloatHashMap;
2526
import org.apache.lucene.internal.hppc.IntObjectHashMap;
2627
import org.apache.lucene.queries.function.FunctionValues;
2728
import org.apache.lucene.queries.function.ValueSource;
29+
import org.apache.lucene.search.Scorable;
2830
import org.apache.solr.common.SolrDocument;
2931
import org.apache.solr.common.SolrException;
3032
import org.apache.solr.response.ResultContext;
@@ -68,20 +70,44 @@ public void setContext(ResultContext context) {
6870
fcontext = ValueSource.newContext(searcher);
6971
this.valueSource.createWeight(fcontext, searcher);
7072
final var docList = context.getDocList();
71-
if (docList == null) {
73+
final int prefetchSize = docList == null ? 0 : Math.min(docList.size(), maxPrefetchSize);
74+
if (prefetchSize == 0) {
7275
return;
7376
}
7477

75-
final int prefetchSize = Math.min(docList.size(), maxPrefetchSize);
78+
// Check if scores are wanted and initialize the Scorable if so
79+
final MutableScorable scorable; // stored in fcontext (when not null)
80+
final IntFloatHashMap docToScoreMap;
81+
if (context.wantsScores()) { // TODO switch to ValueSource.needsScores once it exists
82+
docToScoreMap = new IntFloatHashMap(prefetchSize);
83+
scorable =
84+
new MutableScorable() {
85+
@Override
86+
public float score() throws IOException {
87+
return docToScoreMap.get(docBase + localDocId);
88+
}
89+
};
90+
fcontext.put("scorer", scorable);
91+
} else {
92+
scorable = null;
93+
docToScoreMap = null;
94+
}
95+
96+
// Get the IDs and scores
7697
final int[] ids = new int[prefetchSize];
7798
int i = 0;
7899
var iter = docList.iterator();
79100
while (iter.hasNext() && i < prefetchSize) {
80-
ids[i++] = iter.nextDoc();
101+
ids[i] = iter.nextDoc();
102+
if (docToScoreMap != null) {
103+
docToScoreMap.put(ids[i], iter.score());
104+
}
105+
i++;
81106
}
82107
Arrays.sort(ids);
83-
cachedValuesById = new IntObjectHashMap<>(ids.length);
84108

109+
// Get the values in docId order. Store in cachedValuesById
110+
cachedValuesById = new IntObjectHashMap<>(ids.length);
85111
FunctionValues values = null;
86112
int docBase = -1;
87113
int nextDocBase = 0; // i.e. this segment's maxDoc
@@ -95,9 +121,16 @@ public void setContext(ResultContext context) {
95121
}
96122

97123
int localId = docid - docBase;
98-
var value = values.objectVal(localId);
124+
125+
if (scorable != null) {
126+
scorable.docBase = docBase;
127+
scorable.localDocId = localId;
128+
}
129+
var value = values.objectVal(localId); // note: might use the Scorable
130+
99131
cachedValuesById.put(docid, value != null ? value : NULL_SENTINEL);
100132
}
133+
fcontext.remove("scorer"); // remove ours; it was there only for prefetching
101134
} catch (IOException e) {
102135
throw new SolrException(
103136
SolrException.ErrorCode.SERVER_ERROR, "exception for valuesource " + valueSource, e);
@@ -119,8 +152,13 @@ public void transform(SolrDocument doc, int docid, DocIterationInfo docIteration
119152
try {
120153
int idx = ReaderUtil.subIndex(docid, readerContexts);
121154
LeafReaderContext rcontext = readerContexts.get(idx);
122-
FunctionValues values = valueSource.getValues(fcontext, rcontext);
123155
int localId = docid - rcontext.docBase;
156+
157+
if (context.wantsScores()) {
158+
fcontext.put("scorer", new ScoreAndDoc(localId, docIterationInfo.score()));
159+
}
160+
161+
FunctionValues values = valueSource.getValues(fcontext, rcontext);
124162
setValue(doc, values.objectVal(localId));
125163
} catch (IOException e) {
126164
throw new SolrException(
@@ -131,6 +169,17 @@ public void transform(SolrDocument doc, int docid, DocIterationInfo docIteration
131169
}
132170
}
133171

172+
private abstract static class MutableScorable extends Scorable {
173+
174+
int docBase;
175+
int localDocId;
176+
177+
@Override
178+
public int docID() {
179+
return localDocId;
180+
}
181+
}
182+
134183
/** Always returns true */
135184
@Override
136185
public boolean needsSolrIndexSearcher() {
@@ -142,4 +191,25 @@ protected void setValue(SolrDocument doc, Object val) {
142191
doc.setField(name, val);
143192
}
144193
}
194+
195+
/** Fake scorer for a single document */
196+
protected static class ScoreAndDoc extends Scorable {
197+
final int docid;
198+
final float score;
199+
200+
ScoreAndDoc(int docid, float score) {
201+
this.docid = docid;
202+
this.score = score;
203+
}
204+
205+
@Override
206+
public int docID() {
207+
return docid;
208+
}
209+
210+
@Override
211+
public float score() throws IOException {
212+
return score;
213+
}
214+
}
145215
}
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.solr.search;
18+
19+
import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
20+
21+
import java.text.ParseException;
22+
import java.util.ArrayList;
23+
import java.util.List;
24+
import java.util.Objects;
25+
import java.util.Optional;
26+
import java.util.regex.Matcher;
27+
import java.util.regex.Pattern;
28+
import org.apache.lucene.expressions.Bindings;
29+
import org.apache.lucene.expressions.Expression;
30+
import org.apache.lucene.expressions.js.JavascriptCompiler;
31+
import org.apache.lucene.queries.function.ValueSource;
32+
import org.apache.lucene.search.DoubleValuesSource;
33+
import org.apache.solr.common.SolrException;
34+
import org.apache.solr.common.util.NamedList;
35+
import org.apache.solr.schema.IndexSchema;
36+
import org.apache.solr.schema.SchemaField;
37+
38+
/**
39+
* A ValueSource parser configured with a pre-compiled expression that can then be evaluated at
40+
* request time. It's powered by the Lucene Expressions module, which is a subset of JavaScript.
41+
*/
42+
public class ExpressionValueSourceParser extends ValueSourceParser {
43+
44+
public static final String SCORE_KEY = "score-name"; // TODO get rid of this? Why have it?
45+
public static final String EXPRESSION_KEY = "expression";
46+
47+
private Expression expression;
48+
private String scoreKey;
49+
private int numPositionalArgs = 0; // Number of positional arguments in the expression
50+
51+
@Override
52+
public void init(NamedList<?> args) {
53+
initConfiguredExpression(args);
54+
initScoreKey(args);
55+
super.init(args);
56+
}
57+
58+
/** Checks for optional scoreKey override */
59+
private void initScoreKey(NamedList<?> args) {
60+
scoreKey = Optional.ofNullable((String) args.remove(SCORE_KEY)).orElse(SolrReturnFields.SCORE);
61+
}
62+
63+
/** Parses the pre-configured expression */
64+
private void initConfiguredExpression(NamedList<?> args) {
65+
String expressionStr =
66+
Optional.ofNullable((String) args.remove(EXPRESSION_KEY))
67+
.orElseThrow(
68+
() ->
69+
new SolrException(
70+
SERVER_ERROR, EXPRESSION_KEY + " must be configured with an expression"));
71+
72+
// Find the highest positional argument in the expression
73+
Pattern pattern = Pattern.compile("\\$(\\d+)");
74+
Matcher matcher = pattern.matcher(expressionStr);
75+
while (matcher.find()) {
76+
int argNum = Integer.parseInt(matcher.group(1));
77+
numPositionalArgs = Math.max(numPositionalArgs, argNum);
78+
}
79+
80+
// TODO add way to register additional functions
81+
try {
82+
this.expression = JavascriptCompiler.compile(expressionStr);
83+
} catch (ParseException e) {
84+
throw new SolrException(
85+
SERVER_ERROR, "Unable to parse javascript expression: " + expressionStr, e);
86+
}
87+
}
88+
89+
// TODO: support dynamic expressions: expr("foo * bar / 32") ??
90+
91+
@Override
92+
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
93+
assert null != fp;
94+
95+
// Parse positional arguments if any
96+
List<DoubleValuesSource> positionalArgs = new ArrayList<>();
97+
for (int i = 0; i < numPositionalArgs; i++) {
98+
ValueSource vs = fp.parseValueSource();
99+
positionalArgs.add(vs.asDoubleValuesSource());
100+
}
101+
102+
IndexSchema schema = fp.getReq().getSchema();
103+
SolrBindings b = new SolrBindings(scoreKey, schema, positionalArgs);
104+
return ValueSource.fromDoubleValuesSource(expression.getDoubleValuesSource(b));
105+
}
106+
107+
/**
108+
* A bindings class that uses schema fields to resolve variables.
109+
*
110+
* @lucene.internal
111+
*/
112+
public static class SolrBindings extends Bindings {
113+
private final String scoreKey;
114+
private final IndexSchema schema;
115+
private final List<DoubleValuesSource> positionalArgs;
116+
117+
/**
118+
* @param scoreKey The binding name that should be used to represent the score, may be null
119+
* @param schema IndexSchema for field bindings
120+
* @param positionalArgs List of positional arguments
121+
*/
122+
public SolrBindings(
123+
String scoreKey, IndexSchema schema, List<DoubleValuesSource> positionalArgs) {
124+
this.scoreKey = scoreKey;
125+
this.schema = schema;
126+
this.positionalArgs = positionalArgs != null ? positionalArgs : new ArrayList<>();
127+
}
128+
129+
@Override
130+
public DoubleValuesSource getDoubleValuesSource(String key) {
131+
assert null != key;
132+
133+
if (Objects.equals(scoreKey, key)) {
134+
return DoubleValuesSource.SCORES;
135+
}
136+
137+
// Check for positional arguments like $1, $2, etc.
138+
if (key.startsWith("$")) {
139+
try {
140+
int position = Integer.parseInt(key.substring(1));
141+
return positionalArgs.get(position - 1); // Convert to 0-based index
142+
} catch (RuntimeException e) {
143+
throw new IllegalArgumentException("Not a valid positional argument: " + key, e);
144+
}
145+
}
146+
147+
SchemaField field = schema.getFieldOrNull(key);
148+
if (null != field) {
149+
return field.getType().getValueSource(field, null).asDoubleValuesSource();
150+
}
151+
152+
throw new IllegalArgumentException("No binding or schema field for key: " + key);
153+
}
154+
}
155+
}

0 commit comments

Comments
 (0)