Skip to content

Commit 3b9c781

Browse files
risdenkdsmiley
authored andcommitted
SOLR-5707: Lucene Expressions via ExpressionValueSourceParser (#1244)
New ExpressionValueSourceParser that allows custom function queries / VSPs to be defined in a subset of JavaScript, pre-compiled, and that which can access the score and fields. It's powered by the Lucene Expressions module. ValueSourceAugmenter: score propagation --------- Co-authored-by: Chris Hostetter <[email protected]> Co-authored-by: David Smiley <[email protected]> Co-authored-by: Ryan Ernst <[email protected]> (cherry picked from commit c3b5f57)
1 parent d5ec5f4 commit 3b9c781

File tree

9 files changed

+725
-14
lines changed

9 files changed

+725
-14
lines changed

gradle/documentation/pull-lucene-javadocs.gradle

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,12 @@ configure(project(":solr:documentation")) {
4141
// - For now this list is focused solely on the javadocs needed for ref-guide link validation.
4242
// - If/when additional links are added from the ref-guide to additional lucene modules not listed here,
4343
// they can be added.
44-
// - If/when we need the lucene javadocs for "all" lucene depdencies in Solr (ie: to do link checking
45-
// from all Solr javadocs?) then perhaps we can find a way to build this list programatically?
46-
// - If these javadocs are (only every) consumed by the ref guide only, then these deps & associated tasks
44+
// - If/when we need the lucene javadocs for "all" lucene dependencies in Solr (ie: to do link checking
45+
// from all Solr javadocs?) then perhaps we can find a way to build this list programmatically?
46+
// - If these javadocs are only consumed by the ref guide, then these deps & associated tasks
4747
// should just be moved to the ref-guide build.gradle
4848
javadocs group: 'org.apache.lucene', name: 'lucene-core', classifier: 'javadoc'
49+
javadocs group: 'org.apache.lucene', name: 'lucene-expressions', classifier: 'javadoc'
4950
javadocs group: 'org.apache.lucene', name: 'lucene-analysis-common', classifier: 'javadoc'
5051
javadocs group: 'org.apache.lucene', name: 'lucene-analysis-stempel', classifier: 'javadoc'
5152
javadocs group: 'org.apache.lucene', name: 'lucene-queryparser', classifier: 'javadoc'
@@ -65,7 +66,7 @@ configure(project(":solr:documentation")) {
6566
def resolved = configurations.javadocs.resolvedConfiguration
6667
resolved.resolvedArtifacts.each { artifact ->
6768
def id = artifact.moduleVersion.id
68-
// This mimics the directory stucture used on lucene.apache.org for the javadocs of all modules.
69+
// This mimics the directory structure used on lucene.apache.org for the javadocs of all modules.
6970
//
7071
// HACK: the lucene.apache.org javadocs are organized to match the module directory structure in the repo,
7172
// not the "flat" artifact names -- so there is no one size fits all way to determine the directory name.

solr/CHANGES.txt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,6 @@ Other Changes
3232
================== 9.9.0 ==================
3333
New Features
3434
---------------------
35-
* SOLR-17582: The CLUSTERSTATUS API will now stream each collection's status to the response,
36-
fetching and computing it on the fly. To avoid a backwards compatibility concern, this won't work
37-
for wt=javabin. (Matthew Biscocho, David Smiley)
38-
3935
* SOLR-17626: Add RawTFSimilarityFactory class. (Christine Poerschke)
4036

4137
* SOLR-17656: New 'skipLeaderRecovery' replica property allows PULL replicas with existing indexes to immediately become ACTIVE (hossman)
@@ -52,6 +48,10 @@ New Features
5248

5349
* SOLR-17749: Added linear function support for RankField via RankQParserPlugin. (Christine Poerschke)
5450

51+
* SOLR-5707: New ExpressionValueSourceParser that allows custom function queries / VSPs to be defined in a
52+
subset of JavaScript, pre-compiled, and that which can access the score and fields. It's powered by
53+
the Lucene Expressions module. (hossman, David Smiley, Ryan Ernst, Kevin Risden)
54+
5555
Improvements
5656
---------------------
5757
* SOLR-15751: The v2 API now has parity with the v1 "COLSTATUS" and "segments" APIs, which can be used to fetch detailed information about
@@ -100,6 +100,10 @@ Improvements
100100

101101
Optimizations
102102
---------------------
103+
* SOLR-17582: The CLUSTERSTATUS API will now stream each collection's status to the response,
104+
fetching and computing it on the fly. To avoid a backwards compatibility concern, this won't work
105+
for wt=javabin. (Matthew Biscocho, David Smiley)
106+
103107
* SOLR-17578: Remove ZkController internal core supplier, for slightly faster reconnection after Zookeeper session loss. (Pierre Salagnac)
104108

105109
* SOLR-17669: Reduced memory usage in SolrJ getBeans() method when handling dynamic fields with wildcards. (Martin Anzinger)

solr/core/src/java/org/apache/solr/response/transform/ValueSourceAugmenter.java

Lines changed: 76 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@
2222
import java.util.Map;
2323
import org.apache.lucene.index.LeafReaderContext;
2424
import org.apache.lucene.index.ReaderUtil;
25+
import org.apache.lucene.internal.hppc.IntFloatHashMap;
2526
import org.apache.lucene.internal.hppc.IntObjectHashMap;
2627
import org.apache.lucene.queries.function.FunctionValues;
2728
import org.apache.lucene.queries.function.ValueSource;
29+
import org.apache.lucene.search.Scorable;
2830
import org.apache.solr.common.SolrDocument;
2931
import org.apache.solr.common.SolrException;
3032
import org.apache.solr.response.ResultContext;
@@ -67,20 +69,44 @@ public void setContext(ResultContext context) {
6769
fcontext = ValueSource.newContext(searcher);
6870
this.valueSource.createWeight(fcontext, searcher);
6971
final var docList = context.getDocList();
70-
if (docList == null) {
72+
final int prefetchSize = docList == null ? 0 : Math.min(docList.size(), maxPrefetchSize);
73+
if (prefetchSize == 0) {
7174
return;
7275
}
7376

74-
final int prefetchSize = Math.min(docList.size(), maxPrefetchSize);
77+
// Check if scores are wanted and initialize the Scorable if so
78+
final MutableScorable scorable; // stored in fcontext (when not null)
79+
final IntFloatHashMap docToScoreMap;
80+
if (context.wantsScores()) { // TODO switch to ValueSource.needsScores once it exists
81+
docToScoreMap = new IntFloatHashMap(prefetchSize);
82+
scorable =
83+
new MutableScorable() {
84+
@Override
85+
public float score() throws IOException {
86+
return docToScoreMap.get(docBase + localDocId);
87+
}
88+
};
89+
fcontext.put("scorer", scorable);
90+
} else {
91+
scorable = null;
92+
docToScoreMap = null;
93+
}
94+
95+
// Get the IDs and scores
7596
final int[] ids = new int[prefetchSize];
7697
int i = 0;
7798
var iter = docList.iterator();
7899
while (iter.hasNext() && i < prefetchSize) {
79-
ids[i++] = iter.nextDoc();
100+
ids[i] = iter.nextDoc();
101+
if (docToScoreMap != null) {
102+
docToScoreMap.put(ids[i], iter.score());
103+
}
104+
i++;
80105
}
81106
Arrays.sort(ids);
82-
cachedValuesById = new IntObjectHashMap<>(ids.length);
83107

108+
// Get the values in docId order. Store in cachedValuesById
109+
cachedValuesById = new IntObjectHashMap<>(ids.length);
84110
FunctionValues values = null;
85111
int docBase = -1;
86112
int nextDocBase = 0; // i.e. this segment's maxDoc
@@ -94,9 +120,16 @@ public void setContext(ResultContext context) {
94120
}
95121

96122
int localId = docid - docBase;
97-
var value = values.objectVal(localId);
123+
124+
if (scorable != null) {
125+
scorable.docBase = docBase;
126+
scorable.localDocId = localId;
127+
}
128+
var value = values.objectVal(localId); // note: might use the Scorable
129+
98130
cachedValuesById.put(docid, value != null ? value : NULL_SENTINEL);
99131
}
132+
fcontext.remove("scorer"); // remove ours; it was there only for prefetching
100133
} catch (IOException e) {
101134
throw new SolrException(
102135
SolrException.ErrorCode.SERVER_ERROR, "exception for valuesource " + valueSource, e);
@@ -118,8 +151,13 @@ public void transform(SolrDocument doc, int docid) {
118151
try {
119152
int idx = ReaderUtil.subIndex(docid, readerContexts);
120153
LeafReaderContext rcontext = readerContexts.get(idx);
121-
FunctionValues values = valueSource.getValues(fcontext, rcontext);
122154
int localId = docid - rcontext.docBase;
155+
156+
if (context.wantsScores()) {
157+
fcontext.put("scorer", new ScoreAndDoc(localId, (float) doc.get("score")));
158+
}
159+
160+
FunctionValues values = valueSource.getValues(fcontext, rcontext);
123161
setValue(doc, values.objectVal(localId));
124162
} catch (IOException e) {
125163
throw new SolrException(
@@ -130,6 +168,17 @@ public void transform(SolrDocument doc, int docid) {
130168
}
131169
}
132170

171+
private abstract static class MutableScorable extends Scorable {
172+
173+
int docBase;
174+
int localDocId;
175+
176+
@Override
177+
public int docID() {
178+
return localDocId;
179+
}
180+
}
181+
133182
/** Always returns true */
134183
@Override
135184
public boolean needsSolrIndexSearcher() {
@@ -141,4 +190,25 @@ protected void setValue(SolrDocument doc, Object val) {
141190
doc.setField(name, val);
142191
}
143192
}
193+
194+
/** Fake scorer for a single document */
195+
protected static class ScoreAndDoc extends Scorable {
196+
final int docid;
197+
final float score;
198+
199+
ScoreAndDoc(int docid, float score) {
200+
this.docid = docid;
201+
this.score = score;
202+
}
203+
204+
@Override
205+
public int docID() {
206+
return docid;
207+
}
208+
209+
@Override
210+
public float score() throws IOException {
211+
return score;
212+
}
213+
}
144214
}
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.solr.search;
18+
19+
import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
20+
21+
import java.text.ParseException;
22+
import java.util.ArrayList;
23+
import java.util.List;
24+
import java.util.Objects;
25+
import java.util.Optional;
26+
import java.util.regex.Matcher;
27+
import java.util.regex.Pattern;
28+
import org.apache.lucene.expressions.Bindings;
29+
import org.apache.lucene.expressions.Expression;
30+
import org.apache.lucene.expressions.js.JavascriptCompiler;
31+
import org.apache.lucene.queries.function.ValueSource;
32+
import org.apache.lucene.search.DoubleValuesSource;
33+
import org.apache.solr.common.SolrException;
34+
import org.apache.solr.common.util.NamedList;
35+
import org.apache.solr.schema.IndexSchema;
36+
import org.apache.solr.schema.SchemaField;
37+
38+
/**
39+
* A ValueSource parser configured with a pre-compiled expression that can then be evaluated at
40+
* request time. It's powered by the Lucene Expressions module, which is a subset of JavaScript.
41+
*/
42+
public class ExpressionValueSourceParser extends ValueSourceParser {
43+
44+
public static final String SCORE_KEY = "score-name"; // TODO get rid of this? Why have it?
45+
public static final String EXPRESSION_KEY = "expression";
46+
47+
private Expression expression;
48+
private String scoreKey;
49+
private int numPositionalArgs = 0; // Number of positional arguments in the expression
50+
51+
@Override
52+
public void init(NamedList<?> args) {
53+
initConfiguredExpression(args);
54+
initScoreKey(args);
55+
super.init(args);
56+
}
57+
58+
/** Checks for optional scoreKey override */
59+
private void initScoreKey(NamedList<?> args) {
60+
scoreKey = Optional.ofNullable((String) args.remove(SCORE_KEY)).orElse(SolrReturnFields.SCORE);
61+
}
62+
63+
/** Parses the pre-configured expression */
64+
private void initConfiguredExpression(NamedList<?> args) {
65+
String expressionStr =
66+
Optional.ofNullable((String) args.remove(EXPRESSION_KEY))
67+
.orElseThrow(
68+
() ->
69+
new SolrException(
70+
SERVER_ERROR, EXPRESSION_KEY + " must be configured with an expression"));
71+
72+
// Find the highest positional argument in the expression
73+
Pattern pattern = Pattern.compile("\\$(\\d+)");
74+
Matcher matcher = pattern.matcher(expressionStr);
75+
while (matcher.find()) {
76+
int argNum = Integer.parseInt(matcher.group(1));
77+
numPositionalArgs = Math.max(numPositionalArgs, argNum);
78+
}
79+
80+
// TODO add way to register additional functions
81+
try {
82+
this.expression = JavascriptCompiler.compile(expressionStr);
83+
} catch (ParseException e) {
84+
throw new SolrException(
85+
SERVER_ERROR, "Unable to parse javascript expression: " + expressionStr, e);
86+
}
87+
}
88+
89+
// TODO: support dynamic expressions: expr("foo * bar / 32") ??
90+
91+
@Override
92+
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
93+
assert null != fp;
94+
95+
// Parse positional arguments if any
96+
List<DoubleValuesSource> positionalArgs = new ArrayList<>();
97+
for (int i = 0; i < numPositionalArgs; i++) {
98+
ValueSource vs = fp.parseValueSource();
99+
positionalArgs.add(vs.asDoubleValuesSource());
100+
}
101+
102+
IndexSchema schema = fp.getReq().getSchema();
103+
SolrBindings b = new SolrBindings(scoreKey, schema, positionalArgs);
104+
return ValueSource.fromDoubleValuesSource(expression.getDoubleValuesSource(b));
105+
}
106+
107+
/**
108+
* A bindings class that uses schema fields to resolve variables.
109+
*
110+
* @lucene.internal
111+
*/
112+
public static class SolrBindings extends Bindings {
113+
private final String scoreKey;
114+
private final IndexSchema schema;
115+
private final List<DoubleValuesSource> positionalArgs;
116+
117+
/**
118+
* @param scoreKey The binding name that should be used to represent the score, may be null
119+
* @param schema IndexSchema for field bindings
120+
* @param positionalArgs List of positional arguments
121+
*/
122+
public SolrBindings(
123+
String scoreKey, IndexSchema schema, List<DoubleValuesSource> positionalArgs) {
124+
this.scoreKey = scoreKey;
125+
this.schema = schema;
126+
this.positionalArgs = positionalArgs != null ? positionalArgs : new ArrayList<>();
127+
}
128+
129+
@Override
130+
public DoubleValuesSource getDoubleValuesSource(String key) {
131+
assert null != key;
132+
133+
if (Objects.equals(scoreKey, key)) {
134+
return DoubleValuesSource.SCORES;
135+
}
136+
137+
// Check for positional arguments like $1, $2, etc.
138+
if (key.startsWith("$")) {
139+
try {
140+
int position = Integer.parseInt(key.substring(1));
141+
return positionalArgs.get(position - 1); // Convert to 0-based index
142+
} catch (RuntimeException e) {
143+
throw new IllegalArgumentException("Not a valid positional argument: " + key, e);
144+
}
145+
}
146+
147+
SchemaField field = schema.getFieldOrNull(key);
148+
if (null != field) {
149+
return field.getType().getValueSource(field, null).asDoubleValuesSource();
150+
}
151+
152+
throw new IllegalArgumentException("No binding or schema field for key: " + key);
153+
}
154+
}
155+
}

0 commit comments

Comments
 (0)