Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
bf3cd5d
Combined Query Feature for Multi Query Execution
Jun 15, 2025
182bec9
Tests: Combined Query Feature for Multi Query Execution
Jun 17, 2025
b884f0e
Tests: Combined Query Feature for Multi Query Execution
Jun 24, 2025
29e8aea
Tests: Combined Query Feature for Multi Query Execution
Jun 25, 2025
c113799
Improve: Fix typo
ercsonusharma Jul 4, 2025
3600ed3
Tests: Fix errors
ercsonusharma Jul 4, 2025
9b0c76e
Review comments: implementation
ercsonusharma Jul 5, 2025
a841bc7
Code review changes
ercsonusharma Jul 12, 2025
91f8e09
Code review changes
ercsonusharma Jul 12, 2025
cace1f7
Code review changes
ercsonusharma Jul 12, 2025
299db43
Code review changes
ercsonusharma Jul 13, 2025
840070e
Code review changes
ercsonusharma Jul 13, 2025
d2feefc
Improvement and fixes
ercsonusharma Jul 16, 2025
89f63a9
Review comments impl
ercsonusharma Jul 26, 2025
d821abb
Build fix
ercsonusharma Jul 28, 2025
8041d66
Added documentation
ercsonusharma Aug 5, 2025
397dbb3
Fix for lucene upgrade
ercsonusharma Aug 8, 2025
d8b5588
Doc improv for cursors
ercsonusharma Aug 14, 2025
ec0b9cb
review comment implementation
ercsonusharma Aug 18, 2025
d6fd190
review comment implementation
ercsonusharma Aug 19, 2025
86933bc
review comment implementation
ercsonusharma Aug 20, 2025
b164979
doc update
ercsonusharma Aug 27, 2025
85f2cf9
added more test
ercsonusharma Aug 29, 2025
a4a26aa
abstract QueryComponent.mergeIds' ShardDoc processing
cpoerschke Aug 29, 2025
7fe997c
add missing @Override annotations
cpoerschke Aug 29, 2025
bcd1c3b
make DefaultShardDocQueue an anonymous class
cpoerschke Sep 1, 2025
787a016
Merge branch 'apache:main' into QueryComponent-mergeIds
cpoerschke Sep 1, 2025
7e0727c
Merge remote-tracking branch 'github_cpoerschke/QueryComponent-mergeI…
cpoerschke Sep 1, 2025
4dcbb57
dev increment: add uniqueDoc map-and-logic to ShardDocQueue
cpoerschke Sep 1, 2025
8a65023
review comment fix
ercsonusharma Sep 2, 2025
006b8c2
micro dev increment: replace unnecessary local resultSize use in Quer…
cpoerschke Sep 2, 2025
771089b
dev increment: factor out ShardDocQueue.resultIds method
cpoerschke Sep 2, 2025
460e8cd
dev increment: remove no-longer-used ShardDocQueue.(pop,size) methods
cpoerschke Sep 2, 2025
ac85d2f
review comment fix
ercsonusharma Sep 3, 2025
7b0593c
review comment fix
ercsonusharma Sep 3, 2025
c03c0f7
review comment enhancement
ercsonusharma Sep 3, 2025
a52dd22
simplification/consolidation: protected QueryComponent.newShardDocQue…
cpoerschke Sep 3, 2025
195f3f1
factor out protected QueryComponent.setResultIdsAndResponseDocs method
cpoerschke Sep 3, 2025
c1f5501
review comment enhancement
ercsonusharma Sep 3, 2025
3649d3e
Merge branch 'feat_combined_query' of https://github.com/ercsonusharm…
ercsonusharma Sep 3, 2025
4eedbed
refactor to reduce cyclometric complexity
ercsonusharma Sep 3, 2025
0990e7f
review comment fixes
ercsonusharma Sep 4, 2025
14ff5e1
debug params fix and rrf shard sort order
ercsonusharma Sep 4, 2025
bd637b7
test cases fix and rrf shard sort order
ercsonusharma Sep 5, 2025
2958599
introducing combiner methods as pre and post
ercsonusharma Sep 7, 2025
c3e44c3
distrib forced and doc update
ercsonusharma Sep 10, 2025
e2dfcef
distrib forced fix
ercsonusharma Sep 11, 2025
d4b34fc
distrib forced fix
ercsonusharma Sep 12, 2025
3fe93b8
test fix
ercsonusharma Sep 12, 2025
f23cceb
removing combiner.method and test fix
ercsonusharma Sep 17, 2025
6419a07
test fix
ercsonusharma Sep 19, 2025
a560899
test fix
ercsonusharma Sep 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,9 @@ public void prepare(ResponseBuilder rb) throws IOException {

/**
* Overrides the process method to handle CombinedQueryResponseBuilder instances. This method
* processes the responses from multiple queries in the SearchIndexer, combines them using the
* specified QueryAndResponseCombiner strategy, and sets the appropriate results and metadata in
* the CombinedQueryResponseBuilder.
* processes the responses from multiple queries, combines them using the specified
* QueryAndResponseCombiner strategy, and sets the appropriate results and metadata in the
* CombinedQueryResponseBuilder.
*
* @param rb the ResponseBuilder object to process
* @throws IOException if an I/O error occurs during processing
Expand All @@ -167,6 +167,7 @@ public void process(ResponseBuilder rb) throws IOException {
boolean segmentTerminatedEarly = false;
boolean setMaxHitsTerminatedEarly = false;
List<QueryResult> queryResults = new ArrayList<>();
// TODO: to be parallelized
for (ResponseBuilder thisRb : crb.responseBuilders) {
// Just a placeholder for future implementation for Cursors
thisRb.setCursorMark(crb.getCursorMark());
Expand Down Expand Up @@ -213,7 +214,7 @@ private void prepareCombinedResponseBuilder(
combinedQueryResult.setSegmentTerminatedEarly(segmentTerminatedEarly);
combinedQueryResult.setMaxHitsTerminatedEarly(setMaxHitsTerminatedEarly);
crb.setResult(combinedQueryResult);
if (rb.isDebug()) {
if (rb.isDebugQuery()) {
String[] queryKeys = rb.req.getParams().getParams(CombinerParams.COMBINER_QUERY);
List<Query> queries = crb.responseBuilders.stream().map(ResponseBuilder::getQuery).toList();
NamedList<Explanation> explanations =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,11 @@ private void processComponents(
}
}

/**
* Operations to be performed post prepare for all components.
*
* @param rb the ResponseBuilder containing the request and context, such as sort specifications.
*/
protected void postPrepareComponents(ResponseBuilder rb) {
// Once all of our components have been prepared, check if this request involves a SortSpec.
// If it does, and if our request includes a cursorMark param, then parse & init the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocListAndSet;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.DocSlice;
import org.apache.solr.search.QueryResult;
Expand Down Expand Up @@ -69,21 +68,24 @@ public int getK() {
@Override
public QueryResult combine(List<QueryResult> rankedLists, SolrParams solrParams) {
int kVal = solrParams.getInt(CombinerParams.COMBINER_RRF_K, this.k);
List<DocListAndSet> docListAndSet = getDocListsAndSetFromQueryResults(rankedLists);
QueryResult combinedResult = new QueryResult();
combineResults(combinedResult, docListAndSet, false, kVal);
combineResults(combinedResult, rankedLists, false, kVal);
return combinedResult;
}

private static List<DocListAndSet> getDocListsAndSetFromQueryResults(
List<QueryResult> rankedLists) {
List<DocListAndSet> docLists = new ArrayList<>(rankedLists.size());
for (QueryResult rankedList : rankedLists) {
docLists.add(rankedList.getDocListAndSet());
}
return docLists;
}

/**
* Merges per-shard ranked results using Reciprocal Rank Fusion (RRF).
*
* <p>Each shard list is assumed to be ordered by descending relevance. For a document at rank r
* in a shard, the contribution is {@code 1 / (k + r)} where {@code k} is read from {@link
* CombinerParams#COMBINER_RRF_K} or falls back to {@code this.k}. Contributions for the same
* document ID across shards (if found) are summed, and documents are returned sorted by the fused
* score (descending).
*
* @param shardDocMap per-shard ranked results;
* @param solrParams parameters; optional {@link CombinerParams#COMBINER_RRF_K} overrides k.
* @return one {@link ShardDoc} per unique document ID, ordered by fused score.
*/
@Override
public List<ShardDoc> combine(Map<String, List<ShardDoc>> shardDocMap, SolrParams solrParams) {
int kVal = solrParams.getInt(CombinerParams.COMBINER_RRF_K, this.k);
Expand Down Expand Up @@ -117,29 +119,29 @@ public List<ShardDoc> combine(Map<String, List<ShardDoc>> shardDocMap, SolrParam

private Map<Integer, Integer[]> combineResults(
QueryResult combinedRankedList,
List<DocListAndSet> rankedListAndSets,
List<QueryResult> queryResults,
boolean saveRankPositionsForExplain,
int kVal) {
Map<Integer, Integer[]> docIdToRanks = null;
DocSet combinedDocSet = null;
HashMap<Integer, Float> docIdToScore = new HashMap<>();
List<DocList> docLists = new ArrayList<>();
long totalMatches = 0;
for (DocListAndSet rankedListAndSet : rankedListAndSets) {
DocIterator docs = rankedListAndSet.docList.iterator();
totalMatches = Math.max(totalMatches, rankedListAndSet.docList.matches());
for (QueryResult queryResult : queryResults) {
DocIterator docs = queryResult.getDocList().iterator();
totalMatches = Math.max(totalMatches, queryResult.getDocList().matches());
int ranking = 1;
while (docs.hasNext()) {
int docId = docs.nextDoc();
float rrfScore = 1f / (kVal + ranking);
docIdToScore.compute(docId, (id, score) -> (score == null) ? rrfScore : score + rrfScore);
ranking++;
}
docLists.add(rankedListAndSet.docList);
docLists.add(queryResult.getDocList());
if (combinedDocSet == null) {
combinedDocSet = rankedListAndSet.docSet;
} else if (rankedListAndSet.docSet != null) {
combinedDocSet = combinedDocSet.union(rankedListAndSet.docSet);
combinedDocSet = queryResult.getDocSet();
} else if (queryResult.getDocSet() != null) {
combinedDocSet = combinedDocSet.union(queryResult.getDocSet());
}
}
List<Map.Entry<Integer, Float>> sortedByScoreDescending =
Expand Down Expand Up @@ -207,8 +209,7 @@ public NamedList<Explanation> getExplanations(
NamedList<Explanation> docIdsExplanations = new SimpleOrderedMap<>();
QueryResult combinedRankedList = new QueryResult();
Map<Integer, Integer[]> docIdToRanks =
combineResults(
combinedRankedList, getDocListsAndSetFromQueryResults(queryResult), true, kVal);
combineResults(combinedRankedList, queryResult, true, kVal);
DocList combinedDocList = combinedRankedList.getDocList();

DocIterator iterator = combinedDocList.iterator();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_START;

import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
Expand All @@ -30,11 +29,10 @@
import org.junit.Test;

/**
* The CombinedQueryComponentTest class is a unit test suite for the CombinedQueryComponent in Solr.
* It verifies the functionality of the component by performing various queries and validating the
* responses.
* The CombinedQueryComponentTest class is an integration test suite for the CombinedQueryComponent
* in Solr. It verifies the functionality of the component by performing various queries and
* validating the responses.
*/
@ThreadLeakScope(ThreadLeakScope.Scope.NONE)
public class CombinedQueryComponentTest extends SolrTestCaseJ4 {

private static final int NUM_DOCS = 10;
Expand Down
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please update this test to not call queryServer since it doesn't compare against the control client. I wish that method came with a big fat disclaimer and was named differently that didn't look so normal. There are probably more callers of it than there should be. I did #3639 just now.

In order for this test to compare both single shard & multi-shard with score relevancy, you could do a couple different things. One is to use the ExactStatsCache for distributed-IDF. Or, write queries that set a constant/fixed score. That would actually be most clear.

With such a test, you don't need to test non-distributed since the test infrastructure here allows you to do both as one.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, but I also wanted to show the results are not the same as what was expected in a single shard, as I described in the jira issue. Asserting only ideal queries (like constant/fixed score) or a type of queries (lexical with ExactStatsCache) may not give a clear picture to the algorithm IMHO.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I concur that in a realistic setting, sharding will yield score variances or ties that could change the final ordering. But why test under that circumstance? It only makes testing harder and for a reader of the tests to understand that the assertions are valid. We won't have less test coverage by fixing the scores somehow. I think the vast majority of tests here should operate in this circumstance, which makes them clear to understand / validate. This should save you time (less tests to maintain).

If the way RRF tie brakes matters (I doubt it), I could understand adding such a test to ensure we don't introduce a regression.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, I have removed the Non-distributed test case separately using queryServer and now, using query directly with constant score queries.

Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
*/
package org.apache.solr.handler.component;

import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
Expand All @@ -35,7 +34,6 @@
* functionality of the CombinedQueryComponent in a Solr distributed search environment. It focuses
* on testing the integration of lexical and vector queries using the combiner component.
*/
@ThreadLeakScope(ThreadLeakScope.Scope.NONE)
public class DistributedCombinedQueryComponentTest extends BaseDistributedSearchTestCase {

private static final int NUM_DOCS = 10;
Expand Down Expand Up @@ -205,7 +203,7 @@ public void testMultipleQueryWithSort() throws Exception {
}

/**
* Tests the hybrid query functionality of the system.
* Tests the hybrid query functionality of the system with various setting of pagination.
*
* @throws Exception if any unexpected error occurs during the test execution.
*/
Expand All @@ -222,15 +220,44 @@ public void testHybridQueryWithPagination() throws Exception {
"{\"queries\":"
+ "{\"lexical\":{\"lucene\":{\"query\":\"id:(2^=2 OR 3^=1)\"}},"
+ "\"vector\":{\"knn\":{ \"f\": \"vector\", \"topK\": 5, \"query\": \"[1.0, 2.0, 3.0, 4.0]\"}}},"
+ "\"limit\":4,\"offset\":1"
+ "\"fields\":[\"id\",\"score\",\"title\"],"
+ "\"params\":{\"combiner\":true,\"combiner.query\":[\"lexical\",\"vector\"]}}",
"shards",
getShardsString(),
CommonParams.QT,
"/search"));
assertFieldValues(rsp.getResults(), id, "2", "3", "4", "1", "6", "10", "8", "7", "5");
rsp =
queryServer(
createParams(
CommonParams.JSON,
"{\"queries\":"
+ "{\"lexical\":{\"lucene\":{\"query\":\"id:(2^=2 OR 3^=1)\"}},"
+ "\"vector\":{\"knn\":{ \"f\": \"vector\", \"topK\": 5, \"query\": \"[1.0, 2.0, 3.0, 4.0]\"}}},"
+ "\"limit\":4,"
+ "\"fields\":[\"id\",\"score\",\"title\"],"
+ "\"params\":{\"combiner\":true,\"combiner.query\":[\"lexical\",\"vector\"]}}",
"shards",
getShardsString(),
CommonParams.QT,
"/search"));
assertFieldValues(rsp.getResults(), id, "2", "3", "4", "1");
rsp =
queryServer(
createParams(
CommonParams.JSON,
"{\"queries\":"
+ "{\"lexical\":{\"lucene\":{\"query\":\"id:(2^=2 OR 3^=1)\"}},"
+ "\"vector\":{\"knn\":{ \"f\": \"vector\", \"topK\": 5, \"query\": \"[1.0, 2.0, 3.0, 4.0]\"}}},"
+ "\"limit\":4,\"offset\":3,"
+ "\"fields\":[\"id\",\"score\",\"title\"],"
+ "\"params\":{\"combiner\":true,\"combiner.query\":[\"lexical\",\"vector\"]}}",
"shards",
getShardsString(),
CommonParams.QT,
"/search"));
assertEquals(4, rsp.getResults().size());
assertFieldValues(rsp.getResults(), id, "3", "4", "1", "6");
assertFieldValues(rsp.getResults(), id, "1", "6", "10", "8");
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import static org.apache.solr.common.params.CombinerParams.COMBINER_RRF_K;
import static org.apache.solr.common.params.CombinerParams.RECIPROCAL_RANK_FUSION;

import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
Expand All @@ -41,7 +40,6 @@
* The ReciprocalRankFusionTest class is a unit test suite for the {@link ReciprocalRankFusion}
* class. It verifies the correctness of the fusion algorithm and its supporting methods.
*/
@ThreadLeakScope(ThreadLeakScope.Scope.NONE)
public class ReciprocalRankFusionTest extends SolrTestCaseJ4 {

public static ReciprocalRankFusion reciprocalRankFusion;
Expand Down