Skip to content

Commit 66a55de

Browse files
authored
Support RunAnalyzer() (#367)
Signed-off-by: yhmo <[email protected]>
1 parent 461d115 commit 66a55de

File tree

14 files changed

+737
-0
lines changed

14 files changed

+737
-0
lines changed

DEVELOPMENT.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ Once the `make test` is done, you will see some executable examples under the pa
157157
- `./cmake_build/examples/sdk_nullalbe_field`: example to show the usage of nullable field.
158158
- `./cmake_build/examples/sdk_partition_key`: example to show the usage of partition key.
159159
- `./cmake_build/examples/sdk_rbac`: example to show the usage of RBAC.
160+
- `./cmake_build/examples/sdk_run_analyzer`: example to show the usage of run_analyzer().
160161
- `./cmake_build/examples/sdk_text_match`: example to show the usage of TEXT_MATCH.
161162
- `./cmake_build/examples/sdk_vector_binary`: example to show the usage of BinaryVector field.
162163
- `./cmake_build/examples/sdk_vector_fp16`: example to show the usage of Float16Vector/BFloat16Vector field.

examples/example.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Once the `make test` is done, you will see some executable examples under the pa
2020
- `./cmake_build/examples/sdk_nullalbe_field`: example to show the usage of nullable field.
2121
- `./cmake_build/examples/sdk_partition_key`: example to show the usage of partition key.
2222
- `./cmake_build/examples/sdk_rbac`: example to show the usage of RBAC.
23+
- `./cmake_build/examples/sdk_run_analyzer`: example to show the usage of run_analyzer().
2324
- `./cmake_build/examples/sdk_text_match`: example to show the usage of TEXT_MATCH.
2425
- `./cmake_build/examples/sdk_vector_binary`: example to show the usage of BinaryVector field.
2526
- `./cmake_build/examples/sdk_vector_fp16`: example to show the usage of Float16Vector/BFloat16Vector field.

examples/src/run_analyzer.cpp

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Licensed to the LF AI & Data foundation under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
#include <iostream>
18+
#include <string>
19+
#include <thread>
20+
21+
#include "ExampleUtils.h"
22+
#include "milvus/MilvusClient.h"
23+
24+
namespace {
25+
void
26+
printAnalyzerResults(const milvus::AnalyzerResults& results) {
27+
for (const auto& result : results.Results()) {
28+
std::cout << "\t------------------------------" << std::endl;
29+
for (const auto& token : result.Tokens()) {
30+
std::cout << "\t{token: " << token.token_ << ", start: " << token.start_offset_
31+
<< ", end: " << token.end_offset_ << ", position: " << token.position_
32+
<< ", position_len: " << token.position_length_ << ", hash: " << token.hash_ << "}" << std::endl;
33+
}
34+
std::cout << "\t------------------------------" << std::endl;
35+
}
36+
}
37+
38+
void
39+
runAnalyzer(milvus::MilvusClientPtr& client, const nlohmann::json& analyzer_params) {
40+
std::cout << "Run analyzer params: " << analyzer_params.dump() << std::endl;
41+
42+
const std::vector<std::string> text_content = {
43+
"Milvus is an open-source vector database",
44+
"AI applications help people better life",
45+
"Will the electric car replace gas-powered car?",
46+
"LangChain is a composable framework to build with LLMs. Milvus is integrated into LangChain.",
47+
"RAG is the process of optimizing the output of a large language model",
48+
"Newton is one of the greatest scientist of human history",
49+
"Metric type L2 is Euclidean distance",
50+
"Embeddings represent real-world objects, like words, images, or videos, in a form that computers can process.",
51+
"The moon is 384,400 km distance away from earth",
52+
"Milvus supports L2 distance and IP similarity for float vector.",
53+
};
54+
55+
milvus::RunAnalyzerArguments args;
56+
args.SetTexts(text_content);
57+
args.SetAnalyzerParams(analyzer_params);
58+
args.WithDetail(true);
59+
args.WithHash(true);
60+
61+
milvus::AnalyzerResults results;
62+
auto status = client->RunAnalyzer(args, results);
63+
util::CheckStatus("run analyzer", status);
64+
printAnalyzerResults(results);
65+
}
66+
67+
} // namespace
68+
69+
int
70+
main(int argc, char* argv[]) {
71+
printf("Example start...\n");
72+
73+
auto client = milvus::MilvusClient::Create();
74+
75+
milvus::ConnectParam connect_param{"localhost", 19530, "root", "Milvus"};
76+
auto status = client->Connect(connect_param);
77+
util::CheckStatus("connect milvus server", status);
78+
79+
nlohmann::json params_1 = {
80+
{"tokenizer", "standard"},
81+
{"filter", {{{"type", "stop"}, {"stop_words", {"of"}}}}},
82+
};
83+
runAnalyzer(client, params_1);
84+
85+
nlohmann::json params_2 = {
86+
{"tokenizer", "standard"},
87+
{"filter", {{{"type", "stop"}, {"stop_words", {"is", "of", "for"}}}}},
88+
};
89+
runAnalyzer(client, params_2);
90+
91+
client->Disconnect();
92+
return 0;
93+
}

src/impl/MilvusClientImpl.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,6 +1371,54 @@ MilvusClientImpl::QueryIterator(QueryIteratorArguments& arguments, QueryIterator
13711371
return Status::OK();
13721372
}
13731373

1374+
Status
1375+
MilvusClientImpl::RunAnalyzer(const RunAnalyzerArguments& arguments, AnalyzerResults& results) {
1376+
auto pre = [&arguments](proto::milvus::RunAnalyzerRequest& rpc_request) {
1377+
rpc_request.set_collection_name(arguments.CollectionName());
1378+
rpc_request.set_db_name(arguments.DatabaseName());
1379+
rpc_request.set_field_name(arguments.FieldName());
1380+
rpc_request.set_analyzer_params(arguments.AnalyzerParams().dump());
1381+
auto placeholder = rpc_request.mutable_placeholder();
1382+
for (std::string text : arguments.Texts()) {
1383+
placeholder->Add(std::move(text));
1384+
}
1385+
for (const auto& name : arguments.AnalyzerNames()) {
1386+
rpc_request.add_analyzer_names(name);
1387+
}
1388+
rpc_request.set_with_detail(arguments.IsWithDetail());
1389+
rpc_request.set_with_hash(arguments.IsWithHash());
1390+
return Status::OK();
1391+
};
1392+
1393+
auto post = [&results](const proto::milvus::RunAnalyzerResponse& response) {
1394+
std::vector<AnalyzerResult> results_list;
1395+
const auto& rpc_results = response.results();
1396+
for (auto i = 0; i < response.results_size(); i++) {
1397+
std::vector<AnalyzerToken> tokens;
1398+
const auto& rpc_tokens = rpc_results[i].tokens();
1399+
for (auto k = 0; k < rpc_results[i].tokens_size(); k++) {
1400+
const auto& rpc_token = rpc_tokens[k];
1401+
AnalyzerToken token;
1402+
token.token_ = rpc_token.token();
1403+
token.start_offset_ = rpc_token.start_offset();
1404+
token.end_offset_ = rpc_token.end_offset();
1405+
token.position_ = rpc_token.position();
1406+
token.position_length_ = rpc_token.position_length();
1407+
token.hash_ = rpc_token.hash();
1408+
tokens.emplace_back(std::move(token));
1409+
}
1410+
1411+
AnalyzerResult result{std::move(tokens)};
1412+
results_list.emplace_back(std::move(result));
1413+
}
1414+
results = AnalyzerResults(std::move(results_list));
1415+
return Status::OK();
1416+
};
1417+
1418+
return apiHandler<proto::milvus::RunAnalyzerRequest, proto::milvus::RunAnalyzerResponse>(
1419+
pre, &MilvusConnection::RunAnalyzer, post);
1420+
}
1421+
13741422
Status
13751423
MilvusClientImpl::Flush(const std::vector<std::string>& collection_names, const ProgressMonitor& progress_monitor) {
13761424
auto pre = [&collection_names](proto::milvus::FlushRequest& rpc_request) {

src/impl/MilvusClientImpl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,9 @@ class MilvusClientImpl : public MilvusClient {
233233
Status
234234
QueryIterator(QueryIteratorArguments& arguments, QueryIteratorPtr& iterator) final;
235235

236+
Status
237+
RunAnalyzer(const RunAnalyzerArguments& arguments, AnalyzerResults& results) final;
238+
236239
Status
237240
Flush(const std::vector<std::string>& collection_names, const ProgressMonitor& progress_monitor) final;
238241

src/impl/MilvusConnection.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,12 @@ MilvusConnection::Query(const proto::milvus::QueryRequest& request, proto::milvu
447447
return grpcCall("Query", &Stub::Query, request, response, options);
448448
}
449449

450+
Status
451+
MilvusConnection::RunAnalyzer(const proto::milvus::RunAnalyzerRequest& request,
452+
proto::milvus::RunAnalyzerResponse& response, const GrpcContextOptions& options) {
453+
return grpcCall("RunAnalyzer", &Stub::RunAnalyzer, request, response, options);
454+
}
455+
450456
Status
451457
MilvusConnection::GetFlushState(const proto::milvus::GetFlushStateRequest& request,
452458
proto::milvus::GetFlushStateResponse& response, const GrpcContextOptions& options) {

src/impl/MilvusConnection.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,10 @@ class MilvusConnection {
242242
Query(const proto::milvus::QueryRequest& request, proto::milvus::QueryResults& response,
243243
const GrpcContextOptions& options);
244244

245+
Status
246+
RunAnalyzer(const proto::milvus::RunAnalyzerRequest& request, proto::milvus::RunAnalyzerResponse& response,
247+
const GrpcContextOptions& options);
248+
245249
Status
246250
GetFlushState(const proto::milvus::GetFlushStateRequest& request, proto::milvus::GetFlushStateResponse& response,
247251
const GrpcContextOptions& options);

src/impl/types/AnalyzerResults.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// Licensed to the LF AI & Data foundation under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
#include "milvus/types/AnalyzerResults.h"
18+
19+
#include <memory>
20+
21+
namespace milvus {
22+
23+
AnalyzerResult::AnalyzerResult(std::vector<AnalyzerToken>&& tokens) : tokens_(std::move(tokens)) {
24+
}
25+
26+
const std::vector<AnalyzerToken>&
27+
AnalyzerResult::Tokens() const {
28+
return tokens_;
29+
}
30+
31+
Status
32+
AnalyzerResult::AddToken(AnalyzerToken&& token) {
33+
tokens_.emplace_back(token);
34+
return Status::OK();
35+
}
36+
37+
AnalyzerResults::AnalyzerResults(std::vector<AnalyzerResult>&& results) : results_(std::move(results)) {
38+
}
39+
40+
const std::vector<AnalyzerResult>&
41+
AnalyzerResults::Results() const {
42+
return results_;
43+
}
44+
45+
Status
46+
AnalyzerResults::AddResult(AnalyzerResult&& result) {
47+
results_.emplace_back(result);
48+
return Status::OK();
49+
}
50+
51+
} // namespace milvus
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
// Licensed to the LF AI & Data foundation under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
#include "milvus/types/RunAnalyzerArguments.h"
18+
19+
namespace milvus {
20+
const std::string&
21+
RunAnalyzerArguments::DatabaseName() const {
22+
return db_name_;
23+
}
24+
25+
Status
26+
RunAnalyzerArguments::SetDatabaseName(const std::string& db_name) {
27+
db_name_ = db_name;
28+
return Status::OK();
29+
}
30+
31+
RunAnalyzerArguments&
32+
RunAnalyzerArguments::WithDatabaseName(const std::string& db_name) {
33+
db_name_ = db_name;
34+
return *this;
35+
}
36+
37+
const std::string&
38+
RunAnalyzerArguments::CollectionName() const {
39+
return collection_name_;
40+
}
41+
42+
Status
43+
RunAnalyzerArguments::SetCollectionName(std::string collection_name) {
44+
collection_name_ = collection_name;
45+
return Status::OK();
46+
}
47+
48+
RunAnalyzerArguments&
49+
RunAnalyzerArguments::WithCollectionName(std::string collection_name) {
50+
collection_name_ = collection_name;
51+
return *this;
52+
}
53+
54+
const std::string&
55+
RunAnalyzerArguments::FieldName() const {
56+
return field_name_;
57+
}
58+
59+
Status
60+
RunAnalyzerArguments::SetFieldName(std::string field_name) {
61+
field_name_ = field_name;
62+
return Status::OK();
63+
}
64+
65+
RunAnalyzerArguments&
66+
RunAnalyzerArguments::WithFieldName(std::string field_name) {
67+
field_name_ = field_name;
68+
return *this;
69+
}
70+
71+
const std::vector<std::string>&
72+
RunAnalyzerArguments::Texts() const {
73+
return texts_;
74+
}
75+
76+
Status
77+
RunAnalyzerArguments::SetTexts(const std::vector<std::string>& texts) {
78+
texts_ = texts;
79+
return Status::OK();
80+
}
81+
82+
RunAnalyzerArguments&
83+
RunAnalyzerArguments::AddText(std::string text) {
84+
texts_.emplace_back(text);
85+
return *this;
86+
}
87+
88+
const std::vector<std::string>&
89+
RunAnalyzerArguments::AnalyzerNames() const {
90+
return analyzer_names_;
91+
}
92+
93+
Status
94+
RunAnalyzerArguments::SetAnalyzerNames(const std::vector<std::string>& names) {
95+
analyzer_names_ = names;
96+
return Status::OK();
97+
}
98+
99+
RunAnalyzerArguments&
100+
RunAnalyzerArguments::AddAnalyzerName(std::string name) {
101+
analyzer_names_.emplace_back(name);
102+
return *this;
103+
}
104+
105+
const nlohmann::json&
106+
RunAnalyzerArguments::AnalyzerParams() const {
107+
return analyzer_params_;
108+
}
109+
110+
Status
111+
RunAnalyzerArguments::SetAnalyzerParams(const nlohmann::json& params) {
112+
analyzer_params_ = params;
113+
return Status::OK();
114+
}
115+
116+
RunAnalyzerArguments&
117+
RunAnalyzerArguments::WithAnalyzerParams(const nlohmann::json& params) {
118+
analyzer_params_ = params;
119+
return *this;
120+
}
121+
122+
bool
123+
RunAnalyzerArguments::IsWithDetail() const {
124+
return with_detail_;
125+
}
126+
127+
RunAnalyzerArguments&
128+
RunAnalyzerArguments::WithDetail(bool with_detail) {
129+
with_detail_ = with_detail;
130+
return *this;
131+
}
132+
133+
RunAnalyzerArguments&
134+
RunAnalyzerArguments::WithHash(bool with_hash) {
135+
with_hash_ = with_hash;
136+
return *this;
137+
}
138+
139+
bool
140+
RunAnalyzerArguments::IsWithHash() const {
141+
return with_hash_;
142+
}
143+
144+
} // namespace milvus

0 commit comments

Comments
 (0)