|
| 1 | +// Licensed to the LF AI & Data foundation under one |
| 2 | +// or more contributor license agreements. See the NOTICE file |
| 3 | +// distributed with this work for additional information |
| 4 | +// regarding copyright ownership. The ASF licenses this file |
| 5 | +// to you under the Apache License, Version 2.0 (the |
| 6 | +// "License"); you may not use this file except in compliance |
| 7 | +// with the License. You may obtain a copy of the License at |
| 8 | +// |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +// |
| 11 | +// Unless required by applicable law or agreed to in writing, software |
| 12 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | +// See the License for the specific language governing permissions and |
| 15 | +// limitations under the License. |
| 16 | + |
| 17 | +#include <iostream> |
| 18 | +#include <string> |
| 19 | +#include <thread> |
| 20 | + |
| 21 | +#include "ExampleUtils.h" |
| 22 | +#include "milvus/MilvusClient.h" |
| 23 | + |
| 24 | +namespace { |
| 25 | +void |
| 26 | +printAnalyzerResults(const milvus::AnalyzerResults& results) { |
| 27 | + for (const auto& result : results.Results()) { |
| 28 | + std::cout << "\t------------------------------" << std::endl; |
| 29 | + for (const auto& token : result.Tokens()) { |
| 30 | + std::cout << "\t{token: " << token.token_ << ", start: " << token.start_offset_ |
| 31 | + << ", end: " << token.end_offset_ << ", position: " << token.position_ |
| 32 | + << ", position_len: " << token.position_length_ << ", hash: " << token.hash_ << "}" << std::endl; |
| 33 | + } |
| 34 | + std::cout << "\t------------------------------" << std::endl; |
| 35 | + } |
| 36 | +} |
| 37 | + |
| 38 | +void |
| 39 | +runAnalyzer(milvus::MilvusClientPtr& client, const nlohmann::json& analyzer_params) { |
| 40 | + std::cout << "Run analyzer params: " << analyzer_params.dump() << std::endl; |
| 41 | + |
| 42 | + const std::vector<std::string> text_content = { |
| 43 | + "Milvus is an open-source vector database", |
| 44 | + "AI applications help people better life", |
| 45 | + "Will the electric car replace gas-powered car?", |
| 46 | + "LangChain is a composable framework to build with LLMs. Milvus is integrated into LangChain.", |
| 47 | + "RAG is the process of optimizing the output of a large language model", |
| 48 | + "Newton is one of the greatest scientist of human history", |
| 49 | + "Metric type L2 is Euclidean distance", |
| 50 | + "Embeddings represent real-world objects, like words, images, or videos, in a form that computers can process.", |
| 51 | + "The moon is 384,400 km distance away from earth", |
| 52 | + "Milvus supports L2 distance and IP similarity for float vector.", |
| 53 | + }; |
| 54 | + |
| 55 | + milvus::RunAnalyzerArguments args; |
| 56 | + args.SetTexts(text_content); |
| 57 | + args.SetAnalyzerParams(analyzer_params); |
| 58 | + args.WithDetail(true); |
| 59 | + args.WithHash(true); |
| 60 | + |
| 61 | + milvus::AnalyzerResults results; |
| 62 | + auto status = client->RunAnalyzer(args, results); |
| 63 | + util::CheckStatus("run analyzer", status); |
| 64 | + printAnalyzerResults(results); |
| 65 | +} |
| 66 | + |
| 67 | +} // namespace |
| 68 | + |
| 69 | +int |
| 70 | +main(int argc, char* argv[]) { |
| 71 | + printf("Example start...\n"); |
| 72 | + |
| 73 | + auto client = milvus::MilvusClient::Create(); |
| 74 | + |
| 75 | + milvus::ConnectParam connect_param{"localhost", 19530, "root", "Milvus"}; |
| 76 | + auto status = client->Connect(connect_param); |
| 77 | + util::CheckStatus("connect milvus server", status); |
| 78 | + |
| 79 | + nlohmann::json params_1 = { |
| 80 | + {"tokenizer", "standard"}, |
| 81 | + {"filter", {{{"type", "stop"}, {"stop_words", {"of"}}}}}, |
| 82 | + }; |
| 83 | + runAnalyzer(client, params_1); |
| 84 | + |
| 85 | + nlohmann::json params_2 = { |
| 86 | + {"tokenizer", "standard"}, |
| 87 | + {"filter", {{{"type", "stop"}, {"stop_words", {"is", "of", "for"}}}}}, |
| 88 | + }; |
| 89 | + runAnalyzer(client, params_2); |
| 90 | + |
| 91 | + client->Disconnect(); |
| 92 | + return 0; |
| 93 | +} |
0 commit comments