Skip to content

Commit 0095bd7

Browse files
XuanYang-cnalwayslove2013
authored andcommitted
enhance: Unify optimize and remove ready_to_load
PyMilvus used to be the only client that uses ready_to_load. Not it'll load the collection when creating it, so this PR removes `ready_to_load` from the client.API Also this PR enhance optimize and remove the optimize_with_size Signed-off-by: yangxuan <[email protected]>
1 parent dd5b162 commit 0095bd7

File tree

20 files changed

+33
-109
lines changed

20 files changed

+33
-109
lines changed

vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -325,10 +325,7 @@ def need_normalize_cosine(self) -> bool:
325325

326326
return False
327327

328-
def optimize(self):
329-
pass
330-
331-
def optimize_with_size(self, data_size: int):
328+
def optimize(self, data_size: int):
332329
log.info(f"optimize count: {data_size}")
333330
retry_times = 0
334331
while True:
@@ -340,6 +337,3 @@ def optimize_with_size(self, data_size: int):
340337
if total_count == data_size:
341338
log.info("optimize table finish.")
342339
return
343-
344-
def ready_to_load(self):
345-
"""ready_to_load will be called before load in load cases."""

vectordb_bench/backend/clients/alloydb/alloydb.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,7 @@ def _drop_table(self):
149149
)
150150
self.conn.commit()
151151

152-
def ready_to_load(self):
153-
pass
154-
155-
def optimize(self):
152+
def optimize(self, data_size: int | None = None):
156153
self._post_insert()
157154

158155
def _post_insert(self):

vectordb_bench/backend/clients/api.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,13 @@ def __init__(
137137
@contextmanager
138138
def init(self) -> None:
139139
"""create and destory connections to database.
140+
Why contextmanager:
141+
142+
In multiprocessing search tasks, vectordbbench might init
143+
totally hundreds of thousands of connections with DB server.
144+
145+
Too many connections may drain local FDs or server connection resources.
146+
If the DB client doesn't have `close()` method, just set the object to None.
140147
141148
Examples:
142149
>>> with self.init():
@@ -187,9 +194,8 @@ def search_embedding(
187194
"""
188195
raise NotImplementedError
189196

190-
# TODO: remove
191197
@abstractmethod
192-
def optimize(self):
198+
def optimize(self, data_size: int | None = None):
193199
"""optimize will be called between insertion and search in performance cases.
194200
195201
Should be blocked until the vectorDB is ready to be tested on
@@ -199,16 +205,3 @@ def optimize(self):
199205
Optimize's execution time is limited, the limited time is based on cases.
200206
"""
201207
raise NotImplementedError
202-
203-
def optimize_with_size(self, data_size: int):
204-
self.optimize()
205-
206-
# TODO: remove
207-
@abstractmethod
208-
def ready_to_load(self):
209-
"""ready_to_load will be called before load in load cases.
210-
211-
Should be blocked until the vectorDB is ready to be tested on
212-
heavy load cases.
213-
"""
214-
raise NotImplementedError

vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -145,15 +145,15 @@ def search_embedding(
145145
docvalue_fields=[self.id_col_name],
146146
stored_fields="_none_",
147147
)
148-
log.info(f'Search took: {resp["took"]}')
149-
log.info(f'Search shards: {resp["_shards"]}')
150-
log.info(f'Search hits total: {resp["hits"]["total"]}')
148+
log.info(f"Search took: {resp['took']}")
149+
log.info(f"Search shards: {resp['_shards']}")
150+
log.info(f"Search hits total: {resp['hits']['total']}")
151151
return [int(h["fields"][self.id_col_name][0]) for h in resp["hits"]["hits"]]
152152
except Exception as e:
153153
log.warning(f"Failed to search: {self.index_name} error: {e!s}")
154154
raise e from None
155155

156-
def optimize(self):
156+
def optimize(self, data_size: int | None = None):
157157
"""optimize will be called between insertion and search in performance cases."""
158158
# Call refresh first to ensure that all segments are created
159159
self._refresh_index()
@@ -194,6 +194,3 @@ def _load_graphs_to_memory(self):
194194
log.info("Calling warmup API to load graphs into memory")
195195
warmup_endpoint = f"/_plugins/_knn/warmup/{self.index_name}"
196196
self.client.transport.perform_request("GET", warmup_endpoint)
197-
198-
def ready_to_load(self):
199-
"""ready_to_load will be called before load in load cases."""

vectordb_bench/backend/clients/chroma/chroma.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,7 @@ def init(self) -> None:
5757
def ready_to_search(self) -> bool:
5858
pass
5959

60-
def ready_to_load(self) -> bool:
61-
pass
62-
63-
def optimize(self) -> None:
60+
def optimize(self, data_size: int | None = None):
6461
pass
6562

6663
def insert_embeddings(

vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def search_embedding(
143143
log.warning(f"Failed to search: {self.indice} error: {e!s}")
144144
raise e from None
145145

146-
def optimize(self):
146+
def optimize(self, data_size: int | None = None):
147147
"""optimize will be called between insertion and search in performance cases."""
148148
assert self.client is not None, "should self.init() first"
149149
self.client.indices.refresh(index=self.indice)
@@ -158,6 +158,3 @@ def optimize(self):
158158
task_status = self.client.tasks.get(task_id=force_merge_task_id)
159159
if task_status["completed"]:
160160
return
161-
162-
def ready_to_load(self):
163-
"""ready_to_load will be called before load in load cases."""

vectordb_bench/backend/clients/memorydb/memorydb.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,17 +157,14 @@ def init(self) -> Generator[None, None, None]:
157157
self.conn = self.get_client()
158158
search_param = self.case_config.search_param()
159159
if search_param["ef_runtime"]:
160-
self.ef_runtime_str = f'EF_RUNTIME {search_param["ef_runtime"]}'
160+
self.ef_runtime_str = f"EF_RUNTIME {search_param['ef_runtime']}"
161161
else:
162162
self.ef_runtime_str = ""
163163
yield
164164
self.conn.close()
165165
self.conn = None
166166

167-
def ready_to_load(self) -> bool:
168-
pass
169-
170-
def optimize(self) -> None:
167+
def optimize(self, data_size: int | None = None):
171168
self._post_insert()
172169

173170
def insert_embeddings(

vectordb_bench/backend/clients/milvus/milvus.py

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -138,26 +138,7 @@ def wait_index():
138138
log.warning(f"{self.name} optimize error: {e}")
139139
raise e from None
140140

141-
def ready_to_load(self):
142-
assert self.col, "Please call self.init() before"
143-
self._pre_load(self.col)
144-
145-
def _pre_load(self, coll: Collection):
146-
try:
147-
if not coll.has_index(index_name=self._index_name):
148-
log.info(f"{self.name} create index")
149-
coll.create_index(
150-
self._vector_field,
151-
self.case_config.index_param(),
152-
index_name=self._index_name,
153-
)
154-
coll.load()
155-
log.info(f"{self.name} load")
156-
except Exception as e:
157-
log.warning(f"{self.name} pre load error: {e}")
158-
raise e from None
159-
160-
def optimize(self):
141+
def optimize(self, data_size: int | None = None):
161142
assert self.col, "Please call self.init() before"
162143
self._optimize()
163144

vectordb_bench/backend/clients/pgdiskann/pgdiskann.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,7 @@ def _drop_table(self):
143143
)
144144
self.conn.commit()
145145

146-
def ready_to_load(self):
147-
pass
148-
149-
def optimize(self):
146+
def optimize(self, data_size: int | None = None):
150147
self._post_insert()
151148

152149
def _post_insert(self):

vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,7 @@ def _drop_table(self):
153153
)
154154
self.conn.commit()
155155

156-
def ready_to_load(self):
157-
pass
158-
159-
def optimize(self):
156+
def optimize(self, data_size: int | None = None):
160157
self._post_insert()
161158

162159
def _post_insert(self):

0 commit comments

Comments
 (0)