Skip to content

Commit dd5b162

Browse files
XuanYang-cnalwayslove2013
authored andcommitted
fix: Unable to run vebbench and cli
fix: remove comma of logging str fix cli unable to run #444 Signed-off-by: yangxuan <[email protected]>
1 parent 6ee3dbe commit dd5b162

File tree

15 files changed

+88
-151
lines changed

15 files changed

+88
-151
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ lint.ignore = [
133133
"RUF017",
134134
"C416",
135135
"PLW0603",
136+
"COM812",
136137
]
137138

138139
# Allow autofix for all enabled rules (when `--fix`) is provided.

vectordb_bench/backend/clients/__init__.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class DB(Enum):
4242
AliyunOpenSearch = "AliyunOpenSearch"
4343

4444
@property
45-
def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912
45+
def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901
4646
"""Import while in use"""
4747
if self == DB.Milvus:
4848
from .milvus.milvus import Milvus
@@ -129,11 +129,16 @@ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912
129129

130130
return AliyunOpenSearch
131131

132+
if self == DB.Test:
133+
from .test.test import Test
134+
135+
return Test
136+
132137
msg = f"Unknown DB: {self.name}"
133138
raise ValueError(msg)
134139

135140
@property
136-
def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912
141+
def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901
137142
"""Import while in use"""
138143
if self == DB.Milvus:
139144
from .milvus.config import MilvusConfig
@@ -220,6 +225,11 @@ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912
220225

221226
return AliyunOpenSearchConfig
222227

228+
if self == DB.Test:
229+
from .test.config import TestConfig
230+
231+
return TestConfig
232+
223233
msg = f"Unknown DB: {self.name}"
224234
raise ValueError(msg)
225235

vectordb_bench/backend/clients/memorydb/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ class MemoryDBTypedDict(TypedDict):
4343
show_default=True,
4444
default=False,
4545
help=(
46-
"Cluster Mode Disabled (CMD), use this flag when testing locally on a single node instance.",
47-
" In production, MemoryDB only supports cluster mode (CME)",
46+
"Cluster Mode Disabled (CMD), use this flag when testing locally on a single node instance."
47+
" In production, MemoryDB only supports cluster mode (CME)"
4848
),
4949
),
5050
]

vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -200,10 +200,7 @@ def _create_index(self):
200200
self.cursor.execute(index_create_sql)
201201
self.conn.commit()
202202
except Exception as e:
203-
log.warning(
204-
f"Failed to create pgvecto.rs index {self._index_name} \
205-
at table {self.table_name} error: {e}",
206-
)
203+
log.warning(f"Failed to create pgvecto.rs index {self._index_name} at table {self.table_name} error: {e}")
207204
raise e from None
208205

209206
def _create_table(self, dim: int):
@@ -258,9 +255,7 @@ def insert_embeddings(
258255

259256
return len(metadata), None
260257
except Exception as e:
261-
log.warning(
262-
f"Failed to insert data into pgvecto.rs table ({self.table_name}), error: {e}",
263-
)
258+
log.warning(f"Failed to insert data into pgvecto.rs table ({self.table_name}), error: {e}")
264259
return 0, e
265260

266261
def search_embedding(

vectordb_bench/backend/clients/pgvector/pgvector.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -415,9 +415,7 @@ def insert_embeddings(
415415

416416
return len(metadata), None
417417
except Exception as e:
418-
log.warning(
419-
f"Failed to insert data into pgvector table ({self.table_name}), error: {e}",
420-
)
418+
log.warning(f"Failed to insert data into pgvector table ({self.table_name}), error: {e}")
421419
return 0, e
422420

423421
def search_embedding(

vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,9 +255,7 @@ def insert_embeddings(
255255

256256
return len(metadata), None
257257
except Exception as e:
258-
log.warning(
259-
f"Failed to insert data into pgvector table ({self.table_name}), error: {e}",
260-
)
258+
log.warning(f"Failed to insert data into pgvector table ({self.table_name}), error: {e}")
261259
return 0, e
262260

263261
def search_embedding(

vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ def optimize(self):
7676
continue
7777
if info.status == CollectionStatus.GREEN:
7878
msg = (
79-
f"Stored vectors: {info.vectors_count}, Indexed vectors: {info.indexed_vectors_count}, ",
80-
f"Collection status: {info.indexed_vectors_count}",
79+
f"Stored vectors: {info.vectors_count}, Indexed vectors: {info.indexed_vectors_count}, "
80+
f"Collection status: {info.indexed_vectors_count}"
8181
)
8282
log.info(msg)
8383
return

vectordb_bench/backend/clients/test/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class TestTypedDict(CommonTypedDict): ...
1717
@click_parameter_decorators_from_typed_dict(TestTypedDict)
1818
def Test(**parameters: Unpack[TestTypedDict]):
1919
run(
20-
db=DB.NewClient,
20+
db=DB.Test,
2121
db_config=TestConfig(db_label=parameters["db_label"]),
2222
db_case_config=TestIndexConfig(),
2323
**parameters,

vectordb_bench/backend/data_source.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,7 @@ def validate_file(self, remote: pathlib.Path, local: pathlib.Path) -> bool:
6363
# check size equal
6464
remote_size, local_size = info.content_length, local.stat().st_size
6565
if remote_size != local_size:
66-
log.info(
67-
f"local file: {local} size[{local_size}] not match with remote size[{remote_size}]",
68-
)
66+
log.info(f"local file: {local} size[{local_size}] not match with remote size[{remote_size}]")
6967
return False
7068

7169
return True
@@ -89,9 +87,7 @@ def read(self, dataset: str, files: list[str], local_ds_root: pathlib.Path):
8987
local_file = local_ds_root.joinpath(file)
9088

9189
if (not local_file.exists()) or (not self.validate_file(remote_file, local_file)):
92-
log.info(
93-
f"local file: {local_file} not match with remote: {remote_file}; add to downloading list",
94-
)
90+
log.info(f"local file: {local_file} not match with remote: {remote_file}; add to downloading list")
9591
downloads.append((remote_file, local_file))
9692

9793
if len(downloads) == 0:
@@ -135,9 +131,7 @@ def read(self, dataset: str, files: list[str], local_ds_root: pathlib.Path):
135131
local_file = local_ds_root.joinpath(file)
136132

137133
if (not local_file.exists()) or (not self.validate_file(remote_file, local_file)):
138-
log.info(
139-
f"local file: {local_file} not match with remote: {remote_file}; add to downloading list",
140-
)
134+
log.info(f"local file: {local_file} not match with remote: {remote_file}; add to downloading list")
141135
downloads.append(remote_file)
142136

143137
if len(downloads) == 0:
@@ -157,9 +151,7 @@ def validate_file(self, remote: pathlib.Path, local: pathlib.Path) -> bool:
157151
# check size equal
158152
remote_size, local_size = info.get("size"), local.stat().st_size
159153
if remote_size != local_size:
160-
log.info(
161-
f"local file: {local} size[{local_size}] not match with remote size[{remote_size}]",
162-
)
154+
log.info(f"local file: {local} size[{local_size}] not match with remote size[{remote_size}]")
163155
return False
164156

165157
return True

vectordb_bench/backend/runner/mp_runner.py

Lines changed: 16 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -79,24 +79,22 @@ def search(
7979

8080
if count % 500 == 0:
8181
log.debug(
82-
f"({mp.current_process().name:16}) ",
83-
f"search_count: {count}, latest_latency={time.perf_counter()-s}",
82+
f"({mp.current_process().name:16}) "
83+
f"search_count: {count}, latest_latency={time.perf_counter()-s}"
8484
)
8585

8686
total_dur = round(time.perf_counter() - start_time, 4)
8787
log.info(
8888
f"{mp.current_process().name:16} search {self.duration}s: "
89-
f"actual_dur={total_dur}s, count={count}, qps in this process: {round(count / total_dur, 4):3}",
89+
f"actual_dur={total_dur}s, count={count}, qps in this process: {round(count / total_dur, 4):3}"
9090
)
9191

9292
return (count, total_dur, latencies)
9393

9494
@staticmethod
9595
def get_mp_context():
9696
mp_start_method = "spawn"
97-
log.debug(
98-
f"MultiProcessingSearchRunner get multiprocessing start method: {mp_start_method}",
99-
)
97+
log.debug(f"MultiProcessingSearchRunner get multiprocessing start method: {mp_start_method}")
10098
return mp.get_context(mp_start_method)
10199

102100
def _run_all_concurrencies_mem_efficient(self):
@@ -113,9 +111,7 @@ def _run_all_concurrencies_mem_efficient(self):
113111
mp_context=self.get_mp_context(),
114112
max_workers=conc,
115113
) as executor:
116-
log.info(
117-
f"Start search {self.duration}s in concurrency {conc}, filters: {self.filters}",
118-
)
114+
log.info(f"Start search {self.duration}s in concurrency {conc}, filters: {self.filters}")
119115
future_iter = [executor.submit(self.search, self.test_data, q, cond) for i in range(conc)]
120116
# Sync all processes
121117
while q.qsize() < conc:
@@ -124,9 +120,7 @@ def _run_all_concurrencies_mem_efficient(self):
124120

125121
with cond:
126122
cond.notify_all()
127-
log.info(
128-
f"Syncing all process and start concurrency search, concurrency={conc}",
129-
)
123+
log.info(f"Syncing all process and start concurrency search, concurrency={conc}")
130124

131125
start = time.perf_counter()
132126
all_count = sum([r.result()[0] for r in future_iter])
@@ -140,18 +134,14 @@ def _run_all_concurrencies_mem_efficient(self):
140134
conc_qps_list.append(qps)
141135
conc_latency_p99_list.append(latency_p99)
142136
conc_latency_avg_list.append(latency_avg)
143-
log.info(
144-
f"End search in concurrency {conc}: dur={cost}s, total_count={all_count}, qps={qps}",
145-
)
137+
log.info(f"End search in concurrency {conc}: dur={cost}s, total_count={all_count}, qps={qps}")
146138

147139
if qps > max_qps:
148140
max_qps = qps
149-
log.info(
150-
f"Update largest qps with concurrency {conc}: current max_qps={max_qps}",
151-
)
141+
log.info(f"Update largest qps with concurrency {conc}: current max_qps={max_qps}")
152142
except Exception as e:
153143
log.warning(
154-
f"Fail to search all concurrencies: {self.concurrencies}, max_qps before failure={max_qps}, reason={e}",
144+
f"Fail to search, concurrencies: {self.concurrencies}, max_qps before failure={max_qps}, reason={e}"
155145
)
156146
traceback.print_exc()
157147

@@ -193,9 +183,7 @@ def _run_by_dur(self, duration: int) -> float:
193183
mp_context=self.get_mp_context(),
194184
max_workers=conc,
195185
) as executor:
196-
log.info(
197-
f"Start search_by_dur {duration}s in concurrency {conc}, filters: {self.filters}",
198-
)
186+
log.info(f"Start search_by_dur {duration}s in concurrency {conc}, filters: {self.filters}")
199187
future_iter = [
200188
executor.submit(self.search_by_dur, duration, self.test_data, q, cond) for i in range(conc)
201189
]
@@ -206,24 +194,18 @@ def _run_by_dur(self, duration: int) -> float:
206194

207195
with cond:
208196
cond.notify_all()
209-
log.info(
210-
f"Syncing all process and start concurrency search, concurrency={conc}",
211-
)
197+
log.info(f"Syncing all process and start concurrency search, concurrency={conc}")
212198

213199
start = time.perf_counter()
214200
all_count = sum([r.result() for r in future_iter])
215201
cost = time.perf_counter() - start
216202

217203
qps = round(all_count / cost, 4)
218-
log.info(
219-
f"End search in concurrency {conc}: dur={cost}s, total_count={all_count}, qps={qps}",
220-
)
204+
log.info(f"End search in concurrency {conc}: dur={cost}s, total_count={all_count}, qps={qps}")
221205

222206
if qps > max_qps:
223207
max_qps = qps
224-
log.info(
225-
f"Update largest qps with concurrency {conc}: current max_qps={max_qps}",
226-
)
208+
log.info(f"Update largest qps with concurrency {conc}: current max_qps={max_qps}")
227209
except Exception as e:
228210
log.warning(
229211
f"Fail to search all concurrencies: {self.concurrencies}, max_qps before failure={max_qps}, reason={e}",
@@ -275,14 +257,14 @@ def search_by_dur(
275257

276258
if count % 500 == 0:
277259
log.debug(
278-
f"({mp.current_process().name:16}) search_count: {count}, ",
279-
f"latest_latency={time.perf_counter()-s}",
260+
f"({mp.current_process().name:16}) search_count: {count}, "
261+
f"latest_latency={time.perf_counter()-s}"
280262
)
281263

282264
total_dur = round(time.perf_counter() - start_time, 4)
283265
log.debug(
284266
f"{mp.current_process().name:16} search {self.duration}s: "
285-
f"actual_dur={total_dur}s, count={count}, qps in this process: {round(count / total_dur, 4):3}",
267+
f"actual_dur={total_dur}s, count={count}, qps in this process: {round(count / total_dur, 4):3}"
286268
)
287269

288270
return count

0 commit comments

Comments
 (0)