Skip to content

Commit 032515f

Browse files
XuanYang-cnalwayslove2013
authored andcommitted
enhance: Refine the coding style and enable lint-action
Signed-off-by: yangxuan <[email protected]>
1 parent cc30d03 commit 032515f

File tree

103 files changed

+2490
-2126
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

103 files changed

+2490
-2126
lines changed

.github/workflows/pull_request.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ jobs:
3131
python -m pip install --upgrade pip
3232
pip install -e ".[test]"
3333
34+
- name: Run coding checks
35+
run: |
36+
make lint
37+
3438
- name: Test with pytest
3539
run: |
3640
make unittest

.ruff.toml

Lines changed: 0 additions & 49 deletions
This file was deleted.

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,10 @@
11
unittest:
22
PYTHONPATH=`pwd` python3 -m pytest tests/test_dataset.py::TestDataSet::test_download_small -svv
3+
4+
format:
5+
PYTHONPATH=`pwd` python3 -m black vectordb_bench
6+
PYTHONPATH=`pwd` python3 -m ruff check vectordb_bench --fix
7+
8+
lint:
9+
PYTHONPATH=`pwd` python3 -m black vectordb_bench --check
10+
PYTHONPATH=`pwd` python3 -m ruff check vectordb_bench

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,13 +240,13 @@ After reopen the repository in container, run `python -m vectordb_bench` in the
240240

241241
### Check coding styles
242242
```shell
243-
$ ruff check vectordb_bench
243+
$ make lint
244244
```
245245

246-
Add `--fix` if you want to fix the coding styles automatically
246+
To fix the coding styles automatically
247247

248248
```shell
249-
$ ruff check vectordb_bench --fix
249+
$ make format
250250
```
251251

252252
## How does it work?

pyproject.toml

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ dynamic = ["version"]
4444

4545
[project.optional-dependencies]
4646
test = [
47+
"black",
4748
"ruff",
4849
"pytest",
4950
]
@@ -93,3 +94,116 @@ init_bench = "vectordb_bench.__main__:main"
9394
vectordbbench = "vectordb_bench.cli.vectordbbench:cli"
9495

9596
[tool.setuptools_scm]
97+
98+
[tool.black]
99+
line-length = 120
100+
target-version = ['py311']
101+
include = '\.pyi?$'
102+
103+
[tool.ruff]
104+
lint.select = [
105+
"E",
106+
"F",
107+
"C90",
108+
"I",
109+
"N",
110+
"B", "C", "G",
111+
"A",
112+
"ANN001",
113+
"S", "T", "W", "ARG", "BLE", "COM", "DJ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"
114+
]
115+
lint.ignore = [
116+
"BLE001", # blind-except (BLE001)
117+
"SLF001", # SLF001 Private member accessed [E]
118+
"TRY003", # [ruff] TRY003 Avoid specifying long messages outside the exception class [E]
119+
"FBT001", "FBT002", "FBT003",
120+
"G004", # [ruff] G004 Logging statement uses f-string [E]
121+
"UP031",
122+
"RUF012",
123+
"EM101",
124+
"N805",
125+
"ARG002",
126+
"ARG003",
127+
"PIE796", # https://github.com/zilliztech/VectorDBBench/issues/438
128+
"INP001", # TODO
129+
"TID252", # TODO
130+
"N801", "N802", "N815",
131+
"S101", "S108", "S603", "S311",
132+
"PLR2004",
133+
"RUF017",
134+
"C416",
135+
"PLW0603",
136+
]
137+
138+
# Allow autofix for all enabled rules (when `--fix`) is provided.
139+
lint.fixable = [
140+
"A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W",
141+
"ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT",
142+
"ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH",
143+
"PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP",
144+
"YTT",
145+
]
146+
lint.unfixable = []
147+
148+
show-fixes = true
149+
150+
# Exclude a variety of commonly ignored directories.
151+
exclude = [
152+
".bzr",
153+
".direnv",
154+
".eggs",
155+
".git",
156+
".git-rewrite",
157+
".hg",
158+
".mypy_cache",
159+
".nox",
160+
".pants.d",
161+
".pytype",
162+
".ruff_cache",
163+
".svn",
164+
".tox",
165+
".venv",
166+
"__pypackages__",
167+
"_build",
168+
"buck-out",
169+
"build",
170+
"dist",
171+
"node_modules",
172+
"venv",
173+
"grpc_gen",
174+
"__pycache__",
175+
"frontend", # TODO
176+
"tests",
177+
]
178+
179+
# Same as Black.
180+
line-length = 120
181+
182+
# Allow unused variables when underscore-prefixed.
183+
lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
184+
185+
# Assume Python 3.11
186+
target-version = "py311"
187+
188+
[tool.ruff.lint.mccabe]
189+
# Unlike Flake8, default to a complexity level of 10.
190+
max-complexity = 18
191+
192+
[tool.ruff.lint.pycodestyle]
193+
max-line-length = 120
194+
max-doc-length = 120
195+
196+
[tool.ruff.lint.pylint]
197+
max-args = 20
198+
max-branches = 15
199+
200+
[tool.ruff.lint.flake8-builtins]
201+
builtins-ignorelist = [
202+
# "format",
203+
# "next",
204+
# "object", # TODO
205+
# "id",
206+
# "dict", # TODO
207+
# "filter",
208+
]
209+

vectordb_bench/__init__.py

Lines changed: 49 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -22,46 +22,71 @@ class config:
2222
DROP_OLD = env.bool("DROP_OLD", True)
2323
USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)
2424

25-
NUM_CONCURRENCY = env.list("NUM_CONCURRENCY", [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100], subcast=int )
25+
NUM_CONCURRENCY = env.list(
26+
"NUM_CONCURRENCY",
27+
[
28+
1,
29+
5,
30+
10,
31+
15,
32+
20,
33+
25,
34+
30,
35+
35,
36+
40,
37+
45,
38+
50,
39+
55,
40+
60,
41+
65,
42+
70,
43+
75,
44+
80,
45+
85,
46+
90,
47+
95,
48+
100,
49+
],
50+
subcast=int,
51+
)
2652

2753
CONCURRENCY_DURATION = 30
2854

2955
RESULTS_LOCAL_DIR = env.path(
30-
"RESULTS_LOCAL_DIR", pathlib.Path(__file__).parent.joinpath("results")
56+
"RESULTS_LOCAL_DIR",
57+
pathlib.Path(__file__).parent.joinpath("results"),
3158
)
3259
CONFIG_LOCAL_DIR = env.path(
33-
"CONFIG_LOCAL_DIR", pathlib.Path(__file__).parent.joinpath("config-files")
60+
"CONFIG_LOCAL_DIR",
61+
pathlib.Path(__file__).parent.joinpath("config-files"),
3462
)
3563

36-
3764
K_DEFAULT = 100 # default return top k nearest neighbors during search
3865
CUSTOM_CONFIG_DIR = pathlib.Path(__file__).parent.joinpath("custom/custom_case.json")
3966

40-
CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
41-
LOAD_TIMEOUT_DEFAULT = 24 * 3600 # 24h
42-
LOAD_TIMEOUT_768D_1M = 24 * 3600 # 24h
43-
LOAD_TIMEOUT_768D_10M = 240 * 3600 # 10d
44-
LOAD_TIMEOUT_768D_100M = 2400 * 3600 # 100d
67+
CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
68+
LOAD_TIMEOUT_DEFAULT = 24 * 3600 # 24h
69+
LOAD_TIMEOUT_768D_1M = 24 * 3600 # 24h
70+
LOAD_TIMEOUT_768D_10M = 240 * 3600 # 10d
71+
LOAD_TIMEOUT_768D_100M = 2400 * 3600 # 100d
4572

46-
LOAD_TIMEOUT_1536D_500K = 24 * 3600 # 24h
47-
LOAD_TIMEOUT_1536D_5M = 240 * 3600 # 10d
73+
LOAD_TIMEOUT_1536D_500K = 24 * 3600 # 24h
74+
LOAD_TIMEOUT_1536D_5M = 240 * 3600 # 10d
4875

49-
OPTIMIZE_TIMEOUT_DEFAULT = 24 * 3600 # 24h
50-
OPTIMIZE_TIMEOUT_768D_1M = 24 * 3600 # 24h
51-
OPTIMIZE_TIMEOUT_768D_10M = 240 * 3600 # 10d
52-
OPTIMIZE_TIMEOUT_768D_100M = 2400 * 3600 # 100d
76+
OPTIMIZE_TIMEOUT_DEFAULT = 24 * 3600 # 24h
77+
OPTIMIZE_TIMEOUT_768D_1M = 24 * 3600 # 24h
78+
OPTIMIZE_TIMEOUT_768D_10M = 240 * 3600 # 10d
79+
OPTIMIZE_TIMEOUT_768D_100M = 2400 * 3600 # 100d
5380

81+
OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600 # 24h
82+
OPTIMIZE_TIMEOUT_1536D_5M = 240 * 3600 # 10d
5483

55-
OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600 # 24h
56-
OPTIMIZE_TIMEOUT_1536D_5M = 240 * 3600 # 10d
57-
5884
def display(self) -> str:
59-
tmp = [
60-
i for i in inspect.getmembers(self)
61-
if not inspect.ismethod(i[1])
62-
and not i[0].startswith('_')
63-
and "TIMEOUT" not in i[0]
85+
return [
86+
i
87+
for i in inspect.getmembers(self)
88+
if not inspect.ismethod(i[1]) and not i[0].startswith("_") and "TIMEOUT" not in i[0]
6489
]
65-
return tmp
90+
6691

6792
log_util.init(config.LOG_LEVEL)

vectordb_bench/__main__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
import traceback
21
import logging
2+
import pathlib
33
import subprocess
4-
import os
4+
import traceback
5+
56
from . import config
67

78
log = logging.getLogger("vectordb_bench")
@@ -16,7 +17,7 @@ def run_streamlit():
1617
cmd = [
1718
"streamlit",
1819
"run",
19-
f"{os.path.dirname(__file__)}/frontend/vdb_benchmark.py",
20+
f"{pathlib.Path(__file__).parent}/frontend/vdb_benchmark.py",
2021
"--logger.level",
2122
"info",
2223
"--theme.base",

vectordb_bench/backend/assembler.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,32 @@
1-
from .cases import CaseLabel
2-
from .task_runner import CaseRunner, RunningStatus, TaskRunner
3-
from ..models import TaskConfig
4-
from ..backend.clients import EmptyDBCaseConfig
5-
from ..backend.data_source import DatasetSource
61
import logging
72

3+
from vectordb_bench.backend.clients import EmptyDBCaseConfig
4+
from vectordb_bench.backend.data_source import DatasetSource
5+
from vectordb_bench.models import TaskConfig
6+
7+
from .cases import CaseLabel
8+
from .task_runner import CaseRunner, RunningStatus, TaskRunner
89

910
log = logging.getLogger(__name__)
1011

1112

1213
class Assembler:
1314
@classmethod
14-
def assemble(cls, run_id , task: TaskConfig, source: DatasetSource) -> CaseRunner:
15+
def assemble(cls, run_id: str, task: TaskConfig, source: DatasetSource) -> CaseRunner:
1516
c_cls = task.case_config.case_id.case_cls
1617

1718
c = c_cls(task.case_config.custom_case)
18-
if type(task.db_case_config) != EmptyDBCaseConfig:
19+
if type(task.db_case_config) is not EmptyDBCaseConfig:
1920
task.db_case_config.metric_type = c.dataset.data.metric_type
2021

21-
runner = CaseRunner(
22+
return CaseRunner(
2223
run_id=run_id,
2324
config=task,
2425
ca=c,
2526
status=RunningStatus.PENDING,
2627
dataset_source=source,
2728
)
2829

29-
return runner
30-
3130
@classmethod
3231
def assemble_all(
3332
cls,
@@ -50,12 +49,12 @@ def assemble_all(
5049
db2runner[db].append(r)
5150

5251
# check dbclient installed
53-
for k in db2runner.keys():
52+
for k in db2runner:
5453
_ = k.init_cls
5554

5655
# sort by dataset size
57-
for k in db2runner.keys():
58-
db2runner[k].sort(key=lambda x:x.ca.dataset.data.size)
56+
for k, _ in db2runner:
57+
db2runner[k].sort(key=lambda x: x.ca.dataset.data.size)
5958

6059
all_runners = []
6160
all_runners.extend(load_runners)

0 commit comments

Comments
 (0)