Skip to content

Commit a34fd11

Browse files
committed
fix
1 parent 49a5a15 commit a34fd11

File tree

6 files changed

+18
-33
lines changed

6 files changed

+18
-33
lines changed

docs/CN/source/tutorial/api_server_args_zh.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -274,10 +274,6 @@ attention类型选择参数
274274

275275
多模态资源的缓存服务器容量,默认为 ``200``
276276

277-
.. option:: --cache_reserved_ratio
278-
279-
缓存服务器清理后的保留容量比例,默认为 ``0.5``
280-
281277
.. option:: --visual_infer_batch_size
282278

283279
每次推理批次中处理的图像数量,默认为 ``1``

docs/EN/source/tutorial/api_server_args_zh.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -273,10 +273,6 @@ Multimodal Parameters
273273

274274
Cache server capacity for multimodal resources, default is ``200``
275275

276-
.. option:: --cache_reserved_ratio
277-
278-
Reserved capacity ratio after cache server cleanup, default is ``0.5``
279-
280276
.. option:: --visual_infer_batch_size
281277

282278
Number of images processed in each inference batch, default is ``1``

lightllm/server/api_cli.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -288,9 +288,6 @@ def make_argument_parser() -> argparse.ArgumentParser:
288288
parser.add_argument(
289289
"--cache_capacity", type=int, default=200, help="cache server capacity for multimodal resources"
290290
)
291-
parser.add_argument(
292-
"--cache_reserved_ratio", type=float, default=0.5, help="cache server reserved capacity ratio after clear"
293-
)
294291
parser.add_argument(
295292
"--data_type",
296293
type=str,

lightllm/server/core/objs/start_args_type.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ class StartArgs:
5757
enable_decode_microbatch_overlap: bool = field(default=False)
5858
enable_prefill_microbatch_overlap: bool = field(default=False)
5959
cache_capacity: int = field(default=200)
60-
cache_reserved_ratio: float = field(default=0.5)
6160
data_type: Optional[str] = field(
6261
default=None, metadata={"choices": ["fp16", "float16", "bf16", "bfloat16", "fp32", "float32"]}
6362
)

lightllm/server/embed_cache/impl/naive_memory_cache.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@ def __init__(self, args) -> None:
3232
self._records = dict()
3333
self._md5_to_record = dict()
3434
self.capacity = max(1, args.cache_capacity)
35-
self.reserved = max(0, int(self.capacity * args.cache_reserved_ratio))
36-
self.reserved = min(self.reserved, self.capacity - 1)
3735
self.occupied = 0
3836
self.expired_secs = 60 * 60
3937
self.lock = threading.Lock()
@@ -69,9 +67,9 @@ def _check_and_set_new_id_range(self, alloced_token_num):
6967
time.sleep(3)
7068
return
7169

72-
def _clear(self):
70+
def _clear(self, free_max_count: int):
7371
deleted = 0
74-
max_delete = max(1, self.occupied - self.reserved)
72+
max_delete = free_max_count
7573
items = sorted(self._records.items(), key=lambda x: x[1].visittime)
7674
t = time.time()
7775
for id, record in items:
@@ -91,10 +89,10 @@ def alloc(self, md5sum_list: list[str], token_num_list: list[int]) -> Optional[l
9189
now = time.time()
9290
with self.lock:
9391
new_md5s = [m for m in md5sum_list if m not in self._md5_to_record]
94-
new_needed = len(new_md5s)
92+
new_needed = len(set(new_md5s))
9593

9694
if self.occupied + new_needed > self.capacity:
97-
self._clear()
95+
self._clear(free_max_count=new_needed - (self.capacity - self.occupied))
9896
if self.occupied + new_needed > self.capacity:
9997
return None
10098

@@ -127,19 +125,19 @@ def alloc(self, md5sum_list: list[str], token_num_list: list[int]) -> Optional[l
127125

128126
def release(self, ids: list[int]) -> None:
129127
with self.lock:
130-
for id in ids:
131-
self._records[id].ref -= 1
128+
for id_ in ids:
129+
self._records[id_].ref -= 1
132130

133131
def set_items_data(self, ids: list[int]) -> None:
134-
for id in ids:
135-
self._records[id].data = True
132+
for id_ in ids:
133+
self._records[id_].data = True
136134

137135
def get_items_data(self, ids: list[int]) -> list[Optional[bool]]:
138-
return [self._records.get(i).data if i in self._records else False for i in ids]
136+
return [self._records.get(id_).data if id_ in self._records else False for id_ in ids]
139137

140138
def set_items_embed(self, ids: list[int]) -> None:
141-
for id in ids:
142-
self._records[id].embed = True
139+
for id_ in ids:
140+
self._records[id_].embed = True
143141

144142
def get_items_embed(self, ids: list[int]) -> list[Optional[bool]]:
145-
return [self._records.get(i).embed if i in self._records else False for i in ids]
143+
return [self._records.get(id_).embed if id_ in self._records else False for id_ in ids]

lightllm/server/httpserver/manager.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -115,13 +115,12 @@ def __init__(
115115
return
116116

117117
async def _alloc_resource(self, items, md5sums, token_nums, datas):
118-
wait_time = 1
118+
119119
while True:
120120
records = obtain(self.cache_client.root.alloc(md5sums, token_nums))
121121

122122
if records is None:
123-
await asyncio.sleep(wait_time)
124-
wait_time = min(wait_time + 0.5, 2)
123+
await asyncio.sleep(0.1)
125124
continue
126125

127126
uid_list = []
@@ -155,19 +154,19 @@ async def _alloc_multimodal_resources(self, multimodal_params: MultimodalParams,
155154
self.tokenizer.init_imageitem_extral_params(img, multimodal_params, sampling_params)
156155
data = img.read()
157156
# must after init_imageitem_extral_params
158-
tokens_num = self.tokenizer.get_image_token_length(img)
157+
token_num = self.tokenizer.get_image_token_length(img)
159158
md5sum = hashlib.md5(data).hexdigest() + "_" + str(hash(frozendict(img.extra_params)))
160159
md5sums.append(md5sum)
161-
tokens_nums.append(tokens_num)
160+
tokens_nums.append(token_num)
162161
datas.append(data)
163162
items.append(img)
164163
for audio in multimodal_params.audios:
165164
self.tokenizer.init_audioitem_extral_params(audio, multimodal_params, sampling_params)
166165
data = audio.read()
167-
tokens_num = self.tokenizer.get_audio_token_length(audio)
166+
token_num = self.tokenizer.get_audio_token_length(audio)
168167
md5sum = hashlib.md5(data).hexdigest() + "_" + str(hash(frozendict(audio.extra_params)))
169168
md5sums.append(md5sum)
170-
tokens_nums.append(tokens_num)
169+
tokens_nums.append(token_num)
171170
datas.append(data)
172171
items.append(audio)
173172

0 commit comments

Comments
 (0)