Skip to content

Commit 51c6b59

Browse files
author
niushengxiao
committed
feat: refine
1 parent 80f5832 commit 51c6b59

File tree

8 files changed

+71
-38
lines changed

8 files changed

+71
-38
lines changed

lightllm/server/api_cli.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,4 +540,10 @@ def make_argument_parser() -> argparse.ArgumentParser:
540540
parser.add_argument(
541541
"--disk_cache_storage_size", type=float, default=10, help="""The capacity of disk cache. GB used."""
542542
)
543+
parser.add_argument(
544+
"--disk_cache_dir",
545+
type=str,
546+
default=None,
547+
help="""Directory used to persist disk cache data. Defaults to a temp directory when not set.""",
548+
)
543549
return parser

lightllm/server/core/objs/atomic_lock.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,13 @@ def __exit__(self, exc_type, exc_val, exc_tb):
2929

3030
# acquire_sleep1ms 和 release 是某些特定场景下主动使用进行锁获取的操作函数
3131
def acquire_sleep1ms(self):
32+
last_log_time = time.monotonic()
3233
with atomics.atomicview(buffer=self.shm.buf, atype=atomics.INT) as a:
3334
while not a.cmpxchg_weak(0, 1):
34-
logger.warning("acquire_sleep1ms wait for 1ms")
35+
now = time.monotonic()
36+
if now - last_log_time >= 0.1:
37+
logger.warning("acquire_sleep1ms wait for 100ms")
38+
last_log_time = now
3539
time.sleep(0.001)
3640
pass
3741

lightllm/server/core/objs/start_args_type.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ class StartArgs:
109109
cpu_cache_token_page_size: int = field(default=64)
110110
enable_disk_cache: bool = field(default=False)
111111
disk_cache_storage_size: float = field(default=10)
112+
disk_cache_dir: Optional[str] = field(default=None)
112113
# zmp ports
113114
router_port: int = field(default=None)
114115
detokenization_port: int = field(default=None)

lightllm/server/httpserver/manager.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -612,15 +612,21 @@ async def _wait_to_token_package(
612612
f"disk_prompt_cache_ratio:{disk_prompt_cache_ratio} "
613613
f"mtp_avg_token_per_step:{mtp_avg_token_per_step} "
614614
)
615+
if prompt_cache_len > 0:
616+
logger.info(
617+
f"[gpu cache hit] "
618+
f"prompt_cache_len:{prompt_cache_len} "
619+
f"prompt_cache_ratio:{prompt_cache_ratio} "
620+
)
615621
if cpu_prompt_cache_len > 0:
616622
logger.info(
617-
f"blueswhen "
623+
f"[cpu cache hit] "
618624
f"cpu_prompt_cache_len:{cpu_prompt_cache_len} "
619625
f"cpu_prompt_cache_ratio:{cpu_prompt_cache_ratio} "
620626
)
621627
if disk_prompt_cache_len > 0:
622628
logger.info(
623-
f"blueswhen "
629+
f"[disk cache hit] "
624630
f"disk_prompt_cache_len:{disk_prompt_cache_len} "
625631
f"disk_prompt_cache_ratio:{disk_prompt_cache_ratio} "
626632
)

lightllm/server/multi_level_kv_cache/cpu_cache_client.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,12 @@ def update_pages_status_to_ready(
126126
assert cur_page.ref_count > 0
127127
cur_page.ref_count -= 1
128128

129+
# 控制prompt长度,较短的prompt不进行disk offload
130+
if disk_offload_enable and offload_candidates and len(page_list) * self.args.cpu_cache_token_page_size < 10000:
131+
print(f"skip disk offload for small page, length = {len(page_list) * self.args.cpu_cache_token_page_size}")
132+
self.mark_pages_recyclable(page_list=offload_candidates)
133+
return
134+
129135
if disk_offload_enable and offload_candidates:
130136
for idx, page_index in enumerate(offload_candidates):
131137
if idx == 0:
@@ -225,11 +231,19 @@ def recycle_pages(self, page_list: List[int]):
225231
if page_index == -1:
226232
continue
227233
cur_page: _CpuPageStatus = self.page_items.get_item_by_index(page_index)
228-
cur_page.del_self_from_list()
229-
if not cur_page.is_empty() and cur_page.hash_key != 0:
234+
235+
if cur_page.ref_count != 0:
236+
if cur_page.status == cur_page.LOADING and cur_page.ref_count == 1:
237+
cur_page.ref_count = 0
238+
else:
239+
continue
240+
241+
if cur_page.hash_key != 0:
230242
existing_index = self.page_hash_dict.get(cur_page.hash_key)
231-
if existing_index is not None:
243+
if existing_index is not None and existing_index == cur_page.self_index:
232244
self.page_hash_dict.remove(cur_page.hash_key)
245+
246+
cur_page.del_self_from_list()
233247
cur_page.hash_key = 0
234248
cur_page.status = cur_page.EMPTY
235249
cur_page.ref_count = 0

lightllm/server/multi_level_kv_cache/disk_cache_worker.py

Lines changed: 26 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,28 @@ class _PagePayload:
2323
class DiskCacheWorker:
2424
"""Background worker that offloads CPU KV pages to disk using kvcache."""
2525

26-
def __init__(self, disk_cache_storage_size: float, cpu_cache_client):
26+
def __init__(
27+
self,
28+
disk_cache_storage_size: float,
29+
cpu_cache_client,
30+
disk_cache_dir: Optional[str] = None,
31+
):
2732
self.cpu_cache_client = cpu_cache_client
2833
self._pages_all_idle = False
2934

3035
assert disk_cache_storage_size > 0
3136
storage_size = int(disk_cache_storage_size * (1024 ** 3))
32-
num_shard = 32
33-
num_worker = 32
37+
num_shard = 64
38+
num_worker = 48
39+
max_concurrent_write_tasks = 16
3440

35-
cache_dir = os.getenv("LIGHTLLM_DISK_CACHE_DIR")
41+
cache_dir = disk_cache_dir
3642
if not cache_dir:
3743
cache_dir = os.path.join(tempfile.gettempdir(), f"lightllm_disk_cache_{get_unique_server_name()}")
3844
os.makedirs(cache_dir, exist_ok=True)
3945
cache_file = os.path.join(cache_dir, "cache_file")
4046

47+
self.max_concurrent_write_tasks = max_concurrent_write_tasks
4148
self._page_major_tensor = self._prepare_tensor(cpu_cache_client.cpu_kv_cache_tensor)
4249

4350
self.service = PyLocalCacheService(
@@ -49,7 +56,7 @@ def __init__(self, disk_cache_storage_size: float, cpu_cache_client):
4956
)
5057

5158
logger.info(
52-
"blueswhen disk cache worker initialized: dir=%s size_bytes=%d shards=%d workers=%d pages_per_block=%d",
59+
"disk cache worker initialized: dir=%s size_bytes=%d shards=%d workers=%d pages_per_block=%d",
5360
cache_dir,
5461
storage_size,
5562
num_shard,
@@ -63,35 +70,15 @@ def _prepare_tensor(self, tensor: torch.Tensor) -> torch.Tensor:
6370

6471
def run(self) -> None:
6572
while True:
66-
time.sleep(0.01)
73+
time.sleep(0.1)
6774
payload_groups = self._gather_offload_payloads()
68-
# self._log_idle_once()
6975
if not payload_groups:
7076
continue
7177
for payloads in payload_groups:
7278
if not payloads:
7379
continue
7480
self._persist_pages_to_disk(payloads)
7581

76-
def _log_idle_once(self) -> int:
77-
locked_pages = 0
78-
self.cpu_cache_client.lock.acquire_sleep1ms()
79-
try:
80-
for page_idx in range(self.cpu_cache_client.page_num):
81-
page_item = self.cpu_cache_client.page_items.get_item_by_index(page_idx)
82-
if not page_item.is_ready_recycle() or page_item.ref_count != 0:
83-
locked_pages += 1
84-
finally:
85-
self.cpu_cache_client.lock.release()
86-
87-
if locked_pages == 0:
88-
if not self._pages_all_idle:
89-
logger.info("blueswhen all cpu cache pages are idle and ready to reuse")
90-
self._pages_all_idle = True
91-
else:
92-
self._pages_all_idle = False
93-
return locked_pages
94-
9582
def _gather_offload_payloads(self) -> List[List[_PagePayload]]:
9683
self.cpu_cache_client.lock.acquire_sleep1ms()
9784
try:
@@ -120,16 +107,21 @@ def _persist_pages_to_disk(self, payloads: List[_PagePayload]) -> None:
120107
kv_indexer = torch.tensor(page_indexes, dtype=torch.int32, device="cpu")
121108
query_result = self.service.query(tokens)
122109
if not all(query_result):
110+
# 限制写入并发量,给读取操作留资源
111+
while (
112+
self.service.active_threads("r") and self.service.active_threads("w") >= self.max_concurrent_write_tasks
113+
):
114+
time.sleep(0.001)
115+
123116
task = self.service.create(tokens=tokens, kv_page_indexer=kv_indexer, mode="w")
124-
while not task.ready():
117+
# 数据安全即可结束等待,无需写入完成
118+
while not task.data_safe():
125119
time.sleep(0.001)
126120

127121
self.cpu_cache_client.lock.acquire_sleep1ms()
128122
self.cpu_cache_client.update_pages_status_to_ready_recycle(page_list=page_indexes, deref=True)
129123
self.cpu_cache_client.lock.release()
130124

131-
# self._log_idle_once()
132-
133125
def blocks_exist(self, tokens: List[int], start_pos: int = 0) -> bool:
134126
if not tokens or start_pos < 0 or start_pos >= len(tokens):
135127
return False
@@ -147,6 +139,11 @@ def load_pages(self, tokens: List[int], page_indexes: List[int], start_pos: int
147139
if start_pos < 0 or start_pos >= len(tokens):
148140
return False
149141

142+
# 检测当前是否有写操作在进行,若有则跳过本次load请求,暂时不用
143+
# if self.service.active_threads("w") > 0:
144+
# logger.warning("disk cache worker is busy writing, skip load_pages")
145+
# return False
146+
150147
kv_indexer = torch.tensor(page_indexes, dtype=torch.int32, device="cpu")
151148
task = self.service.create(tokens=tokens, kv_page_indexer=kv_indexer, mode="r", start_pos=start_pos)
152149
while not task.ready():

lightllm/server/multi_level_kv_cache/manager.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def __init__(
4848
self.disk_cache_worker = DiskCacheWorker(
4949
disk_cache_storage_size=self.args.disk_cache_storage_size,
5050
cpu_cache_client=self.cpu_cache_client,
51+
disk_cache_dir=self.args.disk_cache_dir,
5152
)
5253
self.disk_cache_thread = threading.Thread(target=self.disk_cache_worker.run, daemon=True)
5354
self.disk_cache_thread.start()
@@ -71,8 +72,8 @@ def _handle_group_req_cpu_cache_match(self, group_req_indexes: GroupReqIndexes,
7172
if current_time - start_time >= self.cpu_cache_time_out:
7273
self.send_to_router.send_pyobj(group_req_indexes, protocol=pickle.HIGHEST_PROTOCOL)
7374
logger.warning(
74-
f"blueswhen cpu cache match time out {current_time - start_time}s, "
75-
"group_req_id: {group_req_indexes.group_req_id}"
75+
f"cpu cache match time out {current_time - start_time}s, "
76+
f"group_req_id: {group_req_indexes.group_req_id}"
7677
)
7778
return
7879

requirements.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,8 @@ librosa==0.11.0
8787
cuda_bindings==12.9.0
8888
orjson==3.11.2
8989
setproctitle==1.3.6
90-
xxhash==3.6.0
90+
xxhash==3.6.0
91+
torchvision==0.23.0
92+
interegular
93+
partial_json_parser
94+
websockets

0 commit comments

Comments
 (0)