Skip to content

Commit 09c8401

Browse files
authored
[None][fix] Prevent YAML partial kv_cache_config from incorrectly overriding the complete kv_cache_config (#9262)
Signed-off-by: Yuening Li <[email protected]>
1 parent c9771eb commit 09c8401

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

tensorrt_llm/llmapi/llm_args.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3032,6 +3032,15 @@ def update_llm_args_with_extra_dict(
30323032
llm_args_dict: Dict,
30333033
extra_llm_api_options: Optional[str] = None) -> Dict:
30343034

3035+
# Deep merge kv_cache_config to prevent partial YAML kv_cache_config from replacing the complete kv_cache_config
3036+
if 'kv_cache_config' in llm_args and 'kv_cache_config' in llm_args_dict:
3037+
# Convert KvCacheConfig object to dict if necessary
3038+
base_kv_config = llm_args['kv_cache_config']
3039+
if isinstance(base_kv_config, KvCacheConfig):
3040+
base_kv_config = base_kv_config.model_dump(exclude_unset=True)
3041+
llm_args_dict['kv_cache_config'] = base_kv_config | llm_args_dict[
3042+
'kv_cache_config']
3043+
30353044
field_mapping = {
30363045
"quant_config": QuantConfig,
30373046
"calib_config": CalibConfig,
@@ -3043,6 +3052,7 @@ def update_llm_args_with_extra_dict(
30433052
"moe_config": MoeConfig,
30443053
"attention_dp_config": AttentionDpConfig,
30453054
"sparse_attention_config": BaseSparseAttentionConfig,
3055+
"kv_cache_config": KvCacheConfig,
30463056
}
30473057
for field_name, field_type in field_mapping.items():
30483058
if field_name in llm_args_dict:

0 commit comments

Comments
 (0)