Skip to content

Commit 54d4ef4

Browse files
committed
added context_length field
Signed-off-by: Zhuoran-Liu <[email protected]>
1 parent 888eaa1 commit 54d4ef4

File tree

1 file changed

+58
-6
lines changed

1 file changed

+58
-6
lines changed

infer/vllm/process

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,42 @@ def extract_model_version(parsed_model, input_model):
5757
return None
5858

5959
def determine_precision(model_name, default_precision):
60-
"""
61-
Determine precision based on model name.
62-
If model name contains 'fp8' (case insensitive), return 'fp8', otherwise return default.
63-
"""
60+
6461
if model_name and 'fp8' in model_name.lower():
6562
return 'fp8'
6663
return default_precision
6764

65+
def extract_context_length_direct(log_driver):
66+
67+
if not os.path.exists(log_driver):
68+
return None
69+
70+
try:
71+
for line in open(log_driver, errors='ignore'):
72+
if line.startswith('FMWORK ARG') and '--engine:max_model_len@' in line:
73+
match = re.search(r'--engine:max_model_len@\s*(\d+)', line)
74+
if match:
75+
return int(match.group(1))
76+
except Exception:
77+
pass
78+
79+
return None
80+
81+
def extract_context_length_server(cmd_server):
82+
83+
if not os.path.exists(cmd_server):
84+
return None
85+
86+
try:
87+
content = open(cmd_server).read()
88+
match = re.search(r'--max-model-len\s+(\d+)', content)
89+
if match:
90+
return int(match.group(1))
91+
except Exception:
92+
pass
93+
94+
return None
95+
6896
def get_server_completion_info(args_path):
6997

7098
cmd_client = os.path.join(args_path, 'client.cmd')
@@ -234,17 +262,26 @@ def process_direct(args):
234262
model_version = None
235263
final_model_name = model_name
236264

265+
# Determine precision based on model name
266+
final_precision = determine_precision(model_name, args.precision)
267+
268+
# Determine context length
269+
context_length = extract_context_length_direct(log_driver)
270+
if context_length is None and input_size is not None and output_size is not None:
271+
context_length = input_size + output_size
272+
237273
hits.append({
238274
'timestamp' : time_start,
239275
'metadata_id' : args.metadata_id,
240276
'engine' : 'fmwork/infer/vllm',
241277
'model' : final_model_name,
242278
'model_version' : model_version,
243-
'precision' : args.precision,
279+
'precision' : final_precision,
244280
'input' : input_size,
245281
'output' : output_size,
246282
'batch' : batch_size,
247283
'tp' : tp_size,
284+
'context_length': context_length,
248285
'opts' : opts,
249286
'warmup' : round(warmup, 3) if warmup is not None else None,
250287
'setup' : setup,
@@ -276,17 +313,26 @@ def process_direct(args):
276313
model_version = None
277314
final_model_name = model_name
278315

316+
# Determine precision based on model name
317+
final_precision = determine_precision(model_name, args.precision)
318+
319+
# Determine context length
320+
context_length = extract_context_length_direct(log_driver)
321+
if context_length is None and input_size is not None and output_size is not None:
322+
context_length = input_size + output_size
323+
279324
hits.append({
280325
'timestamp' : None,
281326
'metadata_id' : args.metadata_id,
282327
'engine' : 'fmwork/infer/vllm',
283328
'model' : final_model_name,
284329
'model_version' : model_version,
285-
'precision' : args.precision,
330+
'precision' : final_precision,
286331
'input' : input_size,
287332
'output' : output_size,
288333
'batch' : batch_size,
289334
'tp' : tp_size,
335+
'context_length': context_length,
290336
'opts' : opts,
291337
'warmup' : None,
292338
'setup' : setup,
@@ -519,6 +565,11 @@ def process_server(args):
519565
# Determine precision based on model name
520566
final_precision = determine_precision(model_name, args.precision)
521567

568+
# Determine context length
569+
context_length = extract_context_length_server(cmd_server)
570+
if context_length is None and input_size is not None and output_size is not None:
571+
context_length = input_size + output_size
572+
522573
hits = [{
523574
'timestamp' : time_start,
524575
'metadata_id' : args.metadata_id,
@@ -530,6 +581,7 @@ def process_server(args):
530581
'output' : output_size,
531582
'batch' : batch_size,
532583
'tp' : tp_size,
584+
'context_length': context_length,
533585
'opts' : opts,
534586
'warmup' : None,
535587
'setup' : None,

0 commit comments

Comments
 (0)