@@ -26,6 +26,8 @@ def check_error_in_log(log_path):
2626 ("RPC" , r"TimeoutError: RPC call to execute_model timed out\." ),
2727 ("PLT" , r"assert prompt_len <= self\.tkv" ),
2828 ("CTL" , r"Please reduce the length of the messages or completion" ),
29+ ("REQ" , r"assert req_index is not None" ),
30+ ("CGF" , r"Failed to compile graphs: compile_graph failed" ),
2931 ]
3032
3133 try :
@@ -54,6 +56,43 @@ def extract_model_version(parsed_model, input_model):
5456
5557 return None
5658
59+ def determine_precision (model_name , default_precision ):
60+
61+ if model_name and 'fp8' in model_name .lower ():
62+ return 'fp8'
63+ return default_precision
64+
65+ def extract_context_length_direct (log_driver ):
66+
67+ if not os .path .exists (log_driver ):
68+ return None
69+
70+ try :
71+ for line in open (log_driver , errors = 'ignore' ):
72+ if line .startswith ('FMWORK ARG' ) and '--engine:max_model_len@' in line :
73+ match = re .search (r'--engine:max_model_len@\s*(\d+)' , line )
74+ if match :
75+ return int (match .group (1 ))
76+ except Exception :
77+ pass
78+
79+ return None
80+
81+ def extract_context_length_server (cmd_server ):
82+
83+ if not os .path .exists (cmd_server ):
84+ return None
85+
86+ try :
87+ content = open (cmd_server ).read ()
88+ match = re .search (r'--max-model-len\s+(\d+)' , content )
89+ if match :
90+ return int (match .group (1 ))
91+ except Exception :
92+ pass
93+
94+ return None
95+
5796def get_server_completion_info (args_path ):
5897
5998 cmd_client = os .path .join (args_path , 'client.cmd' )
@@ -223,17 +262,26 @@ def process_direct(args):
223262 model_version = None
224263 final_model_name = model_name
225264
265+ # Determine precision based on model name
266+ final_precision = determine_precision (model_name , args .precision )
267+
268+ # Determine context length
269+ context_length = extract_context_length_direct (log_driver )
270+ if context_length is None and input_size is not None and output_size is not None :
271+ context_length = input_size + output_size
272+
226273 hits .append ({
227274 'timestamp' : time_start ,
228275 'metadata_id' : args .metadata_id ,
229276 'engine' : 'fmwork/infer/vllm' ,
230277 'model' : final_model_name ,
231278 'model_version' : model_version ,
232- 'precision' : args . precision ,
279+ 'precision' : final_precision ,
233280 'input' : input_size ,
234281 'output' : output_size ,
235282 'batch' : batch_size ,
236283 'tp' : tp_size ,
284+ 'context_length' : context_length ,
237285 'opts' : opts ,
238286 'warmup' : round (warmup , 3 ) if warmup is not None else None ,
239287 'setup' : setup ,
@@ -265,17 +313,26 @@ def process_direct(args):
265313 model_version = None
266314 final_model_name = model_name
267315
316+ # Determine precision based on model name
317+ final_precision = determine_precision (model_name , args .precision )
318+
319+ # Determine context length
320+ context_length = extract_context_length_direct (log_driver )
321+ if context_length is None and input_size is not None and output_size is not None :
322+ context_length = input_size + output_size
323+
268324 hits .append ({
269325 'timestamp' : None ,
270326 'metadata_id' : args .metadata_id ,
271327 'engine' : 'fmwork/infer/vllm' ,
272328 'model' : final_model_name ,
273329 'model_version' : model_version ,
274- 'precision' : args . precision ,
330+ 'precision' : final_precision ,
275331 'input' : input_size ,
276332 'output' : output_size ,
277333 'batch' : batch_size ,
278334 'tp' : tp_size ,
335+ 'context_length' : context_length ,
279336 'opts' : opts ,
280337 'warmup' : None ,
281338 'setup' : setup ,
@@ -505,17 +562,26 @@ def process_server(args):
505562 model_version = None
506563 final_model_name = model_name
507564
565+ # Determine precision based on model name
566+ final_precision = determine_precision (model_name , args .precision )
567+
568+ # Determine context length
569+ context_length = extract_context_length_server (cmd_server )
570+ if context_length is None and input_size is not None and output_size is not None :
571+ context_length = input_size + output_size
572+
508573 hits = [{
509574 'timestamp' : time_start ,
510575 'metadata_id' : args .metadata_id ,
511576 'engine' : 'fmwork/infer/vllm' ,
512577 'model' : final_model_name ,
513578 'model_version' : model_version ,
514- 'precision' : args . precision ,
579+ 'precision' : final_precision ,
515580 'input' : input_size ,
516581 'output' : output_size ,
517582 'batch' : batch_size ,
518583 'tp' : tp_size ,
584+ 'context_length' : context_length ,
519585 'opts' : opts ,
520586 'warmup' : None ,
521587 'setup' : None ,
0 commit comments