Skip to content

Commit ef20a3b

Browse files
authored
Merge pull request #49 from IBM/process
Adapt `process` to new updates
2 parents b20fff6 + 54d4ef4 commit ef20a3b

File tree

2 files changed

+74
-5
lines changed

2 files changed

+74
-5
lines changed

infer/vllm/process

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ def check_error_in_log(log_path):
2626
("RPC", r"TimeoutError: RPC call to execute_model timed out\."),
2727
("PLT", r"assert prompt_len <= self\.tkv"),
2828
("CTL", r"Please reduce the length of the messages or completion"),
29+
("REQ", r"assert req_index is not None"),
30+
("CGF", r"Failed to compile graphs: compile_graph failed"),
2931
]
3032

3133
try:
@@ -54,6 +56,43 @@ def extract_model_version(parsed_model, input_model):
5456

5557
return None
5658

59+
def determine_precision(model_name, default_precision):
60+
61+
if model_name and 'fp8' in model_name.lower():
62+
return 'fp8'
63+
return default_precision
64+
65+
def extract_context_length_direct(log_driver):
66+
67+
if not os.path.exists(log_driver):
68+
return None
69+
70+
try:
71+
for line in open(log_driver, errors='ignore'):
72+
if line.startswith('FMWORK ARG') and '--engine:max_model_len@' in line:
73+
match = re.search(r'--engine:max_model_len@\s*(\d+)', line)
74+
if match:
75+
return int(match.group(1))
76+
except Exception:
77+
pass
78+
79+
return None
80+
81+
def extract_context_length_server(cmd_server):
82+
83+
if not os.path.exists(cmd_server):
84+
return None
85+
86+
try:
87+
content = open(cmd_server).read()
88+
match = re.search(r'--max-model-len\s+(\d+)', content)
89+
if match:
90+
return int(match.group(1))
91+
except Exception:
92+
pass
93+
94+
return None
95+
5796
def get_server_completion_info(args_path):
5897

5998
cmd_client = os.path.join(args_path, 'client.cmd')
@@ -223,17 +262,26 @@ def process_direct(args):
223262
model_version = None
224263
final_model_name = model_name
225264

265+
# Determine precision based on model name
266+
final_precision = determine_precision(model_name, args.precision)
267+
268+
# Determine context length
269+
context_length = extract_context_length_direct(log_driver)
270+
if context_length is None and input_size is not None and output_size is not None:
271+
context_length = input_size + output_size
272+
226273
hits.append({
227274
'timestamp' : time_start,
228275
'metadata_id' : args.metadata_id,
229276
'engine' : 'fmwork/infer/vllm',
230277
'model' : final_model_name,
231278
'model_version' : model_version,
232-
'precision' : args.precision,
279+
'precision' : final_precision,
233280
'input' : input_size,
234281
'output' : output_size,
235282
'batch' : batch_size,
236283
'tp' : tp_size,
284+
'context_length': context_length,
237285
'opts' : opts,
238286
'warmup' : round(warmup, 3) if warmup is not None else None,
239287
'setup' : setup,
@@ -265,17 +313,26 @@ def process_direct(args):
265313
model_version = None
266314
final_model_name = model_name
267315

316+
# Determine precision based on model name
317+
final_precision = determine_precision(model_name, args.precision)
318+
319+
# Determine context length
320+
context_length = extract_context_length_direct(log_driver)
321+
if context_length is None and input_size is not None and output_size is not None:
322+
context_length = input_size + output_size
323+
268324
hits.append({
269325
'timestamp' : None,
270326
'metadata_id' : args.metadata_id,
271327
'engine' : 'fmwork/infer/vllm',
272328
'model' : final_model_name,
273329
'model_version' : model_version,
274-
'precision' : args.precision,
330+
'precision' : final_precision,
275331
'input' : input_size,
276332
'output' : output_size,
277333
'batch' : batch_size,
278334
'tp' : tp_size,
335+
'context_length': context_length,
279336
'opts' : opts,
280337
'warmup' : None,
281338
'setup' : setup,
@@ -505,17 +562,26 @@ def process_server(args):
505562
model_version = None
506563
final_model_name = model_name
507564

565+
# Determine precision based on model name
566+
final_precision = determine_precision(model_name, args.precision)
567+
568+
# Determine context length
569+
context_length = extract_context_length_server(cmd_server)
570+
if context_length is None and input_size is not None and output_size is not None:
571+
context_length = input_size + output_size
572+
508573
hits = [{
509574
'timestamp' : time_start,
510575
'metadata_id' : args.metadata_id,
511576
'engine' : 'fmwork/infer/vllm',
512577
'model' : final_model_name,
513578
'model_version' : model_version,
514-
'precision' : args.precision,
579+
'precision' : final_precision,
515580
'input' : input_size,
516581
'output' : output_size,
517582
'batch' : batch_size,
518583
'tp' : tp_size,
584+
'context_length': context_length,
519585
'opts' : opts,
520586
'warmup' : None,
521587
'setup' : None,

infer/vllm/runner

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,10 @@ echo "terminating server ..." >> ${odir}/server.log
183183
kill -2 ${vllm_server_pid}
184184
kill -2 ${server_pid}
185185

186+
if [[ -f ${odir}/server.log ]]; then
187+
cat ${odir}/server.log
188+
fi
189+
186190
}
187191

188192
# ==============================================================================
@@ -245,5 +249,4 @@ fi
245249

246250
echo "time_end $(date "+%s.%N")" >> ${odir}/runner.log
247251

248-
echo
249-
252+
echo

0 commit comments

Comments
 (0)