2020import re
2121import sys
2222import time
23+ from datetime import datetime
2324
2425from defs .trt_test_alternative import print_info
2526
3233PROJECT_ROOT = "sandbox-temp-trtllm-ci-perf-v1" # "sandbox-trtllm-ci-perf"
3334TEST_INFO_PROJECT_NAME = f"{ PROJECT_ROOT } -test_info"
3435
35- # Server config fields to compare
36- SERVER_FIELDS = [
37- "s_model_name" ,
38- "l_gpus" ,
39- "l_tp" ,
40- "l_ep" ,
41- "l_pp" ,
42- "l_max_num_tokens" ,
43- "b_enable_chunked_prefill" ,
44- "b_disable_overlap_scheduler" ,
45- "s_attention_backend" ,
46- "s_moe_backend" ,
47- "l_moe_max_num_tokens" ,
48- "l_stream_interval" ,
49- "b_enable_attention_dp" ,
50- "b_attention_dp_balance" ,
51- "l_batching_wait_iters" ,
52- "l_timeout_iters" ,
53- "s_kv_cache_dtype" ,
54- "b_enable_block_reuse" ,
55- "d_free_gpu_memory_fraction" ,
56- "l_max_batch_size" ,
57- "b_enable_padding" ,
58- ]
59-
60- # Client config fields to compare
61- CLIENT_FIELDS = [
62- "l_concurrency" ,
63- "l_iterations" ,
64- "l_isl" ,
65- "l_osl" ,
66- "d_random_range_ratio" ,
67- ]
68-
6936# Metrics where larger is better
7037MAXIMIZE_METRICS = [
7138 "d_seq_throughput" ,
@@ -189,7 +156,7 @@ def get_job_info():
189156 }
190157
191158
192- def query_history_data ():
159+ def query_history_data (gpu_type ):
193160 """
194161 Query post-merge data with specific gpu type and model name
195162 """
@@ -209,6 +176,16 @@ def query_history_data():
209176 "b_is_post_merge" : True
210177 }
211178 },
179+ {
180+ "term" : {
181+ "b_is_regression" : False
182+ }
183+ },
184+ {
185+ "term" : {
186+ "s_gpu_type" : gpu_type
187+ }
188+ },
212189 {
213190 "range" : {
214191 "ts_created" : {
@@ -267,17 +244,32 @@ def match(history_data, new_data):
267244 """
268245 Check if the server and client config of history data matches the new data
269246 """
270- # Combine all fields to compare (excluding log links)
271- fields_to_compare = SERVER_FIELDS + CLIENT_FIELDS
272247
273248 def is_empty (value ):
274249 """Check if a value is empty (None, empty string, etc.)"""
275250 return value is None or value == ""
276251
277- # Compare each field
278- for field in fields_to_compare :
279- history_value = history_data .get (field )
280- new_value = new_data .get (field )
252+ def should_skip_field (field ):
253+ """Check if a field should be skipped in comparison"""
254+ # Skip fields starting with @, _, ts_
255+ if field .startswith ('@' ) or field .startswith ('_' ) or field .startswith (
256+ 'ts_' ):
257+ return True
258+ # Skip log links and speculative_model_dir
259+ if field in [
260+ 's_speculative_model_dir' , 's_server_log_link' ,
261+ 's_client_log_link'
262+ ]:
263+ return True
264+ return False
265+
266+ # Compare each field in new_data
267+ for field , new_value in new_data .items ():
268+ # Skip excluded fields
269+ if should_skip_field (field ):
270+ continue
271+
272+ history_value = history_data .get (field , None )
281273
282274 # If both are empty, consider them equal
283275 if is_empty (history_value ) and is_empty (new_value ):
@@ -339,27 +331,44 @@ def calculate_best_perf_result(history_data_list, new_data):
339331 return best_metrics
340332
341333
342- def get_history_data (new_data_dict ):
334+ def get_history_data (new_data_dict , gpu_type ):
343335 """
344336 Query history post-merge data for each cmd_idx
345337 """
338+
339+ def get_latest_data (data_list ):
340+ if not data_list :
341+ return None
342+ time_format = "%b %d, %Y @ %H:%M:%S.%f"
343+ # Find the item with the maximum ts_created value
344+ latest_data = max (
345+ data_list ,
346+ key = lambda x : datetime .strptime (x ["ts_created" ], time_format ))
347+ return latest_data
348+
346349 history_baseline_dict = {}
347350 history_data_dict = {}
348351 cmd_idxs = new_data_dict .keys ()
349352 for cmd_idx in cmd_idxs :
350353 history_data_dict [cmd_idx ] = []
351- history_baseline_dict [cmd_idx ] = None
352- history_data_list = query_history_data ()
354+ history_baseline_dict [cmd_idx ] = []
355+ history_data_list = []
356+ if cmd_idxs :
357+ history_data_list = query_history_data (gpu_type )
353358 if history_data_list :
354359 for history_data in history_data_list :
355360 for cmd_idx in cmd_idxs :
356361 if match (history_data , new_data_dict [cmd_idx ]):
357362 if history_data .get ("b_is_baseline" ) and history_data .get (
358363 "b_is_baseline" ) == True :
359- history_baseline_dict [cmd_idx ] = history_data
364+ history_baseline_dict [cmd_idx ]. append ( history_data )
360365 else :
361366 history_data_dict [cmd_idx ].append (history_data )
362367 break
368+ # Sometime database has several baselines and we only use the latest baseline one
369+ for cmd_idx , baseline_list in history_baseline_dict .items ():
370+ latest_baseline = get_latest_data (baseline_list )
371+ history_baseline_dict [cmd_idx ] = latest_baseline
363372 return history_baseline_dict , history_data_dict
364373
365374
@@ -477,6 +486,8 @@ def post_new_perf_data(new_baseline_data_dict, new_data_dict,
477486 # Only post regressive test cases when post-merge.
478487 if new_baseline_data_dict :
479488 data_list .extend (regressive_data_list )
489+ if not data_list :
490+ return
480491 try :
481492 print_info (
482493 f"Ready to post { len (data_list )} data to { TEST_INFO_PROJECT_NAME } " )
0 commit comments