2020import re
2121import sys
2222import time
23+ from datetime import datetime
2324
2425from defs .trt_test_alternative import print_info
2526
3637SERVER_FIELDS = [
3738 "s_model_name" ,
3839 "l_gpus" ,
40+ "l_gpus_per_node" ,
3941 "l_tp" ,
4042 "l_ep" ,
4143 "l_pp" ,
4244 "l_max_num_tokens" ,
45+ "l_cuda_graph_max_batch_size" ,
4346 "b_enable_chunked_prefill" ,
4447 "b_disable_overlap_scheduler" ,
4548 "s_attention_backend" ,
4649 "s_moe_backend" ,
4750 "l_moe_max_num_tokens" ,
51+ "l_num_postprocess_workers" ,
4852 "l_stream_interval" ,
4953 "b_enable_attention_dp" ,
5054 "b_attention_dp_balance" ,
5559 "d_free_gpu_memory_fraction" ,
5660 "l_max_batch_size" ,
5761 "b_enable_padding" ,
62+ "s_spec_decoding_type" ,
63+ "l_num_nextn_predict_layers" ,
64+ "l_eagle3_layers_to_capture" ,
65+ "l_max_draft_len" ,
66+ "s_speculative_model_dir" ,
5867]
5968
6069# Client config fields to compare
6473 "l_isl" ,
6574 "l_osl" ,
6675 "d_random_range_ratio" ,
76+ "s_backend" ,
77+ "b_use_chat_template" ,
6778]
6879
6980# Metrics where larger is better
@@ -189,7 +200,7 @@ def get_job_info():
189200 }
190201
191202
192- def query_history_data ():
203+ def query_history_data (gpu_type ):
193204 """
194205 Query post-merge data with specific gpu type and model name
195206 """
@@ -209,6 +220,16 @@ def query_history_data():
209220 "b_is_post_merge" : True
210221 }
211222 },
223+ {
224+ "term" : {
225+ "b_is_regression" : False
226+ }
227+ },
228+ {
229+ "term" : {
230+ "s_gpu_type" : gpu_type
231+ }
232+ },
212233 {
213234 "range" : {
214235 "ts_created" : {
@@ -339,27 +360,44 @@ def calculate_best_perf_result(history_data_list, new_data):
339360 return best_metrics
340361
341362
342- def get_history_data (new_data_dict ):
363+ def get_history_data (new_data_dict , gpu_type ):
343364 """
344365 Query history post-merge data for each cmd_idx
345366 """
367+
368+ def get_latest_data (data_list ):
369+ if not data_list :
370+ return None
371+ time_format = "%b %d, %Y @ %H:%M:%S.%f"
372+ # Find the item with the maximum ts_created value
373+ latest_data = max (
374+ data_list ,
375+ key = lambda x : datetime .strptime (x ["ts_created" ], time_format ))
376+ return latest_data
377+
346378 history_baseline_dict = {}
347379 history_data_dict = {}
348380 cmd_idxs = new_data_dict .keys ()
349381 for cmd_idx in cmd_idxs :
350382 history_data_dict [cmd_idx ] = []
351- history_baseline_dict [cmd_idx ] = None
352- history_data_list = query_history_data ()
383+ history_baseline_dict [cmd_idx ] = []
384+ history_data_list = []
385+ if cmd_idxs :
386+ history_data_list = query_history_data (gpu_type )
353387 if history_data_list :
354388 for history_data in history_data_list :
355389 for cmd_idx in cmd_idxs :
356390 if match (history_data , new_data_dict [cmd_idx ]):
357391 if history_data .get ("b_is_baseline" ) and history_data .get (
358392 "b_is_baseline" ) == True :
359- history_baseline_dict [cmd_idx ] = history_data
393+ history_baseline_dict [cmd_idx ]. append ( history_data )
360394 else :
361395 history_data_dict [cmd_idx ].append (history_data )
362396 break
397+ # Sometime database has several baselines and we only use the latest baseline one
398+ for cmd_idx , baseline_list in history_baseline_dict :
399+ latest_baseline = get_latest_data (baseline_list )
400+ history_baseline_dict [cmd_idx ] = latest_baseline
363401 return history_baseline_dict , history_data_dict
364402
365403
@@ -477,6 +515,8 @@ def post_new_perf_data(new_baseline_data_dict, new_data_dict,
477515 # Only post regressive test cases when post-merge.
478516 if new_baseline_data_dict :
479517 data_list .extend (regressive_data_list )
518+ if not data_list :
519+ return
480520 try :
481521 print_info (
482522 f"Ready to post { len (data_list )} data to { TEST_INFO_PROJECT_NAME } " )
0 commit comments