Skip to content

Commit 6cb155c

Browse files
committed
Add dsr1 and gpt-oss test cases
Signed-off-by: Chenfei Zhang <[email protected]>
1 parent 595f780 commit 6cb155c

File tree

7 files changed

+445
-45
lines changed

7 files changed

+445
-45
lines changed

jenkins/L0_Test.groovy

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2689,6 +2689,7 @@ def launchTestJobs(pipeline, testFilter)
26892689
"DGX_B300-4_GPUs-PyTorch-Post-Merge-1": ["b300-x4", "l0_dgx_b300", 1, 1, 4],
26902690
// Perf sanity post merge test
26912691
"DGX_B200-4_GPUs-PyTorch-Perf-Sanity-Post-Merge-1": ["b200-x4", "perf_sanity_l0_dgx_b200", 1, 1, 4],
2692+
"DGX_B200-8_GPUs-PyTorch-Perf-Sanity-Post-Merge-1": ["b200-x8", "perf_sanity_l0_dgx_b200", 1, 1, 8],
26922693
"DGX_B300-4_GPUs-PyTorch-Perf-Sanity-Post-Merge-1": ["b300-x4", "perf_sanity_l0_dgx_b300", 1, 1, 4],
26932694
]
26942695
fullSet += x86SlurmTestConfigs.keySet()
@@ -2731,7 +2732,9 @@ def launchTestJobs(pipeline, testFilter)
27312732
// "GB200-8_GPUs-2_Nodes-PyTorch-4": ["gb200-trtllm", "l0_gb200_multi_nodes", 4, 5, 8, 2],
27322733
// "GB200-8_GPUs-2_Nodes-PyTorch-5": ["gb200-trtllm", "l0_gb200_multi_nodes", 5, 5, 8, 2],
27332734
// ]
2734-
multiNodesSBSAConfigs = [:]
2735+
multiNodesSBSAConfigs = [
2736+
"GB200-8_GPUs-2_Nodes-PyTorch-Perf-Sanity-Post-Merge-1": ["gb200-trtllm", "perf_sanity_l0_gb200_multi_nodes", 1, 1, 8, 2],
2737+
]
27352738
def numMultiNodeTests = 9
27362739
multiNodesSBSAConfigs += (1..numMultiNodeTests).collectEntries { i ->
27372740
["GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-${i}".toString(), ["gb200-trtllm", "l0_gb200_multi_nodes", i, numMultiNodeTests, 8, 2]]

tests/integration/defs/perf/open_search_db_utils.py

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import re
2121
import sys
2222
import time
23+
from datetime import datetime
2324

2425
from defs.trt_test_alternative import print_info
2526

@@ -36,15 +37,18 @@
3637
SERVER_FIELDS = [
3738
"s_model_name",
3839
"l_gpus",
40+
"l_gpus_per_node",
3941
"l_tp",
4042
"l_ep",
4143
"l_pp",
4244
"l_max_num_tokens",
45+
"l_cuda_graph_max_batch_size",
4346
"b_enable_chunked_prefill",
4447
"b_disable_overlap_scheduler",
4548
"s_attention_backend",
4649
"s_moe_backend",
4750
"l_moe_max_num_tokens",
51+
"l_num_postprocess_workers",
4852
"l_stream_interval",
4953
"b_enable_attention_dp",
5054
"b_attention_dp_balance",
@@ -55,6 +59,11 @@
5559
"d_free_gpu_memory_fraction",
5660
"l_max_batch_size",
5761
"b_enable_padding",
62+
"s_spec_decoding_type",
63+
"l_num_nextn_predict_layers",
64+
"l_eagle3_layers_to_capture",
65+
"l_max_draft_len",
66+
"s_speculative_model_dir",
5867
]
5968

6069
# Client config fields to compare
@@ -64,6 +73,8 @@
6473
"l_isl",
6574
"l_osl",
6675
"d_random_range_ratio",
76+
"s_backend",
77+
"b_use_chat_template",
6778
]
6879

6980
# Metrics where larger is better
@@ -189,7 +200,7 @@ def get_job_info():
189200
}
190201

191202

192-
def query_history_data():
203+
def query_history_data(gpu_type):
193204
"""
194205
Query post-merge data with specific gpu type and model name
195206
"""
@@ -209,6 +220,16 @@ def query_history_data():
209220
"b_is_post_merge": True
210221
}
211222
},
223+
{
224+
"term": {
225+
"b_is_regression": False
226+
}
227+
},
228+
{
229+
"term": {
230+
"s_gpu_type": gpu_type
231+
}
232+
},
212233
{
213234
"range": {
214235
"ts_created": {
@@ -339,27 +360,44 @@ def calculate_best_perf_result(history_data_list, new_data):
339360
return best_metrics
340361

341362

342-
def get_history_data(new_data_dict):
363+
def get_history_data(new_data_dict, gpu_type):
343364
"""
344365
Query history post-merge data for each cmd_idx
345366
"""
367+
368+
def get_latest_data(data_list):
369+
if not data_list:
370+
return None
371+
time_format = "%b %d, %Y @ %H:%M:%S.%f"
372+
# Find the item with the maximum ts_created value
373+
latest_data = max(
374+
data_list,
375+
key=lambda x: datetime.strptime(x["ts_created"], time_format))
376+
return latest_data
377+
346378
history_baseline_dict = {}
347379
history_data_dict = {}
348380
cmd_idxs = new_data_dict.keys()
349381
for cmd_idx in cmd_idxs:
350382
history_data_dict[cmd_idx] = []
351-
history_baseline_dict[cmd_idx] = None
352-
history_data_list = query_history_data()
383+
history_baseline_dict[cmd_idx] = []
384+
history_data_list = []
385+
if cmd_idxs:
386+
history_data_list = query_history_data(gpu_type)
353387
if history_data_list:
354388
for history_data in history_data_list:
355389
for cmd_idx in cmd_idxs:
356390
if match(history_data, new_data_dict[cmd_idx]):
357391
if history_data.get("b_is_baseline") and history_data.get(
358392
"b_is_baseline") == True:
359-
history_baseline_dict[cmd_idx] = history_data
393+
history_baseline_dict[cmd_idx].append(history_data)
360394
else:
361395
history_data_dict[cmd_idx].append(history_data)
362396
break
397+
# Sometime database has several baselines and we only use the latest baseline one
398+
for cmd_idx, baseline_list in history_baseline_dict:
399+
latest_baseline = get_latest_data(baseline_list)
400+
history_baseline_dict[cmd_idx] = latest_baseline
363401
return history_baseline_dict, history_data_dict
364402

365403

@@ -477,6 +515,8 @@ def post_new_perf_data(new_baseline_data_dict, new_data_dict,
477515
# Only post regressive test cases when post-merge.
478516
if new_baseline_data_dict:
479517
data_list.extend(regressive_data_list)
518+
if not data_list:
519+
return
480520
try:
481521
print_info(
482522
f"Ready to post {len(data_list)} data to {TEST_INFO_PROJECT_NAME}")

0 commit comments

Comments
 (0)