Skip to content

Commit 5809fc7

Browse files
committed
Add dsr1 and gpt-oss test cases
Signed-off-by: Chenfei Zhang <[email protected]>
1 parent 7aeac97 commit 5809fc7

File tree

9 files changed

+687
-311
lines changed

9 files changed

+687
-311
lines changed

jenkins/L0_Test.groovy

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2688,8 +2688,13 @@ def launchTestJobs(pipeline, testFilter)
26882688
"DGX_B200-4_GPUs-PyTorch-Post-Merge-1": ["b200-trtllm", "l0_dgx_b200", 1, 1, 4, 1, true],
26892689
"DGX_B300-4_GPUs-PyTorch-Post-Merge-1": ["b300-x4", "l0_dgx_b300", 1, 1, 4],
26902690
// Perf sanity post merge test
2691+
<<<<<<< HEAD
26912692
// Disable perf stages due to https://nvbugs/5643646
26922693
// "DGX_B200-4_GPUs-PyTorch-Perf-Sanity-Post-Merge-1": ["b200-x4", "perf_sanity_l0_dgx_b200", 1, 1, 4],
2694+
=======
2695+
"DGX_B200-4_GPUs-PyTorch-Perf-Sanity-Post-Merge-1": ["b200-x4", "perf_sanity_l0_dgx_b200", 1, 1, 4],
2696+
// "DGX_B200-8_GPUs-PyTorch-Perf-Sanity-Post-Merge-1": ["b200-x8", "perf_sanity_l0_dgx_b200", 1, 1, 8],
2697+
>>>>>>> 694cc7c20 (Add dsr1 and gpt-oss test cases)
26932698
// "DGX_B300-4_GPUs-PyTorch-Perf-Sanity-Post-Merge-1": ["b300-x4", "perf_sanity_l0_dgx_b300", 1, 1, 4],
26942699
]
26952700
fullSet += x86SlurmTestConfigs.keySet()
@@ -2732,7 +2737,9 @@ def launchTestJobs(pipeline, testFilter)
27322737
// "GB200-8_GPUs-2_Nodes-PyTorch-4": ["gb200-trtllm", "l0_gb200_multi_nodes", 4, 5, 8, 2],
27332738
// "GB200-8_GPUs-2_Nodes-PyTorch-5": ["gb200-trtllm", "l0_gb200_multi_nodes", 5, 5, 8, 2],
27342739
// ]
2735-
multiNodesSBSAConfigs = [:]
2740+
multiNodesSBSAConfigs = [
2741+
"GB200-8_GPUs-2_Nodes-PyTorch-Perf-Sanity-Post-Merge-1": ["gb200-trtllm", "perf_sanity_l0_gb200_multi_nodes", 1, 1, 8, 2],
2742+
]
27362743
def numMultiNodeTests = 3
27372744
multiNodesSBSAConfigs += (1..numMultiNodeTests).collectEntries { i ->
27382745
["GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-${i}".toString(), ["gb200-trtllm", "l0_gb200_multi_nodes", i, numMultiNodeTests, 8, 2]]

tests/integration/defs/perf/open_search_db_utils.py

Lines changed: 56 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import re
2121
import sys
2222
import time
23+
from datetime import datetime
2324

2425
from defs.trt_test_alternative import print_info
2526

@@ -32,40 +33,6 @@
3233
PROJECT_ROOT = "sandbox-temp-trtllm-ci-perf-v1" # "sandbox-trtllm-ci-perf"
3334
TEST_INFO_PROJECT_NAME = f"{PROJECT_ROOT}-test_info"
3435

35-
# Server config fields to compare
36-
SERVER_FIELDS = [
37-
"s_model_name",
38-
"l_gpus",
39-
"l_tp",
40-
"l_ep",
41-
"l_pp",
42-
"l_max_num_tokens",
43-
"b_enable_chunked_prefill",
44-
"b_disable_overlap_scheduler",
45-
"s_attention_backend",
46-
"s_moe_backend",
47-
"l_moe_max_num_tokens",
48-
"l_stream_interval",
49-
"b_enable_attention_dp",
50-
"b_attention_dp_balance",
51-
"l_batching_wait_iters",
52-
"l_timeout_iters",
53-
"s_kv_cache_dtype",
54-
"b_enable_block_reuse",
55-
"d_free_gpu_memory_fraction",
56-
"l_max_batch_size",
57-
"b_enable_padding",
58-
]
59-
60-
# Client config fields to compare
61-
CLIENT_FIELDS = [
62-
"l_concurrency",
63-
"l_iterations",
64-
"l_isl",
65-
"l_osl",
66-
"d_random_range_ratio",
67-
]
68-
6936
# Metrics where larger is better
7037
MAXIMIZE_METRICS = [
7138
"d_seq_throughput",
@@ -189,7 +156,7 @@ def get_job_info():
189156
}
190157

191158

192-
def query_history_data():
159+
def query_history_data(gpu_type):
193160
"""
194161
Query post-merge data with specific gpu type and model name
195162
"""
@@ -209,6 +176,16 @@ def query_history_data():
209176
"b_is_post_merge": True
210177
}
211178
},
179+
{
180+
"term": {
181+
"b_is_regression": False
182+
}
183+
},
184+
{
185+
"term": {
186+
"s_gpu_type": gpu_type
187+
}
188+
},
212189
{
213190
"range": {
214191
"ts_created": {
@@ -267,17 +244,32 @@ def match(history_data, new_data):
267244
"""
268245
Check if the server and client config of history data matches the new data
269246
"""
270-
# Combine all fields to compare (excluding log links)
271-
fields_to_compare = SERVER_FIELDS + CLIENT_FIELDS
272247

273248
def is_empty(value):
274249
"""Check if a value is empty (None, empty string, etc.)"""
275250
return value is None or value == ""
276251

277-
# Compare each field
278-
for field in fields_to_compare:
279-
history_value = history_data.get(field)
280-
new_value = new_data.get(field)
252+
def should_skip_field(field):
253+
"""Check if a field should be skipped in comparison"""
254+
# Skip fields starting with @, _, ts_
255+
if field.startswith('@') or field.startswith('_') or field.startswith(
256+
'ts_'):
257+
return True
258+
# Skip log links and speculative_model_dir
259+
if field in [
260+
's_speculative_model_dir', 's_server_log_link',
261+
's_client_log_link'
262+
]:
263+
return True
264+
return False
265+
266+
# Compare each field in new_data
267+
for field, new_value in new_data.items():
268+
# Skip excluded fields
269+
if should_skip_field(field):
270+
continue
271+
272+
history_value = history_data.get(field, None)
281273

282274
# If both are empty, consider them equal
283275
if is_empty(history_value) and is_empty(new_value):
@@ -339,27 +331,44 @@ def calculate_best_perf_result(history_data_list, new_data):
339331
return best_metrics
340332

341333

342-
def get_history_data(new_data_dict):
334+
def get_history_data(new_data_dict, gpu_type):
343335
"""
344336
Query history post-merge data for each cmd_idx
345337
"""
338+
339+
def get_latest_data(data_list):
340+
if not data_list:
341+
return None
342+
time_format = "%b %d, %Y @ %H:%M:%S.%f"
343+
# Find the item with the maximum ts_created value
344+
latest_data = max(
345+
data_list,
346+
key=lambda x: datetime.strptime(x["ts_created"], time_format))
347+
return latest_data
348+
346349
history_baseline_dict = {}
347350
history_data_dict = {}
348351
cmd_idxs = new_data_dict.keys()
349352
for cmd_idx in cmd_idxs:
350353
history_data_dict[cmd_idx] = []
351-
history_baseline_dict[cmd_idx] = None
352-
history_data_list = query_history_data()
354+
history_baseline_dict[cmd_idx] = []
355+
history_data_list = []
356+
if cmd_idxs:
357+
history_data_list = query_history_data(gpu_type)
353358
if history_data_list:
354359
for history_data in history_data_list:
355360
for cmd_idx in cmd_idxs:
356361
if match(history_data, new_data_dict[cmd_idx]):
357362
if history_data.get("b_is_baseline") and history_data.get(
358363
"b_is_baseline") == True:
359-
history_baseline_dict[cmd_idx] = history_data
364+
history_baseline_dict[cmd_idx].append(history_data)
360365
else:
361366
history_data_dict[cmd_idx].append(history_data)
362367
break
368+
# Sometime database has several baselines and we only use the latest baseline one
369+
for cmd_idx, baseline_list in history_baseline_dict.items():
370+
latest_baseline = get_latest_data(baseline_list)
371+
history_baseline_dict[cmd_idx] = latest_baseline
363372
return history_baseline_dict, history_data_dict
364373

365374

@@ -477,6 +486,8 @@ def post_new_perf_data(new_baseline_data_dict, new_data_dict,
477486
# Only post regressive test cases when post-merge.
478487
if new_baseline_data_dict:
479488
data_list.extend(regressive_data_list)
489+
if not data_list:
490+
return
480491
try:
481492
print_info(
482493
f"Ready to post {len(data_list)} data to {TEST_INFO_PROJECT_NAME}")

0 commit comments

Comments
 (0)