NVIDIA
diff --git a/‎jenkins/L0_Test.groovy‎
Lines changed: 4 additions & 1 deletion b/‎jenkins/L0_Test.groovy‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎tests/integration/defs/perf/open_search_db_utils.py‎
Lines changed: 45 additions & 5 deletions b/‎tests/integration/defs/perf/open_search_db_utils.py‎
Lines changed: 45 additions & 5 deletions
@@ -2689,6 +2689,7 @@ def launchTestJobs(pipeline, testFilter)
         "DGX_B300-4_GPUs-PyTorch-Post-Merge-1": ["b300-x4", "l0_dgx_b300", 1, 1, 4],
         // Perf sanity post merge test
         "DGX_B200-4_GPUs-PyTorch-Perf-Sanity-Post-Merge-1": ["b200-x4", "perf_sanity_l0_dgx_b200", 1, 1, 4],
+        "DGX_B200-8_GPUs-PyTorch-Perf-Sanity-Post-Merge-1": ["b200-x8", "perf_sanity_l0_dgx_b200", 1, 1, 8],
         "DGX_B300-4_GPUs-PyTorch-Perf-Sanity-Post-Merge-1": ["b300-x4", "perf_sanity_l0_dgx_b300", 1, 1, 4],
     ]
     fullSet += x86SlurmTestConfigs.keySet()
@@ -2731,7 +2732,9 @@ def launchTestJobs(pipeline, testFilter)
         // "GB200-8_GPUs-2_Nodes-PyTorch-4": ["gb200-trtllm", "l0_gb200_multi_nodes", 4, 5, 8, 2],
         // "GB200-8_GPUs-2_Nodes-PyTorch-5": ["gb200-trtllm", "l0_gb200_multi_nodes", 5, 5, 8, 2],
     // ]
-    multiNodesSBSAConfigs = [:]
+    multiNodesSBSAConfigs = [
+        "GB200-8_GPUs-2_Nodes-PyTorch-Perf-Sanity-Post-Merge-1": ["gb200-trtllm", "perf_sanity_l0_gb200_multi_nodes", 1, 1, 8, 2],
+    ]
     def numMultiNodeTests = 9
     multiNodesSBSAConfigs += (1..numMultiNodeTests).collectEntries { i ->
         ["GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-${i}".toString(), ["gb200-trtllm", "l0_gb200_multi_nodes", i, numMultiNodeTests, 8, 2]]
 
@@ -20,6 +20,7 @@
 import re
 import sys
 import time
+from datetime import datetime
 
 from defs.trt_test_alternative import print_info
 
@@ -36,15 +37,18 @@
 SERVER_FIELDS = [
     "s_model_name",
     "l_gpus",
+    "l_gpus_per_node",
     "l_tp",
     "l_ep",
     "l_pp",
     "l_max_num_tokens",
+    "l_cuda_graph_max_batch_size",
     "b_enable_chunked_prefill",
     "b_disable_overlap_scheduler",
     "s_attention_backend",
     "s_moe_backend",
     "l_moe_max_num_tokens",
+    "l_num_postprocess_workers",
     "l_stream_interval",
     "b_enable_attention_dp",
     "b_attention_dp_balance",
@@ -55,6 +59,11 @@
     "d_free_gpu_memory_fraction",
     "l_max_batch_size",
     "b_enable_padding",
+    "s_spec_decoding_type",
+    "l_num_nextn_predict_layers",
+    "l_eagle3_layers_to_capture",
+    "l_max_draft_len",
+    "s_speculative_model_dir",
 ]
 
 # Client config fields to compare
@@ -64,6 +73,8 @@
     "l_isl",
     "l_osl",
     "d_random_range_ratio",
+    "s_backend",
+    "b_use_chat_template",
 ]
 
 # Metrics where larger is better
@@ -189,7 +200,7 @@ def get_job_info():
     }
 
 
-def query_history_data():
+def query_history_data(gpu_type):
     """
     Query post-merge data with specific gpu type and model name
     """
@@ -209,6 +220,16 @@ def query_history_data():
                             "b_is_post_merge": True
                         }
                     },
+                    {
+                        "term": {
+                            "b_is_regression": False
+                        }
+                    },
+                    {
+                        "term": {
+                            "s_gpu_type": gpu_type
+                        }
+                    },
                     {
                         "range": {
                             "ts_created": {
@@ -339,27 +360,44 @@ def calculate_best_perf_result(history_data_list, new_data):
     return best_metrics
 
 
-def get_history_data(new_data_dict):
+def get_history_data(new_data_dict, gpu_type):
     """
     Query history post-merge data for each cmd_idx
     """
+
+    def get_latest_data(data_list):
+        if not data_list:
+            return None
+        time_format = "%b %d, %Y @ %H:%M:%S.%f"
+        # Find the item with the maximum ts_created value
+        latest_data = max(
+            data_list,
+            key=lambda x: datetime.strptime(x["ts_created"], time_format))
+        return latest_data
+
     history_baseline_dict = {}
     history_data_dict = {}
     cmd_idxs = new_data_dict.keys()
     for cmd_idx in cmd_idxs:
         history_data_dict[cmd_idx] = []
-        history_baseline_dict[cmd_idx] = None
-    history_data_list = query_history_data()
+        history_baseline_dict[cmd_idx] = []
+    history_data_list = []
+    if cmd_idxs:
+        history_data_list = query_history_data(gpu_type)
     if history_data_list:
         for history_data in history_data_list:
             for cmd_idx in cmd_idxs:
                 if match(history_data, new_data_dict[cmd_idx]):
                     if history_data.get("b_is_baseline") and history_data.get(
                             "b_is_baseline") == True:
-                        history_baseline_dict[cmd_idx] = history_data
+                        history_baseline_dict[cmd_idx].append(history_data)
                     else:
                         history_data_dict[cmd_idx].append(history_data)
                     break
+    # Sometime database has several baselines and we only use the latest baseline one
+    for cmd_idx, baseline_list in history_baseline_dict:
+        latest_baseline = get_latest_data(baseline_list)
+        history_baseline_dict[cmd_idx] = latest_baseline
     return history_baseline_dict, history_data_dict
 
 
@@ -477,6 +515,8 @@ def post_new_perf_data(new_baseline_data_dict, new_data_dict,
     # Only post regressive test cases when post-merge.
     if new_baseline_data_dict:
         data_list.extend(regressive_data_list)
+    if not data_list:
+        return
     try:
         print_info(
             f"Ready to post {len(data_list)} data to {TEST_INFO_PROJECT_NAME}")