@@ -21,12 +21,30 @@ l0_a10:
2121 # test list either).
2222 - unittest/_torch/models/checkpoints/hf/test_weight_loader.py
2323 - unittest/others/test_time_breakdown.py
24+ - unittest/disaggregated/test_disagg_utils.py
25+ - unittest/disaggregated/test_router.py
26+ - unittest/disaggregated/test_remoteDictionary.py
27+ - unittest/disaggregated/test_disagg_cluster_manager_worker.py
28+ - unittest/disaggregated/test_cluster_storage.py
2429 - disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0]
2530 - disaggregated/test_disaggregated.py::test_disaggregated_single_gpu_with_mpirun_trt_backend[TinyLlama-1.1B-Chat-v1.0]
2631 - disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0]
2732 - disaggregated/test_disaggregated.py::test_disaggregated_mixed[TinyLlama-1.1B-Chat-v1.0]
2833 - disaggregated/test_disaggregated.py::test_disaggregated_overlap[TinyLlama-1.1B-Chat-v1.0]
2934 - disaggregated/test_disaggregated.py::test_disaggregated_diff_max_tokens[TinyLlama-1.1B-Chat-v1.0]
35+ - disaggregated/test_disaggregated.py::test_disaggregated_kv_cache_time_output[TinyLlama-1.1B-Chat-v1.0]
36+ - disaggregated/test_disaggregated.py::test_disaggregated_perf_metrics[TinyLlama-1.1B-Chat-v1.0]
37+ - disaggregated/test_disaggregated.py::test_disaggregated_cache_aware_balance[TinyLlama-1.1B-Chat-v1.0]
38+ - disaggregated/test_disaggregated.py::test_disaggregated_conditional[TinyLlama-1.1B-Chat-v1.0]
39+ - disaggregated/test_disaggregated.py::test_disaggregated_ngram[TinyLlama-1.1B-Chat-v1.0]
40+ - disaggregated/test_workers.py::test_workers_conditional_disaggregation[TinyLlama-1.1B-Chat-v1.0]
41+ - disaggregated/test_workers.py::test_workers_kv_cache_events[TinyLlama-1.1B-Chat-v1.0]
42+ - disaggregated/test_workers.py::test_workers_kv_cache_aware_router[TinyLlama-1.1B-Chat-v1.0]
43+ - disaggregated/test_workers.py::test_workers_kv_cache_aware_router_eviction[TinyLlama-1.1B-Chat-v1.0]
44+ - disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[False-False-TinyLlama-1.1B-Chat-v1.0]
45+ - disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[False-True-TinyLlama-1.1B-Chat-v1.0]
46+ - disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[True-False-TinyLlama-1.1B-Chat-v1.0]
47+ - disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_llama[True-True-TinyLlama-1.1B-Chat-v1.0]
3048 - test_e2e.py::test_openai_chat_guided_decoding
3149 - test_e2e.py::test_openai_chat_multimodal_example
3250 - test_e2e.py::test_openai_perf_metrics
@@ -92,7 +110,6 @@ l0_a10:
92110 - examples/test_bert.py::test_llm_bert_general[compare_hf-enable_remove_input_padding-use_attention_plugin-enable_context_fmha-tp:1-pp:1-float16-BertModel-bert/bert-base-uncased]
93111 - unittest/trt/model/test_mistral.py
94112 - unittest/trt/model/test_llama.py
95- - test_e2e.py::test_gpt3_175b_1layers_build_only # 6 mins
96113 - llmapi/test_llm_api_connector.py::test_connector_simple[True]
97114 - llmapi/test_llm_api_connector.py::test_connector_simple[False]
98115 - llmapi/test_llm_api_connector.py::test_connector_async_onboard[True]
@@ -119,7 +136,6 @@ l0_a10:
119136 - test_e2e.py::test_trtllm_bench_sanity[--non-streaming-FP16-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]
120137 - test_e2e.py::test_trtllm_bench_latency_sanity[FP16-meta-llama/Llama-3.1-8B-llama-3.1-model/Meta-Llama-3.1-8B]
121138 - unittest/trt/quantization
122- - accuracy/test_cli_flow.py::TestLlama7B::test_streamingllm # 2 mins
123139 - unittest/trt/functional # 37 mins
124140 - llmapi/test_llm_examples.py::test_llmapi_quickstart_atexit
125141 - unittest/api_stability
@@ -140,13 +156,9 @@ l0_a10:
140156 - accuracy/test_cli_flow.py::TestVicuna7B::test_eagle_2[cuda_graph=True-chunked_context=False] # 5 mins
141157 - accuracy/test_cli_flow.py::TestVicuna7B::test_eagle_2[cuda_graph=True-chunked_context=True] # 5 mins
142158 - accuracy/test_cli_flow.py::TestLlama2_7B::test_auto_dtype
143- - examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-disable_weight_only]
144159 - unittest/trt/attention/test_gpt_attention_IFB.py
145160 - unittest/trt/attention/test_gpt_attention_no_cache.py
146- - unittest/trt/model/test_mamba.py # 3 mins
147161 - examples/test_whisper.py::test_llm_whisper_general[large-v3-disable_gemm_plugin-enable_attention_plugin-disable_weight_only-float16-nb:1-use_cpp_runtime]
148- - examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-enable_gemm_plugin]
149- - examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-enable_gemm_plugin] # 3 mins
150162- condition :
151163 ranges :
152164 system_gpu_count :
@@ -205,6 +217,12 @@ l0_a10:
205217 - accuracy/test_llm_api.py::TestEagle2Vicuna_7B_v1_3::test_auto_dtype
206218 - stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-MAX_UTILIZATION-trt-stress-test]
207219 - stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-stress_time_300s_timeout_450s-GUARANTEED_NO_EVICT-trt-stress-test]
220+ - test_e2e.py::test_gpt3_175b_1layers_build_only # 6 mins
221+ - examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-disable_weight_only]
222+ - unittest/trt/model/test_mamba.py # 3 mins
223+ - examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-enable_gemm_plugin]
224+ - examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-enable_gemm_plugin] # 3 mins
225+ - accuracy/test_cli_flow.py::TestLlama7B::test_streamingllm # 2 mins
208226- condition :
209227 ranges :
210228 system_gpu_count :
0 commit comments