Skip to content

Commit c8da25f

Browse files
committed
Enhance Eagle Tests for GPTOSS
Signed-off-by: Dongfeng Yu <[email protected]>
1 parent 7ab02ad commit c8da25f

File tree

2 files changed

+12
-5
lines changed

2 files changed

+12
-5
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3614,7 +3614,7 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
36143614
"apply_chat_template": True,
36153615
}
36163616

3617-
MODEL_PATH = f"{llm_models_root()}/gpt_oss/gpt-oss-120b"
3617+
MODEL_PATH = f"openai/gpt-oss-120b"
36183618

36193619
@pytest.mark.parametrize(
36203620
"kv_cache_dtype",
@@ -3944,16 +3944,24 @@ def test_w4_chunked_prefill(self, kv_cache_dtype, moe_backend, mocker):
39443944
extra_evaluator_kwargs=extra_evaluator_kwargs)
39453945

39463946
@pytest.mark.skip_less_device(4)
3947+
@pytest.mark.parametrize("overlap_scheduler", [True, False],
3948+
ids=["overlap_scheduler", "no_overlap_scheduler"])
3949+
@pytest.mark.parametrize("one_model", [True, False],
3950+
ids=["one_model", "two_model"])
39473951
@pytest.mark.parametrize(
39483952
"moe_backend",
39493953
["CUTLASS",
39503954
pytest.param("TRTLLM", marks=skip_pre_blackwell), "TRITON"],
39513955
ids=["cutlass", "trtllm", "triton"])
3952-
def test_eagle3(self, moe_backend, mocker):
3956+
def test_eagle3(self, moe_backend, one_model, overlap_scheduler, mocker):
39533957
if moe_backend == "TRITON":
39543958
if not IS_TRITON_KERNELS_AVAILABLE:
39553959
pytest.skip("Triton kernels are not available")
39563960

3961+
if get_sm_version() == 90 and moe_backend == "CUTLASS":
3962+
pytest.skip(
3963+
"https://nvbugs/5636916: Remaining Hopper Eagle Accuracy Issue")
3964+
39573965
MAX_OUTPUT_LEN = 128179
39583966
MAX_INPUT_LEN = 32768
39593967

@@ -3965,7 +3973,7 @@ def test_eagle3(self, moe_backend, mocker):
39653973
mocker.patch.object(GPQADiamond, "MAX_INPUT_LEN", MAX_INPUT_LEN)
39663974

39673975
# https://nvbugs/5590408: 2-Model overlap scheduling has accuracy issue
3968-
pytorch_config = dict(disable_overlap_scheduler=True,
3976+
pytorch_config = dict(disable_overlap_scheduler=not overlap_scheduler,
39693977
cuda_graph_config=CudaGraphConfig())
39703978
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.6,
39713979
dtype="auto")
@@ -3974,7 +3982,7 @@ def test_eagle3(self, moe_backend, mocker):
39743982
draft_len = 3
39753983
spec_config = EagleDecodingConfig(max_draft_len=draft_len,
39763984
speculative_model_dir=eagle_model_dir,
3977-
eagle3_one_model=False)
3985+
eagle3_one_model=one_model)
39783986

39793987
max_seq_len = MAX_INPUT_LEN + MAX_OUTPUT_LEN
39803988
llm = LLM(self.MODEL_PATH,

tests/integration/test_lists/waives.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,6 @@ test_e2e.py::test_openai_chat_multimodal_example SKIP (https://nvbugs/5636894)
350350
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm_attention_dp] SKIP (https://nvbugs/5637220)
351351
llmapi/test_llm_examples.py::test_llmapi_example_multilora SKIP (https://nvbugs/5636857)
352352
unittest/_torch/modules/test_mla_helix.py::test_mla_helix_distributed SKIP (https://nvbugspro.nvidia.com/bug/5637012)
353-
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3[cutlass] SKIP (https://nvbugs/5636916)
354353
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=TRTLLM-torch_compile=False] SKIP (https://nvbugs/5616182)
355354
examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-3-small-128k-instruct-fp8-bfloat16] SKIP (https://nvbugs/5465143)
356355
examples/test_multimodal.py::test_llm_multimodal_general[Mistral-Small-3.1-24B-Instruct-2503-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5644684)

0 commit comments

Comments
 (0)