@@ -3614,7 +3614,7 @@ class TestGPTOSS(LlmapiAccuracyTestHarness):
36143614 "apply_chat_template" : True ,
36153615 }
36163616
3617- MODEL_PATH = f"{ llm_models_root () } /gpt_oss /gpt-oss-120b"
3617+ MODEL_PATH = f"openai /gpt-oss-120b"
36183618
36193619 @pytest .mark .parametrize (
36203620 "kv_cache_dtype" ,
@@ -3944,16 +3944,24 @@ def test_w4_chunked_prefill(self, kv_cache_dtype, moe_backend, mocker):
39443944 extra_evaluator_kwargs = extra_evaluator_kwargs )
39453945
39463946 @pytest .mark .skip_less_device (4 )
3947+ @pytest .mark .parametrize ("overlap_scheduler" , [True , False ],
3948+ ids = ["overlap_scheduler" , "no_overlap_scheduler" ])
3949+ @pytest .mark .parametrize ("one_model" , [True , False ],
3950+ ids = ["one_model" , "two_model" ])
39473951 @pytest .mark .parametrize (
39483952 "moe_backend" ,
39493953 ["CUTLASS" ,
39503954 pytest .param ("TRTLLM" , marks = skip_pre_blackwell ), "TRITON" ],
39513955 ids = ["cutlass" , "trtllm" , "triton" ])
3952- def test_eagle3 (self , moe_backend , mocker ):
3956+ def test_eagle3 (self , moe_backend , one_model , overlap_scheduler , mocker ):
39533957 if moe_backend == "TRITON" :
39543958 if not IS_TRITON_KERNELS_AVAILABLE :
39553959 pytest .skip ("Triton kernels are not available" )
39563960
3961+ if get_sm_version () == 90 and moe_backend == "CUTLASS" :
3962+ pytest .skip (
3963+ "https://nvbugs/5636916: Remaining Hopper Eagle Accuracy Issue" )
3964+
39573965 MAX_OUTPUT_LEN = 128179
39583966 MAX_INPUT_LEN = 32768
39593967
@@ -3965,7 +3973,7 @@ def test_eagle3(self, moe_backend, mocker):
39653973 mocker .patch .object (GPQADiamond , "MAX_INPUT_LEN" , MAX_INPUT_LEN )
39663974
39673975 # https://nvbugs/5590408: 2-Model overlap scheduling has accuracy issue
3968- pytorch_config = dict (disable_overlap_scheduler = True ,
3976+ pytorch_config = dict (disable_overlap_scheduler = not overlap_scheduler ,
39693977 cuda_graph_config = CudaGraphConfig ())
39703978 kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.6 ,
39713979 dtype = "auto" )
@@ -3974,7 +3982,7 @@ def test_eagle3(self, moe_backend, mocker):
39743982 draft_len = 3
39753983 spec_config = EagleDecodingConfig (max_draft_len = draft_len ,
39763984 speculative_model_dir = eagle_model_dir ,
3977- eagle3_one_model = False )
3985+ eagle3_one_model = one_model )
39783986
39793987 max_seq_len = MAX_INPUT_LEN + MAX_OUTPUT_LEN
39803988 llm = LLM (self .MODEL_PATH ,
0 commit comments