File tree Expand file tree Collapse file tree 4 files changed +25
-0
lines changed
Expand file tree Collapse file tree 4 files changed +25
-0
lines changed Original file line number Diff line number Diff line change @@ -322,6 +322,8 @@ Qwen/Qwen2.5-7B-Instruct:
322322 - quant_algo : FP8
323323 kv_cache_quant_algo : FP8
324324 accuracy : 33.248
325+ Qwen/QwQ-32B :
326+ - accuracy : 30.358
325327nvidia/Nemotron-Mini-4B-Instruct :
326328 - quant_algo : FP8
327329 accuracy : 25.247
Original file line number Diff line number Diff line change @@ -141,6 +141,8 @@ Qwen/Qwen2.5-7B-Instruct:
141141 - quant_algo : FP8
142142 kv_cache_quant_algo : FP8
143143 accuracy : 75.32
144+ Qwen/QwQ-32B :
145+ - accuracy : 82.60
144146deepseek-ai/DeepSeek-V3-Lite :
145147 - accuracy : 71.40
146148 - quant_algo : NVFP4
Original file line number Diff line number Diff line change @@ -2847,3 +2847,23 @@ def test_auto_dtype(self):
28472847 kv_cache_config = self .kv_cache_config ) as llm :
28482848 task = MMMU (self .MODEL_NAME )
28492849 task .evaluate (llm , sampling_params = self .sampling_params )
2850+
2851+
2852+ class TestQwQ_32B (LlmapiAccuracyTestHarness ):
2853+ MODEL_NAME = "Qwen/QwQ-32B"
2854+ MODEL_PATH = f"{ llm_models_root ()} /QwQ-32B"
2855+
2856+ @pytest .mark .skip_less_device_memory (80000 )
2857+ @pytest .mark .skip_less_device (4 )
2858+ def test_auto_dtype_tp4 (self ):
2859+ kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.5 )
2860+
2861+ with LLM (self .MODEL_PATH ,
2862+ max_num_tokens = 16384 ,
2863+ kv_cache_config = kv_cache_config ,
2864+ tensor_parallel_size = 4 ,
2865+ max_batch_size = 8 ) as llm :
2866+ task = CnnDailymail (self .MODEL_NAME )
2867+ task .evaluate (llm )
2868+ task = MMLU (self .MODEL_NAME )
2869+ task .evaluate (llm )
Original file line number Diff line number Diff line change @@ -21,6 +21,7 @@ accuracy/test_llm_api_pytorch.py::TestNemotronH_56B_Base::test_auto_dtype[tp8-cu
2121accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype[tp8ep4-cuda_graph=True]
2222accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8ep4-cuda_graph=True]
2323accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8-cuda_graph=True]
24+ accuracy/test_llm_api_pytorch.py::TestQwQ_32B::test_auto_dtype_tp4
2425accuracy/test_llm_api.py::TestStarCoder2_7B::test_auto_dtype
2526accuracy/test_llm_api.py::TestStarCoder2_7B::test_fp8
2627accuracy/test_llm_api.py::TestCodestral_22B_V01::test_auto_dtype
You can’t perform that action at this time.
0 commit comments