[model] fix qwen25vl config (#929)

ceci3 · web-flow · commit 16a2dd5b5c41 · 2025-11-27T11:38:02.000+08:00
### PR Category
&lt;!-- One of [ Train | Inference | Compress | Serve | RL | Core |
Hardware | CICD | Tools | Others ] --&gt;
Serve

### PR Types
&lt;!-- One of [ User Experience | New Features | Bug Fixes | Improvements
| Performance | Breaking Change| Deprecations | Test Case | Docs |
Others ] --&gt;
Bug Fixes

### PR Description
&lt;!-- Describe what you’ve done --&gt;
fix config about qwen2.5vl
diff --git a/examples/qwen2_5_vl/conf/serve/32b_instruct.yaml b/examples/qwen2_5_vl/conf/serve/32b_instruct.yaml
@@ -8,8 +8,7 @@
     pipeline_parallel_size: 1
     max_num_seqs: 8 # Even at full 32,768 context usage, 8 concurrent operations won't trigger OOM
     gpu_memory_utilization: 0.9
-    limit_mm_per_prompt: image=18 # should be customized, 18 images/request is enough for most scenarios
+    limit_mm_per_prompt: '{"image": 18}' # should be customized, 18 images/request is enough for most scenarios
     port: 9010
     trust_remote_code: true
-    enforce_eager: true # better compare to FlagGems
     enable_chunked_prefill: true