InternLM
diff --git a/‎.github/scripts/eval_chat_config.py‎
Lines changed: 18 additions & 9 deletions b/‎.github/scripts/eval_chat_config.py‎
Lines changed: 18 additions & 9 deletions
diff --git a/‎.github/scripts/eval_regression_chat_models.py‎
Lines changed: 4 additions & 0 deletions b/‎.github/scripts/eval_regression_chat_models.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/daily_ete_test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/daily_ete_test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/evaluate.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/evaluate.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/evaluate_remote.yml‎
Lines changed: 1 addition & 3 deletions b/‎.github/workflows/evaluate_remote.yml‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎autotest/config.yaml‎
Lines changed: 14 additions & 5 deletions b/‎autotest/config.yaml‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎autotest/interface/pipeline/test_pipeline_func.py‎
Lines changed: 2 additions & 2 deletions b/‎autotest/interface/pipeline/test_pipeline_func.py‎
Lines changed: 2 additions & 2 deletions
@@ -47,6 +47,8 @@
         models as lmdeploy_internlm2_chat_7b  # noqa: F401, E501
     from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_chat_20b import \
         models as lmdeploy_internlm2_chat_20b  # noqa: F401, E501
+    from opencompass.configs.models.hf_internlm.lmdeploy_internlm3_8b_instruct import \
+        models as lmdeploy_internlm3_8b_instruct  # noqa: F401, E501
     from opencompass.configs.models.hf_internlm.lmdeploy_internlm_chat_7b import \
         models as lmdeploy_internlm_chat_7b  # noqa: F401, E501
     from opencompass.configs.models.hf_llama.hf_llama2_7b_chat import models as hf_llama2_chat_7b  # noqa: F401, E501
@@ -66,6 +68,8 @@
         models as hf_mixtral_chat_8x7b  # noqa: F401, E501
     from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import \
         models as lmdeploy_qwen2_5_7b_instruct  # noqa: F401, E501
+    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_32b_instruct import \
+        models as lmdeploy_qwen2_5_32b_instruct  # noqa: F401, E501
     from opencompass.configs.models.qwen.hf_qwen1_5_7b_chat import models as hf_qwen1_5_chat_7b  # noqa: F401, E501
     from opencompass.configs.models.qwen.hf_qwen1_5_moe_a2_7b_chat import \
         models as hf_qwen1_5_moe_a2_7b_chat  # noqa: F401, E501
@@ -116,15 +120,12 @@
 turbomind_internlm2_5_7b_chat_batch1 = deepcopy(*lmdeploy_internlm2_5_7b_chat)
 turbomind_internlm2_5_7b_chat_batch1_4bits = deepcopy(*lmdeploy_internlm2_5_7b_chat)
 
-turbomind_internlm3_8b_instruct = deepcopy(*lmdeploy_internlm2_5_7b_chat)
-turbomind_internlm3_8b_instruct_4bits = deepcopy(*lmdeploy_internlm2_5_7b_chat)
-turbomind_internlm3_8b_instruct_kvint4 = deepcopy(*lmdeploy_internlm2_5_7b_chat)
-turbomind_internlm3_8b_instruct_kvint8 = deepcopy(*lmdeploy_internlm2_5_7b_chat)
-pytorch_internlm3_8b_instruct = deepcopy(*lmdeploy_internlm2_5_7b_chat)
-pytorch_internlm3_8b_instruct_w8a8 = deepcopy(*lmdeploy_internlm2_5_7b_chat)
-for model in [v for k, v in locals().items() if 'internlm3_8b_instruct' in k]:
-    model['abbr'] = 'turbomind-internlm3-8b-instruct'
-    model['path'] = 'internlm/internlm3-8b-instruct'
+turbomind_internlm3_8b_instruct = deepcopy(*lmdeploy_internlm3_8b_instruct)
+turbomind_internlm3_8b_instruct_4bits = deepcopy(*lmdeploy_internlm3_8b_instruct)
+turbomind_internlm3_8b_instruct_kvint4 = deepcopy(*lmdeploy_internlm3_8b_instruct)
+turbomind_internlm3_8b_instruct_kvint8 = deepcopy(*lmdeploy_internlm3_8b_instruct)
+pytorch_internlm3_8b_instruct = deepcopy(*lmdeploy_internlm3_8b_instruct)
+pytorch_internlm3_8b_instruct_w8a8 = deepcopy(*lmdeploy_internlm3_8b_instruct)
 
 # ===== Configs for internlm/internlm2_5_20b_chat =====
 turbomind_internlm2_5_20b_chat = deepcopy(*lmdeploy_internlm2_5_20b_chat)
@@ -186,6 +187,14 @@
 pytorch_qwen2_5_7b_instruct = deepcopy(*lmdeploy_qwen2_5_7b_instruct)
 pytorch_qwen2_5_7b_instruct_w8a8 = deepcopy(*lmdeploy_qwen2_5_7b_instruct)
 
+# ===== Configs for Qwen/Qwen25-32B-Instruct =====
+turbomind_qwen2_5_32b_instruct = deepcopy(*lmdeploy_qwen2_5_32b_instruct)
+turbomind_qwen2_5_32b_instruct_4bits = deepcopy(*lmdeploy_qwen2_5_32b_instruct)
+turbomind_qwen2_5_32b_instruct_kvint4 = deepcopy(*lmdeploy_qwen2_5_32b_instruct)
+turbomind_qwen2_5_32b_instruct_kvint8 = deepcopy(*lmdeploy_qwen2_5_32b_instruct)
+pytorch_qwen2_5_32b_instruct = deepcopy(*lmdeploy_qwen2_5_32b_instruct)
+pytorch_qwen2_5_32b_instruct_w8a8 = deepcopy(*lmdeploy_qwen2_5_32b_instruct)
+
 # ===== Configs for meta-llama/Llama-2-7b-chat-hf =====
 turbomind_llama2_7b_chat = deepcopy(*lmdeploy_llama2_7b_chat)
 turbomind_llama2_7b_chat_4bits = deepcopy(*lmdeploy_llama2_7b_chat)
 
@@ -58,6 +58,8 @@
         models as lmdeploy_qwen2_5_3b_instruct_model  # noqa: F401, E501
     from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_14b_instruct import \
         models as lmdeploy_qwen2_5_14b_instruct_model  # noqa: F401, E501
+    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_32b_instruct import \
+        models as lmdeploy_qwen2_5_32b_instruct_model  # noqa: F401, E501
     from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_72b_instruct import \
         models as lmdeploy_qwen2_5_72b_instruct_model  # noqa: F401, E501
     from opencompass.configs.models.qwen.lmdeploy_qwen2_1_5b_instruct import \
@@ -86,6 +88,7 @@
 pytorch_mistral_nemo_instruct_2407_model = deepcopy(lmdeploy_mistral_nemo_instruct_2407_model)
 pytorch_mistral_small_instruct_2409_model = deepcopy(lmdeploy_mistral_small_instruct_2409_model)
 pytorch_qwen2_5_72b_instruct_model = deepcopy(lmdeploy_qwen2_5_72b_instruct_model)
+pytorch_qwen2_5_32b_instruct_model = deepcopy(lmdeploy_qwen2_5_32b_instruct_model)
 pytorch_qwen2_7b_instruct_model = deepcopy(lmdeploy_qwen2_7b_instruct_model)
 pytorch_yi_1_5_34b_chat_model = deepcopy(lmdeploy_yi_1_5_34b_chat_model)
 
@@ -106,6 +109,7 @@
 lmdeploy_nemotron_70b_instruct_hf_model_native = deepcopy(lmdeploy_nemotron_70b_instruct_hf_model)
 lmdeploy_qwen2_5_0_5b_instruct_model_native = deepcopy(lmdeploy_qwen2_5_0_5b_instruct_model)
 lmdeploy_qwen2_5_14b_instruct_model_native = deepcopy(lmdeploy_qwen2_5_14b_instruct_model)
+lmdeploy_qwen2_5_32b_instruct_model_native = deepcopy(lmdeploy_qwen2_5_32b_instruct_model)
 lmdeploy_qwen2_5_72b_instruct_model_native = deepcopy(lmdeploy_qwen2_5_72b_instruct_model)
 lmdeploy_qwen2_7b_instruct_model_native = deepcopy(lmdeploy_qwen2_7b_instruct_model)
 lmdeploy_yi_1_5_6b_chat_model_native = deepcopy(lmdeploy_yi_1_5_6b_chat_model)
 
@@ -196,7 +196,7 @@ jobs:
     if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'tools'))}}
     runs-on: [self-hosted, linux-a100]
     needs: test_quantization
-    timeout-minutes: 240
+    timeout-minutes: 300
     strategy:
       fail-fast: false
       matrix:
 
@@ -17,7 +17,7 @@ on:
         required: true
         description: 'Tested TurboMind models list. eg. [internlm_chat_7b,internlm_chat_7b_w8a16]'
         type: string
-        default: '[turbomind_internlm2_chat_7b, pytorch_internlm2_chat_7b, turbomind_internlm2_5_7b_chat, pytorch_internlm2_5_7b_chat, turbomind_internlm2_5_7b_chat_batch1, turbomind_internlm2_5_7b_chat_batch1_4bits, turbomind_internlm3_8b_instruct, pytorch_internlm3_8b_instruct, turbomind_internlm2_5_20b_chat, pytorch_internlm2_5_20b_chat, turbomind_internlm2_chat_20b, pytorch_internlm2_chat_20b, turbomind_qwen1_5_7b_chat, pytorch_qwen1_5_7b_chat, turbomind_llama3_8b_instruct, pytorch_llama3_8b_instruct, turbomind_llama3_1_8b_instruct, pytorch_llama3_1_8b_instruct, turbomind_qwen2_7b_instruct, pytorch_qwen2_7b_instruct, turbomind_qwen2_5_7b_instruct, pytorch_qwen2_5_7b_instruct, turbomind_llama2_7b_chat, pytorch_qwen1_5_moe_2_7b_chat, pytorch_gemma_2_9b_it, pytorch_gemma_2_27b_it, turbomind_internlm2_chat_7b_4bits, turbomind_internlm2_chat_7b_kvint4, turbomind_internlm2_chat_7b_kvint8, turbomind_internlm2_5_7b_chat_4bits, turbomind_internlm2_5_7b_chat_kvint4, turbomind_internlm2_5_7b_chat_kvint8, pytorch_internlm2_5_7b_chat_w8a8, turbomind_internlm3_8b_instruct_4bits, turbomind_internlm3_8b_instruct_kvint4, turbomind_internlm3_8b_instruct_kvint8, pytorch_internlm3_8b_instruct_w8a8, turbomind_internlm2_5_20b_chat_4bits, turbomind_internlm2_5_20b_chat_kvint4, turbomind_internlm2_5_20b_chat_kvint8, turbomind_llama3_8b_instruct_4bits, turbomind_llama3_8b_instruct_kvint4, turbomind_llama3_1_8b_instruct_4bits, turbomind_llama3_1_8b_instruct_kvint4, turbomind_llama3_1_8b_instruct_kvint8,turbomind_llama3_8b_instruct_kvint8, pytorch_llama3_1_8b_instruct_w8a8, turbomind_qwen2_7b_instruct_4bits, turbomind_qwen2_7b_instruct_kvint8, pytorch_qwen2_7b_instruct_w8a8, turbomind_qwen2_5_7b_instruct_4bits, turbomind_qwen2_5_7b_instruct_kvint8, pytorch_qwen2_5_7b_instruct_w8a8, turbomind_llama2_7b_chat_4bits, turbomind_llama2_7b_chat_kvint4, turbomind_llama2_7b_chat_kvint8]'
+        default: '[turbomind_internlm2_chat_7b, pytorch_internlm2_chat_7b, turbomind_internlm2_5_7b_chat, pytorch_internlm2_5_7b_chat, turbomind_internlm2_5_7b_chat_batch1, turbomind_internlm2_5_7b_chat_batch1_4bits, turbomind_internlm3_8b_instruct, pytorch_internlm3_8b_instruct, turbomind_internlm2_5_20b_chat, pytorch_internlm2_5_20b_chat, turbomind_internlm2_chat_20b, pytorch_internlm2_chat_20b, turbomind_qwen1_5_7b_chat, pytorch_qwen1_5_7b_chat, turbomind_llama3_8b_instruct, pytorch_llama3_8b_instruct, turbomind_llama3_1_8b_instruct, pytorch_llama3_1_8b_instruct, turbomind_qwen2_7b_instruct, pytorch_qwen2_7b_instruct, turbomind_qwen2_5_7b_instruct, pytorch_qwen2_5_7b_instruct, turbomind_qwen2_5_32b_instruct, pytorch_qwen2_5_32b_instruct, turbomind_llama2_7b_chat, pytorch_qwen1_5_moe_2_7b_chat, pytorch_gemma_2_9b_it, pytorch_gemma_2_27b_it, turbomind_internlm2_chat_7b_4bits, turbomind_internlm2_chat_7b_kvint4, turbomind_internlm2_chat_7b_kvint8, turbomind_internlm2_5_7b_chat_4bits, turbomind_internlm2_5_7b_chat_kvint4, turbomind_internlm2_5_7b_chat_kvint8, pytorch_internlm2_5_7b_chat_w8a8, turbomind_internlm3_8b_instruct_4bits, turbomind_internlm3_8b_instruct_kvint4, turbomind_internlm3_8b_instruct_kvint8, pytorch_internlm3_8b_instruct_w8a8, turbomind_internlm2_5_20b_chat_4bits, turbomind_internlm2_5_20b_chat_kvint4, turbomind_internlm2_5_20b_chat_kvint8, turbomind_llama3_8b_instruct_4bits, turbomind_llama3_8b_instruct_kvint4, turbomind_llama3_1_8b_instruct_4bits, turbomind_llama3_1_8b_instruct_kvint4, turbomind_llama3_1_8b_instruct_kvint8,turbomind_llama3_8b_instruct_kvint8, pytorch_llama3_1_8b_instruct_w8a8, turbomind_qwen2_7b_instruct_4bits, turbomind_qwen2_7b_instruct_kvint8, pytorch_qwen2_7b_instruct_w8a8, turbomind_qwen2_5_7b_instruct_4bits, turbomind_qwen2_5_7b_instruct_kvint8, pytorch_qwen2_5_7b_instruct_w8a8, turbomind_qwen2_5_32b_instruct_4bits, turbomind_qwen2_5_32b_instruct_kvint8, pytorch_qwen2_5_32b_instruct_w8a8,turbomind_llama2_7b_chat_4bits, turbomind_llama2_7b_chat_kvint4, turbomind_llama2_7b_chat_kvint8]'
       chat_datasets:
         required: true
         description: 'Tested datasets list. eg. [*bbh_datasets,*ceval_datasets,*cmmlu_datasets,*GaokaoBench_datasets,*gpqa_datasets,*gsm8k_datasets,*hellaswag_datasets,*humaneval_datasets,*ifeval_datasets,*math_datasets,*sanitized_mbpp_datasets,*mmlu_datasets,*nq_datasets,*race_datasets,*TheoremQA_datasets,*triviaqa_datasets,*winogrande_datasets,*crowspairs_datasets]'
 
@@ -84,7 +84,6 @@ jobs:
       matrix:
         pyver: [py310]
     runs-on: ubuntu-latest
-    environment: 'prod'
     env:
       PYTHON_VERSION: ${{ matrix.pyver }}
       PLAT_NAME: manylinux2014_x86_64
@@ -118,7 +117,6 @@ jobs:
     if: ${{!cancelled()}}
     needs: ['build-pypi', 'build-pypi-lmdeploy']
     runs-on: lmdeploy-volc
-    environment: 'prod'
     timeout-minutes: 120 #2hours
     steps:
       - name: Clone repository
@@ -148,6 +146,7 @@ jobs:
             pip install -r /fs-computility/llm/qa-llm-cicd/config/requirements.txt --cache-dir ${{env.PIP_CACHE_PATH}}
             pip install opencompass*.whl --cache-dir ${{env.PIP_CACHE_PATH}}
             pip install opencompass[lmdeploy] --cache-dir ${{env.PIP_CACHE_PATH}}
+            pip install opencompass[full] --cache-dir ${{env.PIP_CACHE_PATH}}
             pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --cache-dir ${{env.PIP_CACHE_PATH}}
             FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /fs-computility/llm/qa-llm-cicd/packages/flash_attn-2.7.0.post2+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
             cp -r /root/nltk_data ${{env.CONDA_PATH}}/envs/${{env.CONDA_ENV}}/nltk_data
@@ -179,7 +178,6 @@ jobs:
         regression_func: ${{fromJSON(github.event.inputs.regression_func_volc || '["chat_models","base_models"]')}}
         backend: ${{ fromJSON(github.event.inputs.backend || '["turbomind", "pytorch"]')}}
     runs-on: lmdeploy-volc
-    environment: 'prod'
     timeout-minutes: 480 #6hours
     steps:
       - name: Checkout repository
 
@@ -27,6 +27,8 @@ tp_config:
     DeepSeek-V2-Lite-Chat: 2
     DeepSeek-R1-Distill-Qwen-32B: 2
     deepseek-vl2: 2
+    InternVL3-38B: 2
+    Qwen2.5-32B-Instruct: 4
     Llama-3.2-11B-Vision-Instruct: 2 # set tp == 2 for test
 
 turbomind_chat_model:
@@ -44,6 +46,7 @@ turbomind_chat_model:
     - internlm/internlm2-chat-20b-4bits
     - internlm/internlm-xcomposer2-4khd-7b
     - internlm/internlm-xcomposer2d5-7b
+    - OpenGVLab/InternVL3-38B
     - OpenGVLab/InternVL2_5-26B-MPO
     - OpenGVLab/InternVL2_5-1B
     - OpenGVLab/InternVL2_5-8B
@@ -66,6 +69,7 @@ turbomind_chat_model:
     - Qwen/Qwen-VL-Chat
     - Qwen/Qwen2.5-0.5B-Instruct
     - Qwen/Qwen2.5-7B-Instruct
+    - Qwen/Qwen2.5-32B-Instruct
     - Qwen/Qwen2.5-72B-Instruct
     - Qwen/Qwen2-7B-Instruct-GPTQ-Int4
     - Qwen/Qwen2-57B-A14B-Instruct-GPTQ-Int4
@@ -85,6 +89,7 @@ turbomind_chat_model:
     - deepseek-ai/DeepSeek-V2-Lite-Chat
     - codellama/CodeLlama-7b-Instruct-hf
     - THUDM/glm-4-9b-chat
+    - THUDM/codegeex4-all-9b
     - openbmb/MiniCPM-Llama3-V-2_5
     - openbmb/MiniCPM-V-2_6
     - allenai/Molmo-7B-D-0924
@@ -101,6 +106,7 @@ pytorch_chat_model:
     - internlm/internlm2_5-7b-chat
     - internlm/internlm2_5-20b-chat
     - internlm/internlm2-chat-20b
+    - OpenGVLab/InternVL3-38B
     - OpenGVLab/InternVL2_5-26B-MPO
     - OpenGVLab/InternVL2_5-1B
     - OpenGVLab/InternVL2_5-8B
@@ -121,6 +127,7 @@ pytorch_chat_model:
     - Qwen/Qwen2-1.5B-Instruct
     - Qwen/Qwen2.5-0.5B-Instruct
     - Qwen/Qwen2.5-7B-Instruct
+    - Qwen/Qwen2.5-32B-Instruct
     - Qwen/Qwen2.5-72B-Instruct
     - Qwen/Qwen2.5-VL-7B-Instruct
     - Qwen/Qwen1.5-7B-Chat
@@ -137,12 +144,11 @@ pytorch_chat_model:
     - deepseek-ai/deepseek-moe-16b-chat
     - deepseek-ai/deepseek-coder-1.3b-instruct
     - deepseek-ai/DeepSeek-V2-Lite-Chat
-    - deepseek-ai/deepseek-vl2-tiny
-    - deepseek-ai/deepseek-vl2
     - THUDM/chatglm2-6b
     - THUDM/cogvlm2-llama3-chinese-chat-19B
     - THUDM/glm-4v-9b
     - THUDM/glm-4-9b-chat
+    - THUDM/codegeex4-all-9b
     - openbmb/MiniCPM-V-2_6
     - microsoft/Phi-3-mini-4k-instruct
     - microsoft/Phi-3-vision-128k-instruct
@@ -153,6 +159,7 @@ turbomind_vl_model:
     - liuhaotian/llava-v1.6-vicuna-7b
     - 01-ai/Yi-VL-6B
     - deepseek-ai/deepseek-vl-1.3b-chat
+    - OpenGVLab/InternVL3-38B
     - OpenGVLab/InternVL2_5-26B-MPO
     - OpenGVLab/InternVL-Chat-V1-5
     - OpenGVLab/Mini-InternVL-Chat-2B-V1-5
@@ -176,6 +183,7 @@ pytorch_vl_model:
     - OpenGVLab/InternVL2_5-26B-MPO
     - OpenGVLab/InternVL-Chat-V1-5
     - OpenGVLab/Mini-InternVL-Chat-2B-V1-5
+    - OpenGVLab/InternVL3-38B
     - OpenGVLab/InternVL2_5-1B
     - OpenGVLab/InternVL2_5-8B
     - OpenGVLab/InternVL2_5-26B
@@ -196,8 +204,6 @@ pytorch_vl_model:
     - openbmb/MiniCPM-V-2_6
     - microsoft/Phi-3-vision-128k-instruct
     - microsoft/Phi-3.5-vision-instruct
-    - deepseek-ai/deepseek-vl2-tiny
-    - deepseek-ai/deepseek-vl2
 
 turbomind_base_model:
     - internlm/internlm2_5-7b
@@ -214,7 +220,6 @@ pytorch_base_model:
 turbomind_quatization:
     no_awq:
         - meta-llama/Meta-Llama-3-1-70B-Instruct
-        - Qwen/Qwen2.5-72B-Instruct
         - Qwen/Qwen1.5-MoE-A2.7B-Chat
         - Qwen/Qwen2-VL-2B-Instruct
         - Qwen/Qwen2-VL-7B-Instruct
@@ -224,6 +229,7 @@ turbomind_quatization:
         - deepseek-ai/DeepSeek-V2-Lite-Chat
         - codellama/CodeLlama-7b-Instruct-hf
         - allenai/Molmo-7B-D-0924
+        - THUDM/codegeex4-all-9b
     gptq:
         - internlm/internlm2_5-7b-chat
     no_kvint4:
@@ -240,6 +246,7 @@ turbomind_quatization:
         - Qwen/Qwen-VL-Chat
         - Qwen/Qwen2.5-0.5B-Instruct
         - Qwen/Qwen2.5-7B-Instruct
+        - Qwen/Qwen2.5-32B-Instruct
         - Qwen/Qwen2.5-72B-Instruct
         - Qwen/Qwen2-VL-2B-Instruct
         - Qwen/Qwen2-VL-7B-Instruct
@@ -300,6 +307,7 @@ pytorch_quatization:
         - Qwen/Qwen-VL-Chat
         - Qwen/Qwen2.5-0.5B-Instruct
         - Qwen/Qwen2.5-7B-Instruct
+        - Qwen/Qwen2.5-32B-Instruct
         - Qwen/Qwen2.5-72B-Instruct
         - Qwen/Qwen2-VL-2B-Instruct
         - Qwen/Qwen2-VL-7B-Instruct
@@ -330,6 +338,7 @@ benchmark_model:
     - internlm/internlm2_5-20b-chat
     - THUDM/glm-4-9b-chat
     - Qwen/Qwen2.5-7B-Instruct
+    - Qwen/Qwen2.5-32B-Instruct
     - Qwen/Qwen2.5-72B-Instruct
     - mistralai/Mistral-7B-Instruct-v0.3
     - mistralai/Mixtral-8x7B-Instruct-v0.1
 
@@ -424,7 +424,7 @@ def run_pipeline_testcase_bad_words(config, model, backend, file_name):
         # test bad_words
         gen_config = GenerationConfig(bad_words=[' and', '浦', ' to'])
         response = pipe(['Hi, pls intro yourself', 'Shanghai is'], gen_config=gen_config)
-        result = '蒲' in response[0].text or 'sensetime' in response[0].text
+        result = '蒲' in response[0].text or 'SenseTime' in response[0].text
         for i in range(2):
             result &= '浦' not in response[i].text and ' and' not in response[i].text and ' to ' not in response[i].text
         save_pipeline_common_log(config, file_name, result, response)
@@ -896,7 +896,7 @@ def test_backend_config_validate_pytorch(config, model, backend, worker_id):
         del os.environ['CUDA_VISIBLE_DEVICES']
 
 
-@pytest.mark.parametrize('model', ['internlm/internlm2_5-20b-chat', 'OpenGVLab/InternVL2_5-26B'])
+@pytest.mark.parametrize('model', ['internlm/internlm2_5-20b-chat'])
 @pytest.mark.parametrize('backend', [TurbomindEngineConfig])
 def test_backend_config_tp(config, model, backend, worker_id):
     with pytest.raises(AssertionError):