fix moe create way

wangzaijun · wangzaijun · commit 2fa25ff6b305 · 2025-11-12T02:32:47.000Z
diff --git a/lightllm/common/basemodel/layer_weights/meta_weights/__init__.py b/lightllm/common/basemodel/layer_weights/meta_weights/__init__.py
@@ -7,5 +7,5 @@
     ROWBMMWeight,
 )
 from .norm_weight import NormWeight, GEMMANormWeight, TpNormWeight
-from .fused_moe_weight_tp import FusedMoeWeightTP
+from .fused_moe_weight_tp import create_tp_moe_wegiht_obj
 from .fused_moe_weight_ep import FusedMoeWeightEP
diff --git a/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_tp.py b/lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_tp.py
@@ -1,62 +1,60 @@
 import os
 import torch
 import threading
-from typing import Optional, Tuple, List, Dict, Any
+from typing import Optional, Tuple, List, Dict, Any, Union
 from .base_weight import BaseWeight
 from lightllm.utils.dist_utils import get_current_rank_in_dp, get_current_device_id
 from lightllm.common.quantization import Quantcfg
 
 
-class FusedMoeWeightTP:
-    def __new__(
-        cls,
-        gate_proj_name: str,
-        down_proj_name: str,
-        up_proj_name: str,
-        e_score_correction_bias_name: str,
-        weight_prefix: str,
-        n_routed_experts: int,
-        num_fused_shared_experts: int,
-        split_inter_size: int,
-        data_type: torch.dtype,
-        network_config: Dict[str, Any],
-        layer_num: int,
-        quant_cfg: Quantcfg = None,
-    ):
-        quant_method = quant_cfg.get_quant_method(layer_num, "fused_moe")
-        if quant_method is not None and quant_method.method_name == "awq_marlin":
-            return FusedAWQMARLINMoeWeightTP(
-                gate_proj_name=gate_proj_name,
-                down_proj_name=down_proj_name,
-                up_proj_name=up_proj_name,
-                e_score_correction_bias_name=e_score_correction_bias_name,
-                weight_prefix=weight_prefix,
-                n_routed_experts=n_routed_experts,
-                num_fused_shared_experts=num_fused_shared_experts,
-                split_inter_size=split_inter_size,
-                data_type=data_type,
-                network_config=network_config,
-                layer_num=layer_num,
-                quant_cfg=quant_cfg,
-            )
-        else:
-            return FusedBaseMoeWeightTP(
-                gate_proj_name=gate_proj_name,
-                down_proj_name=down_proj_name,
-                up_proj_name=up_proj_name,
-                e_score_correction_bias_name=e_score_correction_bias_name,
-                weight_prefix=weight_prefix,
-                n_routed_experts=n_routed_experts,
-                num_fused_shared_experts=num_fused_shared_experts,
-                split_inter_size=split_inter_size,
-                data_type=data_type,
-                network_config=network_config,
-                layer_num=layer_num,
-                quant_cfg=quant_cfg,
-            )
+def create_tp_moe_wegiht_obj(
+    gate_proj_name: str,
+    down_proj_name: str,
+    up_proj_name: str,
+    e_score_correction_bias_name: str,
+    weight_prefix: str,
+    n_routed_experts: int,
+    num_fused_shared_experts: int,
+    split_inter_size: int,
+    data_type: torch.dtype,
+    network_config: Dict[str, Any],
+    layer_num: int,
+    quant_cfg: Quantcfg = None,
+) -> Union["FusedMoeWeightTP", "FusedAWQMARLINMoeWeightTP"]:
+    quant_method = quant_cfg.get_quant_method(layer_num, "fused_moe")
+    if quant_method is not None and quant_method.method_name == "awq_marlin":
+        return FusedAWQMARLINMoeWeightTP(
+            gate_proj_name=gate_proj_name,
+            down_proj_name=down_proj_name,
+            up_proj_name=up_proj_name,
+            e_score_correction_bias_name=e_score_correction_bias_name,
+            weight_prefix=weight_prefix,
+            n_routed_experts=n_routed_experts,
+            num_fused_shared_experts=num_fused_shared_experts,
+            split_inter_size=split_inter_size,
+            data_type=data_type,
+            network_config=network_config,
+            layer_num=layer_num,
+            quant_cfg=quant_cfg,
+        )
+    else:
+        return FusedMoeWeightTP(
+            gate_proj_name=gate_proj_name,
+            down_proj_name=down_proj_name,
+            up_proj_name=up_proj_name,
+            e_score_correction_bias_name=e_score_correction_bias_name,
+            weight_prefix=weight_prefix,
+            n_routed_experts=n_routed_experts,
+            num_fused_shared_experts=num_fused_shared_experts,
+            split_inter_size=split_inter_size,
+            data_type=data_type,
+            network_config=network_config,
+            layer_num=layer_num,
+            quant_cfg=quant_cfg,
+        )
 
 
-class FusedBaseMoeWeightTP(BaseWeight):
+class FusedMoeWeightTP(BaseWeight):
     def __init__(
         self,
         gate_proj_name: str,
diff --git a/lightllm/models/deepseek2/layer_weights/transformer_layer_weight.py b/lightllm/models/deepseek2/layer_weights/transformer_layer_weight.py
@@ -8,9 +8,9 @@
     ROWMMWeight,
     COLMMWeight,
     NormWeight,
-    FusedMoeWeightTP,
     FusedMoeWeightEP,
     ROWBMMWeight,
+    create_tp_moe_wegiht_obj,
 )
 from functools import partial
 from ..triton_kernel.weight_dequant import weight_dequant
@@ -265,7 +265,7 @@ def _init_moe(self):
         moe_mode = os.getenv("MOE_MODE", "TP")
         assert moe_mode in ["EP", "TP"]
         if moe_mode == "TP":
-            self.experts = FusedMoeWeightTP(
+            self.experts = create_tp_moe_wegiht_obj(
                 gate_proj_name="gate_proj",
                 down_proj_name="down_proj",
                 up_proj_name="up_proj",
diff --git a/lightllm/models/mixtral/layer_weights/transformer_layer_weight.py b/lightllm/models/mixtral/layer_weights/transformer_layer_weight.py
@@ -2,11 +2,7 @@
 from lightllm.utils.log_utils import init_logger
 from lightllm.utils.envs_utils import enable_env_vars
 from lightllm.models.llama.layer_weights.transformer_layer_weight import LlamaTransformerLayerWeight
-from lightllm.common.basemodel.layer_weights.meta_weights import (
-    ROWMMWeight,
-    FusedMoeWeightTP,
-    FusedMoeWeightEP,
-)
+from lightllm.common.basemodel.layer_weights.meta_weights import ROWMMWeight, FusedMoeWeightEP, create_tp_moe_wegiht_obj
 
 logger = init_logger(__name__)
 
@@ -53,7 +49,7 @@ def _init_moe(self):
         assert moe_mode in ["TP"], f"Unsupported moe mode: {moe_mode}"
 
         if moe_mode == "TP":
-            self.experts = FusedMoeWeightTP(
+            self.experts = create_tp_moe_wegiht_obj(
                 gate_proj_name="w1",
                 down_proj_name="w2",
                 up_proj_name="w3",
diff --git a/lightllm/models/qwen3/layer_weights/transformer_layer_weight.py b/lightllm/models/qwen3/layer_weights/transformer_layer_weight.py
@@ -1,18 +1,7 @@
-import os
-import torch
-import math
-import numpy as np
-from lightllm.models.llama.layer_weights.transformer_layer_weight import LlamaTransformerLayerWeight
 from lightllm.models.qwen2.layer_weights.transformer_layer_weight import Qwen2TransformerLayerWeight
 from lightllm.common.basemodel.layer_weights.meta_weights import (
-    ROWMMWeight,
-    COLMMWeight,
     NormWeight,
-    FusedMoeWeightTP,
-    FusedMoeWeightEP,
-    ROWBMMWeight,
 )
-from functools import partial
 
 
 class Qwen3TransformerLayerWeight(Qwen2TransformerLayerWeight):
diff --git a/lightllm/models/qwen3_moe/layer_weights/transformer_layer_weight.py b/lightllm/models/qwen3_moe/layer_weights/transformer_layer_weight.py
@@ -1,19 +1,6 @@
 import os
-import torch
-import math
-import numpy as np
-from lightllm.common.basemodel import TransformerLayerWeight
 from lightllm.models.qwen3.layer_weights.transformer_layer_weight import Qwen3TransformerLayerWeight
-from lightllm.utils.envs_utils import enable_env_vars
-from lightllm.common.basemodel.layer_weights.meta_weights import (
-    ROWMMWeight,
-    COLMMWeight,
-    NormWeight,
-    FusedMoeWeightTP,
-    FusedMoeWeightEP,
-    ROWBMMWeight,
-)
-from functools import partial
+from lightllm.common.basemodel.layer_weights.meta_weights import ROWMMWeight, FusedMoeWeightEP, create_tp_moe_wegiht_obj
 
 
 class Qwen3MOETransformerLayerWeight(Qwen3TransformerLayerWeight):
@@ -76,7 +63,7 @@ def _init_moe(self):
         moe_mode = os.getenv("MOE_MODE", "TP")
         assert moe_mode in ["EP", "TP"]
         if moe_mode == "TP":
-            self.experts = FusedMoeWeightTP(
+            self.experts = create_tp_moe_wegiht_obj(
                 gate_proj_name="gate_proj",
                 down_proj_name="down_proj",
                 up_proj_name="up_proj",

Original file line number	Diff line number	Diff line change
`@@ -7,5 +7,5 @@`
`7`	`7`	`ROWBMMWeight,`
`8`	`8`	`)`
`9`	`9`	`from .norm_weight import NormWeight, GEMMANormWeight, TpNormWeight`
`10`		`-from .fused_moe_weight_tp import FusedMoeWeightTP`
	`10`	`+from .fused_moe_weight_tp import create_tp_moe_wegiht_obj`
`11`	`11`	`from .fused_moe_weight_ep import FusedMoeWeightEP`