File tree Expand file tree Collapse file tree 3 files changed +3
-1
lines changed Expand file tree Collapse file tree 3 files changed +3
-1
lines changed Original file line number Diff line number Diff line change @@ -20,7 +20,7 @@ base_emb_dim: 2048
2020base_mlp_dim : 768
2121base_num_query_heads : 32
2222base_num_kv_heads : 4
23- base_num_decoder_layers : 48
23+ base_num_decoder_layers : 4
2424head_dim : 128
2525mlp_activations : ["silu", "linear"]
2626vocab_size : 152064
Original file line number Diff line number Diff line change @@ -708,6 +708,7 @@ def __call__(
708708 # Loop over the number of sub-layers that make up one repeating pattern.
709709 for i in range (cfg .inhomogeneous_layer_cycle_interval ):
710710 layer = getattr (self , f"layer_{ i } " )
711+ # x, _ = layer(
711712 x = layer (
712713 x ,
713714 decoder_segment_ids ,
Original file line number Diff line number Diff line change @@ -50,6 +50,7 @@ def test_tiny_config(self):
5050 "enable_goodput_recording=False" ,
5151 "enable_checkpoint_cloud_logger=False" ,
5252 "monitor_goodput=False" ,
53+ "model_name=qwen3-next-80b-a3b" ,
5354 ]
5455 )
5556
You can’t perform that action at this time.
0 commit comments