Fix GLM-4 PP Missing Layer When using with PP. (vllm-project#21531)

zRzRzRzRzRzRzR · web-flow · commit 2ce90e5b01aa · 2025-07-24T20:07:38.000-07:00
Signed-off-by: zRzRzRzRzRzRzR &lt;2448370773@qq.com&gt;
diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py
@@ -612,14 +612,20 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         self.num_expert_groups = config.n_group
 
         self.moe_layers: list[FusedMoE] = []
+        example_moe = None
         for layer in self.model.layers:
+            if isinstance(layer, PPMissingLayer):
+                continue
+
             assert isinstance(layer, Glm4MoeDecoderLayer)
             if isinstance(layer.mlp, Glm4MoE):
+                # Pick last one layer since the first ones may be dense layers.
+                example_moe = layer.mlp
                 self.moe_layers.append(layer.mlp.experts)
 
-        # Pick last one layer since the first ones may be dense layers.
-        example_moe = typing.cast(
-            Glm4MoE, self.model.layers[config.num_hidden_layers - 1].mlp)
+        if example_moe is None:
+            raise RuntimeError("No Glm4MoE layer found in model.layers.")
+
         self.num_logical_experts = example_moe.n_logical_experts
         self.num_physical_experts = example_moe.n_physical_experts
         self.num_local_physical_experts = example_moe.n_local_physical_experts