Skip to content

Commit b2c0bef

Browse files
committed
Add comments and improve condition for zero-out
Signed-off-by: Ming Yang <[email protected]>
1 parent 72f5da3 commit b2c0bef

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

vllm/model_executor/layers/fused_moe/cutlass_moe.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,10 @@ def run_cutlass_moe_fp8(
178178
c2 = _resize_cache(workspace2, (M * topk, N))
179179
c3 = _resize_cache(workspace13, (M * topk, K))
180180

181-
if expert_map is not None and not per_act_token:
181+
if not per_act_token and (expert_map is not None or use_batched_format):
182+
# this is necessary to avoid imprecise scale calculation caused by
183+
# random data in the unused workspace. The workspace is unused when
184+
# this rank handles only partial tokens, or when it is batched .
182185
c1.fill_(0)
183186

184187
ops.cutlass_moe_mm(c1, a1q, w1, a1q_scale, w1_scale, expert_offsets,

0 commit comments

Comments
 (0)