[Bugfix] Fix for issue 17396 (vllm-project#18773)

frreiss · minpeter · commit fb53f591d4b3 · 2025-06-24T10:54:30.000+09:00
Signed-off-by: Fred Reiss &lt;frreiss@us.ibm.com&gt;
Signed-off-by: minpeter &lt;kali2005611@gmail.com&gt;
diff --git a/vllm/lora/ops/torch_ops/lora_ops.py b/vllm/lora/ops/torch_ops/lora_ops.py
@@ -36,10 +36,13 @@ def bgmv_expand(inputs: torch.Tensor,
     if outputs.shape[0] == 1 and output_tensor.shape[0] != 1:
         limit = 1
 
+    # LoRA adapter and model may add different amounts of padding to output
+    common_len = min(outputs.shape[1], output_tensor.shape[1])
+
     if add_inputs:
-        output_tensor[:, :outputs.shape[1]] += outputs[:limit, :]
+        output_tensor[:, :common_len] += outputs[:limit, :common_len]
     else:
-        output_tensor[:, :outputs.shape[1]] = outputs[:limit, :]
+        output_tensor[:, :common_len] = outputs[:limit, :common_len]
 
 
 def sgmv_shrink(