We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c0273a3 commit fb53f59Copy full SHA for fb53f59
vllm/lora/ops/torch_ops/lora_ops.py
@@ -36,10 +36,13 @@ def bgmv_expand(inputs: torch.Tensor,
36
if outputs.shape[0] == 1 and output_tensor.shape[0] != 1:
37
limit = 1
38
39
+ # LoRA adapter and model may add different amounts of padding to output
40
+ common_len = min(outputs.shape[1], output_tensor.shape[1])
41
+
42
if add_inputs:
- output_tensor[:, :outputs.shape[1]] += outputs[:limit, :]
43
+ output_tensor[:, :common_len] += outputs[:limit, :common_len]
44
else:
- output_tensor[:, :outputs.shape[1]] = outputs[:limit, :]
45
+ output_tensor[:, :common_len] = outputs[:limit, :common_len]
46
47
48
def sgmv_shrink(
0 commit comments