qwen2.5vl, w8a8, RuntimeError: shape '[1152, -1]' is invalid for input of size 327680

**Describe the bug**

![Image](https://github.com/user-attachments/assets/2e1adfa6-43cc-4e0b-92f5-ce98ad3f9647)

**Environment**
transformers                      4.50.0
llmcompressor                     0.5.1



**Code**
```
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
from datasets import load_dataset
from llmcompressor import oneshot
from llmcompressor.modifiers.quantization import QuantizationModifier
import argparse
import os
import base64
from io import BytesIO
import torch
from datasets import load_dataset
from qwen_vl_utils import process_vision_info
from modelscope import AutoProcessor
# from llmcompressor.modifiers.quantization import GPTQModifier
from datasets import Dataset
#from llmcompressor.transformers import oneshot
from llmcompressor import oneshot
# from llmcompressor.transformers.tracing import (
# TraceableQwen2_5_VLForConditionalGeneration,
# )
from compressed_tensors.quantization.quant_args import (
QuantizationArgs,
QuantizationStrategy,
QuantizationType,
)
os.environ["LOCAL_RANK"]='0'
MODEL_ID = "Qwen/Qwen2_5-VL-32B-Instruct"
dst_model_path = "/mnt"
os.makedirs(dst_model_path, exist_ok=True)
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    MODEL_ID, torch_dtype="auto"
)
processor = AutoProcessor.from_pretrained(MODEL_ID)
NUM_CALIBRATION_SAMPLES = 512
MAX_SEQUENCE_LENGTH = 2048
# Oneshot arguments
DATASET_ID = "lmms-lab/flickr30k"
DATASET_SPLIT = {"calibration": "test[:512]"}
# Load dataset and preprocess.
ds = load_dataset(DATASET_ID, split=DATASET_SPLIT)
ds = ds.shuffle(seed=42)
dampening_frac=0.01
def preprocess_and_tokenize(example):
    # preprocess
    buffered = BytesIO()
    example["image"].save(buffered, format="PNG")
    encoded_image = base64.b64encode(buffered.getvalue())
    encoded_image_text = encoded_image.decode("utf-8")
    base64_qwen = f"data:image;base64,{encoded_image_text}"
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": base64_qwen},
                {"type": "text", "text": "What does the image show?"},
            ],
        }
    ]
    text = processor.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    image_inputs, video_inputs = process_vision_info(messages)

    # tokenize
    batch = processor(
        text=[text],
        images=image_inputs,
        videos=video_inputs,
        padding=False,
        max_length=MAX_SEQUENCE_LENGTH,
        truncation=True,
    )
    for k in batch:
        #添加 image_grid_thw（假设每个图像切片为 1x16x16）
        if image_inputs is not None and len(image_inputs) > 0:
            batch["image_grid_thw"] = torch.tensor([1, 16, 16])  # 根据实际模型调整
    return batch
ds = ds.map(preprocess_and_tokenize, remove_columns=ds["calibration"].column_names)
recipe = [
    QuantizationModifier(
        ignore=["re:.*lm_head","re:visual.*"], 
        config_groups={
            "group_0": dict(
                targets=["Linear"],
                weights=QuantizationArgs(
                    num_bits=8,
                    type=QuantizationType.FLOAT,
                    strategy=QuantizationStrategy.TENSOR,
                    symmetric=True,
                    dynamic=False,
                    ignore=["re:.*lm_head","re:visual.*"],  # 细粒度忽略 lm_head
                    targets=["Linear"]
                ),
                input_activations=QuantizationArgs(
                    num_bits=8,
                    type=QuantizationType.FLOAT,
                    strategy=QuantizationStrategy.TENSOR,
                    symmetric=True,
                    dynamic=False,  # 改为静态量化
                    ignore=["re:.*lm_head","re:visual.*"],  # 可选：如果不需要对某些层激活量化
                    targets=["Linear"]
                )
            )
        },
    )
]
oneshot(
    model=model,
    dataset=ds,
    recipe=recipe,
    max_seq_length=MAX_SEQUENCE_LENGTH,
    num_calibration_samples=NUM_CALIBRATION_SAMPLES,
)
SAVE_DIR = dst_model_path
oneshot(model=model, recipe=recipe, output_dir=SAVE_DIR)
processor.save_pretrained(SAVE_DIR)

```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

qwen2.5vl, w8a8, RuntimeError: shape '[1152, -1]' is invalid for input of size 327680 #1547

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

qwen2.5vl, w8a8, RuntimeError: shape '[1152, -1]' is invalid for input of size 327680 #1547

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions