-
Notifications
You must be signed in to change notification settings - Fork 188
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
Describe the bug
Environment
transformers 4.50.0
llmcompressor 0.5.1
Code
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
from datasets import load_dataset
from llmcompressor import oneshot
from llmcompressor.modifiers.quantization import QuantizationModifier
import argparse
import os
import base64
from io import BytesIO
import torch
from datasets import load_dataset
from qwen_vl_utils import process_vision_info
from modelscope import AutoProcessor
# from llmcompressor.modifiers.quantization import GPTQModifier
from datasets import Dataset
#from llmcompressor.transformers import oneshot
from llmcompressor import oneshot
# from llmcompressor.transformers.tracing import (
# TraceableQwen2_5_VLForConditionalGeneration,
# )
from compressed_tensors.quantization.quant_args import (
QuantizationArgs,
QuantizationStrategy,
QuantizationType,
)
os.environ["LOCAL_RANK"]='0'
MODEL_ID = "Qwen/Qwen2_5-VL-32B-Instruct"
dst_model_path = "/mnt"
os.makedirs(dst_model_path, exist_ok=True)
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
MODEL_ID, torch_dtype="auto"
)
processor = AutoProcessor.from_pretrained(MODEL_ID)
NUM_CALIBRATION_SAMPLES = 512
MAX_SEQUENCE_LENGTH = 2048
# Oneshot arguments
DATASET_ID = "lmms-lab/flickr30k"
DATASET_SPLIT = {"calibration": "test[:512]"}
# Load dataset and preprocess.
ds = load_dataset(DATASET_ID, split=DATASET_SPLIT)
ds = ds.shuffle(seed=42)
dampening_frac=0.01
def preprocess_and_tokenize(example):
# preprocess
buffered = BytesIO()
example["image"].save(buffered, format="PNG")
encoded_image = base64.b64encode(buffered.getvalue())
encoded_image_text = encoded_image.decode("utf-8")
base64_qwen = f"data:image;base64,{encoded_image_text}"
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": base64_qwen},
{"type": "text", "text": "What does the image show?"},
],
}
]
text = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
image_inputs, video_inputs = process_vision_info(messages)
# tokenize
batch = processor(
text=[text],
images=image_inputs,
videos=video_inputs,
padding=False,
max_length=MAX_SEQUENCE_LENGTH,
truncation=True,
)
for k in batch:
#添加 image_grid_thw(假设每个图像切片为 1x16x16)
if image_inputs is not None and len(image_inputs) > 0:
batch["image_grid_thw"] = torch.tensor([1, 16, 16]) # 根据实际模型调整
return batch
ds = ds.map(preprocess_and_tokenize, remove_columns=ds["calibration"].column_names)
recipe = [
QuantizationModifier(
ignore=["re:.*lm_head","re:visual.*"],
config_groups={
"group_0": dict(
targets=["Linear"],
weights=QuantizationArgs(
num_bits=8,
type=QuantizationType.FLOAT,
strategy=QuantizationStrategy.TENSOR,
symmetric=True,
dynamic=False,
ignore=["re:.*lm_head","re:visual.*"], # 细粒度忽略 lm_head
targets=["Linear"]
),
input_activations=QuantizationArgs(
num_bits=8,
type=QuantizationType.FLOAT,
strategy=QuantizationStrategy.TENSOR,
symmetric=True,
dynamic=False, # 改为静态量化
ignore=["re:.*lm_head","re:visual.*"], # 可选:如果不需要对某些层激活量化
targets=["Linear"]
)
)
},
)
]
oneshot(
model=model,
dataset=ds,
recipe=recipe,
max_seq_length=MAX_SEQUENCE_LENGTH,
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
)
SAVE_DIR = dst_model_path
oneshot(model=model, recipe=recipe, output_dir=SAVE_DIR)
processor.save_pretrained(SAVE_DIR)
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working