Skip to content

fix qwen2_5_vl VIDEO_TOTAL_PIXELS #4236

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions swift/llm/model/model/qwen.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from typing import Any, Dict, Optional, Tuple, Type

import torch
Expand Down Expand Up @@ -557,6 +558,9 @@ def _get_cast_dtype(self) -> torch.dtype:
def patch_qwen_vl_utils(vision_process):
if hasattr(vision_process, '_patch'):
return
if os.getenv('VIDEO_MAX_PIXELS') and not os.getenv('VIDEO_TOTAL_PIXELS'):
# https://github.com/QwenLM/Qwen2.5-VL/issues/1120
os.environ['VIDEO_TOTAL_PIXELS'] = str(int(128000 * 28 * 28 * 0.9))
for key in [
'image_factor', 'min_pixels', 'max_pixels', 'max_ratio', 'video_min_pixels', 'video_max_pixels',
'video_total_pixels', 'frame_factor', 'fps', 'fps_min_frames', 'fps_max_frames'
Expand Down
1 change: 1 addition & 0 deletions swift/trainers/callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def on_epoch_end(self, args: TrainingArguments, state: TrainerState, control: Tr
control = super().on_epoch_end(args, state, control, **kwargs)
evaluation_strategy = args.eval_strategy if hasattr(args, 'eval_strategy') else args.evaluation_strategy
if args.max_epochs is not None and args.max_epochs <= math.ceil(state.epoch):
logger.info('Training has reached `max_epochs`. The model will be saved and the training will be exited.')
if evaluation_strategy != IntervalStrategy.NO:
control.should_evaluate = True
if args.save_strategy != IntervalStrategy.NO:
Expand Down
10 changes: 6 additions & 4 deletions tests/test_align/test_template/test_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,17 +117,19 @@ def test_valley():

def test_qwen2_5_vl():
os.environ['FPS'] = '1'
os.environ['VIDEO_MAX_PIXELS'] = str(360 * 420)
pt_engine = PtEngine('Qwen/Qwen2.5-VL-7B-Instruct')
messages = [{'role': 'user', 'content': '<video>What happened in the video?'}]
videos = ['https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/baby.mp4']
response = _infer_model(pt_engine, messages=messages, videos=videos)
pt_engine.default_template.template_backend = 'jinja'
response2 = _infer_model(pt_engine, messages=messages, videos=videos)
assert response == response2 == (
'In the video, a baby is sitting on a bed and appears to be interacting with an open book. '
'The baby seems curious and is touching the pages of the book, possibly exploring its contents or '
'simply playing with it. The setting looks like a cozy bedroom, and the baby is wearing sunglasses, '
'which adds a playful and endearing touch to the scene.')
'In the video, a young child is sitting on a bed and appears to be reading or flipping '
'through a book. The child is wearing sunglasses and seems focused on the book. '
'The setting looks like a cozy bedroom with various items such as clothes and '
"possibly toys around. The child's actions suggest they might be exploring or "
'learning about the book.')


def test_qwen2_5_omni():
Expand Down