Skip to content

Commit e6ff047

Browse files
WoosukKwonFFFfff1FFFfff
authored andcommitted
[Misc] Move M-RoPE init logic to _init_mrope_positions (vllm-project#23422)
Signed-off-by: Woosuk Kwon <[email protected]> Signed-off-by: FFFfff1FFFfff <[email protected]>
1 parent f7893e3 commit e6ff047

File tree

1 file changed

+31
-32
lines changed

1 file changed

+31
-32
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 31 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -507,42 +507,11 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
507507
output_token_ids=[],
508508
lora_request=new_req_data.lora_request,
509509
)
510-
511510
self.requests[req_id] = req_state
512511

513512
# Only relevant for models using M-RoPE (e.g, Qwen2-VL)
514513
if self.uses_mrope:
515-
image_grid_thw = []
516-
video_grid_thw = []
517-
second_per_grid_ts = []
518-
audio_feature_lengths = []
519-
use_audio_in_video = False
520-
for mm_item in req_state.mm_kwargs:
521-
mm_input = mm_item.get_data()
522-
if (t := mm_input.get("image_grid_thw")) is not None:
523-
image_grid_thw.append(t.tolist())
524-
if (t := mm_input.get("video_grid_thw")) is not None:
525-
video_grid_thw.append(t.tolist())
526-
if (t := mm_input.get("second_per_grid_ts")) is not None:
527-
second_per_grid_ts.append(t)
528-
if (t :=
529-
mm_input.get("audio_feature_lengths")) is not None:
530-
audio_feature_lengths.append(t)
531-
if mm_input.get("use_audio_in_video") is True:
532-
use_audio_in_video = True
533-
534-
hf_config = self.model_config.hf_config
535-
536-
req_state.mrope_positions, req_state.mrope_position_delta = \
537-
MRotaryEmbedding.get_input_positions_tensor(
538-
req_state.prompt_token_ids,
539-
hf_config=hf_config,
540-
image_grid_thw=image_grid_thw,
541-
video_grid_thw=video_grid_thw,
542-
second_per_grid_ts=second_per_grid_ts,
543-
audio_feature_lengths=audio_feature_lengths,
544-
use_audio_in_video=use_audio_in_video,
545-
)
514+
self._init_mrope_positions(req_state)
546515

547516
reqs_to_add.append(req_state)
548517

@@ -639,6 +608,36 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
639608
# Refresh batch metadata with any pending updates.
640609
self.input_batch.refresh_metadata()
641610

611+
def _init_mrope_positions(self, req_state: CachedRequestState):
612+
image_grid_thw = []
613+
video_grid_thw = []
614+
second_per_grid_ts = []
615+
audio_feature_lengths = []
616+
use_audio_in_video = False
617+
for mm_item in req_state.mm_kwargs:
618+
mm_input = mm_item.get_data()
619+
if (t := mm_input.get("image_grid_thw")) is not None:
620+
image_grid_thw.append(t.tolist())
621+
if (t := mm_input.get("video_grid_thw")) is not None:
622+
video_grid_thw.append(t.tolist())
623+
if (t := mm_input.get("second_per_grid_ts")) is not None:
624+
second_per_grid_ts.append(t)
625+
if (t := mm_input.get("audio_feature_lengths")) is not None:
626+
audio_feature_lengths.append(t)
627+
if mm_input.get("use_audio_in_video") is True:
628+
use_audio_in_video = True
629+
630+
req_state.mrope_positions, req_state.mrope_position_delta = \
631+
MRotaryEmbedding.get_input_positions_tensor(
632+
req_state.prompt_token_ids,
633+
hf_config=self.model_config.hf_config,
634+
image_grid_thw=image_grid_thw,
635+
video_grid_thw=video_grid_thw,
636+
second_per_grid_ts=second_per_grid_ts,
637+
audio_feature_lengths=audio_feature_lengths,
638+
use_audio_in_video=use_audio_in_video,
639+
)
640+
642641
def _extract_mm_kwargs(
643642
self,
644643
scheduler_output: "SchedulerOutput",

0 commit comments

Comments
 (0)