@@ -507,42 +507,11 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
507
507
output_token_ids = [],
508
508
lora_request = new_req_data .lora_request ,
509
509
)
510
-
511
510
self .requests [req_id ] = req_state
512
511
513
512
# Only relevant for models using M-RoPE (e.g, Qwen2-VL)
514
513
if self .uses_mrope :
515
- image_grid_thw = []
516
- video_grid_thw = []
517
- second_per_grid_ts = []
518
- audio_feature_lengths = []
519
- use_audio_in_video = False
520
- for mm_item in req_state .mm_kwargs :
521
- mm_input = mm_item .get_data ()
522
- if (t := mm_input .get ("image_grid_thw" )) is not None :
523
- image_grid_thw .append (t .tolist ())
524
- if (t := mm_input .get ("video_grid_thw" )) is not None :
525
- video_grid_thw .append (t .tolist ())
526
- if (t := mm_input .get ("second_per_grid_ts" )) is not None :
527
- second_per_grid_ts .append (t )
528
- if (t :=
529
- mm_input .get ("audio_feature_lengths" )) is not None :
530
- audio_feature_lengths .append (t )
531
- if mm_input .get ("use_audio_in_video" ) is True :
532
- use_audio_in_video = True
533
-
534
- hf_config = self .model_config .hf_config
535
-
536
- req_state .mrope_positions , req_state .mrope_position_delta = \
537
- MRotaryEmbedding .get_input_positions_tensor (
538
- req_state .prompt_token_ids ,
539
- hf_config = hf_config ,
540
- image_grid_thw = image_grid_thw ,
541
- video_grid_thw = video_grid_thw ,
542
- second_per_grid_ts = second_per_grid_ts ,
543
- audio_feature_lengths = audio_feature_lengths ,
544
- use_audio_in_video = use_audio_in_video ,
545
- )
514
+ self ._init_mrope_positions (req_state )
546
515
547
516
reqs_to_add .append (req_state )
548
517
@@ -639,6 +608,36 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None:
639
608
# Refresh batch metadata with any pending updates.
640
609
self .input_batch .refresh_metadata ()
641
610
611
+ def _init_mrope_positions (self , req_state : CachedRequestState ):
612
+ image_grid_thw = []
613
+ video_grid_thw = []
614
+ second_per_grid_ts = []
615
+ audio_feature_lengths = []
616
+ use_audio_in_video = False
617
+ for mm_item in req_state .mm_kwargs :
618
+ mm_input = mm_item .get_data ()
619
+ if (t := mm_input .get ("image_grid_thw" )) is not None :
620
+ image_grid_thw .append (t .tolist ())
621
+ if (t := mm_input .get ("video_grid_thw" )) is not None :
622
+ video_grid_thw .append (t .tolist ())
623
+ if (t := mm_input .get ("second_per_grid_ts" )) is not None :
624
+ second_per_grid_ts .append (t )
625
+ if (t := mm_input .get ("audio_feature_lengths" )) is not None :
626
+ audio_feature_lengths .append (t )
627
+ if mm_input .get ("use_audio_in_video" ) is True :
628
+ use_audio_in_video = True
629
+
630
+ req_state .mrope_positions , req_state .mrope_position_delta = \
631
+ MRotaryEmbedding .get_input_positions_tensor (
632
+ req_state .prompt_token_ids ,
633
+ hf_config = self .model_config .hf_config ,
634
+ image_grid_thw = image_grid_thw ,
635
+ video_grid_thw = video_grid_thw ,
636
+ second_per_grid_ts = second_per_grid_ts ,
637
+ audio_feature_lengths = audio_feature_lengths ,
638
+ use_audio_in_video = use_audio_in_video ,
639
+ )
640
+
642
641
def _extract_mm_kwargs (
643
642
self ,
644
643
scheduler_output : "SchedulerOutput" ,
0 commit comments