Skip to content

Commit 89e790c

Browse files
author
Varun Sundar Rabindranath
committed
review comments
1 parent e37b9db commit 89e790c

File tree

3 files changed

+9
-10
lines changed

3 files changed

+9
-10
lines changed

vllm/core/scheduler.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -913,10 +913,9 @@ def _schedule_prefills(
913913
break
914914
elif can_allocate == AllocStatus.NEVER:
915915
logger.warning(
916-
"Input prompt (%d tokens) + lookahead slots "
917-
"({num_lookahead_slots}) is too long"
918-
" and exceeds the capacity of block_manager",
919-
num_new_tokens)
916+
"Input prompt (%d tokens) + lookahead slots (%d) is "
917+
"too long and exceeds the capacity of block_manager",
918+
num_new_tokens, num_lookahead_slots)
920919
for seq in waiting_seqs:
921920
seq.status = SequenceStatus.FINISHED_IGNORED
922921
ignored_seq_groups.append(seq_group)

vllm/engine/llm_engine.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -974,6 +974,11 @@ def update_prefill_num_computed_tokens(
974974
seq_group_meta: SequenceGroupMetadata, num_outputs: int,
975975
is_first_step_output: Optional[bool]) -> None:
976976
"""
977+
When multi-step and chunked-prefill are enabled together, the
978+
prefill sequence scheduled for multi-step execution turn into
979+
decodes in the first step itself. This function accounts
980+
for that conversion.
981+
977982
seq_group: SequenceGroup - A prefill seq_group
978983
seq_group_meta: SequenceGroupMetadata - Metadata of the given
979984
prefill seq_group
@@ -987,11 +992,6 @@ def update_prefill_num_computed_tokens(
987992
must be None, as num_outputs > 1 indicates that outputs from
988993
all the steps in multi-step are submitted in a single burst.
989994
When multi-step is disabled, this value is always True.
990-
991-
When multi-step and chunked-prefill are enabled together, the
992-
prefill sequence scheduled for multi-step execution turn into
993-
decodes in the first step itself. This function accounts
994-
for that conversion.
995995
"""
996996

997997
assert seq_group_meta.is_prompt

vllm/model_executor/sampling_metadata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def prepare_multistep_tensors(self, num_queries: int, device: str,
160160
161161
Example:
162162
Let 2 prompts and 2 decodes be scheduled together. Let the
163-
num-tokens to process for the 2 prompts be 5 and 8 resply.
163+
num-tokens to process for the 2 prompts be 5 and 8 respectively.
164164
165165
In that case, self.sampled_token_indices will be,
166166
[4, 12, 13, 14] as it is constructed for the first-step in

0 commit comments

Comments
 (0)