Skip to content

Commit da27051

Browse files
author
Roger Wang
authored
[Misc] clear and separate error messages for input too long and input + max-tokens too long (vllm-project#22803)
Signed-off-by: Roger Wang <[email protected]>
1 parent 19b927e commit da27051

File tree

1 file changed

+18
-13
lines changed

1 file changed

+18
-13
lines changed

vllm/entrypoints/openai/serving_engine.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,8 @@ def _validate_input(
585585
(EmbeddingChatRequest, EmbeddingCompletionRequest,
586586
ScoreRequest, RerankRequest, ClassificationRequest)):
587587

588+
# Note: input length can be up to the entire model context length
589+
# since these requests don't generate tokens.
588590
if token_num > self.max_model_len:
589591
operations: dict[type[AnyRequest], str] = {
590592
ScoreRequest: "score",
@@ -613,21 +615,24 @@ def _validate_input(
613615
max_tokens = request.max_completion_tokens or request.max_tokens
614616
else:
615617
max_tokens = getattr(request, "max_tokens", None)
616-
if max_tokens is None:
617-
if token_num >= self.max_model_len:
618-
raise ValueError(
619-
f"This model's maximum context length is "
620-
f"{self.max_model_len} tokens. However, you requested "
621-
f"{token_num} tokens in the messages, "
622-
f"Please reduce the length of the messages.")
623-
elif token_num + max_tokens > self.max_model_len:
618+
619+
# Note: input length can be up to model context length - 1 for
620+
# completion-like requests.
621+
if token_num >= self.max_model_len:
624622
raise ValueError(
625623
f"This model's maximum context length is "
626-
f"{self.max_model_len} tokens. However, you requested "
627-
f"{max_tokens + token_num} tokens "
628-
f"({token_num} in the messages, "
629-
f"{max_tokens} in the completion). "
630-
f"Please reduce the length of the messages or completion.")
624+
f"{self.max_model_len} tokens. However, your request has "
625+
f"{token_num} input tokens. Please reduce the length of "
626+
"the input messages.")
627+
628+
if max_tokens is not None and \
629+
token_num + max_tokens > self.max_model_len:
630+
raise ValueError(
631+
"'max_tokens' or 'max_completion_tokens' is too large: "
632+
f"{max_tokens}. This model's maximum context length is "
633+
f"{self.max_model_len} tokens and your request has "
634+
f"{token_num} input tokens ({max_tokens} > {self.max_model_len}"
635+
f" - {token_num}).")
631636

632637
return TextTokensPrompt(prompt=input_text, prompt_token_ids=input_ids)
633638

0 commit comments

Comments
 (0)