We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c5d8594 commit 3283df1Copy full SHA for 3283df1
vllm/entrypoints/openai/serving_chat.py
@@ -726,6 +726,8 @@ async def chat_completion_full_generator(
726
727
assert final_res.prompt_token_ids is not None
728
num_prompt_tokens = len(final_res.prompt_token_ids)
729
+ if final_res.encoder_prompt_token_ids is not None:
730
+ num_prompt_tokens += len(final_res.encoder_prompt_token_ids)
731
num_generated_tokens = sum(
732
len(output.token_ids) for output in final_res.outputs)
733
usage = UsageInfo(
0 commit comments