Skip to content

Commit 1aa1361

Browse files
authored
Fix OpenAI server completion_tokens referenced before assignment (#1996)
1 parent fe470ae commit 1aa1361

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

vllm/entrypoints/openai/api_server.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -332,8 +332,7 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
332332
# Send token-by-token response for each request.n
333333
delta_text = output.text[len(previous_texts[i]):]
334334
previous_texts[i] = output.text
335-
completion_tokens = len(output.token_ids)
336-
previous_num_tokens[i] = completion_tokens
335+
previous_num_tokens[i] = len(output.token_ids)
337336
choice_data = ChatCompletionResponseStreamChoice(
338337
index=i,
339338
delta=DeltaMessage(content=delta_text),
@@ -351,8 +350,8 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
351350
prompt_tokens = len(res.prompt_token_ids)
352351
final_usage = UsageInfo(
353352
prompt_tokens=prompt_tokens,
354-
completion_tokens=completion_tokens,
355-
total_tokens=prompt_tokens + completion_tokens,
353+
completion_tokens=previous_num_tokens[i],
354+
total_tokens=prompt_tokens + previous_num_tokens[i],
356355
)
357356
choice_data = ChatCompletionResponseStreamChoice(
358357
index=i, delta=[], finish_reason=output.finish_reason)

0 commit comments

Comments
 (0)