Skip to content

Commit 1e52328

Browse files
Jialinwenscarl
authored andcommitted
[Core] Optimize update checks in LogitsProcessor (vllm-project#21245)
Signed-off-by: Jialin Ouyang <[email protected]> Signed-off-by: shuw <[email protected]>
1 parent 44b3c2e commit 1e52328

File tree

1 file changed

+13
-5
lines changed

1 file changed

+13
-5
lines changed

vllm/v1/sample/logits_processor.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -335,14 +335,19 @@ def update_state(self, batch_update: Optional[BatchUpdate]):
335335
if not batch_update:
336336
return
337337

338+
needs_update: bool = False
338339
# Process added requests.
339-
needs_update = bool(batch_update.added)
340340
for index, params, _ in batch_update.added:
341341
if isinstance(params, SamplingParams) and (lb :=
342342
params.logit_bias):
343343
self.biases[index] = lb
344+
needs_update = True
344345
else:
345-
self.biases.pop(index, None)
346+
# Drop biases metadata at batch index
347+
if self.biases.pop(index, None) is not None:
348+
# If a new request replaces an old request which
349+
# specified biases, we should update processor tensors
350+
needs_update = True
346351

347352
if self.biases:
348353
# Process removed requests.
@@ -419,17 +424,20 @@ def update_state(self, batch_update: Optional[BatchUpdate]):
419424

420425
if batch_update:
421426
# Process added requests.
422-
needs_update |= bool(batch_update.added)
423427
for index, params, output_tok_ids in batch_update.added:
424428
if (isinstance(params, SamplingParams)
425429
and (min_tokens := params.min_tokens)
426430
and len(output_tok_ids) < min_tokens):
427431
# Replace request metadata at batch index
428432
self.min_toks[index] = (min_tokens, output_tok_ids,
429433
params.all_stop_token_ids)
434+
needs_update = True
430435
else:
431-
# Drop request metadata at batch index
432-
self.min_toks.pop(index, None)
436+
# Drop min_toks metadata at batch index
437+
if self.min_toks.pop(index, None) is not None:
438+
# If a new request replaces an old request which
439+
# specified min_toks, we should update processor tensors
440+
needs_update = True
433441

434442
if self.min_toks:
435443
# Process removed requests.

0 commit comments

Comments
 (0)