Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions vllm_ascend/worker/model_runner_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -1609,6 +1609,10 @@ def execute_model(
)
sampler_output.sampled_token_ids = output_token_ids

num_nans_in_logits = {}
if envs_vllm.VLLM_COMPUTE_NANS_IN_LOGITS:
num_nans_in_logits = self._get_nans_in_logits(logits)

discard_sampled_tokens_req_indices: list[int] = []
# TODO(woosuk): The following loop can be slow since it iterates over
# the requests one by one. Optimize.
Expand Down Expand Up @@ -1699,8 +1703,8 @@ def execute_model(
logprobs=logprobs_lists,
prompt_logprobs_dict=prompt_logprobs_dict,
pooler_output=[],
finished_sending=finished_sending,
finished_recving=finished_recving,
kv_connector_output=None,
num_nans_in_logits=num_nans_in_logits,
)

durations = ProfileExecuteDuration().pop_captured_sync()
Expand Down Expand Up @@ -2643,3 +2647,23 @@ def get_supported_pooling_tasks(self):
return []

return list(model.pooler.get_supported_tasks())

def _get_nans_in_logits(
self,
logits: Optional[torch.Tensor],
) -> dict[str, int]:
try:
if logits is None:
return {req_id: 0 for req_id in self.input_batch.req_ids}

num_nans_in_logits = {}
num_nans_for_index = logits.isnan().sum(dim=-1).cpu().numpy()
for req_id in self.input_batch.req_ids:
req_index = self.input_batch.req_id_to_index[req_id]
num_nans_in_logits[req_id] = (
int(num_nans_for_index[req_index])
if num_nans_for_index is not None
and req_index < logits.shape[0] else 0)
return num_nans_in_logits
except IndexError:
return {}
Loading