We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 637d366 commit c1e3a38Copy full SHA for c1e3a38
vllm/spec_decode/draft_model_runner.py
@@ -303,8 +303,9 @@ def execute_model(
303
304
if self.return_hidden_states and is_fallback:
305
if use_cuda_graph:
306
- indices = model_input.sampling_metadata.selected_token_indices
307
- output.hidden_states = hidden_states[: len(indices)]
+ indices = model_input.sampling_metadata\
+ .selected_token_indices
308
+ output.hidden_states = hidden_states[:len(indices)]
309
else:
310
output.hidden_states = hidden_states
311
0 commit comments