Skip to content

Commit 70729be

Browse files
robertgshaw2-redhatjimpang
authored andcommitted
[UX] Add Feedback During CUDAGraph Capture (vllm-project#19501)
Signed-off-by: [email protected] <[email protected]>
1 parent 264d263 commit 70729be

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

vllm/v1/worker/gpu_model_runner.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import torch
1313
import torch.distributed
1414
import torch.nn as nn
15+
from tqdm import tqdm
1516

1617
import vllm.envs as envs
1718
from vllm.attention import AttentionType, get_attn_backend
@@ -2034,7 +2035,9 @@ def capture_model(self) -> None:
20342035
# can reuse the memory pool allocated for the large shapes.
20352036
with graph_capture(device=self.device):
20362037
skip_attn = not self.vllm_config.compilation_config.full_cuda_graph
2037-
for num_tokens in reversed(self.cudagraph_batch_sizes):
2038+
for num_tokens in tqdm(reversed(self.cudagraph_batch_sizes),
2039+
desc="Capturing CUDA graphs",
2040+
total=len(self.cudagraph_batch_sizes)):
20382041
for _ in range(self.vllm_config.compilation_config.
20392042
cudagraph_num_of_warmups):
20402043
self._dummy_run(num_tokens, skip_attn=skip_attn)

0 commit comments

Comments
 (0)