File tree Expand file tree Collapse file tree 1 file changed +4
-1
lines changed Expand file tree Collapse file tree 1 file changed +4
-1
lines changed Original file line number Diff line number Diff line change 12
12
import torch
13
13
import torch .distributed
14
14
import torch .nn as nn
15
+ from tqdm import tqdm
15
16
16
17
import vllm .envs as envs
17
18
from vllm .attention import AttentionType , get_attn_backend
@@ -2034,7 +2035,9 @@ def capture_model(self) -> None:
2034
2035
# can reuse the memory pool allocated for the large shapes.
2035
2036
with graph_capture (device = self .device ):
2036
2037
skip_attn = not self .vllm_config .compilation_config .full_cuda_graph
2037
- for num_tokens in reversed (self .cudagraph_batch_sizes ):
2038
+ for num_tokens in tqdm (reversed (self .cudagraph_batch_sizes ),
2039
+ desc = "Capturing CUDA graphs" ,
2040
+ total = len (self .cudagraph_batch_sizes )):
2038
2041
for _ in range (self .vllm_config .compilation_config .
2039
2042
cudagraph_num_of_warmups ):
2040
2043
self ._dummy_run (num_tokens , skip_attn = skip_attn )
You can’t perform that action at this time.
0 commit comments