[Misc] Improve logging for dynamic shape cache compilation (vllm-project#20573)

kyolebu · patrickvonplaten · commit d91b585e623a · 2025-07-15T15:08:50.000+02:00
Signed-off-by: kyolebu &lt;kyu@redhat.com&gt;
Signed-off-by: Patrick von Platen &lt;patrick.v.platen@gmail.com&gt;
diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py
@@ -120,10 +120,15 @@ def load(self,
         handle = self.cache[(runtime_shape, graph_index, self.compiler.name)]
         compiled_graph = self.compiler.load(handle, graph, example_inputs,
                                             graph_index, runtime_shape)
-        logger.debug(
-            "Directly load the %s-th graph for shape %s from %s via "
-            "handle %s", graph_index, str(runtime_shape), self.compiler.name,
-            handle)
+        if runtime_shape is None:
+            logger.debug(
+                "Directly load the %s-th graph for dynamic shape from %s via "
+                "handle %s", graph_index, self.compiler.name, handle)
+        else:
+            logger.debug(
+                "Directly load the %s-th graph for shape %s from %s via "
+                "handle %s", graph_index, str(runtime_shape),
+                self.compiler.name, handle)
         return compiled_graph
 
     def compile(self,
@@ -152,9 +157,15 @@ def compile(self,
                 # there can be multiple graphs due to piecewise compilation.
                 now = time.time()
                 elapsed = now - compilation_start_time
-                logger.info(
-                    "Directly load the compiled graph(s) for shape %s "
-                    "from the cache, took %.3f s", str(runtime_shape), elapsed)
+                if runtime_shape is None:
+                    logger.info(
+                        "Directly load the compiled graph(s) for dynamic shape "
+                        "from the cache, took %.3f s", elapsed)
+                else:
+                    logger.info(
+                        "Directly load the compiled graph(s) for shape %s "
+                        "from the cache, took %.3f s", str(runtime_shape),
+                        elapsed)
             return compiled_graph
 
         # no compiler cached the graph, or the cache is disabled,
@@ -178,19 +189,29 @@ def compile(self,
             self.is_cache_updated = True
             if graph_index == 0:
                 # adds some info logging for the first graph
-                logger.info("Cache the graph of shape %s for later use",
-                            str(runtime_shape))
-            logger.debug(
-                "store the %s-th graph for shape %s from %s via handle %s",
-                graph_index, str(runtime_shape), self.compiler.name, handle)
+                if runtime_shape is None:
+                    logger.info(
+                        "Cache the graph for dynamic shape for later use")
+                else:
+                    logger.info("Cache the graph of shape %s for later use",
+                                str(runtime_shape))
+            if runtime_shape is None:
+                logger.debug(
+                    "Store the %s-th graph for dynamic shape from %s via "
+                    "handle %s", graph_index, self.compiler.name, handle)
+            else:
+                logger.debug(
+                    "Store the %s-th graph for shape %s from %s via handle %s",
+                    graph_index, str(runtime_shape), self.compiler.name,
+                    handle)
 
         # after compiling the last graph, record the end time
         if graph_index == num_graphs - 1:
             now = time.time()
             elapsed = now - compilation_start_time
             compilation_config.compilation_time += elapsed
             if runtime_shape is None:
-                logger.info("Compiling a graph for general shape takes %.2f s",
+                logger.info("Compiling a graph for dynamic shape takes %.2f s",
                             elapsed)
             else:
                 logger.info("Compiling a graph for shape %s takes %.2f s",
@@ -308,7 +329,7 @@ def call_module(self, target: torch.fx.node.Target,
                 i for i, x in enumerate(args) if isinstance(x, torch.SymInt)
             ]
             global compilation_start_time
-            compiled_graph_for_general_shape = self.vllm_backend.\
+            compiled_graph_for_dynamic_shape = self.vllm_backend.\
                 compiler_manager.compile(
                 submod,
                 args,
@@ -323,7 +344,7 @@ def call_module(self, target: torch.fx.node.Target,
             self.module.__dict__[target] = piecewise_backend(
                 submod, self.vllm_config, self.graph_pool, index,
                 len(self.compile_submod_names), sym_shape_indices,
-                compiled_graph_for_general_shape, self.vllm_backend)
+                compiled_graph_for_dynamic_shape, self.vllm_backend)
 
             compilation_counter.num_piecewise_capturable_graphs_seen += 1