openvinotoolkit · wgzintel · Jul 22, 2025 · Jul 26, 2025 · Aug 3, 2025 · Aug 4, 2025
diff --git a/src/cpp/include/openvino/genai/cache_eviction.hpp b/src/cpp/include/openvino/genai/cache_eviction.hpp
@@ -19,6 +19,11 @@ enum class AggregationMode {
                 * of a given token in cache */
 };
 
+const std::unordered_map<AggregationMode, std::string> AggregationModeToString = {
+    {AggregationMode::SUM,   "SUM"},
+    {AggregationMode::NORM_SUM, "NORM_SUM"},
+};
+
 /**
 * @brief Configuration struct for the cache eviction algorithm.
 */
@@ -62,6 +67,20 @@ class CacheEvictionConfig {
         return m_evictable_size;
     }
 
+    void print() const {
+        std::cout << "CacheEvictionConfig { " << std::endl;
+        std::cout << "  start_size: " << get_start_size() << std::endl;
+        std::cout << "  recent_size: " << get_recent_size() << std::endl;
+        std::cout << "  max_cache_size: " << get_max_cache_size() << std::endl;
+        std::cout << "  evictable_size: " << get_evictable_size() << std::endl;
+        if (AggregationModeToString.count(aggregation_mode) > 0) {
+            std::cout << "  aggregation_mode: " << AggregationModeToString.at(aggregation_mode) << std::endl;
+        }
+        std::cout << "  apply_rotation: " << apply_rotation << std::endl;
+        std::cout << "  snapkv_window_size: " << snapkv_window_size << std::endl;
+        std::cout << " }" << std::endl;
+    }
+
     /** The mode used to compute the importance of tokens for eviction */
     AggregationMode aggregation_mode = AggregationMode::NORM_SUM;
 

diff --git a/src/cpp/include/openvino/genai/scheduler_config.hpp b/src/cpp/include/openvino/genai/scheduler_config.hpp
@@ -73,5 +73,24 @@ struct SchedulerConfig {
                dynamic_split_fuse == other.dynamic_split_fuse && use_cache_eviction == other.use_cache_eviction &&
                max_num_seqs == other.max_num_seqs && enable_prefix_caching == other.enable_prefix_caching;
     }
+
+    void print() const {
+        std::cout << "SchedulerConfig { " << std::endl; 
+        std::cout << "  max_num_batched_tokens: " << max_num_batched_tokens << std::endl;
+        std::cout << "  num_kv_blocks: " << num_kv_blocks << std::endl;
+        std::cout << "  cache_size: " << cache_size << std::endl;
+        std::cout << "  dynamic_split_fuse: " << dynamic_split_fuse << std::endl;
+        std::cout << "  use_cache_eviction: " << use_cache_eviction << std::endl;
+        if (use_cache_eviction) {
+            cache_eviction_config.print();
+        }
+        std::cout << "  max_num_seqs: " << max_num_seqs << std::endl;
+        std::cout << "  enable_prefix_caching: " << enable_prefix_caching << std::endl;
+        std::cout << "  use_sparse_attention: " << use_sparse_attention << std::endl;
+        if (use_sparse_attention) {
+            sparse_attention_config.print();
+        }
+        std::cout << "}" << std::endl;
+    }
 };
 }
diff --git a/src/cpp/include/openvino/genai/sparse_attention.hpp b/src/cpp/include/openvino/genai/sparse_attention.hpp
@@ -9,6 +9,10 @@ namespace ov::genai {
 
 enum class SparseAttentionMode { TRISHAPE };
 
+const std::unordered_map<SparseAttentionMode, std::string> SparseAttentionModeToString = {
+    {SparseAttentionMode::TRISHAPE, "TRISHAPE"},
+};
+
 /**
  * @brief Configuration struct for the sparse attention prefill functionality.
  */
@@ -43,6 +47,16 @@ class SparseAttentionConfig {
     /** @param num_retained_recent_tokens_in_cache The number of most recent tokens in cache to be retained when
      * applying sparse attention. Must be a multiple of block size. */
     size_t num_retained_recent_tokens_in_cache = 1920;
-};
 
+    void print() const {
+        std::cout << "SparseAttentionConfig { " << std::endl;
+        if (SparseAttentionModeToString.count(mode) > 0) {
+            std::cout << "  sparseAttentionMode: " << SparseAttentionModeToString.at(mode) << std::endl;
+        }
+        std::cout << "  num_last_dense_tokens_in_prefill: " << num_last_dense_tokens_in_prefill << std::endl;
+        std::cout << "  num_retained_start_tokens_in_cache: " << num_retained_start_tokens_in_cache << std::endl;
+        std::cout << "  num_retained_recent_tokens_in_cache: " << num_retained_recent_tokens_in_cache << std::endl;
+        std::cout << " }" << std::endl;
+    }
+};
 }  // namespace ov::genai
diff --git a/src/cpp/src/continuous_batching/pipeline.cpp b/src/cpp/src/continuous_batching/pipeline.cpp
@@ -66,6 +66,8 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p
         embedder = std::make_shared<InputsEmbedder>(models_path, device, vision_encoder_properties);
     }
 
+    utils::print_scheduler_config_info(scheduler_config);
+
     if (is_prompt_lookup_enabled) {
         OPENVINO_ASSERT(draft_model_desr.model == nullptr, "Speculative decoding and prompt lookup decoding are mutually exclusive");
         OPENVINO_ASSERT(embedder == nullptr, "Prompt lookup decoding is not supported for models with embeddings");
@@ -105,6 +107,8 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
         embedder = std::make_shared<InputsEmbedder>(models_path, device, properties_without_draft_model_without_gguf);
     }
 
+    utils::print_scheduler_config_info(scheduler_config);
+
     if (is_prompt_lookup_enabled) {
         OPENVINO_ASSERT(draft_model_desr.model == nullptr, "Speculative decoding and prompt lookup decoding are mutually exclusive");
         OPENVINO_ASSERT(embedder == nullptr, "Prompt lookup decoding is not supported for models with embeddings");
@@ -148,6 +152,8 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
         }
     }
 
+    utils::print_scheduler_config_info(scheduler_config);
+
     if (is_prompt_lookup_enabled) {
         OPENVINO_ASSERT(draft_model_desr.model == nullptr, "Speculative decoding and prompt lookup decoding are mutually exclusive");
         OPENVINO_ASSERT(embedder == nullptr, "Prompt lookup decoding is not supported for models with embeddings");
@@ -196,6 +202,8 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
         }
     }
 
+    utils::print_scheduler_config_info(scheduler_config);
+
     if (is_prompt_lookup_enabled) {
         OPENVINO_ASSERT(draft_model_desr.model == nullptr, "Speculative decoding and prompt lookup decoding are mutually exclusive");
         OPENVINO_ASSERT(embedder == nullptr, "Prompt lookup decoding is not supported for models with embeddings");

diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp
@@ -500,6 +500,14 @@ void print_gguf_debug_info(const std::string &debug_info) {
     std::cout << "[GGUF Reader]: " << debug_info << std::endl;
 }
 
+void print_scheduler_config_info(const SchedulerConfig &scheduler_config) {
+    if (!env_setup_for_print_debug_info()) {
+        return;
+    }
+
+    scheduler_config.print();
+}
+
 std::pair<ov::CompiledModel, KVDesc>
 compile_decoder_for_npu(const std::shared_ptr<ov::Model>& model,
                         const ov::AnyMap& config,

diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp
@@ -151,6 +151,8 @@ void print_compiled_model_properties(ov::CompiledModel& compiled_Model, const ch
 
 void print_gguf_debug_info(const std::string& debug_info);
 
+void print_scheduler_config_info(const SchedulerConfig &scheduler_config);
+
 struct KVDesc {
     uint32_t max_prompt_len;
     uint32_t min_response_len;