Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
37945ff
print scheduler_config info
wgzintel Jul 22, 2025
462b8c7
Merge branch 'master' into guozhong/print_scheduler_config_info
wgzintel Jul 26, 2025
165b03e
Merge branch 'master' of https://github.com/openvinotoolkit/openvino.…
wgzintel Aug 3, 2025
1fcade8
Remove debug methods and AggregationModeToString from public API
wgzintel Aug 4, 2025
caef15d
fix conflict
wgzintel Aug 13, 2025
d9364a8
Merge branch 'master' of https://github.com/openvinotoolkit/openvino.…
wgzintel Aug 19, 2025
753a95e
Merge branch 'master' of https://github.com/openvinotoolkit/openvino.…
wgzintel Aug 22, 2025
cebf1a7
Merge branch 'master' of https://github.com/openvinotoolkit/openvino.…
wgzintel Aug 24, 2025
71a01d8
resolve conflict
wgzintel Aug 25, 2025
60178de
revert file
wgzintel Aug 25, 2025
97366df
Merge branch 'master' of https://github.com/openvinotoolkit/openvino.…
wgzintel Aug 25, 2025
b61e734
Merge branch 'master' of https://github.com/openvinotoolkit/openvino.…
wgzintel Aug 27, 2025
ce465a9
Merge branch 'master' of https://github.com/openvinotoolkit/openvino.…
wgzintel Sep 2, 2025
9c9e19b
Merge branch 'master' of https://github.com/openvinotoolkit/openvino.…
wgzintel Sep 4, 2025
584adb7
Merge branch 'master' of https://github.com/openvinotoolkit/openvino.…
wgzintel Sep 9, 2025
62d7b54
Add mapping for every KVCrushAnchorPointMode values
wgzintel Sep 10, 2025
3517b28
Merge branch 'master' of https://github.com/openvinotoolkit/openvino.…
wgzintel Sep 10, 2025
9d8b49a
Add test case for printing scheduler_config
wgzintel Sep 11, 2025
ac87058
Merge branch 'master' of https://github.com/openvinotoolkit/openvino.…
wgzintel Sep 11, 2025
26e2cd7
Merge branch 'master' of https://github.com/openvinotoolkit/openvino.…
wgzintel Sep 12, 2025
80d1ac1
Add <unordered_map> and <sstream> header
wgzintel Sep 12, 2025
cd98a26
Merge branch 'master' into guozhong/print_scheduler_config_info
wgzintel Sep 16, 2025
e4c8e7e
Merge branch 'master' of https://github.com/openvinotoolkit/openvino.…
wgzintel Sep 17, 2025
3841134
Merge branch 'master' of https://github.com/openvinotoolkit/openvino.…
wgzintel Sep 26, 2025
d7ff536
updated py_openvino_genai.pyi
wgzintel Sep 26, 2025
9bc8e60
Aligned py_openvino_genai.pyi
wgzintel Sep 26, 2025
886278a
Merge branch 'master' into guozhong/print_scheduler_config_info
wgzintel Sep 26, 2025
338f41e
Merge branch 'master' into guozhong/print_scheduler_config_info
wgzintel Sep 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/cpp/include/openvino/genai/cache_eviction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ enum class AggregationMode {
* of a given token in cache */
};

const std::unordered_map<AggregationMode, std::string> AggregationModeToString = {
{AggregationMode::SUM, "SUM"},
{AggregationMode::NORM_SUM, "NORM_SUM"},
};

/**
* @brief Configuration struct for the cache eviction algorithm.
*/
Expand Down Expand Up @@ -62,6 +67,20 @@ class CacheEvictionConfig {
return m_evictable_size;
}

void print() const {
std::cout << "CacheEvictionConfig { " << std::endl;
std::cout << " start_size: " << get_start_size() << std::endl;
std::cout << " recent_size: " << get_recent_size() << std::endl;
std::cout << " max_cache_size: " << get_max_cache_size() << std::endl;
std::cout << " evictable_size: " << get_evictable_size() << std::endl;
if (AggregationModeToString.count(aggregation_mode) > 0) {
std::cout << " aggregation_mode: " << AggregationModeToString.at(aggregation_mode) << std::endl;
}
std::cout << " apply_rotation: " << apply_rotation << std::endl;
std::cout << " snapkv_window_size: " << snapkv_window_size << std::endl;
std::cout << " }" << std::endl;
}

/** The mode used to compute the importance of tokens for eviction */
AggregationMode aggregation_mode = AggregationMode::NORM_SUM;

Expand Down
19 changes: 19 additions & 0 deletions src/cpp/include/openvino/genai/scheduler_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,5 +73,24 @@ struct SchedulerConfig {
dynamic_split_fuse == other.dynamic_split_fuse && use_cache_eviction == other.use_cache_eviction &&
max_num_seqs == other.max_num_seqs && enable_prefix_caching == other.enable_prefix_caching;
}

void print() const {
std::cout << "SchedulerConfig { " << std::endl;
std::cout << " max_num_batched_tokens: " << max_num_batched_tokens << std::endl;
std::cout << " num_kv_blocks: " << num_kv_blocks << std::endl;
std::cout << " cache_size: " << cache_size << std::endl;
std::cout << " dynamic_split_fuse: " << dynamic_split_fuse << std::endl;
std::cout << " use_cache_eviction: " << use_cache_eviction << std::endl;
if (use_cache_eviction) {
cache_eviction_config.print();
}
std::cout << " max_num_seqs: " << max_num_seqs << std::endl;
std::cout << " enable_prefix_caching: " << enable_prefix_caching << std::endl;
std::cout << " use_sparse_attention: " << use_sparse_attention << std::endl;
if (use_sparse_attention) {
sparse_attention_config.print();
}
std::cout << "}" << std::endl;
}
};
}
16 changes: 15 additions & 1 deletion src/cpp/include/openvino/genai/sparse_attention.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ namespace ov::genai {

enum class SparseAttentionMode { TRISHAPE };

const std::unordered_map<SparseAttentionMode, std::string> SparseAttentionModeToString = {
{SparseAttentionMode::TRISHAPE, "TRISHAPE"},
};

/**
* @brief Configuration struct for the sparse attention prefill functionality.
*/
Expand Down Expand Up @@ -43,6 +47,16 @@ class SparseAttentionConfig {
/** @param num_retained_recent_tokens_in_cache The number of most recent tokens in cache to be retained when
* applying sparse attention. Must be a multiple of block size. */
size_t num_retained_recent_tokens_in_cache = 1920;
};

void print() const {
std::cout << "SparseAttentionConfig { " << std::endl;
if (SparseAttentionModeToString.count(mode) > 0) {
std::cout << " sparseAttentionMode: " << SparseAttentionModeToString.at(mode) << std::endl;
}
std::cout << " num_last_dense_tokens_in_prefill: " << num_last_dense_tokens_in_prefill << std::endl;
std::cout << " num_retained_start_tokens_in_cache: " << num_retained_start_tokens_in_cache << std::endl;
std::cout << " num_retained_recent_tokens_in_cache: " << num_retained_recent_tokens_in_cache << std::endl;
std::cout << " }" << std::endl;
}
};
} // namespace ov::genai
8 changes: 8 additions & 0 deletions src/cpp/src/continuous_batching/pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p
embedder = std::make_shared<InputsEmbedder>(models_path, device, vision_encoder_properties);
}

utils::print_scheduler_config_info(scheduler_config);

if (is_prompt_lookup_enabled) {
OPENVINO_ASSERT(draft_model_desr.model == nullptr, "Speculative decoding and prompt lookup decoding are mutually exclusive");
OPENVINO_ASSERT(embedder == nullptr, "Prompt lookup decoding is not supported for models with embeddings");
Expand Down Expand Up @@ -105,6 +107,8 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
embedder = std::make_shared<InputsEmbedder>(models_path, device, properties_without_draft_model_without_gguf);
}

utils::print_scheduler_config_info(scheduler_config);

if (is_prompt_lookup_enabled) {
OPENVINO_ASSERT(draft_model_desr.model == nullptr, "Speculative decoding and prompt lookup decoding are mutually exclusive");
OPENVINO_ASSERT(embedder == nullptr, "Prompt lookup decoding is not supported for models with embeddings");
Expand Down Expand Up @@ -148,6 +152,8 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
}
}

utils::print_scheduler_config_info(scheduler_config);

if (is_prompt_lookup_enabled) {
OPENVINO_ASSERT(draft_model_desr.model == nullptr, "Speculative decoding and prompt lookup decoding are mutually exclusive");
OPENVINO_ASSERT(embedder == nullptr, "Prompt lookup decoding is not supported for models with embeddings");
Expand Down Expand Up @@ -196,6 +202,8 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
}
}

utils::print_scheduler_config_info(scheduler_config);

if (is_prompt_lookup_enabled) {
OPENVINO_ASSERT(draft_model_desr.model == nullptr, "Speculative decoding and prompt lookup decoding are mutually exclusive");
OPENVINO_ASSERT(embedder == nullptr, "Prompt lookup decoding is not supported for models with embeddings");
Expand Down
8 changes: 8 additions & 0 deletions src/cpp/src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,14 @@ void print_gguf_debug_info(const std::string &debug_info) {
std::cout << "[GGUF Reader]: " << debug_info << std::endl;
}

void print_scheduler_config_info(const SchedulerConfig &scheduler_config) {
if (!env_setup_for_print_debug_info()) {
return;
}

scheduler_config.print();
}

std::pair<ov::CompiledModel, KVDesc>
compile_decoder_for_npu(const std::shared_ptr<ov::Model>& model,
const ov::AnyMap& config,
Expand Down
2 changes: 2 additions & 0 deletions src/cpp/src/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ void print_compiled_model_properties(ov::CompiledModel& compiled_Model, const ch

void print_gguf_debug_info(const std::string& debug_info);

void print_scheduler_config_info(const SchedulerConfig &scheduler_config);

struct KVDesc {
uint32_t max_prompt_len;
uint32_t min_response_len;
Expand Down
Loading