File tree Expand file tree Collapse file tree 1 file changed +8
-0
lines changed Expand file tree Collapse file tree 1 file changed +8
-0
lines changed Original file line number Diff line number Diff line change @@ -227,6 +227,14 @@ def _init_cache(self) -> None:
227
227
raise ValueError ("No available memory for the cache blocks. "
228
228
"Try increasing `gpu_memory_utilization` when "
229
229
"initializing the engine." )
230
+ max_seq_len = self .cache_config .block_size * num_gpu_blocks
231
+ if self .model_config .max_model_len > max_seq_len :
232
+ raise ValueError (
233
+ f"The model's max seq len ({ self .model_config .max_model_len } ) "
234
+ "is larger than the maximum number of tokens that can be "
235
+ f"stored in KV cache ({ max_seq_len } ). Try increasing "
236
+ "`gpu_memory_utilization` or decreasing `max_model_len` when "
237
+ "initializing the engine." )
230
238
231
239
self .cache_config .num_gpu_blocks = num_gpu_blocks
232
240
self .cache_config .num_cpu_blocks = num_cpu_blocks
You can’t perform that action at this time.
0 commit comments