Skip to content

Commit 1f0ae3e

Browse files
[Misc] Clean Up EngineArgs.create_engine_config (vllm-project#13734)
Signed-off-by: [email protected] <[email protected]>
1 parent db986c1 commit 1f0ae3e

File tree

2 files changed

+29
-40
lines changed

2 files changed

+29
-40
lines changed

vllm/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,6 +1124,10 @@ def metrics_info(self):
11241124
return {key: str(value) for key, value in self.__dict__.items()}
11251125

11261126
def _verify_args(self) -> None:
1127+
if self.cpu_offload_gb < 0:
1128+
raise ValueError("CPU offload space must be non-negative"
1129+
f", but got {self.cpu_offload_gb}")
1130+
11271131
if self.gpu_memory_utilization > 1.0:
11281132
raise ValueError(
11291133
"GPU memory utilization must be less than 1.0. Got "

vllm/engine/arg_utils.py

Lines changed: 25 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,17 @@ def from_cli_args(cls, args: argparse.Namespace):
10621062
return engine_args
10631063

10641064
def create_model_config(self) -> ModelConfig:
1065+
# gguf file needs a specific model loader and doesn't use hf_repo
1066+
if check_gguf_file(self.model):
1067+
self.quantization = self.load_format = "gguf"
1068+
1069+
# NOTE: This is to allow model loading from S3 in CI
1070+
if (not isinstance(self, AsyncEngineArgs) and envs.VLLM_CI_USE_S3
1071+
and self.model in MODELS_ON_S3
1072+
and self.load_format == LoadFormat.AUTO): # noqa: E501
1073+
self.model = f"{MODEL_WEIGHTS_S3_BUCKET}/{self.model}"
1074+
self.load_format = LoadFormat.RUNAI_STREAMER
1075+
10651076
return ModelConfig(
10661077
model=self.model,
10671078
task=self.task,
@@ -1101,26 +1112,6 @@ def create_model_config(self) -> ModelConfig:
11011112
)
11021113

11031114
def create_load_config(self) -> LoadConfig:
1104-
return LoadConfig(
1105-
load_format=self.load_format,
1106-
download_dir=self.download_dir,
1107-
model_loader_extra_config=self.model_loader_extra_config,
1108-
ignore_patterns=self.ignore_patterns,
1109-
)
1110-
1111-
def create_engine_config(self,
1112-
usage_context: Optional[UsageContext] = None
1113-
) -> VllmConfig:
1114-
from vllm.platforms import current_platform
1115-
current_platform.pre_register_and_update()
1116-
1117-
if envs.VLLM_USE_V1:
1118-
self._override_v1_engine_args(usage_context)
1119-
1120-
# gguf file needs a specific model loader and doesn't use hf_repo
1121-
if check_gguf_file(self.model):
1122-
self.quantization = self.load_format = "gguf"
1123-
11241115
# bitsandbytes quantization needs a specific model loader
11251116
# so we make sure the quant method and the load format are consistent
11261117
if (self.quantization == "bitsandbytes" or
@@ -1137,19 +1128,23 @@ def create_engine_config(self,
11371128
"BitsAndBytes load format and QLoRA adapter only support "
11381129
f"'bitsandbytes' quantization, but got {self.quantization}")
11391130

1140-
assert self.cpu_offload_gb >= 0, (
1141-
"CPU offload space must be non-negative"
1142-
f", but got {self.cpu_offload_gb}")
1131+
return LoadConfig(
1132+
load_format=self.load_format,
1133+
download_dir=self.download_dir,
1134+
model_loader_extra_config=self.model_loader_extra_config,
1135+
ignore_patterns=self.ignore_patterns,
1136+
)
11431137

1144-
device_config = DeviceConfig(device=self.device)
1138+
def create_engine_config(self,
1139+
usage_context: Optional[UsageContext] = None
1140+
) -> VllmConfig:
1141+
from vllm.platforms import current_platform
1142+
current_platform.pre_register_and_update()
11451143

1146-
# NOTE: This is to allow model loading from S3 in CI
1147-
if (not isinstance(self, AsyncEngineArgs) and envs.VLLM_CI_USE_S3
1148-
and self.model in MODELS_ON_S3
1149-
and self.load_format == LoadFormat.AUTO): # noqa: E501
1150-
self.model = f"{MODEL_WEIGHTS_S3_BUCKET}/{self.model}"
1151-
self.load_format = LoadFormat.RUNAI_STREAMER
1144+
if envs.VLLM_USE_V1:
1145+
self._override_v1_engine_args(usage_context)
11521146

1147+
device_config = DeviceConfig(device=self.device)
11531148
model_config = self.create_model_config()
11541149

11551150
if (model_config.is_multimodal_model and not envs.VLLM_USE_V1
@@ -1281,16 +1276,6 @@ def create_engine_config(self,
12811276
if speculative_config is None \
12821277
else speculative_config.num_lookahead_slots
12831278

1284-
if not self.use_v2_block_manager:
1285-
logger.warning(
1286-
"[DEPRECATED] Block manager v1 has been removed, "
1287-
"and setting --use-v2-block-manager to True or False has "
1288-
"no effect on vLLM behavior. Please remove "
1289-
"--use-v2-block-manager in your engine argument. "
1290-
"If your use case is not supported by "
1291-
"SelfAttnBlockSpaceManager (i.e. block manager v2),"
1292-
" please file an issue with detailed information.")
1293-
12941279
scheduler_config = SchedulerConfig(
12951280
runner_type=model_config.runner_type,
12961281
max_num_batched_tokens=self.max_num_batched_tokens,

0 commit comments

Comments
 (0)