Skip to content

Commit e1424d2

Browse files
lewtungarg-amit
authored andcommitted
[Frontend] Expose revision arg in OpenAI server (vllm-project#8501)
Signed-off-by: Amit Garg <[email protected]>
1 parent bac09e3 commit e1424d2

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

vllm/entrypoints/openai/api_server.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,10 @@
6969

7070

7171
def model_is_embedding(model_name: str, trust_remote_code: bool,
72-
quantization: Optional[str]) -> bool:
72+
quantization: Optional[str],
73+
revision: Optional[str]) -> bool:
7374
return ModelConfig(model=model_name,
75+
revision=revision,
7476
tokenizer=model_name,
7577
tokenizer_mode="auto",
7678
trust_remote_code=trust_remote_code,
@@ -130,7 +132,7 @@ async def build_async_engine_client_from_engine_args(
130132
# If manually triggered or embedding model, use AsyncLLMEngine in process.
131133
# TODO: support embedding model via RPC.
132134
if (model_is_embedding(engine_args.model, engine_args.trust_remote_code,
133-
engine_args.quantization)
135+
engine_args.quantization, engine_args.revision)
134136
or disable_frontend_multiprocessing):
135137
engine_client = AsyncLLMEngine.from_engine_args(
136138
engine_args, usage_context=UsageContext.OPENAI_API_SERVER)

0 commit comments

Comments
 (0)