Skip to content

Commit 78cd0c0

Browse files
committed
fix typo quantization
add commandline arg
1 parent 2855bf9 commit 78cd0c0

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

fastchat/serve/vllm_worker.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ async def api_model_details(request: Request):
192192
"--controller-address", type=str, default="http://localhost:21001"
193193
)
194194
parser.add_argument("--model-path", type=str, default="lmsys/vicuna-7b-v1.3")
195+
parser.add_argument("--quantization", type=str)
195196
parser.add_argument(
196197
"--model-names",
197198
type=lambda s: s.split(","),
@@ -210,7 +211,7 @@ async def api_model_details(request: Request):
210211
args.model = args.model_path
211212
if args.num_gpus > 1:
212213
args.tensor_parallel_size = args.num_gpus
213-
if args.quantizaiton:
214+
if args.quantization:
214215
args.quantization = args.quantization
215216

216217
engine_args = AsyncEngineArgs.from_cli_args(args)

0 commit comments

Comments
 (0)