We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 2855bf9 commit 78cd0c0Copy full SHA for 78cd0c0
fastchat/serve/vllm_worker.py
@@ -192,6 +192,7 @@ async def api_model_details(request: Request):
192
"--controller-address", type=str, default="http://localhost:21001"
193
)
194
parser.add_argument("--model-path", type=str, default="lmsys/vicuna-7b-v1.3")
195
+ parser.add_argument("--quantization", type=str)
196
parser.add_argument(
197
"--model-names",
198
type=lambda s: s.split(","),
@@ -210,7 +211,7 @@ async def api_model_details(request: Request):
210
211
args.model = args.model_path
212
if args.num_gpus > 1:
213
args.tensor_parallel_size = args.num_gpus
- if args.quantizaiton:
214
+ if args.quantization:
215
args.quantization = args.quantization
216
217
engine_args = AsyncEngineArgs.from_cli_args(args)
0 commit comments