@@ -70,7 +70,7 @@ def validate_model_size_with_radix(cls, v: object) -> object:
7070
7171
7272class PytorchLLMSpecV1 (BaseModel ):
73- model_format : Literal ["pytorch" , "gptq" ]
73+ model_format : Literal ["pytorch" , "gptq" , "awq" ]
7474 # Must in order that `str` first, then `int`
7575 model_size_in_billions : Union [str , int ]
7676 quantizations : List [str ]
@@ -451,7 +451,7 @@ def _get_meta_path(
451451 return os .path .join (cache_dir , "__valid_download" )
452452 else :
453453 return os .path .join (cache_dir , f"__valid_download_{ model_hub } " )
454- elif model_format in ["ggmlv3" , "ggufv2" , "gptq" ]:
454+ elif model_format in ["ggmlv3" , "ggufv2" , "gptq" , "awq" ]:
455455 assert quantization is not None
456456 if model_hub == "huggingface" :
457457 return os .path .join (cache_dir , f"__valid_download_{ quantization } " )
@@ -489,7 +489,7 @@ def _skip_download(
489489 logger .warning (f"Cache { cache_dir } exists, but it was from { hub } " )
490490 return True
491491 return False
492- elif model_format in ["ggmlv3" , "ggufv2" , "gptq" ]:
492+ elif model_format in ["ggmlv3" , "ggufv2" , "gptq" , "awq" ]:
493493 assert quantization is not None
494494 return os .path .exists (
495495 _get_meta_path (cache_dir , model_format , model_hub , quantization )
@@ -537,7 +537,7 @@ def cache_from_modelscope(
537537 ):
538538 return cache_dir
539539
540- if llm_spec .model_format in ["pytorch" , "gptq" ]:
540+ if llm_spec .model_format in ["pytorch" , "gptq" , "awq" ]:
541541 download_dir = retry_download (
542542 snapshot_download ,
543543 llm_family .model_name ,
@@ -598,7 +598,7 @@ def cache_from_huggingface(
598598 ):
599599 return cache_dir
600600
601- if llm_spec .model_format in ["pytorch" , "gptq" ]:
601+ if llm_spec .model_format in ["pytorch" , "gptq" , "awq" ]:
602602 assert isinstance (llm_spec , PytorchLLMSpecV1 )
603603 retry_download (
604604 huggingface_hub .snapshot_download ,
@@ -679,7 +679,7 @@ def get_cache_status(
679679 ]
680680 return any (revisions )
681681 # just check meta file for ggml and gptq model
682- elif llm_spec .model_format in ["ggmlv3" , "ggufv2" , "gptq" ]:
682+ elif llm_spec .model_format in ["ggmlv3" , "ggufv2" , "gptq" , "awq" ]:
683683 ret = []
684684 for q in llm_spec .quantizations :
685685 assert q is not None
0 commit comments