Skip to content

Commit c80b68a

Browse files
authored
Update AutoRound commit version (#1941)
Signed-off-by: Kaihui-intel <[email protected]>
1 parent 9077b38 commit c80b68a

File tree

9 files changed

+42
-20
lines changed

9 files changed

+42
-20
lines changed

.azure-pipelines/scripts/ut/env_setup.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ elif [[ $(echo "${test_case}" | grep -c "tf pruning") != 0 ]]; then
9292
fi
9393

9494
if [[ $(echo "${test_case}" | grep -c "api") != 0 ]] || [[ $(echo "${test_case}" | grep -c "adaptor") != 0 ]]; then
95-
pip install git+https://github.com/intel/auto-round.git@24b2e74070f2b4e6f26ff069ec75af74cf5b177c
95+
pip install git+https://github.com/intel/auto-round.git@e24b9074af6cdb099e31c92eb81b7f5e9a4a244e
9696
fi
9797

9898
# test deps

neural_compressor/adaptor/pytorch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4926,7 +4926,7 @@ def autoround_quantize(self, model, tune_cfg, dataloader):
49264926
act_group_size = self.recipes["autoround_args"].get("act_group_size", None)
49274927
act_sym = self.recipes["autoround_args"].get("act_sym", None)
49284928
act_dynamic = self.recipes["autoround_args"].get("act_dynamic", True)
4929-
multimodal = self.recipes["autoround_args"].get("multimodal", False)
4929+
quant_block_list = self.recipes["autoround_args"].get("quant_block_list", None)
49304930
use_layer_wise = self.recipes["autoround_args"].get("use_layer_wise", False)
49314931

49324932
if dataloader is not None:
@@ -4959,7 +4959,7 @@ def autoround_quantize(self, model, tune_cfg, dataloader):
49594959
dynamic_max_gap=dynamic_max_gap,
49604960
data_type=data_type,
49614961
scale_dtype=scale_dtype,
4962-
multimodal=multimodal,
4962+
quant_block_list=quant_block_list,
49634963
act_bits=act_bits,
49644964
act_group_size=act_group_size,
49654965
act_sym=act_sym,

neural_compressor/adaptor/torch_utils/weight_only.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -706,7 +706,7 @@ def autoround_quantize(
706706
dynamic_max_gap: int = -1,
707707
data_type: str = "int", ##only support int for now
708708
scale_dtype: str = "fp16",
709-
multimodal: bool = False,
709+
quant_block_list: list = None,
710710
act_bits: int = 32,
711711
act_group_size: int = None,
712712
act_sym: bool = None,
@@ -761,7 +761,7 @@ def autoround_quantize(
761761
data_type (str): The data type to be used (default is "int").
762762
scale_dtype (str): The data type of quantization scale to be used (default is "float32"), different kernels
763763
have different choices.
764-
multimodal(bool): Enable multimodal model quantization, (default is "False").
764+
quant_block_list (list): A list whose elements are list of block's layer names to be quantized.
765765
act_bits (int): Number of bits for activation quantization. Default is 32.
766766
act_group_size (int): Group size for activation quantization. Default is None.
767767
act_sym (bool): Whether to use symmetric activation quantization. Default is None.
@@ -800,7 +800,7 @@ def autoround_quantize(
800800
dynamic_max_gap=dynamic_max_gap,
801801
data_type=data_type, ## only support data_type
802802
scale_dtype=scale_dtype,
803-
multimodal=multimodal,
803+
quant_block_list=quant_block_list,
804804
act_bits=act_bits,
805805
act_group_size=act_group_size,
806806
act_sym=act_sym,

neural_compressor/torch/algorithms/weight_only/autoround.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def __init__(
5555
dynamic_max_gap: int = -1,
5656
data_type: str = "int",
5757
scale_dtype: str = "fp16",
58-
multimodal: bool = False,
58+
quant_block_list: list = None,
5959
act_bits: int = 32,
6060
act_group_size: int = None,
6161
act_sym: bool = None,
@@ -113,8 +113,8 @@ def __init__(
113113
dynamic_max_gap (int): The dynamic maximum gap (default is -1).
114114
data_type (str): The data type to be used (default is "int").
115115
scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
116-
have different choices.
117-
multimodal(bool): Enable multimodal model quantization, (default is "False").
116+
have different choices.
117+
quant_block_list (list): A list whose elements are list of block's layer names to be quantized.
118118
act_bits (int): Number of bits for activation quantization. Default is 32.
119119
act_group_size (int): Group size for activation quantization. Default is None.
120120
act_sym (bool): Whether to use symmetric activation quantization. Default is None.
@@ -146,7 +146,7 @@ def __init__(
146146
self.dynamic_max_gap = dynamic_max_gap
147147
self.data_type = data_type
148148
self.scale_dtype = scale_dtype
149-
self.multimodal = multimodal
149+
self.quant_block_list = quant_block_list
150150
self.act_bits = act_bits
151151
self.act_group_size = act_group_size
152152
self.act_sym = act_sym
@@ -202,7 +202,7 @@ def convert(self, model: torch.nn.Module, *args, **kwargs):
202202
dynamic_max_gap=self.dynamic_max_gap,
203203
data_type=self.data_type,
204204
scale_dtype=self.scale_dtype,
205-
multimodal=self.multimodal,
205+
quant_block_list=self.quant_block_list,
206206
act_bits=self.act_bits,
207207
act_group_size=self.act_group_size,
208208
act_sym=self.act_sym,

neural_compressor/torch/quantization/algorithm_entry.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -567,9 +567,14 @@ def autoround_quantize_entry(
567567
if quant_config.name != AUTOROUND or quant_config.dtype == "fp32":
568568
continue
569569
else:
570+
dtype = quant_config.dtype
571+
bits = quant_config.bits
572+
if dtype != "int" and "int" in dtype:
573+
bits = int(dtype.lstrip("int"))
574+
dtype = "int"
570575
weight_config[op_name] = {
571-
"data_type": quant_config.dtype,
572-
"bits": quant_config.bits,
576+
"data_type": dtype,
577+
"bits": bits,
573578
"sym": quant_config.use_sym,
574579
"group_size": quant_config.group_size,
575580
"act_bits": quant_config.act_bits,
@@ -595,7 +600,7 @@ def autoround_quantize_entry(
595600
not_use_best_mse = quant_config.not_use_best_mse
596601
dynamic_max_gap = quant_config.dynamic_max_gap
597602
scale_dtype = quant_config.scale_dtype
598-
multimodal = quant_config.multimodal
603+
quant_block_list = quant_config.quant_block_list
599604
low_cpu_mem_usage = quant_config.use_layer_wise
600605

601606
kwargs.pop("example_inputs")
@@ -622,7 +627,7 @@ def autoround_quantize_entry(
622627
not_use_best_mse=not_use_best_mse,
623628
dynamic_max_gap=dynamic_max_gap,
624629
scale_dtype=scale_dtype,
625-
multimodal=multimodal,
630+
quant_block_list=quant_block_list,
626631
low_cpu_mem_usage=low_cpu_mem_usage,
627632
)
628633
model = quantizer.execute(model=model, mode=mode, *args, **kwargs)

neural_compressor/torch/quantization/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -917,7 +917,7 @@ def __init__(
917917
dynamic_max_gap: int = -1,
918918
scale_dtype: str = "fp16",
919919
use_layer_wise: bool = False,
920-
multimodal: bool = False,
920+
quant_block_list: list = None,
921921
white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST,
922922
):
923923
"""Init AUTOROUND weight-only quantization config.
@@ -951,7 +951,7 @@ def __init__(
951951
scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
952952
have different choices.
953953
use_layer_wise (bool): Enables quantize model per layer. Defaults to False.
954-
multimodal(bool): Enable multimodal model quantization, (default is "False").
954+
quant_block_list (list): A list whose elements are list of block's layer names to be quantized.
955955
white_list (Optional[List[OP_NAME_OR_MODULE_TYPE]]): White list of operator names or module types.
956956
Default is DEFAULT_WHITE_LIST.
957957
"""
@@ -983,7 +983,7 @@ def __init__(
983983
self.dynamic_max_gap = dynamic_max_gap
984984
self.scale_dtype = scale_dtype
985985
self.use_layer_wise = use_layer_wise
986-
self.multimodal = multimodal
986+
self.quant_block_list = quant_block_list
987987
self._post_init()
988988

989989
@classmethod

test/3x/torch/quantization/weight_only/test_autoround.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,23 @@ def test_autoround(self, quant_lm_head):
8080
if quant_lm_head is True:
8181
assert isinstance(q_model.lm_head, WeightOnlyLinear), "quantization for lm_head failed."
8282

83+
def test_int4_dtype(self):
84+
fp32_model = copy.deepcopy(self.gptj)
85+
quant_config = AutoRoundConfig(dtype="int4", nsamples=32, seqlen=10, iters=10, scale_dtype="fp32")
86+
logger.info(f"Test AutoRound with config {quant_config}")
87+
88+
# prepare + convert API
89+
model = prepare(model=fp32_model, quant_config=quant_config)
90+
91+
run_fn(model, self.dataloader)
92+
q_model = convert(model)
93+
out = q_model(self.inp)[0]
94+
assert torch.allclose(out, self.label, atol=1e-1)
95+
assert "transformer.h.0.attn.k_proj" in q_model.autoround_config.keys()
96+
assert "scale" in q_model.autoround_config["transformer.h.0.attn.k_proj"].keys()
97+
assert torch.float32 == q_model.autoround_config["transformer.h.0.attn.k_proj"]["scale_dtype"]
98+
assert isinstance(q_model.transformer.h[0].attn.k_proj, WeightOnlyLinear), "packing model failed."
99+
83100
def test_autoround_with_quantize_API(self):
84101
gpt_j_model = copy.deepcopy(self.gptj)
85102

test/3x/torch/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
auto_round @ git+https://github.com/intel/auto-round.git@24b2e74070f2b4e6f26ff069ec75af74cf5b177c
1+
auto_round @ git+https://github.com/intel/auto-round.git@e24b9074af6cdb099e31c92eb81b7f5e9a4a244e
22
expecttest
33
intel_extension_for_pytorch
44
numpy

test/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
--find-links https://download.pytorch.org/whl/torch_stable.html
22
accelerate==0.21.0
3-
auto-round @ git+https://github.com/intel/auto-round.git@24b2e74070f2b4e6f26ff069ec75af74cf5b177c
3+
auto-round @ git+https://github.com/intel/auto-round.git@e24b9074af6cdb099e31c92eb81b7f5e9a4a244e
44
dynast==1.6.0rc1
55
horovod
66
intel-extension-for-pytorch

0 commit comments

Comments
 (0)