@@ -55,7 +55,7 @@ def __init__(
5555 dynamic_max_gap : int = - 1 ,
5656 data_type : str = "int" ,
5757 scale_dtype : str = "fp16" ,
58- multimodal : bool = False ,
58+ quant_block_list : list = None ,
5959 act_bits : int = 32 ,
6060 act_group_size : int = None ,
6161 act_sym : bool = None ,
@@ -113,8 +113,8 @@ def __init__(
113113 dynamic_max_gap (int): The dynamic maximum gap (default is -1).
114114 data_type (str): The data type to be used (default is "int").
115115 scale_dtype (str): The data type of quantization scale to be used (default is "float16"), different kernels
116- have different choices.
117- multimodal(bool ): Enable multimodal model quantization, (default is "False") .
116+ have different choices.
117+ quant_block_list (list ): A list whose elements are list of block's layer names to be quantized .
118118 act_bits (int): Number of bits for activation quantization. Default is 32.
119119 act_group_size (int): Group size for activation quantization. Default is None.
120120 act_sym (bool): Whether to use symmetric activation quantization. Default is None.
@@ -146,7 +146,7 @@ def __init__(
146146 self .dynamic_max_gap = dynamic_max_gap
147147 self .data_type = data_type
148148 self .scale_dtype = scale_dtype
149- self .multimodal = multimodal
149+ self .quant_block_list = quant_block_list
150150 self .act_bits = act_bits
151151 self .act_group_size = act_group_size
152152 self .act_sym = act_sym
@@ -202,7 +202,7 @@ def convert(self, model: torch.nn.Module, *args, **kwargs):
202202 dynamic_max_gap = self .dynamic_max_gap ,
203203 data_type = self .data_type ,
204204 scale_dtype = self .scale_dtype ,
205- multimodal = self .multimodal ,
205+ quant_block_list = self .quant_block_list ,
206206 act_bits = self .act_bits ,
207207 act_group_size = self .act_group_size ,
208208 act_sym = self .act_sym ,
0 commit comments