2121
2222try :
2323 import intel_extension_for_pytorch as ipex
24- except :
24+ except : # pragma: no cover
2525 assert False , "Please install IPEX for smooth quantization."
2626
27+ from collections import OrderedDict
28+ from types import MethodType
29+
2730from packaging .version import Version
2831
32+ from neural_compressor .torch .algorithms import Quantizer
33+
2934from .utility import (
3035 TorchSmoothQuant ,
3136 cfg_to_qconfig ,
4146ipex_ver = get_ipex_version ()
4247
4348
44- def smooth_quantize (model , tune_cfg , run_fn , example_inputs , inplace = True ):
45- """Execute the quantize process on the specified model.
49+ class SmoothQuantQuantizer (Quantizer ):
50+ def __init__ (self , quant_config : OrderedDict = {}):
51+ """Init a SmoothQuantQuantizer object.
4652
47- Args:
48- model: a float model to be quantized.
49- tune_cfg: quantization config for ops.
50- run_fn: a calibration function for calibrating the model.
51- example_inputs: used to trace torch model.
52- inplace: whether to carry out model transformations in-place.
53+ Args:
54+ quant_config (OrderedDict, optional): quantization config for ops. Defaults to {}.
55+ """
56+ super ().__init__ (quant_config )
5357
54- Returns:
55- A quantized model.
56- """
57- assert not ipex_ver .release < Version ("2.1" ).release , "IPEX version >= 2.1 is required for SmoothQuant."
58+ def prepare (self , model , example_inputs , inplace = True , * args , ** kwargs ):
59+ """Prepares a given model for quantization.
60+
61+ Args:
62+ model: A float model to be quantized.
63+ example_inputs: Used to trace torch model.
64+ inplace: Whether to carry out model transformations in-place. Defaults to True.
65+
66+ Returns:
67+ A prepared model.
68+ """
69+ assert example_inputs is not None , "Please provide example_inputs for smooth quantization."
70+ assert not ipex_ver .release < Version ("2.1" ).release , "IPEX version >= 2.1 is required for SmoothQuant."
71+
72+ # Note: we should make sure smoothquant is only executed once with inplacing fp32 model.
73+ if hasattr (model , "_smoothquant_optimized" ) and model ._smoothquant_optimized : # pragma: no cover
74+ logger .info ("The model is already optimized by SmoothQuant algorithm, skip it." )
75+ return model
76+
77+ cfgs , op_infos_from_cfgs , output_tensor_id_op_name = (
78+ model .cfgs ,
79+ model .op_infos_from_cfgs ,
80+ model .output_tensor_id_op_name ,
81+ )
82+
83+ # Update json file in ipex_config_path
84+ cfg_to_qconfig (self .quant_config , cfgs , op_infos_from_cfgs , output_tensor_id_op_name )
85+ model .eval ()
86+
87+ # check smoothquant alpha and act_algo value
88+ recipe_cfgs = self .quant_config .get ("recipe_cfgs" , None )
89+ alpha = recipe_cfgs ["smooth_quant_args" ]["alpha" ]
90+ for op , _ in self .quant_config ["op" ].items ():
91+ act_algo = self .quant_config ["op" ][op ]["activation" ]["algorithm" ]
5892
59- _ , cfgs , op_infos_from_cfgs , output_tensor_id_op_name , _ = get_quantizable_ops_recursively (model , example_inputs )
93+ # Check save_qconf_summary part is a workaround for IPEX bug.
94+ # Sometimes the prepared model from get_op_capablitiy loss this attribute.
95+ if not hasattr (model , "save_qconf_summary" ) or not hasattr (model , "load_qconf_summary" ):
96+ from torch .ao .quantization .observer import MinMaxObserver
6097
61- # check smoothquant folding value
62- recipe_cfgs = tune_cfg .get ("recipe_cfgs" , None )
63- if "smooth_quant_args" in recipe_cfgs and "folding" in recipe_cfgs ["smooth_quant_args" ]:
64- if recipe_cfgs ["smooth_quant_args" ]["folding" ] is None :
65- if ipex_ver .release < Version ("2.1" ).release :
66- folding = True
98+ if ipex_ver .release >= Version ("2.1.1" ).release :
99+ static_qconfig = ipex .quantization .get_smooth_quant_qconfig_mapping (
100+ alpha = alpha , act_observer = MinMaxObserver
101+ )
102+ else : # pragma: no cover
103+ if act_algo == "minmax" :
104+ static_qconfig = ipex .quantization .get_smooth_quant_qconfig_mapping (
105+ alpha = alpha , act_observer = MinMaxObserver ()
106+ )
107+ logger .warning (
108+ "The int8 model accuracy will be close to 0 with MinMaxobserver, "
109+ + "the suggested IPEX version is higher or equal than 2.1.100+cpu."
110+ )
111+ else :
112+ static_qconfig = ipex .quantization .get_smooth_quant_qconfig_mapping (alpha = alpha )
113+
114+ if isinstance (example_inputs , dict ):
115+ model = ipex .quantization .prepare (
116+ model , static_qconfig , example_kwarg_inputs = example_inputs , inplace = inplace
117+ )
67118 else :
68- folding = False
69- else :
70- folding = recipe_cfgs ["smooth_quant_args" ]["folding" ]
119+ model = ipex .quantization .prepare (model , static_qconfig , example_inputs = example_inputs , inplace = inplace )
71120
72- # Note: we should make sure smoothquant is only executed once with inplacing fp32 model.
73- if hasattr (model , "_smoothquant_optimized" ) and model ._smoothquant_optimized :
74- logger .info ("The model is already optimized by SmoothQuant algorithm, skip it." )
121+ cfg_to_qconfig (self .quant_config , cfgs , op_infos_from_cfgs , output_tensor_id_op_name , smooth_quant = True )
122+ model .load_qconf_summary (qconf_summary = ipex_config_path )
75123 return model
76124
77- sq = TorchSmoothQuant (model , dataloader = None , example_inputs = example_inputs , q_func = run_fn , record_max_info = True )
78- model = sq .transform (
79- alpha = recipe_cfgs ["smooth_quant_args" ]["alpha" ],
80- folding = folding ,
81- auto_alpha_args = recipe_cfgs ["smooth_quant_args" ]["auto_alpha_args" ],
82- scale_sharing = recipe_cfgs ["smooth_quant_args" ]["scale_sharing" ],
83- )
84-
85- # Update model parameter when smoothquant folding = False
86- if recipe_cfgs and recipe_cfgs .get ("smooth_quant" , False ) and not folding :
87- return qdq_quantize (
88- model , tune_cfg , run_fn , example_inputs , inplace , cfgs , op_infos_from_cfgs , output_tensor_id_op_name , sq
89- )
125+ def convert (self , model , example_inputs , inplace = True , * args , ** kwargs ):
126+ """Converts a prepared model to a quantized model.
90127
91- # Update model parameter when smoothquant folding = True
92- if recipe_cfgs and recipe_cfgs . get ( "smooth_quant" , False ) and folding :
93- _apply_pre_optimization ( model , tune_cfg , sq )
94- model . eval ()
128+ Args:
129+ model: The prepared model to be converted.
130+ example_inputs: Used to trace torch model.
131+ inplace: Whether to carry out model transformations in-place. Defaults to True.
95132
96- # Check save_qconf_summary part is a workaround for IPEX bug.
97- # Sometimes the prepared model from get_op_capablitiy loss this attribute
98- if not hasattr (model , "save_qconf_summary" ) or not hasattr (model , "load_qconf_summary" ):
99- static_qconfig = ipex .quantization .default_static_qconfig_mapping
100- if isinstance (example_inputs , dict ):
101- model = ipex .quantization .prepare (
102- model , static_qconfig , example_kwarg_inputs = example_inputs , inplace = inplace
133+ Returns:
134+ A quantized model.
135+ """
136+ model .save_qconf_summary (qconf_summary = ipex_config_path )
137+ model = _ipex_post_quant_process (model , example_inputs , inplace = inplace )
138+
139+ with open (ipex_config_path , "r" ) as f :
140+ model .tune_cfg = json .load (f )
141+ model .ipex_config_path = ipex_config_path
142+ dump_model_op_stats (self .quant_config ["op" ])
143+
144+ from neural_compressor .torch .algorithms .smooth_quant import save
145+
146+ logger .info ("Smooth quantization done." )
147+ model .ori_save = model .save
148+ model .save = MethodType (save , model )
149+ return model
150+
151+ def quantize (self , model , tune_cfg , run_fn , example_inputs , inplace = True , * args , ** kwargs ):
152+ """Execute the quantize process on the specified model.
153+
154+ Args:
155+ model: a float model to be quantized.
156+ tune_cfg: quantization config for ops.
157+ run_fn: a calibration function for calibrating the model.
158+ example_inputs: used to trace torch model.
159+ inplace: whether to carry out model transformations in-place.
160+
161+ Returns:
162+ A quantized model.
163+ """
164+ assert not ipex_ver .release < Version ("2.1" ).release , "IPEX version >= 2.1 is required for SmoothQuant."
165+
166+ cfgs , op_infos_from_cfgs , output_tensor_id_op_name = (
167+ model .cfgs ,
168+ model .op_infos_from_cfgs ,
169+ model .output_tensor_id_op_name ,
170+ )
171+
172+ # check smoothquant folding value
173+ recipe_cfgs = tune_cfg .get ("recipe_cfgs" , None )
174+ if "smooth_quant_args" in recipe_cfgs and "folding" in recipe_cfgs ["smooth_quant_args" ]:
175+ if recipe_cfgs ["smooth_quant_args" ]["folding" ] is None : # pragma: no cover
176+ if ipex_ver .release < Version ("2.1" ).release :
177+ folding = True
178+ else :
179+ folding = False
180+ else :
181+ folding = recipe_cfgs ["smooth_quant_args" ]["folding" ]
182+
183+ # Note: we should make sure smoothquant is only executed once with inplacing fp32 model.
184+ if hasattr (model , "_smoothquant_optimized" ) and model ._smoothquant_optimized : # pragma: no cover
185+ logger .info ("The model is already optimized by SmoothQuant algorithm, skip it." )
186+ return model
187+
188+ sq_info = model .sq_info
189+
190+ # Update model parameter when smoothquant folding = False
191+ if recipe_cfgs and recipe_cfgs .get ("smooth_quant" , False ) and not folding :
192+ return qdq_quantize (
193+ model ,
194+ tune_cfg ,
195+ run_fn ,
196+ example_inputs ,
197+ inplace ,
198+ cfgs ,
199+ op_infos_from_cfgs ,
200+ output_tensor_id_op_name ,
201+ sq_info ,
103202 )
104- else :
105- model = ipex .quantization .prepare (model , static_qconfig , example_inputs = example_inputs , inplace = inplace )
106203
107- model .load_qconf_summary (qconf_summary = ipex_config_path )
108- run_fn (model )
109- model .save_qconf_summary (qconf_summary = ipex_config_path )
110- model = _ipex_post_quant_process (model , example_inputs , inplace = inplace )
204+ # Update model parameter when smoothquant folding = True
205+ if recipe_cfgs and recipe_cfgs .get ("smooth_quant" , False ) and folding :
206+ _apply_pre_optimization (model , tune_cfg , sq_info )
111207
112- # Recover model parameter when smoothquant folding = True
113- if (
114- recipe_cfgs
115- and recipe_cfgs .get ("smooth_quant" , False )
116- and recipe_cfgs ["smooth_quant_args" ]["folding" ]
117- and not inplace
118- ): # pragma: no cover
119- _apply_pre_optimization (model , tune_cfg , sq , recover = True )
208+ # Update json file in ipex_config_path
209+ cfg_to_qconfig (self .quant_config , cfgs , op_infos_from_cfgs , output_tensor_id_op_name )
210+ model .eval ()
120211
121- with open (ipex_config_path , "r" ) as f :
122- model .tune_cfg = json .load (f )
123- model .ipex_config_path = ipex_config_path
124- dump_model_op_stats (tune_cfg ["op" ])
125- return model
212+ # Check save_qconf_summary part is a workaround for IPEX bug.
213+ # Sometimes the prepared model from get_op_capablitiy loss this attribute
214+ if not hasattr (model , "save_qconf_summary" ) or not hasattr (model , "load_qconf_summary" ): # pragma: no cover
215+ static_qconfig = ipex .quantization .default_static_qconfig_mapping
216+ if isinstance (example_inputs , dict ):
217+ model = ipex .quantization .prepare (
218+ model , static_qconfig , example_kwarg_inputs = example_inputs , inplace = inplace
219+ )
220+ else :
221+ model = ipex .quantization .prepare (model , static_qconfig , example_inputs = example_inputs , inplace = inplace )
222+
223+ model .load_qconf_summary (qconf_summary = ipex_config_path )
224+ run_fn (model )
225+ model .save_qconf_summary (qconf_summary = ipex_config_path )
226+ model = _ipex_post_quant_process (model , example_inputs , inplace = inplace )
227+
228+ # Recover model parameter when smoothquant folding = True
229+ if (
230+ recipe_cfgs
231+ and recipe_cfgs .get ("smooth_quant" , False )
232+ and recipe_cfgs ["smooth_quant_args" ]["folding" ]
233+ and not inplace
234+ ): # pragma: no cover
235+ _apply_pre_optimization (model , tune_cfg , sq_info , recover = True )
236+
237+ with open (ipex_config_path , "r" ) as f :
238+ model .tune_cfg = json .load (f )
239+ model .ipex_config_path = ipex_config_path
240+ dump_model_op_stats (tune_cfg ["op" ])
241+ return model
126242
127243
128244def qdq_quantize (
@@ -133,12 +249,12 @@ def qdq_quantize(
133249
134250 # Check save_qconf_summary part is a workaround for IPEX bug.
135251 # Sometimes the prepared model from get_op_capablitiy loss this attribute
136- if not hasattr (model , "save_qconf_summary" ) or not hasattr (model , "load_qconf_summary" ):
252+ if not hasattr (model , "save_qconf_summary" ) or not hasattr (model , "load_qconf_summary" ): # pragma: no cover
137253 from torch .ao .quantization .observer import MinMaxObserver
138254
139255 if ipex_ver .release >= Version ("2.1.1" ).release :
140256 static_qconfig = ipex .quantization .get_smooth_quant_qconfig_mapping (alpha = 0.5 , act_observer = MinMaxObserver )
141- else :
257+ else : # pragma: no cover
142258 if sq_minmax_init :
143259 static_qconfig = ipex .quantization .get_smooth_quant_qconfig_mapping (
144260 alpha = 0.5 , act_observer = MinMaxObserver ()
@@ -169,7 +285,7 @@ def qdq_quantize(
169285 # IPEX may raise an error on the second iteration.
170286 # OverflowError: cannot convert float infinity to integer
171287 run_fn (model )
172- except :
288+ except : # pragma: no cover
173289 logger .warning (
174290 "The calibration failed when calibrating with ipex, "
175291 + "using scale info from SmoothQuant for Linear and "
@@ -197,7 +313,7 @@ def _apply_pre_optimization(model, tune_cfg, sq, recover=False):
197313 tsq = TorchSmoothQuant (model , None )
198314 alpha = tune_cfg ["recipe_cfgs" ]["smooth_quant_args" ]["alpha" ]
199315 for op_name , info in sq_max_info .items ():
200- if alpha == "auto" :
316+ if alpha == "auto" : # pragma: no cover
201317 alpha = info ["alpha" ]
202318 absorb_layer = op_name
203319 absorbed_layer = info ["absorbed_layer" ]
@@ -237,7 +353,7 @@ def _ipex_post_quant_process(model, example_inputs, inplace=False):
237353 else :
238354 model = torch .jit .trace (model , example_inputs )
239355 model = torch .jit .freeze (model .eval ())
240- except :
356+ except : # pragma: no cover
241357 if isinstance (example_inputs , dict ):
242358 model = torch .jit .trace (model , example_kwarg_inputs = example_inputs , strict = False , check_trace = False )
243359 else :
0 commit comments