@@ -28,7 +28,7 @@ def enable(
2828 code ,
2929 features ,
3030 target_batch_size = 1 , # effective for feature "pytorch_change_batch_size"
31- num_benchmark_iteration = 30 , # effective for feature "pytorch_benchmark"
31+ num_benchmark_iteration = 10 , # effective for feature "pytorch_benchmark"
3232 eval_accuracy = False ,
3333 generate_patch = True ,
3434 overwrite = False ,
@@ -178,6 +178,23 @@ def enable(
178178
179179 transformed_list_code_path = []
180180
181+ ## Determine Code Domain
182+ # reset globals
183+ globals .reset_globals ()
184+
185+ from .utils import handle_user_input
186+ globals .list_code_path , num_user_code_path = handle_user_input .get_all_code_path (code )
187+
188+ from .coders .autoinc import domain
189+ code_domain = domain .determine_domain (globals .list_code_path [0 ])
190+ if code_domain == "transformers_trainer" :
191+ if "pytorch_benchmark" in features :
192+ features = ["pytorch_reclaim_inference_transformers_trainer" ] + features
193+ # for BS
194+ args += " --per_device_eval_batch_size " + str (target_batch_size )
195+ globals .batch_size_changed = True
196+
197+ ## Feature Transformation
181198 for idx_feature , feature in enumerate (features ):
182199
183200 # reset globals
@@ -223,10 +240,14 @@ def enable(
223240 from .coders .pytorch .dummy_dataloader import DummyDataLoader
224241 opt = DummyDataLoader (globals .list_model_def_instance )
225242 opt .register_transformation ()
226- elif feature == "pytorch_reclaim_inputs" : # is not in harness scope, but needs call graph and type inference
243+ elif feature == "pytorch_reclaim_inputs" :
227244 from .coders .pytorch .reclaim_inputs import ReclaimInputs
228245 opt = ReclaimInputs (globals .list_model_def_instance )
229246 opt .register_transformation ()
247+ elif feature == "pytorch_reclaim_inference_transformers_trainer" :
248+ from .coders .pytorch .reclaim_inference_transformers_trainer import ReclaimInferenceTransformersTrainer
249+ opt = ReclaimInferenceTransformersTrainer (globals .list_model_def_instance )
250+ opt .register_transformation ()
230251 elif feature in [
231252 "pytorch_inc_dynamic_quant" ,
232253 "pytorch_inc_static_quant_fx" ,
@@ -257,6 +278,8 @@ def enable(
257278 for i in range (len (list_transformed_code )):
258279 # Batch Size
259280 if "pytorch_change_batch_size" in features :
281+ if "batch_size" in list_transformed_code [0 ]: # entry code has "batch_size"
282+ globals .batch_size_changed = True
260283 from .coders .pytorch .batch_size import BatchSizeCoder
261284 globals .target_batch_size = str (target_batch_size )
262285 list_transformed_code [i ] = BatchSizeCoder (list_transformed_code [i ]).transform ()
@@ -321,9 +344,6 @@ def enable(
321344 ### Output of Enabling
322345 globals .list_code_path , num_user_code_path = handle_user_input .get_all_code_path (code )
323346
324- if save_patch_path == "" :
325- save_patch_path = ws_path
326-
327347 if generate_patch :
328348 whole_patch_user_code = ""
329349 for path in globals .list_code_path [0 :num_user_code_path ]:
@@ -335,10 +355,12 @@ def enable(
335355 this_patch , _ = sp_gen_patch .communicate ()
336356 this_patch = str (this_patch )[2 :- 1 ]
337357 whole_patch_user_code += this_patch
338- open (save_patch_path + "neural_coder_patch" + patch_suffix , "w" ).write (
358+ if save_patch_path == "" :
359+ save_patch_path = ws_path + "neural_coder_patch"
360+ open (save_patch_path + patch_suffix , "w" ).write (
339361 whole_patch_user_code .replace (r'\n' , '\n ' ).replace (r'\t' , '\t ' ).replace (r"\'" , "\' " ))
340362 abs_patch_path = os .path .abspath (
341- save_patch_path + "neural_coder_patch" + patch_suffix )
363+ save_patch_path + patch_suffix )
342364 logger .info (f"The patch is saved to: [{ abs_patch_path } ]" )
343365
344366 if overwrite :
@@ -358,10 +380,12 @@ def enable(
358380 this_patch , _ = sp_gen_patch .communicate ()
359381 this_patch = str (this_patch )[2 :- 1 ]
360382 whole_patch_import_modules += this_patch
361- open (save_patch_path + "neural_coder_patch_import_modules" + patch_suffix , "w" ).write (
383+ if save_patch_path == "" :
384+ save_patch_path = ws_path + "neural_coder_patch_import_modules"
385+ open (save_patch_path + patch_suffix , "w" ).write (
362386 whole_patch_import_modules .replace (r'\n' , '\n ' ).replace (r'\t' , '\t ' ).replace (r"\'" , "\' " ))
363387 abs_patch_path = os .path .abspath (
364- save_patch_path + "neural_coder_patch_import_modules" + patch_suffix )
388+ save_patch_path + patch_suffix )
365389 logger .info (
366390 f"The patch for imported modules is saved to: [{ abs_patch_path } ]" )
367391
@@ -580,7 +604,10 @@ def bench(
580604 IPS [- 1 ] = IPS [- 2 ]
581605
582606 try :
583- FPS = round (sum (IPS ) / len (IPS ) * ninstances * bench_batch_size , 3 )
607+ if globals .batch_size_changed : # only times BS if BS has been modified, otherwise times 1
608+ FPS = round (sum (IPS ) / len (IPS ) * ninstances * bench_batch_size , 3 )
609+ else :
610+ FPS = round (sum (IPS ) / len (IPS ) * ninstances * 1 , 3 )
584611 except :
585612 FPS = 0
586613 try :
@@ -824,20 +851,43 @@ def remove_if_have(list, element):
824851 return list
825852
826853 features = remove_if_have (features , "pytorch_benchmark" )
827- features = remove_if_have (
828- features , "pytorch_change_batch_size" )
854+ features = remove_if_have (features , "pytorch_change_batch_size" )
829855 features = remove_if_have (features , "pytorch_cuda_to_cpu" )
830856
831- if not eval_accuracy :
832- logger .info (
833- f"Benchmark result (performance) of optimization set [{ features } ]"
834- f" is [{ bench_performance [0 ]} ] (FPS)" )
857+ if auto_quant :
858+ # convert feature name to display name for better user experience
859+ if features == ['pytorch_inc_dynamic_quant' ]:
860+ features_display = "Intel INT8 (Dynamic)"
861+ elif features == ['pytorch_inc_static_quant_fx' ]:
862+ features_display = "Intel INT8 (Static)"
863+ elif features == ['pytorch_inc_static_quant_ipex' ]:
864+ features_display = "Intel INT8 (IPEX)"
865+ elif features == ['pytorch_inc_bf16' ]:
866+ features_display = "Intel BF16"
867+ elif features == []:
868+ features_display = "The Original Model"
869+
870+ if not eval_accuracy :
871+ logger .info (
872+ f"Benchmark result (performance) of { features_display } "
873+ f" is { bench_performance [0 ]} (FPS)" )
874+ else :
875+ logger .info (
876+ f"Benchmark result (performance) of { features_display } "
877+ f" is { bench_performance [0 ]} (FPS)" )
878+ logger .info (
879+ f"Benchmark result (accuracy) of { features_display } is { bench_acc [5 ]} " )
835880 else :
836- logger .info (
837- f"Benchmark result (performance) of optimization set [{ features } ]"
838- f" is [{ bench_performance [0 ]} ] (FPS)" )
839- logger .info (
840- f"Benchmark result (accuracy) of optimization set [{ features } ] is [{ bench_acc [5 ]} ]" )
881+ if not eval_accuracy :
882+ logger .info (
883+ f"Benchmark result (performance) of optimization set [{ features } ]"
884+ f" is [{ bench_performance [0 ]} ] (FPS)" )
885+ else :
886+ logger .info (
887+ f"Benchmark result (performance) of optimization set [{ features } ]"
888+ f" is [{ bench_performance [0 ]} ] (FPS)" )
889+ logger .info (
890+ f"Benchmark result (accuracy) of optimization set [{ features } ] is [{ bench_acc [5 ]} ]" )
841891
842892 d = {} # initialize dict
843893 d ["features" ] = features
@@ -857,8 +907,7 @@ def remove_if_have(list, element):
857907
858908 # print result
859909 if not eval_accuracy :
860- logger .info (
861- f"Superbench result of sweeping [{ sweep_objective } ] printed below with sorted FPS: " )
910+ print (f"Superbench result of sweeping [{ sweep_objective } ] printed below with sorted FPS: " )
862911 print ("{:<20} {:<20} {:<120}" .format (
863912 'Numactl Mode' , 'Performance (FPS)' , 'Features Applied' ))
864913
@@ -878,8 +927,7 @@ def remove_if_have(list, element):
878927 )
879928 )
880929 else :
881- logger .info (
882- f"Superbench result of sweeping [{ sweep_objective } ] printed below with sorted FPS: " )
930+ print (f"Superbench result of sweeping [{ sweep_objective } ] printed below with sorted FPS: " )
883931 print ("{:<20} {:<20} {:<20} {:<120}" .format (
884932 'Numactl Mode' , 'Performance (FPS)' , 'Accuracy' , 'Features Applied' ))
885933
@@ -921,12 +969,42 @@ def remove_if_have(list, element):
921969 original_model_performance = list_FPS [i ]
922970 break
923971
924- logger .info (f"The best optimization set for your model is: { list_optimization_set_top3 [0 ]} " )
925- logger .info (
926- f"You can get up to: "
927- f"{ round (list_performance_top3 [0 ] / original_model_performance , 1 )} "
928- f" X performance boost with the suggested optimization set."
972+ if auto_quant :
973+ # convert feature name to display name for better user experience
974+ if list_optimization_set_top3 [0 ] == ['pytorch_inc_dynamic_quant' ]:
975+ best_optimization_display = "Intel INT8 (Dynamic)"
976+ elif list_optimization_set_top3 [0 ] == ['pytorch_inc_static_quant_fx' ]:
977+ best_optimization_display = "Intel INT8 (Static)"
978+ elif list_optimization_set_top3 [0 ] == ['pytorch_inc_static_quant_ipex' ]:
979+ best_optimization_display = "Intel INT8 (IPEX)"
980+ elif list_optimization_set_top3 [0 ] == ['pytorch_inc_bf16' ]:
981+ best_optimization_display = "Intel BF16"
982+ elif list_optimization_set_top3 [0 ] == []:
983+ best_optimization_display = "The Original Model"
984+
985+ logger .info (f"The best optimization set for your model is { best_optimization_display } " )
986+ logger .info (
987+ f"You can get up to "
988+ f"{ round (list_performance_top3 [0 ] / original_model_performance , 1 )} "
989+ f" X performance boost."
990+ )
991+ else :
992+ logger .info (f"The best optimization set for your model is: { list_optimization_set_top3 [0 ]} " )
993+ logger .info (
994+ f"You can get up to "
995+ f"{ round (list_performance_top3 [0 ] / original_model_performance , 1 )} "
996+ f" X performance boost."
997+ )
998+
999+ # generate patch for the best optimization
1000+ features_to_generate = list_optimization_set_top3 [0 ]
1001+ features_to_generate .append ("pytorch_cuda_to_cpu" )
1002+ enable (
1003+ code = code ,
1004+ features = features_to_generate ,
1005+ save_patch_path = "intel_optimization" ,
9291006 )
1007+ logger .info ('The optimization patch was saved to "intel_optimziation.diff"' )
9301008
9311009 return list_optimization_set_top3 , list_performance_top3 , original_model_ranking , original_model_performance
9321010
0 commit comments