Neural coder regular bug fix (#1175)

kaikaiyao · web-flow · commit 135e52fc33c2 · 2022-08-23T11:41:30.000+08:00
diff --git a/neural_coder/backends/pytorch_inc_static_quant_fx.yaml b/neural_coder/backends/pytorch_inc_static_quant_fx.yaml
@@ -14,7 +14,7 @@
 
 transformation:
   location:
-    - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line", "insert_below_input_definition_line"]
+    - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"]
   content:
     - |-
       [+] if "GraphModule" not in str(type(MODEL_NAME)):
diff --git a/neural_coder/backends/pytorch_inc_static_quant_ipex.yaml b/neural_coder/backends/pytorch_inc_static_quant_ipex.yaml
@@ -14,7 +14,7 @@
 
 transformation:
   location:
-    - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line", "insert_below_input_definition_line"]
+    - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"]
   content:
     - |-
       [+] if "GraphModule" not in str(type(MODEL_NAME)):
diff --git a/neural_coder/coders/pytorch/batch_size.py b/neural_coder/coders/pytorch/batch_size.py
@@ -27,8 +27,6 @@ def transform(self):
                 new_line = self.modify(line)
                 self.result.append(new_line)
             else:
-                if line == '' and self.result[-1] == '':
-                    continue
                 self.result.append(line)
         for index, line in enumerate(self.result):
             if index != len(self.result)-1:
diff --git a/neural_coder/coders/pytorch/harness.py b/neural_coder/coders/pytorch/harness.py
@@ -269,7 +269,7 @@ def register_transformation(self):
                             if is_eval_func and "[coder-enabled]" not in line:
                                 if eval_func_type == "non-forward":
                                     pass # do something
-                                
+                                inference_line = line
                                 inference_line_indent_level = get_line_indent_level(line)
 
                                 if "indent_inference_line" in loc:
@@ -312,7 +312,20 @@ def register_transformation(self):
                                 if "insert_above_inference_line" in loc:
                                     idx_offset = 0
                                 elif "insert_below_inference_line" in loc:
-                                    idx_offset = 1
+                                    if ")" in line:  # e.g. model = Net(xxx)
+                                        idx_offset = 1
+                                    else:  # e.g. model = Net(xxx, \n xxx, \n xxx)
+                                        do_search = True
+                                        i_search = 1
+                                        while do_search:
+                                            following_line = lines[line_idx + i_search]
+                                            if ")" in following_line:
+                                                do_search = False
+                                            i_search += 1
+                                            inference_line = \
+                                                inference_line + "\n" + \
+                                                " " * (get_line_indent_level(line) + 4) + following_line
+                                        idx_offset = i_search
                                 
                                 if "insert_above_inference_line" in loc or "insert_below_inference_line" in loc:
                                     bk_trans_content_this = bk_trans_content[bk_trans_location.index(loc)]
@@ -348,7 +361,7 @@ def register_transformation(self):
                                         lines_to_insert = lines_to_insert.replace("ACCURACY_MODE", 
                                                                                     str(globals.eval_accuracy))
                                         lines_to_insert = lines_to_insert.replace("INFERENCE_LINE", 
-                                                                                    line.strip())
+                                                                                    inference_line.strip())
 
                                     ### register
                                     
diff --git a/neural_coder/coders/pytorch/reclaim_inference_transformers_trainer.py b/neural_coder/coders/pytorch/reclaim_inference_transformers_trainer.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ... import globals
+from ...utils.line_operation import (
+    get_line_indent_level,
+    is_eval_func_model_name,
+    get_line_left_hand_side,
+    single_line_comment_or_empty_line_detection
+)
+
+import logging
+
+logging.basicConfig(level=globals.logging_level,
+                    format='%(asctime)s %(levelname)s %(message)s',
+                    datefmt='%a, %d %b %Y %H:%M:%S +0000')
+logger = logging.getLogger(__name__)
+
+
+class ReclaimInferenceTransformersTrainer(object):
+    def __init__(self, list_model_def_instance):
+        self.list_model_def_instance = list_model_def_instance
+        
+    def print_info(self):
+        for i in self.list_model_def_instance:
+            logger.debug(f"i.print_info(): {i.print_info()}")
+
+    # collect file transformation info and register (store) in globals 
+    # (i.e. which file to add which lines at which location)
+    def register_transformation(self): 
+        file_path = globals.list_code_path[0]
+        lines = open(file_path, 'r').read().split('\n')
+        line_idx = 0
+
+        for i in range(len(lines)):
+            line = lines[i]
+
+            if "# Evaluation" in line:
+                indent_level = get_line_indent_level(line)
+                trans_insert_location = i
+                lines_to_insert = ""
+                lines_to_insert += " " * indent_level + "eval_dataloader = trainer.get_eval_dataloader()" + "\n"
+                lines_to_insert += " " * indent_level + "import torch" + "\n"
+                lines_to_insert += " " * indent_level + "for step, inputs in enumerate(eval_dataloader):" + "\n"
+                lines_to_insert += " " * indent_level + "    with torch.no_grad():" + "\n"
+                lines_to_insert += " " * indent_level + "        model(**inputs)"
+
+                if file_path not in globals.list_trans_insert_modified_file:
+                    globals.list_trans_insert_modified_file.append(file_path)
+                    globals.list_trans_insert_location_idxs.append([trans_insert_location])
+                    globals.list_trans_insert_number_insert_lines.append([lines_to_insert.count("\n") + 1])
+                    globals.list_trans_insert_lines_to_insert.append([lines_to_insert])
+                else:
+                    idx = globals.list_trans_insert_modified_file.index(file_path)
+                    globals.list_trans_insert_location_idxs[idx].append(trans_insert_location)
+                    globals.list_trans_insert_number_insert_lines[idx].append(lines_to_insert.count("\n") + 1)
+                    globals.list_trans_insert_lines_to_insert[idx].append(lines_to_insert)
+
+            line_idx += 1
+        
+        logger.debug(f"globals.list_trans_insert_modified_file: {globals.list_trans_insert_modified_file}")
+        logger.debug(f"globals.list_trans_insert_location_idxs: {globals.list_trans_insert_location_idxs}")
+        logger.debug(f"globals.list_trans_insert_number_insert_lines: {globals.list_trans_insert_number_insert_lines}")
+        logger.debug(f"globals.list_trans_insert_lines_to_insert: {globals.list_trans_insert_lines_to_insert}")
diff --git a/neural_coder/docs/AutoQuant.md b/neural_coder/docs/AutoQuant.md
@@ -24,11 +24,7 @@ HuggingFace [Transformers](https://github.com/huggingface/transformers) models:
 from neural_coder import auto_quant
 auto_quant(
     code="https://github.com/huggingface/transformers/blob/v4.21-release/examples/pytorch/text-classification/run_glue.py",
-    args="--model_name_or_path albert-base-v2 \
-          --task_name sst2 \
-          --do_eval \
-          --output_dir result \
-          --overwrite_output_dir",
+    args="--model_name_or_path albert-base-v2 --task_name sst2 --do_eval --output_dir result",
 )
 ```
 
diff --git a/neural_coder/globals.py b/neural_coder/globals.py
@@ -20,6 +20,9 @@
 # target batch size for feature of changing PyTorch batch size
 target_batch_size = 1
 
+# mark for successful batch size change
+batch_size_changed = False
+
 # number of benchmark iteration for feature of PyTorch benchmark
 num_benchmark_iteration = 30
 
diff --git a/neural_coder/interface.py b/neural_coder/interface.py
@@ -28,7 +28,7 @@ def enable(
     code,
     features,
     target_batch_size=1,  # effective for feature "pytorch_change_batch_size"
-    num_benchmark_iteration=30,  # effective for feature "pytorch_benchmark"
+    num_benchmark_iteration=10,  # effective for feature "pytorch_benchmark"
     eval_accuracy=False,
     generate_patch=True,
     overwrite=False,
@@ -178,6 +178,23 @@ def enable(
 
     transformed_list_code_path = []
 
+    ## Determine Code Domain
+    # reset globals
+    globals.reset_globals()
+
+    from .utils import handle_user_input
+    globals.list_code_path, num_user_code_path = handle_user_input.get_all_code_path(code)
+
+    from .coders.autoinc import domain
+    code_domain = domain.determine_domain(globals.list_code_path[0])
+    if code_domain == "transformers_trainer":
+        if "pytorch_benchmark" in features:
+            features = ["pytorch_reclaim_inference_transformers_trainer"] + features
+            # for BS
+            args += " --per_device_eval_batch_size " + str(target_batch_size)
+            globals.batch_size_changed = True
+
+    ## Feature Transformation
     for idx_feature, feature in enumerate(features):
 
         # reset globals
@@ -223,10 +240,14 @@ def enable(
                 from .coders.pytorch.dummy_dataloader import DummyDataLoader
                 opt = DummyDataLoader(globals.list_model_def_instance)
                 opt.register_transformation()
-            elif feature == "pytorch_reclaim_inputs": # is not in harness scope, but needs call graph and type inference
+            elif feature == "pytorch_reclaim_inputs":
                 from .coders.pytorch.reclaim_inputs import ReclaimInputs
                 opt = ReclaimInputs(globals.list_model_def_instance)
                 opt.register_transformation()
+            elif feature == "pytorch_reclaim_inference_transformers_trainer":
+                from .coders.pytorch.reclaim_inference_transformers_trainer import ReclaimInferenceTransformersTrainer
+                opt = ReclaimInferenceTransformersTrainer(globals.list_model_def_instance)
+                opt.register_transformation()
             elif feature in [
                     "pytorch_inc_dynamic_quant",
                     "pytorch_inc_static_quant_fx",
@@ -257,6 +278,8 @@ def enable(
             for i in range(len(list_transformed_code)):
                 # Batch Size
                 if "pytorch_change_batch_size" in features:
+                    if "batch_size" in list_transformed_code[0]:  # entry code has "batch_size"
+                        globals.batch_size_changed = True
                     from .coders.pytorch.batch_size import BatchSizeCoder
                     globals.target_batch_size = str(target_batch_size)
                     list_transformed_code[i] = BatchSizeCoder(list_transformed_code[i]).transform()
@@ -321,9 +344,6 @@ def enable(
     ### Output of Enabling
     globals.list_code_path, num_user_code_path = handle_user_input.get_all_code_path(code)
 
-    if save_patch_path == "":
-        save_patch_path = ws_path
-
     if generate_patch:
         whole_patch_user_code = ""
         for path in globals.list_code_path[0:num_user_code_path]:
@@ -335,10 +355,12 @@ def enable(
             this_patch, _ = sp_gen_patch.communicate()
             this_patch = str(this_patch)[2:-1]
             whole_patch_user_code += this_patch
-        open(save_patch_path + "neural_coder_patch" + patch_suffix, "w").write(
+        if save_patch_path == "":
+            save_patch_path = ws_path + "neural_coder_patch"
+        open(save_patch_path + patch_suffix, "w").write(
             whole_patch_user_code.replace(r'\n', '\n').replace(r'\t', '\t').replace(r"\'", "\'"))
         abs_patch_path = os.path.abspath(
-            save_patch_path + "neural_coder_patch" + patch_suffix)
+            save_patch_path + patch_suffix)
         logger.info(f"The patch is saved to: [{abs_patch_path}]")
 
         if overwrite:
@@ -358,10 +380,12 @@ def enable(
                 this_patch, _ = sp_gen_patch.communicate()
                 this_patch = str(this_patch)[2:-1]
                 whole_patch_import_modules += this_patch
-            open(save_patch_path + "neural_coder_patch_import_modules" + patch_suffix, "w").write(
+            if save_patch_path == "":
+                save_patch_path = ws_path + "neural_coder_patch_import_modules"
+            open(save_patch_path + patch_suffix, "w").write(
                 whole_patch_import_modules.replace(r'\n', '\n').replace(r'\t', '\t').replace(r"\'", "\'"))
             abs_patch_path = os.path.abspath(
-                save_patch_path + "neural_coder_patch_import_modules" + patch_suffix)
+                save_patch_path + patch_suffix)
             logger.info(
                 f"The patch for imported modules is saved to: [{abs_patch_path}]")
 
@@ -580,7 +604,10 @@ def bench(
         IPS[-1] = IPS[-2]
 
     try:
-        FPS = round(sum(IPS) / len(IPS) * ninstances * bench_batch_size, 3)
+        if globals.batch_size_changed: # only times BS if BS has been modified, otherwise times 1
+            FPS = round(sum(IPS) / len(IPS) * ninstances * bench_batch_size, 3)
+        else:
+            FPS = round(sum(IPS) / len(IPS) * ninstances * 1, 3)
     except:
         FPS = 0
     try:
@@ -824,20 +851,43 @@ def remove_if_have(list, element):
                     return list
 
                 features = remove_if_have(features, "pytorch_benchmark")
-                features = remove_if_have(
-                    features, "pytorch_change_batch_size")
+                features = remove_if_have(features, "pytorch_change_batch_size")
                 features = remove_if_have(features, "pytorch_cuda_to_cpu")
 
-                if not eval_accuracy:
-                    logger.info(
-                        f"Benchmark result (performance) of optimization set [{features}]"
-                        f" is [{bench_performance[0]}] (FPS)")
+                if auto_quant:
+                    # convert feature name to display name for better user experience
+                    if features == ['pytorch_inc_dynamic_quant']:
+                        features_display = "Intel INT8 (Dynamic)"
+                    elif features == ['pytorch_inc_static_quant_fx']:
+                        features_display = "Intel INT8 (Static)"
+                    elif features == ['pytorch_inc_static_quant_ipex']:
+                        features_display = "Intel INT8 (IPEX)"
+                    elif features == ['pytorch_inc_bf16']:
+                        features_display = "Intel BF16"
+                    elif features == []:
+                        features_display = "The Original Model"
+
+                    if not eval_accuracy:
+                        logger.info(
+                            f"Benchmark result (performance) of {features_display}"
+                            f" is {bench_performance[0]} (FPS)")
+                    else:
+                        logger.info(
+                            f"Benchmark result (performance) of {features_display}"
+                            f" is {bench_performance[0]} (FPS)")
+                        logger.info(
+                            f"Benchmark result (accuracy) of {features_display} is {bench_acc[5]}")
                 else:
-                    logger.info(
-                        f"Benchmark result (performance) of optimization set [{features}]"
-                        f" is [{bench_performance[0]}] (FPS)")
-                    logger.info(
-                        f"Benchmark result (accuracy) of optimization set [{features}] is [{bench_acc[5]}]")
+                    if not eval_accuracy:
+                        logger.info(
+                            f"Benchmark result (performance) of optimization set [{features}]"
+                            f" is [{bench_performance[0]}] (FPS)")
+                    else:
+                        logger.info(
+                            f"Benchmark result (performance) of optimization set [{features}]"
+                            f" is [{bench_performance[0]}] (FPS)")
+                        logger.info(
+                            f"Benchmark result (accuracy) of optimization set [{features}] is [{bench_acc[5]}]")
 
                 d = {}  # initialize dict
                 d["features"] = features
@@ -857,8 +907,7 @@ def remove_if_have(list, element):
 
         # print result
         if not eval_accuracy:
-            logger.info(
-                f"Superbench result of sweeping [{sweep_objective}] printed below with sorted FPS: ")
+            print(f"Superbench result of sweeping [{sweep_objective}] printed below with sorted FPS: ")
             print("{:<20} {:<20} {:<120}".format(
                 'Numactl Mode', 'Performance (FPS)', 'Features Applied'))
 
@@ -878,8 +927,7 @@ def remove_if_have(list, element):
                         )
                     )
         else:
-            logger.info(
-                f"Superbench result of sweeping [{sweep_objective}] printed below with sorted FPS: ")
+            print(f"Superbench result of sweeping [{sweep_objective}] printed below with sorted FPS: ")
             print("{:<20} {:<20} {:<20} {:<120}".format(
                 'Numactl Mode', 'Performance (FPS)', 'Accuracy', 'Features Applied'))
 
@@ -921,12 +969,42 @@ def remove_if_have(list, element):
                     original_model_performance = list_FPS[i]
                     break
         
-        logger.info(f"The best optimization set for your model is: {list_optimization_set_top3[0]}")
-        logger.info(
-            f"You can get up to: "
-            f"{round(list_performance_top3[0] / original_model_performance, 1)}"
-            f" X performance boost with the suggested optimization set."
+        if auto_quant:
+            # convert feature name to display name for better user experience
+            if list_optimization_set_top3[0] == ['pytorch_inc_dynamic_quant']:
+                best_optimization_display = "Intel INT8 (Dynamic)"
+            elif list_optimization_set_top3[0] == ['pytorch_inc_static_quant_fx']:
+                best_optimization_display = "Intel INT8 (Static)"
+            elif list_optimization_set_top3[0] == ['pytorch_inc_static_quant_ipex']:
+                best_optimization_display = "Intel INT8 (IPEX)"
+            elif list_optimization_set_top3[0] == ['pytorch_inc_bf16']:
+                best_optimization_display = "Intel BF16"
+            elif list_optimization_set_top3[0] == []:
+                best_optimization_display = "The Original Model"
+
+            logger.info(f"The best optimization set for your model is {best_optimization_display}")
+            logger.info(
+                f"You can get up to "
+                f"{round(list_performance_top3[0] / original_model_performance, 1)}"
+                f" X performance boost."
+            )
+        else:
+            logger.info(f"The best optimization set for your model is: {list_optimization_set_top3[0]}")
+            logger.info(
+                f"You can get up to "
+                f"{round(list_performance_top3[0] / original_model_performance, 1)}"
+                f" X performance boost."
+            )
+
+        # generate patch for the best optimization
+        features_to_generate = list_optimization_set_top3[0]
+        features_to_generate.append("pytorch_cuda_to_cpu")
+        enable(
+            code=code,
+            features=features_to_generate,
+            save_patch_path="intel_optimization",
         )
+        logger.info('The optimization patch was saved to "intel_optimziation.diff"')
 
         return list_optimization_set_top3, list_performance_top3, original_model_ranking, original_model_performance
 
diff --git a/neural_coder/utils/line_operation.py b/neural_coder/utils/line_operation.py