joey12300
diff --git a/‎docs/compression.md‎
Lines changed: 35 additions & 51 deletions b/‎docs/compression.md‎
Lines changed: 35 additions & 51 deletions
diff --git a/‎model_zoo/ernie-3.0/compress_qa.py‎
Lines changed: 1 addition & 13 deletions b/‎model_zoo/ernie-3.0/compress_qa.py‎
Lines changed: 1 addition & 13 deletions
diff --git a/‎paddlenlp/trainer/compression_args.py‎
Lines changed: 13 additions & 61 deletions b/‎paddlenlp/trainer/compression_args.py‎
Lines changed: 13 additions & 61 deletions
@@ -6,7 +6,7 @@
        * [Step1：获取模型压缩参数 compression_args](#获取模型压缩参数compression_args)
        * [Step2：实例化 Trainer 并调用 compress()](#实例化Trainer并调用compress())
            * [Trainer 实例化参数介绍](#Trainer实例化参数介绍)
-       * [Step3：实现自定义评估函数（按需可选）](#实现自定义评估函数（按需可选）)
+       * [Step3：实现自定义评估函数和 loss 计算函数（按需可选）](#实现自定义评估函数和loss计算函数（按需可选）)
        * [Step4：传参并运行压缩脚本](#传参并运行压缩脚本)
            * [CompressionArguments 参数介绍](#CompressionArguments参数介绍)
        * [三大场景模型压缩 API 使用示例](#三大场景模型压缩API使用示例)
@@ -118,45 +118,11 @@ compression_args = parser.parse_args_into_dataclasses()
 #### Trainer 实例化参数介绍
 
 - **--model** 待压缩的模型，目前支持 ERNIE、BERT、RoBERTa、ERNIE-M、ELECTRA、ERNIE-Gram、PP-MiniLM、TinyBERT 等结构相似的模型，是在下游任务中微调后的模型，当预训练模型选择 ERNIE 时，需要继承 `ErniePretrainedModel`。以分类任务为例，可通过`AutoModelForSequenceClassification.from_pretrained(model_name_or_path)` 等方式来获取，这种情况下，`model_name_or_path`目录下需要有 model_config.json, model_state.pdparams 文件；
-- **--data_collator** 三类任务均可使用 PaddleNLP 预定义好的 [DataCollator 类](../paddlenlp/data/data_collator.py)，`data_collator` 可对数据进行 `Pad` 等操作。使用方法参考 [示例代码](../model_zoo/ernie-3.0/compress_seq_cls.py) 即可；
+- **--data_collator** 三类任务均可使用 PaddleNLP 预定义好的 [DataCollator 类](../../paddlenlp/data/data_collator.py)，`data_collator` 可对数据进行 `Pad` 等操作。使用方法参考 [示例代码](../model_zoo/ernie-3.0/compress_seq_cls.py) 即可；
 - **--train_dataset** 裁剪训练需要使用的训练集，是任务相关的数据。自定义数据集的加载可参考 [文档](https://huggingface.co/docs/datasets/loading)。不启动裁剪时，可以为 None；
 - **--eval_dataset** 裁剪训练使用的评估集，也是量化使用的校准数据，是任务相关的数据。自定义数据集的加载可参考 [文档](https://huggingface.co/docs/datasets/loading)。是 Trainer 的必选参数；
 - **--tokenizer** 模型 `model` 对应的 `tokenizer`，可使用 `AutoTokenizer.from_pretrained(model_name_or_path)` 来获取。
-- **--criterion** 模型的 loss 计算方法，可以是一个 nn.Layer 对象，也可以是一个函数，用于在 ofa_utils.py 计算模型的 loss 用于计算梯度从而确定神经元重要程度。
-
-其中，`criterion` 函数定义示例：
-
-```python
-# 支持的形式一：
-def criterion(logits, labels):
-    loss_fct = paddle.nn.BCELoss()
-    start_ids, end_ids = labels
-    start_prob, end_prob = outputs
-    start_ids = paddle.cast(start_ids, 'float32')
-    end_ids = paddle.cast(end_ids, 'float32')
-    loss_start = loss_fct(start_prob, start_ids)
-    loss_end = loss_fct(end_prob, end_ids)
-    loss = (loss_start + loss_end) / 2.0
-    return loss
-
-# 支持的形式二：
-class CrossEntropyLossForSQuAD(paddle.nn.Layer):
-
-    def __init__(self):
-        super(CrossEntropyLossForSQuAD, self).__init__()
-
-    def forward(self, y, label):
-        start_logits, end_logits = y
-        start_position, end_position = label
-        start_position = paddle.unsqueeze(start_position, axis=-1)
-        end_position = paddle.unsqueeze(end_position, axis=-1)
-        start_loss = paddle.nn.functional.cross_entropy(input=start_logits,
-                                                        label=start_position)
-        end_loss = paddle.nn.functional.cross_entropy(input=end_logits,
-                                                      label=end_position)
-        loss = (start_loss + end_loss) / 2
-        return loss
-```
+- **--criterion** 模型的 loss 对象，是一个 nn.Layer 对象，用于在 ofa_utils.py 计算模型的 loss 用于计算梯度从而确定神经元重要程度。
 
 用以上参数实例化 Trainer 对象，之后直接调用 `compress()` 。`compress()` 会根据选择的策略进入不同的分支，以进行裁剪或者量化的过程。
 
@@ -181,11 +147,11 @@ trainer = Trainer(
 trainer.compress()
 ```
 
-<a name="实现自定义评估函数(按需可选）"></a>
+<a name="实现自定义评估函数和loss计算函数（按需可选）"></a>
 
-### Step3：实现自定义评估函数，以适配自定义压缩任务
+### Step3：实现自定义评估函数和 loss 计算函数（按需可选），以适配自定义压缩任务
 
-当使用 DynaBERT 裁剪功能时，如果模型、Metrics 不符合下表的情况，那么模型压缩 API 中评估函数需要自定义。
+当使用 DynaBERT 裁剪功能时，如果模型、Metrics 不符合下表的情况，那么模型压缩 API 中自带的评估函数和计算 loss 的参数可能需要自定义。
 
 目前 DynaBERT 裁剪功能只支持 SequenceClassification 等三类 PaddleNLP 内置 class，并且内置评估器对应为 Accuracy、F1、Squad。
 
@@ -197,26 +163,33 @@ trainer.compress()
 
 - 如果模型是自定义模型，需要继承 `XXXPretrainedModel`，例如当预训练模型选择 ERNIE 时，继承 `ErniePretrainedModel`，模型需要支持调用 `from_pretrained()` 导入模型，且只含 `pretrained_model_name_or_path` 一个必选参数，`forward` 函数返回 `logits` 或者 `tuple of logits`；
 
-- 如果模型是自定义模型，或者数据集比较特殊，压缩 API 中 loss 的计算不符合使用要求，需要自定义 `custom_evaluate` 评估函数，需要同时支持 `paddleslim.nas.ofa.OFA` 模型和 `paddle.nn.layer` 模型。可参考下方示例代码。
+- 如果模型是自定义模型，或者数据集比较特殊，压缩 API 中 loss 的计算不符合使用要求，需要自定义 `custom_dynabert_calc_loss` 函数。计算 loss 后计算梯度，从而得出计算神经元的重要性以便裁剪使用。可参考下方示例代码。
+    - 输入每个 batch 的数据，返回模型的 loss。
+    - 将该函数传入 `compress()` 中的 `custom_dynabert_calc_loss` 参数；
+
+- 如果评估器也不满足上述所支持情况，需实现自定义 `custom_dynabert_evaluate` 评估函数，需要同时支持 `paddleslim.nas.ofa.OFA` 模型和 `paddle.nn.layer` 模型。可参考下方示例代码。
     - 输入`model` 和 `dataloader`，返回模型的评价指标（单个 float 值）。
-    - 将该函数传入 `compress()` 中的 `custom_evaluate` 参数；
+    - 将该函数传入 `compress()` 中的 `custom_dynabert_evaluate` 参数；
 
-`custom_evaluate()` 函数定义示例：
+`custom_dynabert_evaluate()` 函数定义示例：
 
 ```python
     import paddle
     from paddle.metric import Accuracy
+    from paddleslim.nas.ofa import OFA
 
     @paddle.no_grad()
-    def evaluate_seq_cls(self, model, data_loader):
+    def evaluate_seq_cls(model, data_loader):
         metric = Accuracy()
         model.eval()
         metric.reset()
         for batch in data_loader:
             logits = model(input_ids=batch['input_ids'],
-                           token_type_ids=batch['token_type_ids'])
+                           token_type_ids=batch['token_type_ids'],
+                           #必须写这一行
+                           attention_mask=[None, None])
             # Supports paddleslim.nas.ofa.OFA model and nn.layer model.
-            if isinstance(model, paddleslim.nas.ofa.OFA):
+            if isinstance(model, OFA):
                 logits = logits[0]
             correct = metric.compute(logits, batch['labels'])
             metric.update(correct)
@@ -226,11 +199,22 @@ trainer.compress()
         return res
 ```
 
+`custom_dynabert_calc_loss` 函数定义示例：
 
-在调用 `compress()` 时传入这个自定义函数：
+```python
+def calc_loss(loss_fct, model, batch, head_mask):
+    logits = model(input_ids=batch["input_ids"],
+                token_type_ids=batch["token_type_ids"],
+                # 必须写下面这行
+                attention_mask=[None, head_mask])
+    loss = loss_fct(logits, batch["labels"])
+    return loss
+```
+在调用 `compress()` 时传入这 2 个自定义函数：
 
 ```python
-trainer.compress(custom_evaluate=evaluate_seq_cls)
+trainer.compress(custom_dynabert_evaluate=evaluate_seq_cls,
+                 custom_dynabert_calc_loss=calc_loss)
 ```
 
 
@@ -269,8 +253,8 @@ python compress.py \
 
 公共参数中的参数和具体的压缩策略无关。
 
-- **--strategy** 模型压缩策略，目前支持 `'dynabert+ptq'`、 `'dynabert'` 、 `'ptq'` 和 `'qat'`。
-其中 `'dynabert'` 代表基于 DynaBERT 的宽度裁剪策略，`'ptq'` 表示静态离线量化， `'dynabert+ptq'` 代表先裁剪后量化。`qat` 表示量化训练。默认是 `'dynabert+ptq'`；
+- **--strategy** 模型压缩策略，目前支持 `'dynabert+ptq'`、 `'dynabert'` 和 `'ptq'`。
+其中 `'dynabert'` 代表基于 DynaBERT 的宽度裁剪策略，`'ptq'` 表示静态离线量化， `'dynabert+ptq'` 代表先裁剪后量化。默认是 `'dynabert+ptq'`；
 
 - **--output_dir** 模型压缩后模型保存目录；
 
@@ -403,7 +387,7 @@ python compress_qa.py \
 
 ### Paddle2ONNX 部署
 
-ONNX 导出及 ONNXRuntime 部署请参考：[ONNX 导出及 ONNXRuntime 部署指南](../model_zoo/ernie-3.0/deploy/paddle2onnx/README.md)
+ONNX 导出及 ONNXRuntime 部署请参考：[ONNX 导出及 ONNXRuntime 部署指南](./deploy/paddle2onnx/README.md)
 
 
 ### Paddle Lite 移动端部署
 
@@ -17,7 +17,6 @@
 from functools import partial
 
 import paddle
-import paddle.nn.functional as F
 
 from paddlenlp.data import DataCollatorWithPadding
 from paddlenlp.trainer import PdArgumentParser, CompressionArguments, Trainer
@@ -115,16 +114,6 @@ def post_processing_function(examples, features, predictions, stage="eval"):
         } for ex in examples]
         return EvalPrediction(predictions=predictions, label_ids=references)
 
-    def criterion(outputs, label):
-        start_logits, end_logits = outputs
-        start_position, end_position = label
-        start_position = paddle.unsqueeze(start_position, axis=-1)
-        end_position = paddle.unsqueeze(end_position, axis=-1)
-        start_loss = F.cross_entropy(input=start_logits, label=start_position)
-        end_loss = F.cross_entropy(input=end_logits, label=end_position)
-        loss = (start_loss + end_loss) / 2
-        return loss
-
     trainer = QuestionAnsweringTrainer(
         model=model,
         args=compression_args,
@@ -133,8 +122,7 @@ def criterion(outputs, label):
         eval_examples=eval_examples,
         data_collator=data_collator,
         post_process_function=post_processing_function,
-        tokenizer=tokenizer,
-        criterion=criterion)
+        tokenizer=tokenizer)
 
     compression_args.print_config()
 
 
@@ -46,7 +46,7 @@ class CompressionArguments(TrainingArguments):
         default="dynabert+ptq",
         metadata={
             "help":
-            "Compression strategy. It supports 'dynabert+ptq', 'dynabert', 'ptq' and 'qat' now."
+            "Compression strategy. It supports 'dynabert+ptq', 'dynabert' and 'ptq' now."
         },
     )
     # dynabert
@@ -69,25 +69,6 @@ class CompressionArguments(TrainingArguments):
         metadata={
             "help": "Linear warmup over warmup_ratio fraction of total steps."
         })
-    # quant
-    weight_quantize_type: Optional[str] = field(
-        default='channel_wise_abs_max',
-        metadata={
-        "help":
-        "Quantization type for weights. Supports 'abs_max' and 'channel_wise_abs_max'. " \
-        "This param only specifies the fake ops in saving quantized model, and " \
-        "we save the scale obtained by post training quantization in fake ops. " \
-        "Compared to 'abs_max' the model accuracy is usually higher when it is " \
-        "'channel_wise_abs_max'."
-    }, )
-    activation_quantize_type: Optional[str] = field(
-        default=None,
-        metadata={
-        "help":
-        "Support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'. " \
-        "In strategy 'ptq', it defaults to 'range_abs_max' and in strategy " \
-        "'qat', it defaults to 'moving_average_abs_max'."
-    }, )
     # ptq:
     algo_list: Optional[List[str]] = field(
         default=None,
@@ -119,7 +100,15 @@ class CompressionArguments(TrainingArguments):
             "List of batch_size. 'batch_size' is the batch of data loader."
         },
     )
-
+    weight_quantize_type: Optional[str] = field(
+        default='channel_wise_abs_max',
+        metadata={
+        "help":
+        "Support 'abs_max' and 'channel_wise_abs_max'. This param only specifies " \
+        "the fake ops in saving quantized model, and we save the scale obtained " \
+        "by post training quantization in fake ops. Compared to 'abs_max', " \
+        "the model accuracy is usually higher when it is 'channel_wise_abs_max'."
+    }, )
     round_type: Optional[str] = field(
         default='round',
         metadata={
@@ -146,45 +135,16 @@ class CompressionArguments(TrainingArguments):
             "is None."
         },
     )
-    # qat
-    activation_preprocess_type: Optional[str] = field(
-        default=None,
-        metadata={
-            "help":
-            "Method of preprocessing the activation value of the quantitative " \
-            "model. Currently, PACT method is supported. If necessary, it can be " \
-            "set to 'PACT'. The default value is None, which means that no " \
-            "preprocessing is performed on the active value."
-        },
-    )
-    weight_preprocess_type: Optional[str] = field(
-        default=None,
-        metadata={
-            "help":
-            "Method of preprocessing the weight parameters of the quantitative " \
-            "model. Currently, method 'PACT' is supported. If necessary, it can " \
-            "be set to 'PACT'. The default value is None, which means that " \
-            "no preprocessing is performed on weights."
-        },
-    )
-    moving_rate: Optional[float] = field(
-        default=0.9,
-        metadata={
-            "help": "The decay coefficient of moving average. Defaults to 0.9."
-        },
-    )
 
     def print_config(self, args=None, key=""):
         """
         Prints all config values.
         """
 
         compression_arg_name = [
-            'strategy', 'width_mult_list', 'batch_num_list', 'bias_correction',
+            'width_mult_list', 'batch_num_list', 'bias_correction',
             'round_type', 'algo_list', 'batch_size_list', 'strategy',
-            'weight_quantize_type', 'activation_quantize_type',
-            'input_infer_model_path', 'activation_preprocess_type',
-            'weight_preprocess_type', 'moving_rate'
+            'weight_quantize_type', 'input_infer_model_path'
         ]
         default_arg_dict = {
             "width_mult_list": ['3/4'],
@@ -203,9 +163,7 @@ def print_config(self, args=None, key=""):
             "'dynabert' and 'ptq'. `width_mult_list` is needed in " \
             "`dynabert`, and `algo_list`, `batch_num_list`, `batch_size_list`," \
             " `round_type`, `bias_correction`, `weight_quantize_type`, " \
-            "`input_infer_model_path` are needed in 'ptq'. `activation_preprocess_type'`, " \
-            "'weight_preprocess_type', 'moving_rate', 'weight_quantize_type', " \
-            "and 'activation_quantize_type' are needed in 'qat'."
+            "`input_infer_model_path` are needed in 'ptq'. "
             )
         logger.info('{:30}:{}'.format("paddle commit id",
                                       paddle.version.commit))
@@ -218,12 +176,6 @@ def print_config(self, args=None, key=""):
                 if v is None and arg in default_arg_dict:
                     v = default_arg_dict[arg]
                     setattr(args, arg, v)
-                elif v is None and arg == 'activation_quantize_type':
-                    if key == "Compression" and 'ptq' in args.strategy:
-                        setattr(args, arg, 'range_abs_max')
-                    elif key == "Compression" and 'qat' in args.strategy:
-                        setattr(args, arg, 'moving_average_abs_max')
-
                 if not isinstance(v, types.MethodType):
                     logger.info('{:30}:{}'.format(arg, v))