Support multi-label setup in Text Classification Taskflow (#3968)

sijunhe · wj-Mcat · w5688414 · web-flow · commit f2a583caa082 · 2022-12-14T11:32:06.000+08:00
* [NewFeatures]add ci & pypi workflow (#3578) * add basic ci & pypi workflow * add Makefile file to enable more custom scripts * update makefile & workflow * add manifest.in file * save requirements * improve makefile * add test running command * complete simple CI test script * complete simple CI test script * complete first version of workflow * failure at first * remove CI workflow * update workflows * add dev-dependency * complete format & lint * add paddle dependency * complete format & lint & test command * upgrae workflow files * use pytest to do test * update need keywords * change the description * update lint script * upgrade workflow * update parameterized * update makefile * fix lint * update test command in workflow * fix __init__ lint * ignore all __init__.py file * fix script workflow * udpate flake8 config * update flake config * update flake8 config * remove redefinition of unused Co-authored-by: Sijun He <sijun.he@hotmail.com> * Add multi recall of semantic search for pipelines (#3864) * Add multi recall of semantic search for pipelines * Update multi recall semantic search README.md * remove unused imports * remove unused imports * Update __init__.py * remove unused imports * restore __init__.py * skip retriever __init__.py * [trainer] fix bug when batch size=1 (#3960) * [PPdiffusers] Release ppdiffusers 0.6.3 (#3963) * release 0.6.3 * release 0.6.3 * style * code style * fix test * remove commented code Co-authored-by: 骑马小猫 <1435130236@qq.com> Co-authored-by: w5688414 <w5688414@gmail.com> Co-authored-by: Noel <wanghuijuan03@baidu.com> Co-authored-by: yujun <50394665+JunnYu@users.noreply.github.com>
diff --git a/paddlenlp/taskflow/taskflow.py b/paddlenlp/taskflow/taskflow.py
@@ -355,7 +355,11 @@
         "models": {
             "multi_class": {
                 "task_class": TextClassificationTask,
-                "task_flag": "text_classification-text_classification",
+                "task_flag": "text_classification-multi_class",
+            },
+            "multi_label": {
+                "task_class": TextClassificationTask,
+                "task_flag": "text_classification-multi_label",
             },
         },
         "default": {"model": "multi_class"},
diff --git a/paddlenlp/taskflow/text_classification.py b/paddlenlp/taskflow/text_classification.py
@@ -15,46 +15,70 @@
 
 from typing import Any, Dict, List, Union
 
-from paddlenlp.data import DataCollatorWithPadding
-from paddlenlp.transformers import AutoModelForSequenceClassification, AutoTokenizer
-
 import numpy as np
 import paddle
 import paddle.nn.functional as F
-from .utils import static_mode_guard, dygraph_mode_guard
+from scipy.special import expit as np_sigmoid
+from scipy.special import softmax as np_softmax
+
+from paddlenlp.data import DataCollatorWithPadding
+from paddlenlp.transformers import AutoModelForSequenceClassification, AutoTokenizer
+
 from .task import Task
+from .utils import dygraph_mode_guard, static_mode_guard
 
 usage = r"""
         from paddlenlp import Taskflow
-        id2label = {
-            0: "negative",
-            1: "positive"
-        }
         text_cls = Taskflow(
             "text_classification",
             model="multi_class",
             task_path=<local_saved_model>,
-            id2label=id2label
+            id2label={0: "negative", 1: "positive"}
             )
         text_cls('房间依然很整洁，相当不错')
         '''
         [
-            {'text': '房间依然很整洁，相当不错',
-            'label': 'positive',
-            'score': 0.80}
+            {
+                'text': '房间依然很整洁，相当不错',
+                'predictions: [{
+                    'label': 'positive',
+                    'score': 0.80
+                }]
+            }
         ]
         '''
-
-        text_cls(['房间依然很整洁，相当不错',
-                        '味道不咋地，很一般'])
+        text_cls = Taskflow(
+            "text_classification",
+            model="multi_label",
+            task_path=<local_saved_model>,
+            id2label={ 0: "体育", 1: "经济", 2: "娱乐"}
+            )
+        text_cls(['这是一条体育娱乐新闻的例子',
+                        '这是一条经济新闻'])
         '''
         [
-            {'text': '房间依然很整洁，相当不错',
-            'label': 'positive',
-            'score': 0.90},
-            {'text': '味道不咋地，很一般',
-            'label': 'negative',
-            'score': 0.88},
+            {
+                'text': '这是一条体育娱乐新闻的例子',
+                'predictions: [
+                    {
+                        'label': '体育',
+                        'score': 0.80
+                    },
+                    {
+                        'label': '娱乐',
+                        'score': 0.90
+                    }
+                ]
+            },
+            {
+                'text': '这是一条经济新闻',
+                'predictions: [
+                    {
+                    'label': '经济',
+                    'score': 0.80
+                    }
+                ]
+            }
         ]
          """
 
@@ -73,18 +97,29 @@ class TextClassificationTask(Task):
 
     Args:
         task (string): The name of task.
-        model (string): Mode of the classification, only support `multi_class` for now
+        model (string): Mode of the classification, Supports ["multi_class", "multi_class"]
         task_path (string): The local file path to the model path or a pre-trained model
         id2label (string): The dictionary to map the predictions from class ids to class names
         is_static_model (string): Whether the model is a static model
+        multilabel_threshold (float): The probability threshold used for the multi_label setup. Only effective if model = "multi_label". Defaults to 0.5
         kwargs (dict, optional): Additional keyword arguments passed along to the specific task.
     """
 
-    def __init__(self, task: str, model: str, id2label: Dict[int, str], is_static_model: bool = False, **kwargs):
+    def __init__(
+        self,
+        task: str,
+        model: str,
+        id2label: Dict[int, str],
+        is_static_model: bool = False,
+        multilabel_threshold: float = 0.5,
+        **kwargs
+    ):
         super().__init__(task=task, model=model, is_static_model=is_static_model, **kwargs)
         self.id2label = id2label
         self.is_static_model = is_static_model
         self._construct_tokenizer(self._task_path)
+        self.multilabel_threshold = multilabel_threshold
+
         if self.is_static_model:
             self._get_inference_model()
         else:
@@ -135,40 +170,58 @@ def _run_model(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
         """
         Run the task model from the outputs of the `_tokenize` function.
         """
-        # TODO: support multi_label, hierachical classification
-        model_outputs = []
+        # TODO: support hierachical classification
+        outputs = {}
+        outputs["text"] = inputs["text"]
+        outputs["batch_logits"] = []
         if self.is_static_model:
             with static_mode_guard():
                 for batch in inputs["batches"]:
                     for i, input_name in enumerate(self.predictor.get_input_names()):
                         self.input_handles[i].copy_from_cpu(batch[input_name])
                     self.predictor.run()
                     logits = self.output_handle[0].copy_to_cpu().tolist()
-                    pred_indices = np.argmax(logits, axis=-1)
-                    probs = softmax(logits, axis=-1)
-                    for prob, pred_index in zip(probs, pred_indices):
-                        model_outputs.append({"label": pred_index, "score": prob[pred_index]})
+                    outputs["batch_logits"].append(logits)
         else:
             with dygraph_mode_guard():
                 for batch in inputs["batches"]:
                     logits = self._model(**batch)
-                    probs = F.softmax(logits, axis=-1).tolist()
-                    pred_indices = paddle.argmax(logits, axis=-1).tolist()
-                    for prob, pred_index in zip(probs, pred_indices):
-                        model_outputs.append({"label": pred_index, "score": prob[pred_index]})
-        outputs = {}
-        outputs["text"] = inputs["text"]
-        outputs["model_outputs"] = model_outputs
+                    outputs["batch_logits"].append(logits)
         return outputs
 
     def _postprocess(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
         """
-        The model output is tag ids, this function will convert the model output to raw text.
+        This function converts the model logits output to class score and predictions
         """
-        # TODO: support multi_label, hierachical classification
+        # TODO: support hierachical classification
         postprocessed_outputs = []
-        for i, model_output in enumerate(inputs["model_outputs"]):
-            model_output["label"] = self.id2label[model_output["label"]]
-            model_output["text"] = inputs["text"][i]
-            postprocessed_outputs.append(model_output)
+        for logits in inputs["batch_logits"]:
+            if self.model == "multi_class":
+                if isinstance(logits, paddle.Tensor):  # dygraph
+                    scores = F.softmax(logits, axis=-1).numpy()
+                    labels = paddle.argmax(logits, axis=-1).numpy()
+                else:  # static graph
+                    scores = np_softmax(logits, axis=-1)
+                    labels = np.argmax(logits, axis=-1)
+                for score, label in zip(scores, labels):
+                    postprocessed_output = {}
+                    postprocessed_output["predictions"] = [{"label": self.id2label[label], "score": score[label]}]
+                    postprocessed_outputs.append(postprocessed_output)
+            else:  # multi_label
+                if isinstance(logits, paddle.Tensor):  # dygraph
+                    scores = F.sigmoid(logits).numpy()
+                else:  # static graph
+                    scores = np_sigmoid(logits)
+                for score in scores:
+                    postprocessed_output = {}
+                    postprocessed_output["predictions"] = []
+                    for i, class_score in enumerate(score):
+                        if class_score > self.multilabel_threshold:
+                            postprocessed_output["predictions"].append(
+                                {"label": self.id2label[i], "score": class_score}
+                            )
+                    postprocessed_outputs.append(postprocessed_output)
+
+        for i, postprocessed_output in enumerate(postprocessed_outputs):
+            postprocessed_output["text"] = inputs["text"][i]
         return postprocessed_outputs
diff --git a/tests/taskflow/test_text_classification.py b/tests/taskflow/test_text_classification.py
@@ -14,47 +14,57 @@
 
 import os
 import unittest
+from tempfile import TemporaryDirectory
 
 import paddle
 from parameterized import parameterized
-from tempfile import TemporaryDirectory
+
 from paddlenlp.taskflow import Taskflow
 from paddlenlp.taskflow.text_classification import TextClassificationTask
 from paddlenlp.transformers import AutoTokenizer, ErnieForSequenceClassification
 
 
 class TestTextClassificationTask(unittest.TestCase):
-    def setUp(self):
-        self.temp_dir = TemporaryDirectory()
-        self.dygraph_model_path = os.path.join(self.temp_dir.name, "dygraph")
+    @classmethod
+    def setUpClass(cls):
+        cls.temp_dir = TemporaryDirectory()
+        cls.dygraph_model_path = os.path.join(cls.temp_dir.name, "dygraph")
         model = ErnieForSequenceClassification.from_pretrained("__internal_testing__/ernie", num_classes=2)
         tokenizer = AutoTokenizer.from_pretrained("__internal_testing__/ernie")
-        model.save_pretrained(self.dygraph_model_path)
-        tokenizer.save_pretrained(self.dygraph_model_path)
+        model.save_pretrained(cls.dygraph_model_path)
+        tokenizer.save_pretrained(cls.dygraph_model_path)
 
         # export to static
-        self.static_model_path = os.path.join(self.temp_dir.name, "static")
+        cls.static_model_path = os.path.join(cls.temp_dir.name, "static")
         input_spec = [
             paddle.static.InputSpec(shape=[None, None], dtype="int64", name="input_ids"),
             paddle.static.InputSpec(shape=[None, None], dtype="int64", name="token_type_ids"),
         ]
         static_model = paddle.jit.to_static(model, input_spec=input_spec)
-        paddle.jit.save(static_model, self.static_model_path)
-        tokenizer.save_pretrained(self.static_model_path)
-
-    def tearDown(self):
-        self.temp_dir.cleanup()
-
-    @parameterized.expand([(1,), (2,)])
-    def test_text_classification_task(self, batch_size):
+        paddle.jit.save(static_model, cls.static_model_path)
+        tokenizer.save_pretrained(cls.static_model_path)
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.temp_dir.cleanup()
+
+    @parameterized.expand(
+        [
+            (1, "multi_class"),
+            (2, "multi_class"),
+            (1, "multi_label"),
+            (2, "multi_label"),
+        ]
+    )
+    def test_classification_task(self, batch_size, model):
         # input_text is a tuple to simulate the args passed from Taskflow to TextClassificationTask
         input_text = (["百度", "深度学习框架", "飞桨", "PaddleNLP"],)
         id2label = {
             0: "negative",
             1: "positive",
         }
         dygraph_taskflow = TextClassificationTask(
-            model="multi_class",
+            model=model,
             task="text_classification",
             task_path=self.dygraph_model_path,
             id2label=id2label,
@@ -63,10 +73,11 @@ def test_text_classification_task(self, batch_size):
         )
 
         dygraph_results = dygraph_taskflow(input_text)
+
         self.assertEqual(len(dygraph_results), len(input_text[0]))
 
         static_taskflow = TextClassificationTask(
-            model="multi_class",
+            model=model,
             task="text_classification",
             is_static_model=True,
             task_path=self.static_model_path,
@@ -79,18 +90,29 @@ def test_text_classification_task(self, batch_size):
         self.assertEqual(len(static_results), len(input_text[0]))
 
         for dygraph_result, static_result in zip(dygraph_results, static_results):
-            self.assertEqual(dygraph_result["label"], static_result["label"])
-            self.assertAlmostEqual(dygraph_result["score"], static_result["score"], delta=1e-6)
-
-    @parameterized.expand([(1,), (2,)])
-    def test_taskflow(self, batch_size):
+            for dygraph_pred, static_pred in zip(dygraph_result["predictions"], static_result["predictions"]):
+                self.assertEqual(dygraph_pred["label"], static_pred["label"])
+                self.assertAlmostEqual(dygraph_pred["score"], static_pred["score"], delta=1e-6)
+                # if multi_label, all predictions should be greater than the threshold
+                if model == "multi_label":
+                    self.assertGreater(dygraph_pred["score"], dygraph_taskflow.multilabel_threshold)
+
+    @parameterized.expand(
+        [
+            (1, "multi_class"),
+            (2, "multi_class"),
+            (1, "multi_label"),
+            (2, "multi_label"),
+        ]
+    )
+    def test_taskflow(self, batch_size, model):
         input_text = ["百度", "深度学习框架", "飞桨", "PaddleNLP"]
         id2label = {
             0: "negative",
             1: "positive",
         }
         dygraph_taskflow = Taskflow(
-            model="multi_class",
+            model=model,
             task="text_classification",
             task_path=self.dygraph_model_path,
             id2label=id2label,
@@ -101,7 +123,7 @@ def test_taskflow(self, batch_size):
         self.assertEqual(len(dygraph_results), len(input_text))
 
         static_taskflow = Taskflow(
-            model="multi_class",
+            model=model,
             task="text_classification",
             is_static_model=True,
             task_path=self.static_model_path,
@@ -113,5 +135,9 @@ def test_taskflow(self, batch_size):
         self.assertEqual(len(static_results), len(input_text))
 
         for dygraph_result, static_result in zip(dygraph_results, static_results):
-            self.assertEqual(dygraph_result["label"], static_result["label"])
-            self.assertAlmostEqual(dygraph_result["score"], static_result["score"], delta=1e-6)
+            for dygraph_pred, static_pred in zip(dygraph_result["predictions"], static_result["predictions"]):
+                self.assertEqual(dygraph_pred["label"], static_pred["label"])
+                self.assertAlmostEqual(dygraph_pred["score"], static_pred["score"], delta=1e-6)
+                # if multi_label, all predictions should be greater than the threshold
+                if model == "multi_label":
+                    self.assertGreater(dygraph_pred["score"], dygraph_taskflow.task_instance.multilabel_threshold)