[Typing][C-127,C-128] Add type annotations for python/paddle/audio/datasets/{esc50,tess}.py (#67067)

enkilee · web-flow · commit 4306af2a2d81 · 2024-08-14T14:32:57.000+08:00
diff --git a/python/paddle/audio/datasets/esc50.py b/python/paddle/audio/datasets/esc50.py
@@ -13,14 +13,30 @@
 # limitations under the License.
 from __future__ import annotations
 
-import collections
 import os
+from typing import TYPE_CHECKING, Any, Literal, NamedTuple
+
+from typing_extensions import TypeAlias
 
 from paddle.dataset.common import DATA_HOME
 from paddle.utils import download
 
 from .dataset import AudioClassificationDataset
 
+if TYPE_CHECKING:
+    _ModeLiteral: TypeAlias = Literal[
+        'train',
+        'dev',
+    ]
+    _FeatTypeLiteral: TypeAlias = Literal[
+        'raw',
+        'melspectrogram',
+        'mfcc',
+        'logmelspectrogram',
+        'spectrogram',
+    ]
+
+
 __all__ = []
 
 
@@ -80,12 +96,12 @@ class ESC50(AudioClassificationDataset):
 
     """
 
-    archive = {
+    archive: dict[str, str] = {
         'url': 'https://paddleaudio.bj.bcebos.com/datasets/ESC-50-master.zip',
         'md5': '7771e4b9d86d0945acce719c7a59305a',
     }
 
-    label_list = [
+    label_list: list[str] = [
         # Animals
         'Dog',
         'Rooster',
@@ -142,21 +158,26 @@ class ESC50(AudioClassificationDataset):
         'Fireworks',
         'Hand saw',
     ]
-    meta = os.path.join('ESC-50-master', 'meta', 'esc50.csv')
-    meta_info = collections.namedtuple(
-        'META_INFO',
-        ('filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take'),
-    )
-    audio_path = os.path.join('ESC-50-master', 'audio')
+    meta: str = os.path.join('ESC-50-master', 'meta', 'esc50.csv')
+    audio_path: str = os.path.join('ESC-50-master', 'audio')
+
+    class meta_info(NamedTuple):
+        filename: str
+        fold: str
+        target: str
+        category: str
+        esc10: str
+        src_file: str
+        take: str
 
     def __init__(
         self,
-        mode: str = 'train',
+        mode: _ModeLiteral = 'train',
         split: int = 1,
-        feat_type: str = 'raw',
-        archive=None,
-        **kwargs,
-    ):
+        feat_type: _FeatTypeLiteral = 'raw',
+        archive: dict[str, str] | None = None,
+        **kwargs: Any,
+    ) -> None:
         assert split in range(
             1, 6
         ), f'The selected split should be integer, and 1 <= split <= 5, but got {split}'
@@ -167,14 +188,16 @@ def __init__(
             files=files, labels=labels, feat_type=feat_type, **kwargs
         )
 
-    def _get_meta_info(self) -> list[collections.namedtuple]:
+    def _get_meta_info(self) -> list[meta_info]:
         ret = []
         with open(os.path.join(DATA_HOME, self.meta), 'r') as rf:
             for line in rf.readlines()[1:]:
                 ret.append(self.meta_info(*line.strip().split(',')))
         return ret
 
-    def _get_data(self, mode: str, split: int) -> tuple[list[str], list[int]]:
+    def _get_data(
+        self, mode: _ModeLiteral, split: int
+    ) -> tuple[list[str], list[int]]:
         if not os.path.isdir(
             os.path.join(DATA_HOME, self.audio_path)
         ) or not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
diff --git a/python/paddle/audio/datasets/tess.py b/python/paddle/audio/datasets/tess.py
@@ -13,14 +13,17 @@
 # limitations under the License.
 from __future__ import annotations
 
-import collections
 import os
+from typing import TYPE_CHECKING, Any, NamedTuple
 
 from paddle.dataset.common import DATA_HOME
 from paddle.utils import download
 
 from .dataset import AudioClassificationDataset
 
+if TYPE_CHECKING:
+    from .esc50 import _FeatTypeLiteral, _ModeLiteral
+
 __all__ = []
 
 
@@ -71,12 +74,12 @@ class TESS(AudioClassificationDataset):
             ...     # [feature_dim, num_frames] , label_id
     """
 
-    archive = {
+    archive: dict[str, str] = {
         'url': 'https://bj.bcebos.com/paddleaudio/datasets/TESS_Toronto_emotional_speech_set.zip',
         'md5': '1465311b24d1de704c4c63e4ccc470c7',
     }
 
-    label_list = [
+    label_list: list[str] = [
         'angry',
         'disgust',
         'fear',
@@ -85,20 +88,23 @@ class TESS(AudioClassificationDataset):
         'ps',  # pleasant surprise
         'sad',
     ]
-    meta_info = collections.namedtuple(
-        'META_INFO', ('speaker', 'word', 'emotion')
-    )
-    audio_path = 'TESS_Toronto_emotional_speech_set'
+
+    audio_path: str = 'TESS_Toronto_emotional_speech_set'
+
+    class meta_info(NamedTuple):
+        speaker: str
+        word: str
+        emotion: str
 
     def __init__(
         self,
-        mode: str = 'train',
+        mode: _ModeLiteral = 'train',
         n_folds: int = 5,
         split: int = 1,
-        feat_type: str = 'raw',
-        archive=None,
-        **kwargs,
-    ):
+        feat_type: _FeatTypeLiteral = 'raw',
+        archive: dict[str, str] | None = None,
+        **kwargs: Any,
+    ) -> None:
         assert isinstance(n_folds, int) and (
             n_folds >= 1
         ), f'the n_folds should be integer and n_folds >= 1, but got {n_folds}'
@@ -112,7 +118,7 @@ def __init__(
             files=files, labels=labels, feat_type=feat_type, **kwargs
         )
 
-    def _get_meta_info(self, files) -> list[collections.namedtuple]:
+    def _get_meta_info(self, files) -> list[meta_info]:
         ret = []
         for file in files:
             basename_without_extend = os.path.basename(file)[:-4]