PaddlePaddle · luotao1 · Jul 10, 2024 · Jul 9, 2024 · Jul 9, 2024
diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py
@@ -29,11 +29,7 @@
 
 import numbers
 import warnings
-from typing import (
-    TYPE_CHECKING,
-    Literal,
-    Sequence,
-)
+from typing import TYPE_CHECKING, Literal, Sequence
 
 import numpy as np
 
@@ -64,6 +60,7 @@
         DataLayout3D,
         DataLayoutND,
         DTypeLike,
+        ParamAttrLike,
         ShapeLike,
     )
 
@@ -86,8 +83,8 @@ def __init__(
         num_features: int,
         epsilon: float = 1e-5,
         momentum: float = 0.9,
-        weight_attr: ParamAttr | bool | None = None,
-        bias_attr: ParamAttr | bool | None = None,
+        weight_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
         data_format: Literal["NCHW"] = "NCHW",
         name: str | None = None,
     ) -> None:
@@ -702,9 +699,9 @@ def __init__(
         num_features: int,
         momentum: float = 0.9,
         epsilon: float = 1e-05,
-        weight_attr: ParamAttr | bool | None = None,
-        bias_attr: ParamAttr | bool | None = None,
-        data_format: DataLayout2D | str = 'NCHW',
+        weight_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
+        data_format: DataLayoutND = 'NCHW',
         use_global_stats: bool | None = None,
         name: str | None = None,
     ) -> None:
@@ -946,8 +943,8 @@ def __init__(
         is_test: bool = False,
         momentum: float = 0.9,
         epsilon: float = 1e-05,
-        param_attr: ParamAttr | bool | None = None,
-        bias_attr: ParamAttr | bool | None = None,
+        param_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
         dtype: DTypeLike = 'float32',
         data_layout: DataLayout2D = 'NCHW',
         in_place: bool = False,
@@ -1233,8 +1230,8 @@ def __init__(
         num_features: int,
         momentum: float = 0.9,
         epsilon: float = 1e-05,
-        weight_attr: ParamAttr | bool | None = None,
-        bias_attr: ParamAttr | bool | None = None,
+        weight_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
         data_format: DataLayout1D = 'NCL',
         use_global_stats: bool | None = None,
         name: str | None = None,
@@ -1445,8 +1442,8 @@ def __init__(
         num_features: int,
         momentum: float = 0.9,
         epsilon: float = 1e-05,
-        weight_attr: ParamAttr | bool | None = None,
-        bias_attr: ParamAttr | bool | None = None,
+        weight_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
         data_format: DataLayout3D = 'NCDHW',
         use_global_stats: bool | None = None,
         name: str | None = None,
@@ -1579,10 +1576,10 @@ def __init__(
         num_features: int,
         momentum: float = 0.9,
         epsilon: float = 1e-05,
-        weight_attr: ParamAttr | bool | None = None,
-        bias_attr: ParamAttr | bool | None = None,
+        weight_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
         data_format: DataLayoutND = 'NCHW',
-        name=None,
+        name: str | None = None,
     ) -> None:
         super().__init__(
             num_features,

diff --git a/python/paddle/sparse/nn/layer/norm.py b/python/paddle/sparse/nn/layer/norm.py
@@ -12,14 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 import warnings
+from typing import TYPE_CHECKING, Literal
 
 import paddle
 from paddle import _C_ops, in_dynamic_mode
 from paddle.base.layer_helper import LayerHelper
 from paddle.framework import no_grad
 from paddle.nn.layer.norm import _BatchNormBase
 
+if TYPE_CHECKING:
+    from paddle import Tensor
+    from paddle._typing import (
+        DataLayoutND,
+        ParamAttrLike,
+    )
+    from paddle.nn import Layer
+
 
 class BatchNorm(paddle.nn.BatchNorm1D):
     r"""
@@ -68,7 +79,7 @@ class BatchNorm(paddle.nn.BatchNorm1D):
             If it is set to None or one attribute of ParamAttr, batch_norm
             will create ParamAttr as bias_attr. If it is set to False, the weight is not learnable.
             If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None.
-        data_format(str, optional): Specify the input data format, may be "NC", "NCL" or "NLC". Default "NCL".
+        data_format(str, optional): Specify the input data format, may be "NDHWC" or "NHWC". Default "NDHWC".
         use_global_stats(bool|None, optional): Whether to use global mean and variance. If set to False, use the statistics of one mini-batch, if set to True, use the global statistics, if set to None, use global statistics in the test phase and use the statistics of one mini-batch in the training phase. Default: None.
         name(str, optional): Name for the BatchNorm, default is None. For more information, please refer to :ref:`api_guide_Name`..
 
@@ -97,15 +108,15 @@ class BatchNorm(paddle.nn.BatchNorm1D):
 
     def __init__(
         self,
-        num_features,
-        momentum=0.9,
-        epsilon=1e-05,
-        weight_attr=None,
-        bias_attr=None,
-        data_format='NDHWC',
-        use_global_stats=None,
-        name=None,
-    ):
+        num_features: int,
+        momentum: float = 0.9,
+        epsilon: float = 1e-05,
+        weight_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
+        data_format: Literal["NDHWC", "NHWC"] = "NDHWC",
+        use_global_stats: bool | None = None,
+        name: str | None = None,
+    ) -> None:
         super().__init__(
             num_features,
             momentum=momentum,
@@ -117,13 +128,13 @@ def __init__(
             name=name,
         )
 
-    def _check_data_format(self, input):
+    def _check_data_format(self, input: Literal["NDHWC", "NHWC"]) -> None:
         if input not in ["NDHWC", "NHWC"]:
             raise ValueError(
                 'sparse BatchNorm only support layout of "NDHWC" and "NHWC"'
             )
 
-    def forward(self, input):
+    def forward(self, input: Tensor) -> Tensor:
         self._check_data_format(self._data_format)
 
         if self.training:
@@ -304,14 +315,14 @@ class SyncBatchNorm(paddle.nn.SyncBatchNorm):
 
     def __init__(
         self,
-        num_features,
-        momentum=0.9,
-        epsilon=1e-05,
-        weight_attr=None,
-        bias_attr=None,
-        data_format='NCHW',
-        name=None,
-    ):
+        num_features: int,
+        momentum: float = 0.9,
+        epsilon: float = 1e-05,
+        weight_attr: ParamAttrLike | None = None,
+        bias_attr: ParamAttrLike | None = None,
+        data_format: DataLayoutND = 'NCHW',
+        name: str | None = None,
+    ) -> None:
         super().__init__(
             num_features,
             momentum,
@@ -322,7 +333,7 @@ def __init__(
             name,
         )
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         self._check_data_format()
         sync_batch_norm_out, _, _, _, _, _ = _C_ops.sparse_sync_batch_norm_(
             x,
@@ -340,7 +351,7 @@ def forward(self, x):
         return sync_batch_norm_out
 
     @classmethod
-    def convert_sync_batchnorm(cls, layer):
+    def convert_sync_batchnorm(cls, layer: Layer) -> Layer:
         r"""
         Helper function to convert :class: `paddle.sparse.nn.BatchNorm` layers in the model to :class: `paddle.sparse.nn.SyncBatchNorm` layers.