Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 10 additions & 11 deletions python/paddle/hapi/model_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from typing_extensions import TypedDict

import paddle
from paddle import nn
from paddle import Tensor, nn
from paddle.autograd import no_grad
from paddle.static import InputSpec

Expand All @@ -35,17 +35,16 @@ class ModelSummary(TypedDict):


def summary(
net: paddle.nn.Layer,
input_size: int
| tuple[int, ...]
| InputSpec
| list[tuple[int, ...] | InputSpec]
| None = None,
net: nn.Layer,
input_size: (
int
| tuple[int, ...]
| InputSpec
| list[tuple[int, ...] | InputSpec]
| None
) = None,
dtypes: str | Sequence[str] | None = None,
input: paddle.Tensor
| Sequence[paddle.Tensor]
| dict[str, paddle.Tensor]
| None = None,
input: Tensor | Sequence[Tensor] | dict[str, Tensor] | None = None,
) -> ModelSummary:
"""Prints a string summary of the network.
Expand Down
44 changes: 27 additions & 17 deletions python/paddle/optimizer/adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,10 @@ class Adam(Optimizer):
>>> inp = paddle.rand([10,10], dtype="float32")
>>> out = linear(inp)
>>> loss = paddle.mean(out)
>>> adam = paddle.optimizer.Adam(learning_rate=0.1,
... parameters=linear.parameters())
>>> adam = paddle.optimizer.Adam(
... learning_rate=0.1,
... parameters=linear.parameters()
... )
>>> loss.backward()
>>> adam.step()
>>> adam.clear_grad()
Expand All @@ -147,11 +149,13 @@ class Adam(Optimizer):
>>> loss = paddle.mean(out)
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
>>> beta2 = paddle.to_tensor([0.99], dtype="float32")
>>> adam = paddle.optimizer.Adam(learning_rate=0.1,
... parameters=linear.parameters(),
... beta1=beta1,
... beta2=beta2,
... weight_decay=0.01)
>>> adam = paddle.optimizer.Adam(
... learning_rate=0.1,
... parameters=linear.parameters(),
... beta1=beta1,
... beta2=beta2,
... weight_decay=0.01
... )
>>> loss.backward()
>>> adam.step()
>>> adam.clear_grad()
Expand All @@ -174,12 +178,14 @@ class Adam(Optimizer):
... 'beta1': 0.8
... }],
... weight_decay=0.01,
... beta1=0.9)
... beta1=0.9
... )
>>> loss.backward()
>>> adam.step()
>>> adam.clear_grad()
"""

type: str
_moment1_acc_str = "moment1"
_moment2_acc_str = "moment2"
Expand All @@ -192,9 +198,9 @@ def __init__(
beta1: float | Tensor = 0.9,
beta2: float | Tensor = 0.999,
epsilon: float | Tensor = 1e-8,
parameters: Sequence[Tensor]
| Sequence[_AdamParameterConfig]
| None = None,
parameters: (
Sequence[Tensor] | Sequence[_AdamParameterConfig] | None
) = None,
weight_decay: float | WeightDecayRegularizer | None = None,
grad_clip: GradientClipBase | None = None,
lazy_mode: bool = False,
Expand Down Expand Up @@ -265,9 +271,11 @@ def _add_moments_pows(self, p):
name=self._beta1_pow_acc_str,
param=p,
dtype=acc_dtype,
fill_value=0.9
if isinstance(self._beta1, (Variable, Value))
else self._beta1,
fill_value=(
0.9
if isinstance(self._beta1, (Variable, Value))
else self._beta1
),
shape=[1],
type=core.VarDesc.VarType.LOD_TENSOR,
device='cpu',
Expand All @@ -276,9 +284,11 @@ def _add_moments_pows(self, p):
name=self._beta2_pow_acc_str,
param=p,
dtype=acc_dtype,
fill_value=0.999
if isinstance(self._beta2, (Variable, Value))
else self._beta2,
fill_value=(
0.999
if isinstance(self._beta2, (Variable, Value))
else self._beta2
),
shape=[1],
type=core.VarDesc.VarType.LOD_TENSOR,
device='cpu',
Expand Down
28 changes: 15 additions & 13 deletions python/paddle/optimizer/adamax.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,16 @@ class Adamax(Optimizer):
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
>>> beta2 = paddle.to_tensor([0.99], dtype="float32")
>>> adam = paddle.optimizer.Adamax(learning_rate=0.1,
... parameters=linear.parameters(),
... beta1=beta1,
... beta2=beta2,
... weight_decay=0.01
>>> adamax = paddle.optimizer.Adamax(
... learning_rate=0.1,
... parameters=linear.parameters(),
... beta1=beta1,
... beta2=beta2,
... weight_decay=0.01
... )
>>> out.backward()
>>> adam.step()
>>> adam.clear_grad()
>>> adamax.step()
>>> adamax.clear_grad()
>>> # Note that the learning_rate of linear_2 is 0.01.
Expand All @@ -135,7 +136,7 @@ class Adamax(Optimizer):
>>> out = linear_1(inp)
>>> out = linear_2(out)
>>> loss = paddle.mean(out)
>>> adam = paddle.optimizer.Adamax(
>>> adamax = paddle.optimizer.Adamax(
... learning_rate=0.1,
... parameters=[{ # type: ignore
... 'params': linear_1.parameters()
Expand All @@ -149,9 +150,10 @@ class Adamax(Optimizer):
... beta1=0.9
... )
>>> out.backward()
>>> adam.step()
>>> adam.clear_grad()
>>> adamax.step()
>>> adamax.clear_grad()
"""

type: str
_moment_acc_str = "moment"
_inf_norm_acc_str = "inf_norm"
Expand All @@ -163,9 +165,9 @@ def __init__(
beta1: float | Tensor = 0.9,
beta2: float | Tensor = 0.999,
epsilon: float | Tensor = 1e-8,
parameters: Sequence[Tensor]
| Sequence[_AdamaxParameterConfig]
| None = None,
parameters: (
Sequence[Tensor] | Sequence[_AdamaxParameterConfig] | None
) = None,
weight_decay: float | WeightDecayRegularizer | None = None,
grad_clip: GradientClipBase | None = None,
name: str | None = None,
Expand Down
41 changes: 24 additions & 17 deletions python/paddle/optimizer/adamw.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,12 @@ class AdamW(Optimizer):
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
>>> beta2 = paddle.to_tensor([0.99], dtype="float32")
>>> opt = paddle.optimizer.AdamW(learning_rate=0.1,
... parameters=linear.parameters(),
... beta1=beta1,
... beta2=beta2,
... weight_decay=0.01
>>> opt = paddle.optimizer.AdamW(
... learning_rate=0.1,
... parameters=linear.parameters(),
... beta1=beta1,
... beta2=beta2,
... weight_decay=0.01
... )
>>> loss.backward()
>>> opt.step()
Expand Down Expand Up @@ -171,9 +172,9 @@ def __init__(
beta1: float | Tensor = 0.9,
beta2: float | Tensor = 0.999,
epsilon: float | Tensor = 1e-8,
parameters: Sequence[Tensor]
| Sequence[_AdamParameterConfig]
| None = None,
parameters: (
Sequence[Tensor] | Sequence[_AdamParameterConfig] | None
) = None,
weight_decay: float | Tensor = 0.01,
lr_ratio: Callable[[Tensor], float] | None = None,
apply_decay_param_fun: Callable[[str], bool] | None = None,
Expand Down Expand Up @@ -383,9 +384,11 @@ def _add_moments_pows(self, p):
name=self._beta1_pow_acc_str,
param=p,
dtype=acc_dtype,
fill_value=0.9
if isinstance(self._beta1, (Variable, Value))
else self._beta1,
fill_value=(
0.9
if isinstance(self._beta1, (Variable, Value))
else self._beta1
),
shape=[1],
type=core.VarDesc.VarType.LOD_TENSOR,
device='cpu',
Expand All @@ -394,9 +397,11 @@ def _add_moments_pows(self, p):
name=self._beta2_pow_acc_str,
param=p,
dtype=acc_dtype,
fill_value=0.999
if isinstance(self._beta2, (Variable, Value))
else self._beta2,
fill_value=(
0.999
if isinstance(self._beta2, (Variable, Value))
else self._beta2
),
shape=[1],
type=core.VarDesc.VarType.LOD_TENSOR,
device='cpu',
Expand Down Expand Up @@ -538,9 +543,11 @@ def _append_optimize_op(self, block, param_and_grad):
"multi_precision": find_master,
"with_decay": with_decay,
"coeff": self._weight_decay,
"lr_ratio": 1.0
if self._lr_ratio is None
else self._lr_ratio(param_and_grad[0]),
"lr_ratio": (
1.0
if self._lr_ratio is None
else self._lr_ratio(param_and_grad[0])
),
}

if isinstance(self._beta1, Variable):
Expand Down
7 changes: 6 additions & 1 deletion python/paddle/optimizer/asgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,12 @@ class ASGD(Optimizer):
>>> inp = paddle.to_tensor(inp)
>>> out = linear(inp)
>>> loss = paddle.mean(out)
>>> asgd = paddle.optimizer.ASGD(learning_rate=0.001, batch_num=10, parameters=linear.parameters(), weight_decay=0.01)
>>> asgd = paddle.optimizer.ASGD(
... learning_rate=0.001,
... batch_num=10,
... parameters=linear.parameters(),
... weight_decay=0.01
... )
>>> out.backward()
>>> asgd.step()
>>> asgd.clear_grad()
Expand Down
26 changes: 16 additions & 10 deletions python/paddle/optimizer/lamb.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,18 @@ class Lamb(Optimizer):
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
>>> beta2 = paddle.to_tensor([0.85], dtype="float32")
>>> lamb = paddle.optimizer.Lamb(
... learning_rate=0.002, beta1=beta1, beta2=beta2, parameters=linear.parameters(), lamb_weight_decay=0.01)
... learning_rate=0.002,
... beta1=beta1,
... beta2=beta2,
... parameters=linear.parameters(),
... lamb_weight_decay=0.01
... )
>>> back = out.backward()
>>> lamb.step()
>>> lamb.clear_grad()
"""

_moment1_acc_str = "moment1"
_moment2_acc_str = "moment2"
_beta1_pow_acc_str = "beta1_pow_acc"
Expand All @@ -129,9 +135,9 @@ def __init__(
beta1: float | Tensor = 0.9,
beta2: float | Tensor = 0.999,
epsilon: float | Tensor = 1e-6,
parameters: Sequence[Tensor]
| Sequence[_LambParameterConfig]
| None = None,
parameters: (
Sequence[Tensor] | Sequence[_LambParameterConfig] | None
) = None,
grad_clip: GradientClipBase | None = None,
exclude_from_weight_decay_fn: Callable[[Tensor], bool] | None = None,
multi_precision: bool = False,
Expand Down Expand Up @@ -211,9 +217,9 @@ def _add_moments_pows(self, p):
name=self._beta1_pow_acc_str,
param=p,
dtype=acc_dtype,
fill_value=0.9
if isinstance(self._beta1, Variable)
else self._beta1,
fill_value=(
0.9 if isinstance(self._beta1, Variable) else self._beta1
),
shape=[1],
type=core.VarDesc.VarType.LOD_TENSOR,
device='cpu',
Expand All @@ -222,9 +228,9 @@ def _add_moments_pows(self, p):
name=self._beta2_pow_acc_str,
param=p,
dtype=acc_dtype,
fill_value=0.999
if isinstance(self._beta2, Variable)
else self._beta2,
fill_value=(
0.999 if isinstance(self._beta2, Variable) else self._beta2
),
shape=[1],
type=core.VarDesc.VarType.LOD_TENSOR,
device='cpu',
Expand Down
7 changes: 6 additions & 1 deletion python/paddle/optimizer/momentum.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,11 @@ class Momentum(Optimizer):
>>> loss = paddle.mean(out)
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
>>> beta2 = paddle.to_tensor([0.99], dtype="float32")
>>> momentum = paddle.optimizer.Momentum(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01)
>>> momentum = paddle.optimizer.Momentum(
... learning_rate=0.1,
... parameters=linear.parameters(),
... weight_decay=0.01
... )
>>> back = out.backward()
>>> momentum.step()
>>> momentum.clear_grad()
Expand Down Expand Up @@ -117,6 +121,7 @@ class Momentum(Optimizer):
>>> momentum.clear_grad()
"""

_velocity_acc_str = "velocity"

def __init__(
Expand Down
14 changes: 8 additions & 6 deletions python/paddle/optimizer/nadam.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,10 @@ class NAdam(Optimizer):
>>> out = linear(inp)
>>> loss = paddle.mean(out)
>>> nadam = paddle.optimizer.NAdam(learning_rate=0.1,
... parameters=linear.parameters())
>>> nadam = paddle.optimizer.NAdam(
... learning_rate=0.1,
... parameters=linear.parameters()
... )
>>> out.backward()
>>> nadam.step()
>>> nadam.clear_grad()
Expand All @@ -124,7 +126,7 @@ class NAdam(Optimizer):
>>> loss = paddle.mean(out)
>>> opt = paddle.optimizer.NAdam(
... learning_rate=0.1,
... parameters=[{ # type: ignore
... parameters=[{ # type: ignore
... 'params': linear_1.parameters()
... }, {
... 'params': linear_2.parameters(),
Expand Down Expand Up @@ -154,9 +156,9 @@ def __init__(
beta2: float | Tensor = 0.999,
epsilon: float = 1.0e-8,
momentum_decay: float = 0.004,
parameters: Sequence[Tensor]
| Sequence[_NAdamParameterConfig]
| None = None,
parameters: (
Sequence[Tensor] | Sequence[_NAdamParameterConfig] | None
) = None,
weight_decay: float | Tensor | None = None,
grad_clip: GradientClipBase | None = None,
name: str | None = None,
Expand Down
Loading