Skip to content

Commit e12caf4

Browse files
DrRyanHuangSigureMo
authored andcommitted
[Typing][A-48,A-49,A-50][debug] Add type annotations for 3 optimizers (RAdam, RMSProp, Rprop) (PaddlePaddle#65085)
--------- Co-authored-by: SigureMo <[email protected]>
1 parent bacbe49 commit e12caf4

File tree

13 files changed

+241
-129
lines changed

13 files changed

+241
-129
lines changed

python/paddle/hapi/model_summary.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from typing_extensions import TypedDict
2323

2424
import paddle
25-
from paddle import nn
25+
from paddle import Tensor, nn
2626
from paddle.autograd import no_grad
2727
from paddle.static import InputSpec
2828

@@ -35,17 +35,16 @@ class ModelSummary(TypedDict):
3535

3636

3737
def summary(
38-
net: paddle.nn.Layer,
39-
input_size: int
40-
| tuple[int, ...]
41-
| InputSpec
42-
| list[tuple[int, ...] | InputSpec]
43-
| None = None,
38+
net: nn.Layer,
39+
input_size: (
40+
int
41+
| tuple[int, ...]
42+
| InputSpec
43+
| list[tuple[int, ...] | InputSpec]
44+
| None
45+
) = None,
4446
dtypes: str | Sequence[str] | None = None,
45-
input: paddle.Tensor
46-
| Sequence[paddle.Tensor]
47-
| dict[str, paddle.Tensor]
48-
| None = None,
47+
input: Tensor | Sequence[Tensor] | dict[str, Tensor] | None = None,
4948
) -> ModelSummary:
5049
"""Prints a string summary of the network.
5150

python/paddle/optimizer/adam.py

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,10 @@ class Adam(Optimizer):
129129
>>> inp = paddle.rand([10,10], dtype="float32")
130130
>>> out = linear(inp)
131131
>>> loss = paddle.mean(out)
132-
>>> adam = paddle.optimizer.Adam(learning_rate=0.1,
133-
... parameters=linear.parameters())
132+
>>> adam = paddle.optimizer.Adam(
133+
... learning_rate=0.1,
134+
... parameters=linear.parameters()
135+
... )
134136
>>> loss.backward()
135137
>>> adam.step()
136138
>>> adam.clear_grad()
@@ -147,11 +149,13 @@ class Adam(Optimizer):
147149
>>> loss = paddle.mean(out)
148150
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
149151
>>> beta2 = paddle.to_tensor([0.99], dtype="float32")
150-
>>> adam = paddle.optimizer.Adam(learning_rate=0.1,
151-
... parameters=linear.parameters(),
152-
... beta1=beta1,
153-
... beta2=beta2,
154-
... weight_decay=0.01)
152+
>>> adam = paddle.optimizer.Adam(
153+
... learning_rate=0.1,
154+
... parameters=linear.parameters(),
155+
... beta1=beta1,
156+
... beta2=beta2,
157+
... weight_decay=0.01
158+
... )
155159
>>> loss.backward()
156160
>>> adam.step()
157161
>>> adam.clear_grad()
@@ -174,12 +178,14 @@ class Adam(Optimizer):
174178
... 'beta1': 0.8
175179
... }],
176180
... weight_decay=0.01,
177-
... beta1=0.9)
181+
... beta1=0.9
182+
... )
178183
>>> loss.backward()
179184
>>> adam.step()
180185
>>> adam.clear_grad()
181186
182187
"""
188+
183189
type: str
184190
_moment1_acc_str = "moment1"
185191
_moment2_acc_str = "moment2"
@@ -192,9 +198,9 @@ def __init__(
192198
beta1: float | Tensor = 0.9,
193199
beta2: float | Tensor = 0.999,
194200
epsilon: float | Tensor = 1e-8,
195-
parameters: Sequence[Tensor]
196-
| Sequence[_AdamParameterConfig]
197-
| None = None,
201+
parameters: (
202+
Sequence[Tensor] | Sequence[_AdamParameterConfig] | None
203+
) = None,
198204
weight_decay: float | WeightDecayRegularizer | None = None,
199205
grad_clip: GradientClipBase | None = None,
200206
lazy_mode: bool = False,
@@ -265,9 +271,11 @@ def _add_moments_pows(self, p):
265271
name=self._beta1_pow_acc_str,
266272
param=p,
267273
dtype=acc_dtype,
268-
fill_value=0.9
269-
if isinstance(self._beta1, (Variable, Value))
270-
else self._beta1,
274+
fill_value=(
275+
0.9
276+
if isinstance(self._beta1, (Variable, Value))
277+
else self._beta1
278+
),
271279
shape=[1],
272280
type=core.VarDesc.VarType.LOD_TENSOR,
273281
device='cpu',
@@ -276,9 +284,11 @@ def _add_moments_pows(self, p):
276284
name=self._beta2_pow_acc_str,
277285
param=p,
278286
dtype=acc_dtype,
279-
fill_value=0.999
280-
if isinstance(self._beta2, (Variable, Value))
281-
else self._beta2,
287+
fill_value=(
288+
0.999
289+
if isinstance(self._beta2, (Variable, Value))
290+
else self._beta2
291+
),
282292
shape=[1],
283293
type=core.VarDesc.VarType.LOD_TENSOR,
284294
device='cpu',

python/paddle/optimizer/adamax.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -117,15 +117,16 @@ class Adamax(Optimizer):
117117
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
118118
>>> beta2 = paddle.to_tensor([0.99], dtype="float32")
119119
120-
>>> adam = paddle.optimizer.Adamax(learning_rate=0.1,
121-
... parameters=linear.parameters(),
122-
... beta1=beta1,
123-
... beta2=beta2,
124-
... weight_decay=0.01
120+
>>> adamax = paddle.optimizer.Adamax(
121+
... learning_rate=0.1,
122+
... parameters=linear.parameters(),
123+
... beta1=beta1,
124+
... beta2=beta2,
125+
... weight_decay=0.01
125126
... )
126127
>>> out.backward()
127-
>>> adam.step()
128-
>>> adam.clear_grad()
128+
>>> adamax.step()
129+
>>> adamax.clear_grad()
129130
130131
131132
>>> # Note that the learning_rate of linear_2 is 0.01.
@@ -135,7 +136,7 @@ class Adamax(Optimizer):
135136
>>> out = linear_1(inp)
136137
>>> out = linear_2(out)
137138
>>> loss = paddle.mean(out)
138-
>>> adam = paddle.optimizer.Adamax(
139+
>>> adamax = paddle.optimizer.Adamax(
139140
... learning_rate=0.1,
140141
... parameters=[{ # type: ignore
141142
... 'params': linear_1.parameters()
@@ -149,9 +150,10 @@ class Adamax(Optimizer):
149150
... beta1=0.9
150151
... )
151152
>>> out.backward()
152-
>>> adam.step()
153-
>>> adam.clear_grad()
153+
>>> adamax.step()
154+
>>> adamax.clear_grad()
154155
"""
156+
155157
type: str
156158
_moment_acc_str = "moment"
157159
_inf_norm_acc_str = "inf_norm"
@@ -163,9 +165,9 @@ def __init__(
163165
beta1: float | Tensor = 0.9,
164166
beta2: float | Tensor = 0.999,
165167
epsilon: float | Tensor = 1e-8,
166-
parameters: Sequence[Tensor]
167-
| Sequence[_AdamaxParameterConfig]
168-
| None = None,
168+
parameters: (
169+
Sequence[Tensor] | Sequence[_AdamaxParameterConfig] | None
170+
) = None,
169171
weight_decay: float | WeightDecayRegularizer | None = None,
170172
grad_clip: GradientClipBase | None = None,
171173
name: str | None = None,

python/paddle/optimizer/adamw.py

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -121,11 +121,12 @@ class AdamW(Optimizer):
121121
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
122122
>>> beta2 = paddle.to_tensor([0.99], dtype="float32")
123123
124-
>>> opt = paddle.optimizer.AdamW(learning_rate=0.1,
125-
... parameters=linear.parameters(),
126-
... beta1=beta1,
127-
... beta2=beta2,
128-
... weight_decay=0.01
124+
>>> opt = paddle.optimizer.AdamW(
125+
... learning_rate=0.1,
126+
... parameters=linear.parameters(),
127+
... beta1=beta1,
128+
... beta2=beta2,
129+
... weight_decay=0.01
129130
... )
130131
>>> loss.backward()
131132
>>> opt.step()
@@ -171,9 +172,9 @@ def __init__(
171172
beta1: float | Tensor = 0.9,
172173
beta2: float | Tensor = 0.999,
173174
epsilon: float | Tensor = 1e-8,
174-
parameters: Sequence[Tensor]
175-
| Sequence[_AdamParameterConfig]
176-
| None = None,
175+
parameters: (
176+
Sequence[Tensor] | Sequence[_AdamParameterConfig] | None
177+
) = None,
177178
weight_decay: float | Tensor = 0.01,
178179
lr_ratio: Callable[[Tensor], float] | None = None,
179180
apply_decay_param_fun: Callable[[str], bool] | None = None,
@@ -383,9 +384,11 @@ def _add_moments_pows(self, p):
383384
name=self._beta1_pow_acc_str,
384385
param=p,
385386
dtype=acc_dtype,
386-
fill_value=0.9
387-
if isinstance(self._beta1, (Variable, Value))
388-
else self._beta1,
387+
fill_value=(
388+
0.9
389+
if isinstance(self._beta1, (Variable, Value))
390+
else self._beta1
391+
),
389392
shape=[1],
390393
type=core.VarDesc.VarType.LOD_TENSOR,
391394
device='cpu',
@@ -394,9 +397,11 @@ def _add_moments_pows(self, p):
394397
name=self._beta2_pow_acc_str,
395398
param=p,
396399
dtype=acc_dtype,
397-
fill_value=0.999
398-
if isinstance(self._beta2, (Variable, Value))
399-
else self._beta2,
400+
fill_value=(
401+
0.999
402+
if isinstance(self._beta2, (Variable, Value))
403+
else self._beta2
404+
),
400405
shape=[1],
401406
type=core.VarDesc.VarType.LOD_TENSOR,
402407
device='cpu',
@@ -538,9 +543,11 @@ def _append_optimize_op(self, block, param_and_grad):
538543
"multi_precision": find_master,
539544
"with_decay": with_decay,
540545
"coeff": self._weight_decay,
541-
"lr_ratio": 1.0
542-
if self._lr_ratio is None
543-
else self._lr_ratio(param_and_grad[0]),
546+
"lr_ratio": (
547+
1.0
548+
if self._lr_ratio is None
549+
else self._lr_ratio(param_and_grad[0])
550+
),
544551
}
545552

546553
if isinstance(self._beta1, Variable):

python/paddle/optimizer/asgd.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,12 @@ class ASGD(Optimizer):
9494
>>> inp = paddle.to_tensor(inp)
9595
>>> out = linear(inp)
9696
>>> loss = paddle.mean(out)
97-
>>> asgd = paddle.optimizer.ASGD(learning_rate=0.001, batch_num=10, parameters=linear.parameters(), weight_decay=0.01)
97+
>>> asgd = paddle.optimizer.ASGD(
98+
... learning_rate=0.001,
99+
... batch_num=10,
100+
... parameters=linear.parameters(),
101+
... weight_decay=0.01
102+
... )
98103
>>> out.backward()
99104
>>> asgd.step()
100105
>>> asgd.clear_grad()

python/paddle/optimizer/lamb.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,18 @@ class Lamb(Optimizer):
111111
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
112112
>>> beta2 = paddle.to_tensor([0.85], dtype="float32")
113113
>>> lamb = paddle.optimizer.Lamb(
114-
... learning_rate=0.002, beta1=beta1, beta2=beta2, parameters=linear.parameters(), lamb_weight_decay=0.01)
114+
... learning_rate=0.002,
115+
... beta1=beta1,
116+
... beta2=beta2,
117+
... parameters=linear.parameters(),
118+
... lamb_weight_decay=0.01
119+
... )
115120
>>> back = out.backward()
116121
>>> lamb.step()
117122
>>> lamb.clear_grad()
118123
119124
"""
125+
120126
_moment1_acc_str = "moment1"
121127
_moment2_acc_str = "moment2"
122128
_beta1_pow_acc_str = "beta1_pow_acc"
@@ -129,9 +135,9 @@ def __init__(
129135
beta1: float | Tensor = 0.9,
130136
beta2: float | Tensor = 0.999,
131137
epsilon: float | Tensor = 1e-6,
132-
parameters: Sequence[Tensor]
133-
| Sequence[_LambParameterConfig]
134-
| None = None,
138+
parameters: (
139+
Sequence[Tensor] | Sequence[_LambParameterConfig] | None
140+
) = None,
135141
grad_clip: GradientClipBase | None = None,
136142
exclude_from_weight_decay_fn: Callable[[Tensor], bool] | None = None,
137143
multi_precision: bool = False,
@@ -211,9 +217,9 @@ def _add_moments_pows(self, p):
211217
name=self._beta1_pow_acc_str,
212218
param=p,
213219
dtype=acc_dtype,
214-
fill_value=0.9
215-
if isinstance(self._beta1, Variable)
216-
else self._beta1,
220+
fill_value=(
221+
0.9 if isinstance(self._beta1, Variable) else self._beta1
222+
),
217223
shape=[1],
218224
type=core.VarDesc.VarType.LOD_TENSOR,
219225
device='cpu',
@@ -222,9 +228,9 @@ def _add_moments_pows(self, p):
222228
name=self._beta2_pow_acc_str,
223229
param=p,
224230
dtype=acc_dtype,
225-
fill_value=0.999
226-
if isinstance(self._beta2, Variable)
227-
else self._beta2,
231+
fill_value=(
232+
0.999 if isinstance(self._beta2, Variable) else self._beta2
233+
),
228234
shape=[1],
229235
type=core.VarDesc.VarType.LOD_TENSOR,
230236
device='cpu',

python/paddle/optimizer/momentum.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,11 @@ class Momentum(Optimizer):
8888
>>> loss = paddle.mean(out)
8989
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
9090
>>> beta2 = paddle.to_tensor([0.99], dtype="float32")
91-
>>> momentum = paddle.optimizer.Momentum(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01)
91+
>>> momentum = paddle.optimizer.Momentum(
92+
... learning_rate=0.1,
93+
... parameters=linear.parameters(),
94+
... weight_decay=0.01
95+
... )
9296
>>> back = out.backward()
9397
>>> momentum.step()
9498
>>> momentum.clear_grad()
@@ -117,6 +121,7 @@ class Momentum(Optimizer):
117121
>>> momentum.clear_grad()
118122
119123
"""
124+
120125
_velocity_acc_str = "velocity"
121126

122127
def __init__(

python/paddle/optimizer/nadam.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,10 @@ class NAdam(Optimizer):
109109
>>> out = linear(inp)
110110
>>> loss = paddle.mean(out)
111111
112-
>>> nadam = paddle.optimizer.NAdam(learning_rate=0.1,
113-
... parameters=linear.parameters())
112+
>>> nadam = paddle.optimizer.NAdam(
113+
... learning_rate=0.1,
114+
... parameters=linear.parameters()
115+
... )
114116
>>> out.backward()
115117
>>> nadam.step()
116118
>>> nadam.clear_grad()
@@ -124,7 +126,7 @@ class NAdam(Optimizer):
124126
>>> loss = paddle.mean(out)
125127
>>> opt = paddle.optimizer.NAdam(
126128
... learning_rate=0.1,
127-
... parameters=[{ # type: ignore
129+
... parameters=[{ # type: ignore
128130
... 'params': linear_1.parameters()
129131
... }, {
130132
... 'params': linear_2.parameters(),
@@ -154,9 +156,9 @@ def __init__(
154156
beta2: float | Tensor = 0.999,
155157
epsilon: float = 1.0e-8,
156158
momentum_decay: float = 0.004,
157-
parameters: Sequence[Tensor]
158-
| Sequence[_NAdamParameterConfig]
159-
| None = None,
159+
parameters: (
160+
Sequence[Tensor] | Sequence[_NAdamParameterConfig] | None
161+
) = None,
160162
weight_decay: float | Tensor | None = None,
161163
grad_clip: GradientClipBase | None = None,
162164
name: str | None = None,

0 commit comments

Comments
 (0)