Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/paddle/optimizer/adadelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,11 @@ def _create_accumulators(self, block, parameters):
parameters = parameters.get('params')

for p in parameters:
if p.name in self._already_create_accumulater:
continue
self._add_accumulator(self._avg_squared_grad_acc_str, p)
self._add_accumulator(self._avg_squared_update_acc_str, p)
self._already_create_accumulater.add(p.name)

def _append_optimize_op(self, block, param_and_grad):
if isinstance(param_and_grad, dict):
Expand Down
3 changes: 3 additions & 0 deletions python/paddle/optimizer/adagrad.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,14 @@ def _create_accumulators(self, block, parameters):
parameters = self._update_param_group(parameters)

for p in parameters:
if p.name in self._already_create_accumulater:
continue
self._add_accumulator(
self._moment_acc_str,
p,
fill_value=self.initial_accumulator_value,
)
self._already_create_accumulater.add(p.name)

def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
Expand Down
4 changes: 4 additions & 0 deletions python/paddle/optimizer/adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,9 +317,12 @@ def _create_accumulators(self, block, parameters):

# Create accumulator tensors for first and second moments
for p in parameters:
if p.name in self._already_create_accumulater:
continue
if self._multi_precision and self._is_dtype_fp16_or_bf16(p.dtype):
master_p = self._create_master_weight(p)
self._add_moments_pows(master_p)
self._already_create_accumulater.add(p.name)
continue
if (
self._is_dtype_fp16_or_bf16(p.dtype)
Expand All @@ -330,6 +333,7 @@ def _create_accumulators(self, block, parameters):
"Consider using multi_precision=True option of the Adam optimizer."
)
self._add_moments_pows(p)
self._already_create_accumulater.add(p.name)

def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
Expand Down
3 changes: 3 additions & 0 deletions python/paddle/optimizer/adamax.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ def _create_accumulators(self, block, parameters):

# Create accumulator tensors for first moment and infinity norm
for p in parameters:
if p.name in self._already_create_accumulater:
continue
self._add_accumulator(self._moment_acc_str, p)
self._add_accumulator(self._inf_norm_acc_str, p)
self._add_accumulator(
Expand All @@ -184,6 +186,7 @@ def _create_accumulators(self, block, parameters):
fill_value=self._beta1,
shape=[1],
)
self._already_create_accumulater.add(p.name)

def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
Expand Down
5 changes: 5 additions & 0 deletions python/paddle/optimizer/adamw.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ def __init__(
self._use_multi_tensor = None
self.regularization = None
self._auxiliary_vars = {}
self._already_create_accumulater = set()

def _set_auxiliary_var(self, key, val):
self._auxiliary_vars[key] = val
Expand Down Expand Up @@ -422,9 +423,12 @@ def _create_accumulators(self, block, parameters):

# Create accumulator tensors for first and second moments
for p in parameters:
if p.name in self._already_create_accumulater:
continue
if self._multi_precision and self._is_dtype_fp16_or_bf16(p.dtype):
master_p = self._create_master_weight(p)
self._add_moments_pows(master_p)
self._already_create_accumulater.add(p.name)
continue
if (
self._is_dtype_fp16_or_bf16(p.dtype)
Expand All @@ -435,6 +439,7 @@ def _create_accumulators(self, block, parameters):
"Consider using multi_precision=True option of the Adam optimizer."
)
self._add_moments_pows(p)
self._already_create_accumulater.add(p.name)

def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
Expand Down
4 changes: 4 additions & 0 deletions python/paddle/optimizer/lamb.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,11 +190,15 @@ def _create_accumulators(self, block, parameters):

# Create accumulator tensors for first and second moments
for p in parameters:
if p.name in self._already_create_accumulater:
continue
if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16:
master_p = self._create_master_weight(p)
self._add_moments_pows(master_p)
self._already_create_accumulater.add(p.name)
else:
self._add_moments_pows(p)
self._already_create_accumulater.add(p.name)

def _get_accumulator(self, name, param):
"""Utility function to fetch an accumulator for a parameter
Expand Down
4 changes: 4 additions & 0 deletions python/paddle/optimizer/momentum.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,12 @@ def _create_accumulators(self, block, parameters):
parameters = self._update_param_group(parameters)

for p in parameters:
if p.name in self._already_create_accumulater:
continue
if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16:
master_p = self._create_master_weight(p)
self._add_accumulator(self._velocity_acc_str, master_p)
self._already_create_accumulater.add(p.name)
continue
if (
p.dtype == core.VarDesc.VarType.FP16
Expand All @@ -283,6 +286,7 @@ def _create_accumulators(self, block, parameters):
"Consider using multi_precision=True option of the Momentum optimizer."
)
self._add_accumulator(self._velocity_acc_str, p)
self._already_create_accumulater.add(p.name)

def _create_regularization_of_grad(self, param, grad, regularization=None):
"""Create and add backward regularization Operators
Expand Down
1 change: 1 addition & 0 deletions python/paddle/optimizer/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ def __init__(

self._param_dict = self._create_multi_tensor_dict()
self._auxiliary_vars = {}
self._already_create_accumulater = set()

def _set_auxiliary_var(self, key, val):
self._auxiliary_vars[key] = val
Expand Down
3 changes: 3 additions & 0 deletions python/paddle/optimizer/rmsprop.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,12 @@ def _create_accumulators(self, block, parameters):
parameters = parameters.get('params')

for p in parameters:
if p.name in self._already_create_accumulater:
continue
self._add_accumulator(self._momentum_acc_str, p)
self._add_accumulator(self._mean_square_acc_str, p)
self._add_accumulator(self._mean_grad_acc_str, p)
self._already_create_accumulater.add(p.name)

def _append_optimize_op(self, block, param_and_grad):
if not isinstance(block, framework.Block):
Expand Down
3 changes: 3 additions & 0 deletions python/paddle/optimizer/sgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,11 @@ def _create_accumulators(self, block, parameters):

# Create accumulator tensors for first and second moments
for p in parameters:
if p.name in self._already_create_accumulater:
continue
if self._multi_precision and p.dtype == core.VarDesc.VarType.FP16:
master_p = self._create_master_weight(p)
self._already_create_accumulater.add(p.name)
continue
if (
p.dtype == core.VarDesc.VarType.FP16
Expand Down