PaddlePaddle · kangguangli · Apr 7, 2024 · Mar 25, 2024 · Mar 26, 2024 · Mar 27, 2024
diff --git a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py
@@ -142,6 +142,7 @@
     'decayed_adagrad',
     'distributed_push_sparse',
     'distributed_lookup_table',
+    'dgc_momentum',
     'dpsgd',
     'embedding_grad_sparse',
     'ftrl',

diff --git a/paddle/fluid/pir/dialect/operator/ir/ops.yaml b/paddle/fluid/pir/dialect/operator/ir/ops.yaml
@@ -470,6 +470,16 @@
   optional : in_accum, in_state, out_scale, out_accum, out_state
   inplace : (scale -> out_scale, in_accum -> out_accum, in_state -> out_state)
 
+- op : dgc_momentum
+  args : (Tensor param, Tensor grad, Tensor velocity, Tensor learning_rate, Tensor master_param, Tensor current_step_tensor, Tensor nranks_tensor, float mu, bool use_nesterov=false, str regularization_method="", float regularization_coeff=0.0f, bool multi_precision=false, float rescale_grad=1.0f, float rampup_begin_step=-1.0f)
+  output : Tensor(param_out), Tensor(velocity_out), Tensor(master_param_out), Tensor(grad_out)
+  infer_meta :
+    func : DGCMomentumInferMeta
+  kernel :
+    func : dgc_momentum
-    func : dgc_momentum
+    func : dgc_momentum
+    param: [和kernel一致的参数顺序]
-    func : dgc_momentum
+    func : dgc_momentum
+    param: [和kernel一致的参数顺序]
+    data_type : param
+  optional : master_param, master_param_out
+
 - op : disable_check_model_nan_inf
   args: (Tensor x, int flag = 0)
   output: Tensor(out)

diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml
@@ -818,6 +818,12 @@
   outputs :
     out : Out
 
+- op : dgc_momentum
+  inputs :
+    {param : Param, grad : Grad, velocity : Velocity, learning_rate : LearningRate, master_param : MasterParam, current_step_tensor : current_step, nranks_tensor : nranks}
+  outputs :
+    {param_out : ParamOut, velocity_out : VelocityOut, master_param_out : MasterParamOut, grad_out : Grad_out}
+
 - op : diag (diag_v2)
   backward : diag_grad (diag_v2_grad)
   inputs :

diff --git a/test/ir/pir/translator/CMakeLists.txt b/test/ir/pir/translator/CMakeLists.txt
@@ -19,6 +19,7 @@ list(APPEND DISTRIBUTED_OP_TRANSLATOR_TEST
 list(APPEND DISTRIBUTED_OP_TRANSLATOR_TEST
      test_distributed_push_sparse_translator)
 list(APPEND DISTRIBUTED_OP_TRANSLATOR_TEST test_distributed_fused_lamb_init)
+list(APPEND DISTRIBUTED_OP_TRANSLATOR_TEST test_dgc_momentum_translator)
 list(APPEND DISTRIBUTED_OP_TRANSLATOR_TEST test_nop_translator)
 list(APPEND DISTRIBUTED_OP_TRANSLATOR_TEST test_partial_allgather_translator)
 list(APPEND DISTRIBUTED_OP_TRANSLATOR_TEST test_partial_send_translator)

diff --git a/test/ir/pir/translator/test_dgc_momentum_translator.py b/test/ir/pir/translator/test_dgc_momentum_translator.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import test_op_translator
+
+import paddle
+from paddle.base.layer_helper import LayerHelper
+
+
+class TestDgcMomemtumOpTranslator(test_op_translator.TestOpTranslator):
+    def append_op(self):
+        self.op_type = "dgc_momentum"
+
+        grad = paddle.ones(shape=(123, 321), dtype='float32')
+        param = paddle.ones(shape=(123, 321), dtype='float32')
+        velocity = paddle.zeros(shape=(123, 321), dtype='float32')
+        learning_rate = paddle.to_tensor([0.001], dtype='float32')
+        current_step = paddle.to_tensor([1], dtype='float32')
+        nranks = paddle.to_tensor([1, 1], dtype='float32')
+
+        param_out = paddle.ones(shape=(123, 321), dtype='float32')
+        velocity_out = paddle.ones(shape=(123, 321), dtype='float32')
+        grad_out = paddle.ones(shape=(123, 321), dtype='float32')
+
+        attrs = {
+            'mu': 0.0001,
+            'use_nesterov': False,
+            'rampup_begin_step': 10.0,
+        }
+        helper = LayerHelper(self.op_type)
+        helper.append_op(
+            type=self.op_type,
+            inputs={
+                "Param": param,
+                "Grad": grad,
+                "Velocity": velocity,
+                "LearningRate": learning_rate,
+                "current_step": current_step,
+                "nranks": nranks,
+            },
+            outputs={
+                "ParamOut": param_out,
+                "VelocityOut": velocity_out,
+                "Grad_out": grad_out,
+            },
+            attrs=attrs,
+        )
+
+    def test_translator(self):
+        self.check()
+
+
+if __name__ == "__main__":
+    unittest.main()