revert shard_dataloader

pkuzyc · pkuzyc · commit bf6ddb864957 · 2024-09-02T22:04:12.000+08:00
diff --git a/paddle/fluid/pybind/dist_api.cc b/paddle/fluid/pybind/dist_api.cc
@@ -81,13 +81,6 @@ void BindTensorDistAttribute(py::module *m) {
                              [](TensorDistAttribute &self) {
                                return self.process_mesh_attr().process_mesh();
                              })
-      .def_property_readonly(
-          "process_mesh_attr",
-          [](TensorDistAttribute &self) { return self.process_mesh_attr(); })
-      .def_property_readonly("process_mesh_name",
-                             [](TensorDistAttribute &self) {
-                               return self.process_mesh_attr().dim_names();
-                             })
       .def_property_readonly(
           "dims_mapping",
           [](TensorDistAttribute &self) { return self.dims_mapping(); })
diff --git a/paddle/fluid/pybind/pir.cc b/paddle/fluid/pybind/pir.cc
@@ -1532,10 +1532,6 @@ void BindAttribute(py::module *m) {
              }
              return py::cast<py::none>(Py_None);
            })
-      .def("as_int64",
-           [](Attribute &self) {
-             return reinterpret_cast<int64_t>(static_cast<const void *>(self));
-           })
       .def("as_array_attr", [](Attribute &self) -> py::object {
         if (auto array_attr = self.dyn_cast<ArrayAttribute>()) {
           return py::cast(array_attr);
diff --git a/python/paddle/distributed/auto_parallel/api.py b/python/paddle/distributed/auto_parallel/api.py
@@ -14,7 +14,6 @@
 from __future__ import annotations
 
 import copy
-import typing
 from types import MethodType
 from typing import TYPE_CHECKING, Any, Literal, TypedDict
 
@@ -340,11 +339,9 @@ def forward(
         if local_tensor.is_dist():
             local_mesh = local_tensor.process_mesh
             local_val = local_tensor._local_value()
-            # local_placement = local_tensor.placements[0]
         else:
             local_val = local_tensor
             local_mesh = None
-            # local_placement = dist.Replicate()
 
         ctx.global_mesh = copy.deepcopy(mesh)
         ctx.placements = placements
@@ -2766,15 +2763,7 @@ def __init__(
         dataloader: paddle.io.DataLoader,
         meshes: ProcessMesh | list[ProcessMesh] | tuple[ProcessMesh],
         input_keys: list[str] | tuple[str] | None = None,
-        shard_dims: (
-            list
-            | tuple
-            | str
-            | int
-            | list[dist.Placement]
-            | list[list[dist.Placement]]
-            | None
-        ) = None,
+        shard_dims: list | tuple | str | int | None = None,
         is_dataset_splitted: bool = False,
     ):
         # do some check
@@ -2850,7 +2839,6 @@ def __init__(
         self._dataloader.pin_memory = False
 
     def _process_shard_dims(self, shard_dims):
-        shard_dims = self._convert_shard_dim_type(shard_dims)
         if isinstance(shard_dims, (int, str)) or shard_dims is None:
             res = []
             for i in range(len(self._meshes)):
@@ -2866,52 +2854,6 @@ def _process_shard_dims(self, shard_dims):
                 )
             return shard_dims
 
-    def _convert_placements_to_mesh_dim(self, placements):
-        mesh_dim = None
-        for i, placement in enumerate(placements):
-            if placement.is_shard():
-                shard_dim = typing.cast(dist.Shard, placement).get_dim()
-                assert (
-                    shard_dim == 0
-                ), "Only the 0th dim of the input can be sharded."
-                assert (
-                    mesh_dim is None
-                ), "The input placements can only contain one Shard(0)."
-                mesh_dim = i
-            else:
-                assert (
-                    placement.is_replicate()
-                ), "The input placement must be Replicate or Shard(0)."
-        assert (
-            mesh_dim is not None
-        ), "Failed to convert placements to a mesh_dim."
-        return mesh_dim
-
-    def _convert_shard_dim_type(self, shard_dims):
-        if not isinstance(shard_dims, list) or not isinstance(
-            shard_dims[0], dist.Placement
-        ):
-            # if the input shard_dims is not Placement type,
-            # no need to convert it
-            return shard_dims
-        if isinstance(shard_dims[0], dist.Placement):
-            # if the input shard_dims is a list of Placement,
-            # convert it to a mesh_dim value
-            mesh_dim = self._convert_placements_to_mesh_dim(shard_dims)
-            return mesh_dim
-        elif isinstance(shard_dims[0], list):
-            # if the input shard_dims is a list of List(Placement),
-            # convert each placements to a mesh_dim value
-            res = []
-            for shard_dim in shard_dims:
-                mesh_dim = self._convert_placements_to_mesh_dim(shard_dim)
-                res.append(mesh_dim)
-            return res
-        else:
-            raise TypeError(
-                f"shard_dims must be Placements or list/tuple of Placements, but got {type(shard_dims)}"
-            )
-
     def _get_mesh_and_shard_dim(self, process_id):
         for i in range(len(self._meshes)):
             if isinstance(self._meshes[i], (list, tuple)):
@@ -3075,15 +3017,7 @@ def shard_dataloader(
     dataloader: paddle.io.DataLoader,
     meshes: ProcessMesh | list[ProcessMesh] | tuple[ProcessMesh],
     input_keys: list[str] | tuple[str] | None = None,
-    shard_dims: (
-        list
-        | tuple
-        | str
-        | int
-        | list[dist.Placement]
-        | list[list[dist.Placement]]
-        | None
-    ) = None,
+    shard_dims: list | tuple | str | int | None = None,
     is_dataset_splitted: bool = False,
 ) -> ShardDataloader:
     """
diff --git a/python/paddle/distributed/auto_parallel/static/pir_pass.py b/python/paddle/distributed/auto_parallel/static/pir_pass.py
@@ -91,8 +91,6 @@ def reshard_combine_value(program, op, operand, attr):
 
 def apply_partition_pass(program):
     for op in program.global_block().ops:
-        # if op.name() == "pd_op.matmul_grad":
-        #     breakpoint()
         if op.name() in partition_skip_op_list:
             continue
 
diff --git a/test/auto_parallel/pir/semi_auto_parallel_simple_net_ep.py b/test/auto_parallel/pir/semi_auto_parallel_simple_net_ep.py
@@ -31,11 +31,8 @@ def __init__(self):
         self.hidden_size = 16
         self.class_num = 10
         self.run_ep = False
-        # self.mesh = dist.ProcessMesh([0, 1], dim_names=["x"])
         self.mesh = dist.ProcessMesh([0, 1])
         self.expert_mesh_list = []
-        # self.expert_mesh_list.append(dist.ProcessMesh([0], dim_names=["x"]))
-        # self.expert_mesh_list.append(dist.ProcessMesh([1], dim_names=["x"]))
         self.expert_mesh_list.append(dist.ProcessMesh([0]))
         self.expert_mesh_list.append(dist.ProcessMesh([1]))
 
@@ -204,7 +201,7 @@ def run_ep(self):
         model, train_dataloader, criterion, optimizer = self.build(config)
 
         dist_dataloader = dist.shard_dataloader(
-            train_dataloader, config.mesh, shard_dims=[dist.Shard(0)]
+            train_dataloader, config.mesh, shard_dims=0
         )
         loss = self.train(config, model, dist_dataloader, criterion, optimizer)
 
@@ -226,7 +223,7 @@ def run_dy2st(self):
         model, train_dataloader, criterion, optimizer = self.build(config)
 
         dist_dataloader = dist.shard_dataloader(
-            train_dataloader, config.mesh, shard_dims="d0"
+            train_dataloader, config.mesh, shard_dims=0
         )
 
         mode = "train"
diff --git a/test/auto_parallel/pir/test_semi_auto_parallel_simple_net_ep.py b/test/auto_parallel/pir/test_semi_auto_parallel_simple_net_ep.py
@@ -11,11 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import sys
 import tempfile
 import unittest
 
-sys.path.append("..")
 import collective.test_communication_api_base as test_base