PaddlePaddle · luotao1 · Sep 7, 2023 · Aug 29, 2023 · Aug 29, 2023 · Aug 29, 2023
diff --git a/.clang-tidy b/.clang-tidy
@@ -24,7 +24,7 @@ bugprone-misplaced-widening-cast,
 -bugprone-not-null-terminated-result,
 -bugprone-parent-virtual-call,
 -bugprone-posix-return,
--bugprone-signed-char-misuse,
+bugprone-signed-char-misuse,
 -bugprone-sizeof-container,
 -bugprone-sizeof-expression,
 -bugprone-string-constructor,
@@ -91,7 +91,7 @@ clang-analyzer-cplusplus.InnerPointer,
 -clang-analyzer-optin.osx.cocoa.localizability.NonLocalizedStringChecker,
 -clang-analyzer-optin.performance.GCDAntipattern,
 -clang-analyzer-optin.performance.Padding,
--clang-analyzer-optin.portability.UnixAPI,
+clang-analyzer-optin.portability.UnixAPI,
 -clang-analyzer-osx.API,
 -clang-analyzer-osx.MIG,
 -clang-analyzer-osx.NSOrCFErrorDerefChecker,

diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc
@@ -321,7 +321,7 @@ void AllReduceOpHandle::NCCLAllReduceFunc(
 void AllReduceOpHandle::SyncNCCLAllReduce() {
   if (FLAGS_sync_nccl_allreduce) {
     for (auto &p : places_) {
-      int dev_id = p.device;
+      int dev_id = p.device;  // NOLINT
       auto *nccl_ctxs =
           nccl_ctxs_->GetRunEnvNCCLCtx(run_order_, use_hierarchical_allreduce_);
       auto &nccl_ctx = nccl_ctxs->at(dev_id);

diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc
@@ -90,7 +90,7 @@ void BroadcastOpHandle::BroadcastOneVar(
   } else if (platform::is_gpu_place(in_tensor.place())) {
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
     VarHandle *out_handle = nullptr;
-    int root_id = in_tensor.place().device;
+    int root_id = in_tensor.place().device;  // NOLINT
     std::vector<std::function<void()>> broadcast_calls;
 
     int type = platform::ToNCCLDataType(
@@ -101,7 +101,7 @@ void BroadcastOpHandle::BroadcastOneVar(
       Variable *out_var = var_scopes.at(out_var_handle->scope_idx())
                               ->FindVar(out_var_handle->name());
 
-      int dst_id = out_var_handle->place().device;
+      int dst_id = out_var_handle->place().device;  // NOLINT
 
       auto &nccl_ctx = nccl_ctxs_->at(dst_id);
 

diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc
@@ -47,7 +47,7 @@ OpHandleBase::~OpHandleBase() PADDLE_MAY_THROW {  // NOLINT
 void OpHandleBase::InitCUDA() {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   for (auto &p : dev_ctxes_) {
-    int dev_id = p.first.device;
+    int dev_id = p.first.device;  // NOLINT
     platform::SetDeviceId(dev_id);
 #ifdef PADDLE_WITH_HIP
     PADDLE_ENFORCE_GPU_SUCCESS(
@@ -61,7 +61,7 @@ void OpHandleBase::InitCUDA() {
     for (auto &out_var : outputs_) {
       auto *out_var_handle = dynamic_cast<VarHandle *>(out_var);
       if (out_var_handle) {
-        int dev_id = out_var_handle->place().device;
+        int dev_id = out_var_handle->place().device;  // NOLINT
         out_var_handle->SetGenerateEvent(events_.at(dev_id));
       }
     }
@@ -74,7 +74,7 @@ void OpHandleBase::InitCUDA() {
             Name(),
             dev_ctxes_.size()));
     auto &place = dev_ctxes_.begin()->first;
-    int dev_id = place.device;
+    int dev_id = place.device;  // NOLINT
     for (auto &out_var : outputs_) {
       auto *out_var_handle = dynamic_cast<VarHandle *>(out_var);
       if (out_var_handle) {

diff --git a/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc b/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc
@@ -45,7 +45,7 @@ static std::vector<std::unique_ptr<ir::Graph>> SeparateMultiDevicesGraph(
   for (auto &op : op_handles) {
     auto &dev_ctx = op->DeviceContext();
     auto &p = dev_ctx.begin()->first;
-    int dev_id = p.device;
+    int dev_id = p.device;  // NOLINT
     auto &dev_dummys = graphs[dev_id]->Get<GraphDepVars>(kGraphDepVars);
     graphs[dev_id]->AddNode(graph->RemoveNode(op->Node()).release());
 

diff --git a/paddle/fluid/framework/details/reduce_op_handle.cc b/paddle/fluid/framework/details/reduce_op_handle.cc
@@ -189,13 +189,13 @@ void ReduceOpHandle::RunImpl() {
           out_var_handle->place(), pre_in.dtype());
 
       auto out_p = out_var_handle->place();
-      int root_id = out_p.device;
+      int root_id = out_p.device;  // NOLINT
       std::vector<std::function<void()>> all_reduce_calls;
       for (size_t i = 0; i < var_scopes.size(); ++i) {
         auto &p = in_places[i];
         auto &lod_tensor = *lod_tensors[i];
 
-        int dev_id = p.device;
+        int dev_id = p.device;  // NOLINT
         auto &nccl_ctx = nccl_ctxs_->at(dev_id);
 
         void *buffer = const_cast<void *>(lod_tensor.data());

diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc
@@ -99,7 +99,7 @@ struct DLDeviceVisitor {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
     ::DLDevice device;
     device.device_type = kDLGPU;
-    device.device_id = place.device;
+    device.device_id = place.device;  // NOLINT
     return device;
 #else
     PADDLE_THROW(platform::errors::Unavailable(

diff --git a/paddle/fluid/imperative/nccl_context.cc b/paddle/fluid/imperative/nccl_context.cc
@@ -80,7 +80,7 @@ void NCCLParallelContext::Init() {
   }
   BcastNCCLId(nccl_ids, 0, server_fd);
 
-  int gpu_id = place_.device;
+  int gpu_id = place_.device;  // NOLINT
   for (int ring_id = 0; ring_id < strategy_.nrings_; ring_id++) {
     VLOG(0) << "init nccl context nranks: " << strategy_.nranks_
             << " local rank: " << strategy_.local_rank_ << " gpu id: " << gpu_id
@@ -115,7 +115,7 @@ void NCCLParallelContext::InitWithRingID(int ring_id) {
   }
   BcastNCCLId(nccl_ids, 0, server_fd);
 
-  int gpu_id = place_.device;
+  int gpu_id = place_.device;  // NOLINT
   VLOG(0) << "init nccl context nranks: " << strategy_.nranks_
           << " local rank: " << strategy_.local_rank_ << " gpu id: " << gpu_id
           << " ring id: " << ring_id;

diff --git a/paddle/fluid/imperative/xccl_context.cc b/paddle/fluid/imperative/xccl_context.cc
@@ -99,7 +99,7 @@ void XCCLParallelContext::Init() {
   }
   BcastXCCLId(xccl_ids, 0, server_fd);
 
-  int dev_id = place_.device;
+  int dev_id = place_.device;  // NOLINT
   for (int ring_id = 0; ring_id < strategy_.nrings_; ring_id++) {
     VLOG(0) << "init nccl context nranks: " << strategy_.nranks_
             << " local rank: " << strategy_.local_rank_ << " dev id: " << dev_id
@@ -136,7 +136,7 @@ void XCCLParallelContext::InitWithRingID(int ring_id) {
   }
   BcastXCCLId(xccl_ids, 0, server_fd);
 
-  int dev_id = place_.device;
+  int dev_id = place_.device;  // NOLINT
   VLOG(0) << "init xccl context nranks: " << strategy_.nranks_
           << " local rank: " << strategy_.local_rank_ << " dev id: " << dev_id
           << " ring id: " << ring_id;

diff --git a/paddle/fluid/memory/allocation/cuda_managed_allocator.cc b/paddle/fluid/memory/allocation/cuda_managed_allocator.cc
@@ -48,7 +48,7 @@ void CUDAManagedAllocator::FreeImpl(phi::Allocation* allocation) {
 phi::Allocation* CUDAManagedAllocator::AllocateImpl(size_t size) {
   std::call_once(once_flag_, [this] { platform::SetDeviceId(place_.device); });
 
-  int dev_id = place_.device;
+  int dev_id = place_.device;  // NOLINT
   void* ptr;
   auto result = platform::RecordedGpuMalloc(&ptr,
                                             size,

diff --git a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc
@@ -45,7 +45,7 @@ CUDAVirtualMemAllocator::CUDAVirtualMemAllocator(
   // case, the allocation will be pinnded device memory local to a given device.
   prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
   prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
-  prop.location.id = place.device;
+  prop.location.id = place.device;  // NOLINT
   prop_ = prop;
 
   // Prepare the access descriptor array indicating where and how the backings

diff --git a/paddle/fluid/operators/reader/buffered_reader.cc b/paddle/fluid/operators/reader/buffered_reader.cc
@@ -50,7 +50,7 @@ BufferedReader::BufferedReader(
   VLOG(1) << "BufferedReader";
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   if (platform::is_gpu_place(place_) && !pin_memory) {
-    int dev_idx = place_.device;
+    int dev_idx = place_.device;  // NOLINT
     compute_stream_ =
         ((phi::GPUContext *)(platform::DeviceContextPool::Instance().Get(
              place_)))

diff --git a/paddle/fluid/platform/device_event_custom_device.cc b/paddle/fluid/platform/device_event_custom_device.cc
@@ -27,7 +27,7 @@ struct CustomDeviceEventWrapper {
         platform::errors::PreconditionNotMet(
             "Required device shall be CustomPlace, but received %d. ", place));
 
-    device_id_ = place.device;
+    device_id_ = place.device;  // NOLINT
     PADDLE_ENFORCE_GT(
         device_id_,
         -1,

diff --git a/paddle/fluid/platform/device_event_gpu.cc b/paddle/fluid/platform/device_event_gpu.cc
@@ -27,7 +27,7 @@ struct CUDADeviceEventWrapper {
         platform::errors::PreconditionNotMet(
             "Required device shall be CUDAPlace, but received %d. ", place));
 
-    device_id_ = place.device;
+    device_id_ = place.device;  // NOLINT
     PADDLE_ENFORCE_GT(
         device_id_,
         -1,

diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
@@ -420,7 +420,7 @@ struct iinfo {
         dtype = "int64";
         break;
       case framework::proto::VarType::INT8:
-        min = std::numeric_limits<int8_t>::min();
+        min = std::numeric_limits<int8_t>::min();  // NOLINT
         max = std::numeric_limits<int8_t>::max();
         bits = 8;
         dtype = "int8";

diff --git a/paddle/phi/backends/custom/custom_device.cc b/paddle/phi/backends/custom/custom_device.cc
@@ -935,7 +935,7 @@ class CustomDevice : public DeviceInterface {
 
  private:
   inline int PlaceToIdNoCheck(const Place& place) {
-    int dev_id = place.GetDeviceId();
+    int dev_id = place.GetDeviceId();  // NOLINT
     return dev_id;
   }
 

diff --git a/paddle/phi/kernels/cpu/cross_entropy_grad_kernel.cc b/paddle/phi/kernels/cpu/cross_entropy_grad_kernel.cc
@@ -92,7 +92,7 @@ void CrossEntropyWithSoftmaxGradCPUKernel(const CPUContext& dev_ctx,
         for (int j = 0; j < remain; j++) {  // for each sample_other_dims
           int idx = i * remain + j;  // this sample's label_idx. for 1d case,
                                      // remain=1 and j=0, so, idx = i
-          auto lbl = static_cast<int64_t>(label_data[idx]);
+          auto lbl = static_cast<int64_t>(label_data[idx]);  // NOLINT
           if (lbl == ignore_index) {
             for (int k = 0; k < axis_dim; ++k) {  // for each class id's label
               logit_grad_data[i * d + k * remain + j] = 0;
@@ -144,7 +144,7 @@ void CrossEntropyWithSoftmaxGradCPUKernel(const CPUContext& dev_ctx,
       for (int j = 0; j < remain; j++) {  // for each sample_other_dims
         int idx = i * remain + j;  // this sample's label_idx. for 1d case,
                                    // remain=1 and j=0, so, idx = i
-        auto lbl = static_cast<int64_t>(label_data[idx]);
+        auto lbl = static_cast<int64_t>(label_data[idx]);  // NOLINT
         if (lbl == ignore_index) {
           for (int k = 0; k < axis_dim; ++k) {  // for each class id's label
             logit_grad_data[i * d + k * remain + j] = 0;

diff --git a/paddle/phi/kernels/cpu/multiclass_nms3_kernel.cc b/paddle/phi/kernels/cpu/multiclass_nms3_kernel.cc
@@ -66,8 +66,8 @@ void Array2Poly(const T* box,
   (*poly).contour =
       (phi::funcs::gpc_vertex_list*)malloc(sizeof(phi::funcs::gpc_vertex_list));
   (*poly).contour->num_vertices = static_cast<int>(pts_num);
-  (*poly).contour->vertex =
-      (phi::funcs::gpc_vertex*)malloc(sizeof(phi::funcs::gpc_vertex) * pts_num);
+  (*poly).contour->vertex = (phi::funcs::gpc_vertex*)malloc(
+      sizeof(phi::funcs::gpc_vertex) * pts_num);  // NOLINT
   for (size_t i = 0; i < pts_num; ++i) {
     (*poly).contour->vertex[i].x = box[2 * i];
     (*poly).contour->vertex[i].y = box[2 * i + 1];

diff --git a/paddle/phi/kernels/funcs/cross_entropy.cc b/paddle/phi/kernels/funcs/cross_entropy.cc
@@ -51,7 +51,7 @@ struct HardLabelCrossEntropyCPUFunctorImpl {
     const auto* label_data = labels_->template data<U>();
     for (int i = 0; i < batch_size; ++i) {
       for (int j = 0; j < num_remain; j++) {
-        int lbl = static_cast<int>(label_data[i * num_remain + j]);
+        int lbl = static_cast<int>(label_data[i * num_remain + j]);  // NOLINT
         if (lbl != ignore_index_) {
           PADDLE_ENFORCE_GE(
               lbl,