Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions paddle/phi/kernels/gpu/distribute_fpn_proposals_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ namespace cub = hipcub;

#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h"

namespace phi {
Expand All @@ -46,6 +45,13 @@ static inline int NumBlocks(const int N) {
kNumMaxinumNumBlocks);
}

struct RangeInitFunctor {
int start;
int delta;
int* out;
HOSTDEVICE void operator()(size_t i) { out[i] = start + i * delta; }
};

template <class T>
__global__ void GPUDistFpnProposalsHelper(const int nthreads,
const T* rois,
Expand All @@ -62,7 +68,18 @@ __global__ void GPUDistFpnProposalsHelper(const int nthreads,
const T* offset_roi = rois + i * BBoxSize;
int roi_batch_ind = roi_batch_id_data[i];
// get the target level of current rois
T roi_area = paddle::operators::RoIArea(offset_roi, pixel_offset);
T roi_area;
if (offset_roi[2] < offset_roi[0] || offset_roi[3] < offset_roi[1]) {
roi_area = static_cast<T>(0.);
} else {
const T w = offset_roi[2] - offset_roi[0];
const T h = offset_roi[3] - offset_roi[1];
if (pixel_offset) {
roi_area = (w + 1) * (h + 1);
} else {
roi_area = w * h;
}
}
T roi_scale = sqrt(roi_area);
int tgt_lvl = floor(
log2(roi_scale / static_cast<T>(refer_scale) + (T)1e-8) + refer_level);
Expand Down Expand Up @@ -155,7 +172,7 @@ void DistributeFpnProposalsKernel(
index_in_t.Resize({roi_num});
int* idx_in = dev_ctx.template Alloc<int>(&index_in_t);
funcs::ForRange<phi::GPUContext> for_range(dev_ctx, roi_num);
for_range(paddle::operators::RangeInitFunctor{0, 1, idx_in});
for_range(RangeInitFunctor{0, 1, idx_in});
Copy link
Contributor Author

@Patrick-Star125 Patrick-Star125 Dec 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@YuanRisheng
函数RangeInitFunctor在同目录下的generate_proposals_v2_kernel.cu中有相同实现,但是我不太熟悉cuda的编程模型,不清楚如何直接引用该实现。
可以将RangeInitFunctor实现位置从generate_proposals_v2_kernel.cu更改到generate_proposals_v2_kernel.h,但是感觉不太符合规范。

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

在phi/kernels/funcs/detection目录下创建bbox_util.h,然后把RangeInitFunctor放到这里面,同时可以把generate_proposals_v2_kernel.cu里重复的代码删掉,都改为引用bbox_util.h头文件

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


DenseTensor keys_out_t;
keys_out_t.Resize({roi_num});
Expand Down