Skip to content

Commit 626c1ed

Browse files
fix the bug of yolo_box which can't run on nano and tx2 (#33422)
1 parent a6b3328 commit 626c1ed

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

paddle/fluid/operators/detection/yolo_box_op.cu

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,14 @@ class YoloBoxOpCUDAKernel : public framework::OpKernel<T> {
120120
platform::GpuLaunchConfig config =
121121
platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), n * box_num);
122122

123-
KeYoloBoxFw<T><<<config.block_per_grid, config.thread_per_block, 0,
123+
dim3 thread_num = config.thread_per_block;
124+
#ifdef WITH_NV_JETSON
125+
if (config.compute_capability == 53 || config.compute_capability == 62) {
126+
thread_num = 512;
127+
}
128+
#endif
129+
130+
KeYoloBoxFw<T><<<config.block_per_grid, thread_num, 0,
124131
ctx.cuda_device_context().stream()>>>(
125132
input_data, imgsize_data, boxes_data, scores_data, conf_thresh,
126133
anchors_data, n, h, w, an_num, class_num, box_num, input_size_h,

paddle/fluid/platform/gpu_launch_config.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ struct GpuLaunchConfig {
3737
dim3 theory_thread_count = dim3(1, 1, 1);
3838
dim3 thread_per_block = dim3(1, 1, 1);
3939
dim3 block_per_grid = dim3(1, 1, 1);
40+
int compute_capability = 0;
4041
};
4142

4243
inline GpuLaunchConfig GetGpuLaunchConfig1D(
@@ -67,11 +68,14 @@ inline GpuLaunchConfig GetGpuLaunchConfig1D(
6768
std::min(max_threads, context.GetMaxThreadsPerBlock());
6869
const int block_count =
6970
std::min(DivUp(physical_thread_count, thread_per_block), sm);
71+
// Get compute_capability
72+
const int capability = context.GetComputeCapability();
7073

7174
GpuLaunchConfig config;
7275
config.theory_thread_count.x = theory_thread_count;
7376
config.thread_per_block.x = thread_per_block;
7477
config.block_per_grid.x = block_count;
78+
config.compute_capability = capability;
7579
return config;
7680
}
7781

0 commit comments

Comments
 (0)