Skip to content

Commit f42fccd

Browse files
authored
Merge branch 'master' into gemm-large
2 parents 2a11ceb + fe509e9 commit f42fccd

19 files changed

+1553
-777
lines changed

.github/workflows/test-coverage.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ jobs:
5959
token: ${{ secrets.CODECOV_TOKEN }}
6060
disable_search: true
6161
plugins: noop
62+
binary: /data/action/.local/bin/codecov
6263
files: build/lcov.info
6364

6465
linux-gcc-x64:

src/layer/vulkan/deconvolution_vulkan.cpp

Lines changed: 246 additions & 121 deletions
Large diffs are not rendered by default.

src/layer/vulkan/deconvolution_vulkan.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,17 @@ class Deconvolution_vulkan : public Deconvolution
3737

3838
Pipeline* pipeline_deconvolution_gemm;
3939
Pipeline* pipeline_deconvolution_col2im;
40+
41+
// cooperative matrix
42+
bool use_cooperative_matrix;
43+
int coopmat_M;
44+
int coopmat_N;
45+
int coopmat_K;
46+
int UNROLL_SG_M;
47+
int UNROLL_SG_N;
48+
int UNROLL_SG_K;
49+
int UNROLL_WG_M;
50+
int UNROLL_WG_N;
4051
};
4152

4253
} // namespace ncnn

src/layer/vulkan/shader/deconvolution_col2im.comp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,15 @@ layout (constant_id = 6) const int bias_term = 0;
1616
layout (constant_id = 7) const int activation_type = 0;
1717
layout (constant_id = 8) const float activation_param_0 = 0;
1818
layout (constant_id = 9) const float activation_param_1 = 0;
19+
layout (constant_id = 10) const int outc = 0;
1920

20-
#define shape_constant_id_offset 10
21+
#define shape_constant_id_offset 11
2122
layout (constant_id = shape_constant_id_offset + 0) const int w = 0;
2223
layout (constant_id = shape_constant_id_offset + 1) const int h = 0;
24+
layout (constant_id = shape_constant_id_offset + 2) const int cstep = 0;
2325

24-
layout (constant_id = shape_constant_id_offset + 2) const int outw = 0;
25-
layout (constant_id = shape_constant_id_offset + 3) const int outh = 0;
26-
layout (constant_id = shape_constant_id_offset + 4) const int outc = 0;
26+
layout (constant_id = shape_constant_id_offset + 3) const int outw = 0;
27+
layout (constant_id = shape_constant_id_offset + 4) const int outh = 0;
2728
layout (constant_id = shape_constant_id_offset + 5) const int outcstep = 0;
2829

2930
layout (binding = 0) readonly buffer col_blob { sfp col_blob_data[]; };
@@ -34,10 +35,10 @@ layout (push_constant) uniform parameter
3435
{
3536
int w;
3637
int h;
38+
int cstep;
3739

3840
int outw;
3941
int outh;
40-
int outc;
4142
int outcstep;
4243
} p;
4344

@@ -47,7 +48,7 @@ void main()
4748
int gy = int(gl_GlobalInvocationID.y);
4849
int gz = int(gl_GlobalInvocationID.z);
4950

50-
if (gx >= psc(outw) || gy >= psc(outh) || gz >= psc(outc))
51+
if (gx >= psc(outw) || gy >= psc(outh) || gz >= outc)
5152
return;
5253

5354
afp sum;
@@ -84,7 +85,7 @@ void main()
8485
h_k /= dilation_h;
8586
w_k /= dilation_w;
8687

87-
const int gi = (gz * maxk + h_k * kernel_w + w_k) * psc(w) * psc(h) + sy * psc(w) + sx;
88+
const int gi = (gz * maxk + h_k * kernel_w + w_k) * psc(cstep) + sy * psc(w) + sx;
8889

8990
sum += buffer_ld1(col_blob_data, gi);
9091
}

src/layer/vulkan/shader/deconvolution_gemm.comp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ layout (constant_id = shape_constant_id_offset + 1) const int h = 0;
1313
layout (constant_id = shape_constant_id_offset + 2) const int c = 0;
1414
layout (constant_id = shape_constant_id_offset + 3) const int cstep = 0;
1515

16-
layout (constant_id = shape_constant_id_offset + 4) const int outw = 0;
17-
layout (constant_id = shape_constant_id_offset + 5) const int outh = 0;
16+
layout (constant_id = shape_constant_id_offset + 4) const int outcstep = 0;
17+
layout (constant_id = shape_constant_id_offset + 5) const int outc = 0;
1818

1919
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
2020
layout (binding = 1) writeonly buffer col_blob { sfp col_blob_data[]; };
@@ -27,8 +27,8 @@ layout (push_constant) uniform parameter
2727
int c;
2828
int cstep;
2929

30-
int outw;
31-
int outh;
30+
int outcstep;
31+
int outc;
3232
} p;
3333

3434
#if NCNN_shader_local_memory
@@ -42,7 +42,7 @@ void main()
4242
int gy = int(gl_GlobalInvocationID.y);
4343

4444
#if !NCNN_shader_local_memory
45-
if (gx >= psc(outw) || gy >= psc(outh))
45+
if (gx >= psc(outcstep) || gy >= psc(outc))
4646
return;
4747
#endif
4848

@@ -158,14 +158,14 @@ void main()
158158
#endif
159159

160160
#if NCNN_shader_local_memory
161-
if (gx >= psc(outw) || gy >= psc(outh))
161+
if (gx >= psc(outcstep) || gy >= psc(outc))
162162
return;
163163
#endif
164164

165-
const int gi = gy * psc(outw) + gx;
165+
const int gi = gy * psc(outcstep) + gx;
166166

167167
buffer_st1(col_blob_data, gi, sum0);
168-
if (gx + 1 < psc(outw)) buffer_st1(col_blob_data, gi + 1, sum1);
169-
if (gx + 2 < psc(outw)) buffer_st1(col_blob_data, gi + 2, sum2);
170-
if (gx + 3 < psc(outw)) buffer_st1(col_blob_data, gi + 3, sum3);
168+
if (gx + 1 < psc(outcstep)) buffer_st1(col_blob_data, gi + 1, sum1);
169+
if (gx + 2 < psc(outcstep)) buffer_st1(col_blob_data, gi + 2, sum2);
170+
if (gx + 3 < psc(outcstep)) buffer_st1(col_blob_data, gi + 3, sum3);
171171
}

0 commit comments

Comments
 (0)