Skip to content

Commit 205ca50

Browse files
authored
fix shader compile failure on nvidia (#6029)
1 parent b8602a6 commit 205ca50

File tree

2 files changed

+12
-12
lines changed

2 files changed

+12
-12
lines changed

src/layer/vulkan/shader/padding_pack1to4.comp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,10 @@ void main()
175175
afpvec4 v;
176176
#if ncnn_vendorID == 4318
177177
// out of index load cause stall on nvidia
178-
v.r = mask.r ? buffer_ld1(bottom_blob_data, v_offset.r) : value;
179-
v.g = mask.g ? buffer_ld1(bottom_blob_data, v_offset.g) : value;
180-
v.b = mask.b ? buffer_ld1(bottom_blob_data, v_offset.b) : value;
181-
v.a = mask.a ? buffer_ld1(bottom_blob_data, v_offset.a) : value;
178+
v.r = mask.r ? buffer_ld1(bottom_blob_data, v_offset.r) : afp(value);
179+
v.g = mask.g ? buffer_ld1(bottom_blob_data, v_offset.g) : afp(value);
180+
v.b = mask.b ? buffer_ld1(bottom_blob_data, v_offset.b) : afp(value);
181+
v.a = mask.a ? buffer_ld1(bottom_blob_data, v_offset.a) : afp(value);
182182
#else
183183
v.r = buffer_ld1(bottom_blob_data, v_offset.r);
184184
v.g = buffer_ld1(bottom_blob_data, v_offset.g);

src/layer/vulkan/shader/padding_pack1to8.comp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -211,14 +211,14 @@ void main()
211211
afpvec8 v;
212212
#if ncnn_vendorID == 4318
213213
// out of index load cause stall on nvidia
214-
v[0].r = mask.r ? buffer_ld1(bottom_blob_data, v_offset.r) : value;
215-
v[0].g = mask.g ? buffer_ld1(bottom_blob_data, v_offset.g) : value;
216-
v[0].b = mask.b ? buffer_ld1(bottom_blob_data, v_offset.b) : value;
217-
v[0].a = mask.a ? buffer_ld1(bottom_blob_data, v_offset.a) : value;
218-
v[1].r = mask2.r ? buffer_ld1(bottom_blob_data, v_offset2.r) : value;
219-
v[1].g = mask2.g ? buffer_ld1(bottom_blob_data, v_offset2.g) : value;
220-
v[1].b = mask2.b ? buffer_ld1(bottom_blob_data, v_offset2.b) : value;
221-
v[1].a = mask2.a ? buffer_ld1(bottom_blob_data, v_offset2.a) : value;
214+
v[0].r = mask.r ? buffer_ld1(bottom_blob_data, v_offset.r) : afp(value);
215+
v[0].g = mask.g ? buffer_ld1(bottom_blob_data, v_offset.g) : afp(value);
216+
v[0].b = mask.b ? buffer_ld1(bottom_blob_data, v_offset.b) : afp(value);
217+
v[0].a = mask.a ? buffer_ld1(bottom_blob_data, v_offset.a) : afp(value);
218+
v[1].r = mask2.r ? buffer_ld1(bottom_blob_data, v_offset2.r) : afp(value);
219+
v[1].g = mask2.g ? buffer_ld1(bottom_blob_data, v_offset2.g) : afp(value);
220+
v[1].b = mask2.b ? buffer_ld1(bottom_blob_data, v_offset2.b) : afp(value);
221+
v[1].a = mask2.a ? buffer_ld1(bottom_blob_data, v_offset2.a) : afp(value);
222222
#else
223223
v[0].r = buffer_ld1(bottom_blob_data, v_offset.r);
224224
v[0].g = buffer_ld1(bottom_blob_data, v_offset.g);

0 commit comments

Comments
 (0)