Skip to content

Commit 91cd2e0

Browse files
Syonykmr-c
authored andcommitted
Change generation of 2^n values for fixed point conversions.
As demonstrated by test code in #1260 the behavior of pow() in non-round-to-nearest rounding modes is not exact. This causes behavior divergence from ARMv8 hardware when not using round-to-nearest. The updated forms match hardware properly across a range of values. The tests are not updated to handle rounding modes, as doing this in a cross-platform way is not trivial. However, all existing test vectors pass properly, and in more detailed testing, these changes are closer to hardware.
1 parent 4936149 commit 91cd2e0

File tree

1 file changed

+37
-33
lines changed

1 file changed

+37
-33
lines changed

simde/arm/neon/cvt_n.h

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
*
2323
* Copyright:
2424
* 2023 Yi-Yen Chung <[email protected]> (Copyright owned by Andes Technology)
25+
*
26+
* Note: pow(2, n) does not generate proper (exact) results with rounding
27+
* modes other than round-to-nearest.
28+
* See https://github.com/simd-everywhere/simde/issues/1260
2529
*/
2630

2731
#if !defined(SIMDE_ARM_NEON_CVT_N_H)
@@ -40,7 +44,7 @@ simde_vcvth_n_u16_f16(simde_float16_t a, const int n)
4044
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) {
4145
return simde_vcvth_u16_f16(
4246
simde_float16_from_float32(
43-
simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))));
47+
simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))));
4448
}
4549
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
4650
#define simde_vcvth_n_u16_f16(a, n) vcvth_n_u16_f16(a, n)
@@ -57,7 +61,7 @@ simde_vcvth_n_f16_s16(int16_t a, const int n)
5761
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) {
5862
return simde_float16_from_float32(
5963
HEDLEY_STATIC_CAST(simde_float32_t,
60-
HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n)));
64+
HEDLEY_STATIC_CAST(simde_float64_t, a) / (UINT64_C(1) << n)));
6165
}
6266
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
6367
#define simde_vcvth_n_f16_s16(a, n) vcvth_n_f16_s16(a, n)
@@ -74,7 +78,7 @@ simde_vcvth_n_f16_u16(uint16_t a, const int n)
7478
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) {
7579
return simde_float16_from_float32(
7680
HEDLEY_STATIC_CAST(simde_float32_t,
77-
HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n)));
81+
HEDLEY_STATIC_CAST(simde_float64_t, a) / (UINT64_C(1) << n)));
7882
}
7983
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
8084
#define simde_vcvth_n_f16_u16(a, n) vcvth_n_f16_u16(a, n)
@@ -89,7 +93,7 @@ SIMDE_FUNCTION_ATTRIBUTES
8993
int32_t
9094
simde_vcvts_n_s32_f32(simde_float32_t a, const int n)
9195
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) {
92-
return simde_vcvts_s32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
96+
return simde_vcvts_s32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
9397
}
9498
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
9599
#define simde_vcvts_n_s32_f32(a, n) vcvts_n_s32_f32(a, n)
@@ -103,7 +107,7 @@ SIMDE_FUNCTION_ATTRIBUTES
103107
uint32_t
104108
simde_vcvts_n_u32_f32(simde_float32_t a, const int n)
105109
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) {
106-
return simde_vcvts_u32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
110+
return simde_vcvts_u32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
107111
}
108112
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
109113
#define simde_vcvts_n_u32_f32(a, n) vcvts_n_u32_f32(a, n)
@@ -118,7 +122,7 @@ simde_float32_t
118122
simde_vcvts_n_f32_s32(int32_t a, const int n)
119123
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) {
120124
return HEDLEY_STATIC_CAST(simde_float32_t,
121-
HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n));
125+
HEDLEY_STATIC_CAST(simde_float64_t, a) / (UINT64_C(1) << n));
122126
}
123127
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
124128
#define simde_vcvts_n_f32_s32(a, n) vcvts_n_f32_s32(a, n)
@@ -133,7 +137,7 @@ simde_float32_t
133137
simde_vcvts_n_f32_u32(uint32_t a, const int n)
134138
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) {
135139
return HEDLEY_STATIC_CAST(simde_float32_t,
136-
HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n));
140+
HEDLEY_STATIC_CAST(simde_float64_t, a) / (UINT64_C(1) << n));
137141
}
138142
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
139143
#define simde_vcvts_n_f32_u32(a, n) vcvts_n_f32_u32(a, n)
@@ -147,7 +151,7 @@ SIMDE_FUNCTION_ATTRIBUTES
147151
int64_t
148152
simde_vcvtd_n_s64_f64(simde_float64_t a, const int n)
149153
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) {
150-
return simde_vcvtd_s64_f64(a * simde_math_pow(2, n));
154+
return simde_vcvtd_s64_f64(a * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
151155
}
152156
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
153157
#define simde_vcvtd_n_s64_f64(a, n) vcvtd_n_s64_f64(a, n)
@@ -161,7 +165,7 @@ SIMDE_FUNCTION_ATTRIBUTES
161165
uint64_t
162166
simde_vcvtd_n_u64_f64(simde_float64_t a, const int n)
163167
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) {
164-
return simde_vcvtd_u64_f64(a * simde_math_pow(2, n));
168+
return simde_vcvtd_u64_f64(a * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
165169
}
166170
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
167171
#define simde_vcvtd_n_u64_f64(a, n) vcvtd_n_u64_f64(a, n)
@@ -175,7 +179,7 @@ SIMDE_FUNCTION_ATTRIBUTES
175179
simde_float64_t
176180
simde_vcvtd_n_f64_s64(int64_t a, const int n)
177181
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) {
178-
return HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n);
182+
return HEDLEY_STATIC_CAST(simde_float64_t, a) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n);
179183
}
180184
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
181185
#define simde_vcvtd_n_f64_s64(a, n) vcvtd_n_f64_s64(a, n)
@@ -189,7 +193,7 @@ SIMDE_FUNCTION_ATTRIBUTES
189193
simde_float64_t
190194
simde_vcvtd_n_f64_u64(uint64_t a, const int n)
191195
SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) {
192-
return HEDLEY_STATIC_CAST(simde_float64_t, a) / simde_math_pow(2, n);
196+
return HEDLEY_STATIC_CAST(simde_float64_t, a) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n);
193197
}
194198
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
195199
#define simde_vcvtd_n_f64_u64(a, n) vcvtd_n_f64_u64(a, n)
@@ -208,7 +212,7 @@ simde_vcvt_n_s32_f32(simde_float32x2_t a, const int n)
208212

209213
SIMDE_VECTORIZE
210214
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
211-
r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
215+
r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
212216
}
213217

214218
return simde_int32x2_from_private(r_);
@@ -230,7 +234,7 @@ simde_vcvt_n_s64_f64(simde_float64x1_t a, const int n)
230234

231235
SIMDE_VECTORIZE
232236
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
233-
r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * simde_math_pow(2, n));
237+
r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
234238
}
235239

236240
return simde_int64x1_from_private(r_);
@@ -254,7 +258,7 @@ simde_vcvt_n_u16_f16(simde_float16x4_t a, const int n)
254258
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
255259
r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32(
256260
simde_float16_to_float32(a_.values[i]) *
257-
HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))));
261+
HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))));
258262
}
259263

260264
return simde_uint16x4_from_private(r_);
@@ -277,7 +281,7 @@ simde_vcvt_n_u32_f32(simde_float32x2_t a, const int n)
277281

278282
SIMDE_VECTORIZE
279283
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
280-
r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
284+
r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
281285
}
282286

283287
return simde_uint32x2_from_private(r_);
@@ -299,7 +303,7 @@ simde_vcvt_n_u64_f64(simde_float64x1_t a, const int n)
299303

300304
SIMDE_VECTORIZE
301305
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
302-
r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * simde_math_pow(2, n));
306+
r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
303307
}
304308

305309
return simde_uint64x1_from_private(r_);
@@ -322,7 +326,7 @@ simde_vcvtq_n_s32_f32(simde_float32x4_t a, const int n)
322326

323327
SIMDE_VECTORIZE
324328
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
325-
r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
329+
r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
326330
}
327331

328332
return simde_int32x4_from_private(r_);
@@ -344,7 +348,7 @@ simde_vcvtq_n_s64_f64(simde_float64x2_t a, const int n)
344348

345349
SIMDE_VECTORIZE
346350
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
347-
r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * simde_math_pow(2, n));
351+
r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
348352
}
349353

350354
return simde_int64x2_from_private(r_);
@@ -368,7 +372,7 @@ simde_vcvtq_n_u16_f16(simde_float16x8_t a, const int n)
368372
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
369373
r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32(
370374
simde_float16_to_float32(a_.values[i]) *
371-
HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n))));
375+
HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n))));
372376
}
373377

374378
return simde_uint16x8_from_private(r_);
@@ -391,7 +395,7 @@ simde_vcvtq_n_u32_f32(simde_float32x4_t a, const int n)
391395

392396
SIMDE_VECTORIZE
393397
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
394-
r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, simde_math_pow(2, n)));
398+
r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, (UINT64_C(1) << n)));
395399
}
396400

397401
return simde_uint32x4_from_private(r_);
@@ -414,7 +418,7 @@ simde_vcvtq_n_u64_f64(simde_float64x2_t a, const int n)
414418

415419
SIMDE_VECTORIZE
416420
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
417-
r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * simde_math_pow(2, n));
421+
r_.values[i] = simde_vcvtd_u64_f64(a_.values[i] * ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
418422
}
419423

420424
return simde_uint64x2_from_private(r_);
@@ -437,7 +441,7 @@ simde_vcvt_n_f16_u16(simde_uint16x4_t a, const int n)
437441

438442
SIMDE_VECTORIZE
439443
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
440-
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)));
444+
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n)));
441445
}
442446

443447
return simde_float16x4_from_private(r_);
@@ -460,7 +464,7 @@ simde_vcvt_n_f16_s16(simde_int16x4_t a, const int n)
460464

461465
SIMDE_VECTORIZE
462466
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
463-
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)));
467+
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n)));
464468
}
465469

466470
return simde_float16x4_from_private(r_);
@@ -483,7 +487,7 @@ simde_vcvtq_n_f16_u16(simde_uint16x8_t a, const int n)
483487

484488
SIMDE_VECTORIZE
485489
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
486-
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n)));
490+
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n)));
487491
}
488492

489493
return simde_float16x8_from_private(r_);
@@ -506,7 +510,7 @@ simde_vcvtq_n_f16_s16(simde_int16x8_t a, const int n)
506510

507511
SIMDE_VECTORIZE
508512
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
509-
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, (a_.values[i] / simde_math_pow(2, n))));
513+
r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n)));
510514
}
511515

512516
return simde_float16x8_from_private(r_);
@@ -529,7 +533,7 @@ simde_vcvt_n_f32_u32(simde_uint32x2_t a, const int n)
529533

530534
SIMDE_VECTORIZE
531535
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
532-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
536+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n));
533537
}
534538

535539
return simde_float32x2_from_private(r_);
@@ -551,7 +555,7 @@ simde_vcvt_n_f32_s32(simde_int32x2_t a, const int n)
551555

552556
SIMDE_VECTORIZE
553557
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
554-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
558+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n));
555559
}
556560

557561
return simde_float32x2_from_private(r_);
@@ -573,7 +577,7 @@ simde_vcvt_n_f64_u64(simde_uint64x1_t a, const int n)
573577

574578
SIMDE_VECTORIZE
575579
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
576-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
580+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
577581
}
578582

579583
return simde_float64x1_from_private(r_);
@@ -595,7 +599,7 @@ simde_vcvtq_n_f64_u64(simde_uint64x2_t a, const int n)
595599

596600
SIMDE_VECTORIZE
597601
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
598-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
602+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
599603
}
600604

601605
return simde_float64x2_from_private(r_);
@@ -617,7 +621,7 @@ simde_vcvt_n_f64_s64(simde_int64x1_t a, const int n)
617621

618622
SIMDE_VECTORIZE
619623
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
620-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
624+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
621625
}
622626

623627
return simde_float64x1_from_private(r_);
@@ -639,7 +643,7 @@ simde_vcvtq_n_f64_s64(simde_int64x2_t a, const int n)
639643

640644
SIMDE_VECTORIZE
641645
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
642-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
646+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / ((n == 64) ? simde_math_pow(2, n) : UINT64_C(1) << n));
643647
}
644648

645649
return simde_float64x2_from_private(r_);
@@ -661,7 +665,7 @@ simde_vcvtq_n_f32_s32(simde_int32x4_t a, const int n)
661665

662666
SIMDE_VECTORIZE
663667
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
664-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
668+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n));
665669
}
666670

667671
return simde_float32x4_from_private(r_);
@@ -683,7 +687,7 @@ simde_vcvtq_n_f32_u32(simde_uint32x4_t a, const int n)
683687

684688
SIMDE_VECTORIZE
685689
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
686-
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / simde_math_pow(2, n));
690+
r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / (UINT64_C(1) << n));
687691
}
688692

689693
return simde_float32x4_from_private(r_);

0 commit comments

Comments
 (0)