Skip to content

Commit c6d94b5

Browse files
committed
Fix extract_bits() implementation on non-x86
1 parent f3cb489 commit c6d94b5

File tree

1 file changed

+15
-10
lines changed

1 file changed

+15
-10
lines changed

simdpp/detail/insn/extract_bits.h

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ SIMDPP_INL uint8_t i_extract_bits_any(const uint32<8>& ca)
209209
SIMDPP_INL uint8_t i_extract_bits_any(const uint64<2>& ca)
210210
{
211211
uint64<2> a = ca;
212-
#if SIMDPP_USE_NULL
212+
#if SIMDPP_USE_NULL || !SIMDPP_HAS_INT64_SIMD
213213
uint8_t r = 0;
214214
for (unsigned i = 0; i < a.length; i++) {
215215
uint8_t x = ca.el(i);
@@ -222,13 +222,13 @@ SIMDPP_INL uint8_t i_extract_bits_any(const uint64<2>& ca)
222222
#elif SIMDPP_USE_NEON
223223
uint64<2> mask = make_uint(0x1, 0x2);
224224
a = bit_and(a, mask);
225-
uint64x1_t r = vadd_u64(vget_low_u64(r2), vget_high_u64(r2));
225+
uint64x1_t r = vadd_u64(vget_low_u64(a.native()), vget_high_u64(a.native()));
226226
return vget_lane_u8(vreinterpret_u8_u64(r), 0);
227-
#elif SIMDPP_USE_ALTIVEC
227+
#elif SIMDPP_USE_ALTIVEC && SIMDPP_HAS_INT64_SIMD
228228
uint32<4> mask = make_uint(0x1, 0x0, 0x2, 0x0);
229-
a = bit_and(a, mask);
229+
uint32<4> m = bit_and(uint32<4>(a), mask);
230230
uint32<4> zero = make_zero();
231-
uint32<4> s = (int32x4)vec_sums((__vector int32_t)a.native(),
231+
uint32<4> s = (int32x4)vec_sums((__vector int32_t)m.native(),
232232
(__vector int32_t)zero.native());
233233
#if SIMDPP_BIG_ENDIAN
234234
return extract<7>(uint16x8(s));
@@ -237,16 +237,21 @@ SIMDPP_INL uint8_t i_extract_bits_any(const uint64<2>& ca)
237237
#endif
238238
#elif SIMDPP_USE_MSA
239239
uint32<4> mask = make_uint(0x1, 0x0, 0x2, 0x0);
240-
a = bit_and(a, mask);
241-
a = (v4u32) __msa_hadd_u_d(a.native(), a.native());
242-
a = bit_or(a, move4_l<2>(a));
243-
return extract<0>(a);
240+
uint32<4> b = bit_and(uint32<4>(a), mask);
241+
b = (v4u32) __msa_hadd_u_d(b.native(), b.native());
242+
b = bit_or(b, move4_l<2>(b));
243+
return extract<0>(b);
244244
#endif
245245
}
246246

247247
SIMDPP_INL uint8_t i_extract_bits_any(const uint64<4>& ca)
248248
{
249-
#if SIMDPP_USE_AVX512DQ
249+
#if SIMDPP_USE_NULL || !SIMDPP_HAS_INT64_SIMD
250+
return (ca.template vec<0>().el(0) & 1) |
251+
(ca.template vec<0>().el(1) & 2) |
252+
(ca.template vec<1>().el(0) & 4) |
253+
(ca.template vec<1>().el(1) & 8);
254+
#elif SIMDPP_USE_AVX512DQ
250255
return _mm256_movepi64_mask(ca.native());
251256
#elif SIMDPP_USE_AVX2
252257
return _mm256_movemask_pd(_mm256_castsi256_pd(ca.native()));

0 commit comments

Comments
 (0)