3838#include < cstring>
3939#include < climits>
4040
41+ #if defined(__aarch64__) || defined(_M_ARM64)
42+ #define NODE_HAS_SIMD_NEON 1
43+ #endif
44+
45+ #if NODE_HAS_SIMD_NEON
46+ #include < arm_neon.h>
47+ #endif
48+
4149#define THROW_AND_RETURN_UNLESS_BUFFER (env, obj ) \
4250 THROW_AND_RETURN_IF_NOT_BUFFER (env, obj, " argument" ) \
4351
@@ -741,6 +749,46 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {
741749 args.GetReturnValue ().Set (args[0 ].As <String>()->Utf8Length (env->isolate ()));
742750}
743751
752+ #if NODE_HAS_SIMD_NEON
753+ uint32_t FastByteLengthUtf8 (Local<Value> receiver,
754+ const v8::FastOneByteString& source) {
755+ uint8_t * data =
756+ const_cast <uint8_t *>(reinterpret_cast <const uint8_t *>(source.data ));
757+ auto length = source.length ;
758+
759+ uint32_t result{0 };
760+ const int lanes = sizeof (uint8x16_t );
761+ uint8_t remaining = length % lanes;
762+ const auto * simd_end = data + (length / lanes) * lanes;
763+ const auto threshold = vdupq_n_u8 (0x80 );
764+
765+ for (; data < simd_end; data += lanes) {
766+ // load 16 bits
767+ uint8x16_t input = vld1q_u8 (data);
768+
769+ // compare to threshold (0x80)
770+ uint8x16_t with_highbit = vcgeq_u8 (input, threshold);
771+
772+ // shift and narrow
773+ uint8x8_t highbits = vshrn_n_u16 (vreinterpretq_u16_u8 (with_highbit), 4 );
774+
775+ // we have 0, 4 or 8 bits per byte
776+ uint8x8_t bitsperbyte = vcnt_u8 (highbits);
777+
778+ // sum the bytes vertically to uint32_t
779+ result += vaddlv_u8 (bitsperbyte);
780+ }
781+
782+ // we overcounted by a factor of 4
783+ result /= 4 ;
784+
785+ for (uint8_t j = 0 ; j < remaining; j++) {
786+ result += (simd_end[j] >> 7 );
787+ }
788+
789+ return result + length;
790+ }
791+ #else
744792uint32_t FastByteLengthUtf8 (Local<Value> receiver,
745793 const v8::FastOneByteString& source) {
746794 uint32_t result = 0 ;
@@ -752,6 +800,7 @@ uint32_t FastByteLengthUtf8(Local<Value> receiver,
752800 result += length;
753801 return result;
754802}
803+ #endif
755804
756805static v8::CFunction fast_byte_length_utf8 (
757806 v8::CFunction::Make (FastByteLengthUtf8));
0 commit comments