1
+ // Copyright (c) Microsoft Corporation.
2
+ // Licensed under the MIT license.
3
+ // This code was adapted from https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.h
4
+ // -----------------------------------------------------------------------------
5
+ // MurmurHash3 was written by Austin Appleby, and is placed in the public
6
+ // domain. The author hereby disclaims copyright to this source code.
7
+ #include " MurmurHash.h"
8
+ #include < stdlib.h>
9
+
10
+ #if defined(_MSC_VER)
11
+
12
+ #define FORCE_INLINE __forceinline
13
+ #define ROTL64 (x, y ) _rotl64(x, y)
14
+ #define BIG_CONSTANT (x ) (x)
15
+
16
+ #else // defined(_MSC_VER)
17
+
18
+ #define FORCE_INLINE inline __attribute__ ((always_inline))
19
+ inline uint64_t rotl64(uint64_t x, int8_t r) {
20
+ return (x << r) | (x >> (64 - r));
21
+ }
22
+ #define ROTL64 (x, y ) rotl64(x, y)
23
+ #define BIG_CONSTANT (x ) (x##LLU)
24
+
25
+ #endif // !defined(_MSC_VER)
26
+
27
+ using namespace std ;
28
+
29
+ FORCE_INLINE uint64_t fmix64 (uint64_t k) {
30
+ k ^= k >> 33 ;
31
+ k *= BIG_CONSTANT (0xff51afd7ed558ccd );
32
+ k ^= k >> 33 ;
33
+ k *= BIG_CONSTANT (0xc4ceb9fe1a85ec53 );
34
+ k ^= k >> 33 ;
35
+
36
+ return k;
37
+ }
38
+
39
+ bool isAscii (uint64_t k) {
40
+ return (k & 0x8080808080808080 ) == 0ull ;
41
+ }
42
+
43
+ bool MurmurHash3_x64_128 (const void *key, const int len, const uint32_t seed, void *out) {
44
+ const uint8_t *data = (const uint8_t *)key;
45
+ const int nblocks = len / 16 ;
46
+
47
+ uint64_t h1 = seed;
48
+ uint64_t h2 = seed;
49
+
50
+ const uint64_t c1 = BIG_CONSTANT (0x87c37b91114253d5 );
51
+ const uint64_t c2 = BIG_CONSTANT (0x4cf5ad432745937f );
52
+
53
+ // ----------
54
+ // body
55
+
56
+ const uint64_t *blocks = (const uint64_t *)(data);
57
+
58
+ bool isAsciiString{true };
59
+ for (int i = 0 ; i < nblocks; i++) {
60
+ uint64_t k1 = blocks[i * 2 + 0 ];
61
+ uint64_t k2 = blocks[i * 2 + 1 ];
62
+
63
+ isAsciiString &= isAscii (k1) && isAscii (k2);
64
+
65
+ k1 *= c1;
66
+ k1 = ROTL64 (k1, 31 );
67
+ k1 *= c2;
68
+ h1 ^= k1;
69
+
70
+ h1 = ROTL64 (h1, 27 );
71
+ h1 += h2;
72
+ h1 = h1 * 5 + 0x52dce729 ;
73
+
74
+ k2 *= c2;
75
+ k2 = ROTL64 (k2, 33 );
76
+ k2 *= c1;
77
+ h2 ^= k2;
78
+
79
+ h2 = ROTL64 (h2, 31 );
80
+ h2 += h1;
81
+ h2 = h2 * 5 + 0x38495ab5 ;
82
+ }
83
+
84
+ // ----------
85
+ // tail
86
+
87
+ const uint8_t *tail = (const uint8_t *)(data + nblocks * 16 );
88
+
89
+ for (auto i = 0 ; i < len % 16 ; i++) {
90
+ if (tail[i] > 127 ) {
91
+ isAsciiString = false ;
92
+ break ;
93
+ }
94
+ }
95
+
96
+ uint64_t k1 = 0 ;
97
+ uint64_t k2 = 0 ;
98
+
99
+ switch (len & 15 ) {
100
+ case 15 :
101
+ k2 ^= ((uint64_t )tail[14 ]) << 48 ;
102
+ case 14 :
103
+ k2 ^= ((uint64_t )tail[13 ]) << 40 ;
104
+ case 13 :
105
+ k2 ^= ((uint64_t )tail[12 ]) << 32 ;
106
+ case 12 :
107
+ k2 ^= ((uint64_t )tail[11 ]) << 24 ;
108
+ case 11 :
109
+ k2 ^= ((uint64_t )tail[10 ]) << 16 ;
110
+ case 10 :
111
+ k2 ^= ((uint64_t )tail[9 ]) << 8 ;
112
+ case 9 :
113
+ k2 ^= ((uint64_t )tail[8 ]) << 0 ;
114
+ k2 *= c2;
115
+ k2 = ROTL64 (k2, 33 );
116
+ k2 *= c1;
117
+ h2 ^= k2;
118
+
119
+ case 8 :
120
+ k1 ^= ((uint64_t )tail[7 ]) << 56 ;
121
+ case 7 :
122
+ k1 ^= ((uint64_t )tail[6 ]) << 48 ;
123
+ case 6 :
124
+ k1 ^= ((uint64_t )tail[5 ]) << 40 ;
125
+ case 5 :
126
+ k1 ^= ((uint64_t )tail[4 ]) << 32 ;
127
+ case 4 :
128
+ k1 ^= ((uint64_t )tail[3 ]) << 24 ;
129
+ case 3 :
130
+ k1 ^= ((uint64_t )tail[2 ]) << 16 ;
131
+ case 2 :
132
+ k1 ^= ((uint64_t )tail[1 ]) << 8 ;
133
+ case 1 :
134
+ k1 ^= ((uint64_t )tail[0 ]) << 0 ;
135
+ k1 *= c1;
136
+ k1 = ROTL64 (k1, 31 );
137
+ k1 *= c2;
138
+ h1 ^= k1;
139
+ };
140
+
141
+ // ----------
142
+ // finalization
143
+
144
+ h1 ^= len;
145
+ h2 ^= len;
146
+
147
+ h1 += h2;
148
+ h2 += h1;
149
+
150
+ h1 = fmix64 (h1);
151
+ h2 = fmix64 (h2);
152
+
153
+ h1 += h2;
154
+ h2 += h1;
155
+
156
+ ((uint64_t *)out)[0 ] = h1;
157
+ ((uint64_t *)out)[1 ] = h2;
158
+
159
+ return isAsciiString;
160
+ }
161
+
162
+ bool murmurhash (const uint8_t *key, size_t length, uint64_t &hash) {
163
+ uint64_t hashes[2 ];
164
+
165
+ bool isAscii = MurmurHash3_x64_128 (key, length, 31 , &hashes);
166
+
167
+ hash = hashes[0 ];
168
+
169
+ return isAscii;
170
+ }
0 commit comments