| 
1 |  | -#include <bignum.c>  | 
 | 1 | +#include <ruby.h>  | 
2 | 2 | 
 
  | 
 | 3 | +// extract bignum to array of unsigned ints  | 
 | 4 | +static unsigned int * idhash_bignum_to_buf(VALUE a, size_t *num) {  | 
 | 5 | +    size_t word_numbits = sizeof(unsigned int) * CHAR_BIT;  | 
 | 6 | +    size_t nlz_bits = 0;  | 
 | 7 | +    *num = rb_absint_numwords(a, word_numbits, &nlz_bits);  | 
 | 8 | + | 
 | 9 | +    if (*num == (size_t)-1) {  | 
 | 10 | +        rb_raise(rb_eRuntimeError, "Number too large to represent and overflow occured");  | 
 | 11 | +    }  | 
 | 12 | + | 
 | 13 | +    unsigned int *buf = ALLOC_N(unsigned int, *num);  | 
 | 14 | + | 
 | 15 | +    rb_integer_pack(a, buf, *num, sizeof(unsigned int), 0,  | 
 | 16 | +                    INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER|  | 
 | 17 | +                    INTEGER_PACK_2COMP);  | 
 | 18 | + | 
 | 19 | +    return buf;  | 
 | 20 | +}  | 
 | 21 | + | 
 | 22 | +// does ((a ^ b) & (a | b) >> 128)  | 
3 | 23 | static VALUE idhash_distance(VALUE self, VALUE a, VALUE b){  | 
4 |  | -    BDIGIT* tempd;  | 
5 |  | -    long i, an = BIGNUM_LEN(a), bn = BIGNUM_LEN(b), templ, acc = 0;  | 
6 |  | -    BDIGIT* as = BDIGITS(a);  | 
7 |  | -    BDIGIT* bs = BDIGITS(b);  | 
8 |  | -    while (0 < an && as[an-1] == 0) an--; // for (i = an; --i;) printf("%u\n", as[i]);  | 
9 |  | -    while (0 < bn && bs[bn-1] == 0) bn--; // for (i = bn; --i;) printf("%u\n", bs[i]);  | 
10 |  | -    // printf("%lu %lu\n", an, bn);  | 
 | 24 | +    size_t an, bn;  | 
 | 25 | +    unsigned int *as = idhash_bignum_to_buf(a, &an);  | 
 | 26 | +    unsigned int *bs = idhash_bignum_to_buf(b, &bn);  | 
 | 27 | + | 
 | 28 | +    while (an > 0 && as[an-1] == 0) an--;  | 
 | 29 | +    while (bn > 0 && bs[bn-1] == 0) bn--;  | 
 | 30 | + | 
11 | 31 |     if (an < bn) {  | 
 | 32 | +      unsigned int *tempd; size_t templ;  | 
12 | 33 |       tempd = as; as = bs; bs = tempd;  | 
13 | 34 |       templ = an; an = bn; bn = templ;  | 
14 | 35 |     }  | 
15 |  | -    for (i = an; i-- > 4;) {  | 
16 |  | -      // printf("%ld : (%u | %u) & (%u ^ %u)\n", i, as[i], (i >= bn ? 0 : bs[i]), as[i-4], bs[i-4]);  | 
17 |  | -      acc += __builtin_popcountl((as[i] | (i >= bn ? 0 : bs[i])) & (as[i-4] ^ bs[i-4]));  | 
18 |  | -      // printf("%ld : %ld\n", i, acc);  | 
 | 36 | + | 
 | 37 | +    size_t i;  | 
 | 38 | +    long acc = 0;  | 
 | 39 | +    // to count >> 128  | 
 | 40 | +    size_t cycles = 128 / (sizeof(unsigned int) * CHAR_BIT);  | 
 | 41 | + | 
 | 42 | +    for (i = an; i-- > cycles;) {  | 
 | 43 | +      acc += __builtin_popcountl((as[i] | (i >= bn ? 0 : bs[i])) & (as[i-cycles] ^ (i-cycles >= bn ? 0 : bs[i-cycles])));  | 
19 | 44 |     }  | 
 | 45 | + | 
20 | 46 |     RB_GC_GUARD(a);  | 
21 | 47 |     RB_GC_GUARD(b);  | 
 | 48 | +    xfree(as);  | 
 | 49 | +    xfree(bs);  | 
 | 50 | + | 
22 | 51 |     return INT2FIX(acc);  | 
23 | 52 | }  | 
24 | 53 | 
 
  | 
25 | 54 | void Init_idhash() {  | 
26 |  | -  VALUE m = rb_define_module("DHashVips");  | 
27 |  | -  VALUE mm = rb_define_module_under(m, "IDHash");  | 
28 |  | -  rb_define_module_function(mm, "distance3_c", idhash_distance, 2);  | 
 | 55 | +    VALUE m = rb_define_module("DHashVips");  | 
 | 56 | +    VALUE mm = rb_define_module_under(m, "IDHash");  | 
 | 57 | +    rb_define_module_function(mm, "distance3_c", idhash_distance, 2);  | 
29 | 58 | }  | 
0 commit comments