diff --git a/moo/kernel/test-001.moo b/moo/kernel/test-001.moo index 594a639..25f7fd6 100644 --- a/moo/kernel/test-001.moo +++ b/moo/kernel/test-001.moo @@ -119,12 +119,13 @@ extend MyObject ^(a value: 5) * (b value: 6). ## (12 * 5 + 22) * (99 * 6 + 4) => 49036 } - method(#class) testBigintDiv: divd_base divisor: divr_base + method(#class) testBigintDiv: divd_base divisor: divr_base count: count { - | i q r divd divr | + | i q r divd divr ubound | i := 1. - while (i < 2000) + ubound := i + count. + while (i < ubound) { divd := divd_base * i. divr := divr_base * i. @@ -375,8 +376,13 @@ extend MyObject ## 150-154 [ (8113063330913503995887611892379812731289731289312898971231 div: 8113063330913503995887611892379812731289731289312898971230) = 1 ], [ (8113063330913503995887611892379812731289731289312898971231 rem: 8113063330913503995887611892379812731289731289312898971230) = 1 ], - [ self testBigintDiv: 100919283908998345873248972389472389791283789123712899089034258903482398198123912831 divisor: 129323482374892374238974238974238947328972389128387312892713891728391278 ], - [ self testBigintDiv: 234897230919283908998345873248972389472389791283789123712899089034258903482398198123912831 divisor: 129323482374892374238974238974238947328972389128387312892713891728391278 ], + [ self testBigintDiv: 100919283908998345873248972389472389791283789123712899089034258903482398198123912831 divisor: 129323482374892374238974238974238947328972389128387312892713891728391278 count: 2000 ], + [ self testBigintDiv: 234897230919283908998345873248972389472389791283789123712899089034258903482398198123912831 divisor: 12932348237489237423897423897423894732897238912838731289271389172839127 count: 2000 ], + [ self testBigintDiv: 16r1234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 divisor: 16r12345678123456781234567812345678123456781234567812345678 count: 2000 ], + [ self testBigintDiv: 16r000089ab0000456700000123 divisor: 1 count: 2000 ], + [ self testBigintDiv: 16r000000030000000000008000 divisor: 16r000000010000000000002000 count: 2000 ], + [ self testBigintDiv: 16rfffffffe0000000080000000 divisor: 16r0000ffff0000000080000000 count: 12345 ], + [ self testBigintDiv: 16rfffffffffffffffe divisor: 16rffffffff count: 2000 ], ## ========================= diff --git a/moo/lib/bigint.c b/moo/lib/bigint.c index e4a8449..18bc364 100644 --- a/moo/lib/bigint.c +++ b/moo/lib/bigint.c @@ -134,6 +134,41 @@ static const moo_uint8_t debruijn_64[64] = # define LOG2_FOR_POW2_64(x) (debruijn_64[(moo_uint64_t)((moo_uint64_t)(x) * 0x022fdd63cc95386d) >> 58]) #endif +static MOO_INLINE int get_num_leading_zero_bits (moo_oow_t x) +{ +#if defined(MOO_HAVE_BUILTIN_CLZLL) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG_LONG) + return __builtin_clzll(x); /* count the number of leading zeros */ +#elif defined(MOO_HAVE_BUILTIN_CLZL) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG) + return __builtin_clzl(x); /* count the number leading zeros */ +#elif defined(MOO_HAVE_BUILTIN_CLZ) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_INT) + return __builtin_clz(x); /* count the number of leading zeros */ +#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386)) + /* bit scan reverse. not all x86 CPUs have LZCNT. */ + moo_oow_t pos; + __asm__ volatile ( + "bsr %1,%0\n\t" + : "=r"(pos) /* output */ + : "r"(x) /* input */ + ); + return (int)((MOO_SIZEOF(pos) * 8) - pos); +#elif defined(__GNUC__) && (defined(__arm__) + moo_oow_t count; + __asm__ volatile ( + "clz %0,%1\n\t" + : "=r"(count) /* output */ + : "r"(x) /* input */ + ); + return (int)count; +#else + int count = 0; + while (x >= 0) + { + x <<= 1; + count++; + } +#endif +} + static MOO_INLINE int get_pos_of_msb_set (moo_oow_t x) { /* the caller must ensure that x is power of 2. if x happens to be zero, @@ -1802,20 +1837,17 @@ static void divide_unsigned_array2 (moo_t* moo, const moo_liw_t* x, moo_oow_t xs for (i = 0; i < ys; i++) r[i] = y[i]; /* copy y to r */ y1 = r[ys - 1]; /* highest divisor word */ - d = (y1 == MOO_TYPE_MAX(moo_liw_t)? ((moo_liw_t)1): ((moo_liw_t)(((moo_lidw_t)1 << MOO_LIW_BITS) / (y1 + 1)))); + + + //d = (y1 == MOO_TYPE_MAX(moo_liw_t)? ((moo_liw_t)1): ((moo_liw_t)(((moo_lidw_t)1 << MOO_LIW_BITS) / (y1 + 1)))); + d = (moo_liw_t)(((moo_lidw_t)1 << MOO_LIW_BITS) / ((moo_lidw_t)y1 + 1)); if (d > 1) { moo_lidw_t dw; moo_liw_t carry; - /* q = q * d */ - for (carry = 0, i = 0; i < xs; i++) - { - dw = ((moo_lidw_t)q[i] * d) + carry; - carry = (moo_liw_t)(dw >> MOO_LIW_BITS); - q[i] = (moo_liw_t)dw; - } - q[xs] = carry; + /* shift the divisor such that its high-order bit is on. + * shift the dividend the same amount as the previous step */ /* r = r * d */ for (carry = 0, i = 0; i < ys; i++) @@ -1825,6 +1857,15 @@ static void divide_unsigned_array2 (moo_t* moo, const moo_liw_t* x, moo_oow_t xs r[i] = (moo_liw_t)dw; } MOO_ASSERT (moo, carry == 0); + + /* q = q * d */ + for (carry = 0, i = 0; i < xs; i++) + { + dw = ((moo_lidw_t)q[i] * d) + carry; + carry = (moo_liw_t)(dw >> MOO_LIW_BITS); + q[i] = (moo_liw_t)dw; + } + q[xs] = carry; } y1 = r[ys - 1]; @@ -1842,6 +1883,7 @@ static void divide_unsigned_array2 (moo_t* moo, const moo_liw_t* x, moo_oow_t xs xhi = q[i]; xlo = q[i - 1]; +//printf ("AAAAAAAAAAAaa %llx %llx\n", (unsigned long long)xhi, (unsigned long long)y1); if (xhi == y1) { quo = MOO_TYPE_MAX(moo_liw_t); @@ -1850,60 +1892,57 @@ static void divide_unsigned_array2 (moo_t* moo, const moo_liw_t* x, moo_oow_t xs { dw = ((moo_lidw_t)xhi << MOO_LIW_BITS) + xlo; /* TODO: optimize it with ASM - no seperate / and % */ - quo = (moo_liw_t)(dw / y1); - q[i] = (moo_liw_t)(dw % y1); + quo = (moo_liw_t)(dw / y1); /* qhat */ + q[i] = (moo_liw_t)(dw % y1); /* rhat */ + +MOO_ASSERT (moo, (dw / y1) == quo); } /* ---------------------------------------------------------- */ /*quo = adjust_for_over_estimate(y1, y2, quo, q[i], q[i - 2]);*/ - { - moo_liw_t c, c2, y2q; +//printf ("qhat == %llx rhat == %llx dw = %llx r[ys-1] = %llx /// %llx %llx\n", (unsigned long long)qhat, (unsigned long long)rhat, (unsigned long long)dw, (unsigned long long)r[ys-1], (unsigned long long)qq[j + ys], (unsigned long long)qq[j + ys - 1]); +#if 0 +{ +moo_lidw_t rhat = q[i]; + adjust_quotient: + if (quo > MOO_TYPE_MAX(moo_liw_t) || ((moo_lidw_t)quo * y2) > (((moo_lidw_t)rhat << MOO_LIW_BITS) + q[i - 2])) + { + quo = quo - 1; + rhat = rhat + y1; + if (rhat <= MOO_TYPE_MAX(moo_liw_t)) goto adjust_quotient; + } +} +#else + { + moo_liw_t c, c2, y2q, rhat; + +//printf ("XXXXXXXXXXXXXXXXXXXXXx %d %d\n", (int)xs, (int)i); +rhat = q[i]; +adjust_quotient: dw = ((moo_lidw_t)quo * y2); c = (moo_liw_t)(dw >> MOO_LIW_BITS); y2q = (moo_liw_t)dw; - -/* -x1 => c -x2 => y2q -ujn2 => q[i - 2] -rhat => q[i] - -for greaterThan(x1, x2, rhat, ujn2) - -func greaterThan(x1, x2, y1, y2 Word) bool { - return x1 > y1 || x1 == y1 && x2 > y2 -}*/ - -//printf ("XXXXXXXXXXXXXXXXXXXXXx %d %d\n", (int)xs, (int)i); - if (c > q[i] || (c == q[i] && y2q > q[i - 2])) + if (c > rhat || (c == rhat && y2q > q[i - 2])) { //printf ("ADJUST...............%llu %llu %llu %llu\n", (unsigned long long)c, (unsigned long long)q[i], (unsigned long long)y2q, (unsigned long long)q[i - 2]); - --quo; /* too large by 1 */ + --quo; /* too large */ - /*xpy(xi, y1, c2); add back divisor */ - c2 = (moo_liw_t)(((moo_lidw_t)q[i] + y1) >> MOO_LIW_BITS); - if (c2 == 0) + dw = (moo_lidw_t)q[i] + y1; /* add back the divisor */ + if (dw <= MOO_TYPE_MAX(moo_liw_t)) /* if ((dw >> MOO_LIW_BITS) == 0) */ { - /*y2q = axpy(y2, q, 0, c);*/ - dw = ((moo_lidw_t)quo * y2); - c = (moo_liw_t)(dw >> MOO_LIW_BITS); - y2q = (moo_liw_t)dw; - if (c > q[i] || (c == q[i] && y2q > q[i - 2])) - { -//printf ("ADJUST2...............%llu %llu %llu %llu\n", (unsigned long long)c, (unsigned long long)q[i], (unsigned long long)y2q, (unsigned long long)q[i - 2]); - --quo; /* too large by 2 */ - } + rhat = (moo_liw_t)dw; + goto adjust_quotient; } } } - +#endif /* ---------------------------------------------------------- */ b = calculate_remainder(moo, q, r, quo, i - ys, ys); - b = (moo_liw_t)((((moo_lidw_t)xhi - b) >> MOO_LIW_BITS) & 1); + b = (moo_liw_t)((((moo_lidw_t)xhi - b) >> MOO_LIW_BITS) & 1); /* is the sign bit set? */ if (b) { - /* underflow */ + /* yes. underflow */ b = adjust_for_underflow(q, r, i - ys, ys); MOO_ASSERT (moo, b == 1); q[i] = quo - 1; @@ -1938,28 +1977,12 @@ func greaterThan(x1, x2, y1, y2 Word) bool { } -static moo_oow_t nlz (moo_liw_t x) -{ - moo_oow_t n; - - if (x == 0) return(32); - - n = 0; - if (x <= 0x0000FFFF) {n = n +16; x = x <<16;} - if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;} - if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;} - if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;} - if (x <= 0x7FFFFFFF) {n = n + 1;} - - return n; -} - static void divide_unsigned_array3 (moo_t* moo, const moo_liw_t* x, moo_oow_t xs, const moo_liw_t* y, moo_oow_t ys, moo_liw_t* q, moo_liw_t* r) { - moo_oow_t i, j, s; + moo_oow_t s, i, j, g; moo_lidw_t dw, qhat, rhat; moo_lidi_t t, k; - moo_liw_t* qq; + moo_liw_t* qq, y1, y2; /* the caller must ensure: * - q can hold 'xs + 1' words and r can hold 'ys' words. @@ -2007,7 +2030,7 @@ for (i = 0; i < ys; i++) printf ("0x%llx, ", (unsigned long long)y[i]); printf ("};\n"); */ //for (i = 0; i <= xs; i++) printf ("un %llx\n", (unsigned long long)q[i]); - s = nlz(y[ys - 1]); + s = get_num_leading_zero_bits(y[ys - 1]); for (i = ys; i > 1; ) { --i; @@ -2028,79 +2051,62 @@ printf ("};\n"); */ //for (i = 0; i < ys; i++) printf ("vn %llx\n", (unsigned long long)r[i]); //for (i = 0; i <= xs; i++) printf ("un %llx\n", (unsigned long long)qq[i]); + y1 = r[ys - 1]; + y2 = r[ys - 2]; + +/* for (j = xs - ys + 1; j > 0; ) { --j; +*/ + + + for (j = xs; j >= ys; --j) + { + g = j - ys; /* position where remainder begins in qq */ /* estimate */ - dw = ((moo_lidw_t)qq[j + ys] << MOO_LIW_BITS) + qq[j + ys - 1]; - qhat = dw / r[ys - 1]; - rhat = dw - (qhat * r[ys - 1]); + dw = ((moo_lidw_t)qq[j] << MOO_LIW_BITS) + qq[j - 1]; + qhat = dw / y1; + rhat = dw - (qhat * y1); //printf ("qhat == %llx rhat == %llx dw = %llx r[ys-1] = %llx /// %llx %llx\n", (unsigned long long)qhat, (unsigned long long)rhat, (unsigned long long)dw, (unsigned long long)r[ys-1], (unsigned long long)qq[j + ys], (unsigned long long)qq[j + ys - 1]); adjust_quotient: - if (qhat > MOO_TYPE_MAX(moo_liw_t) || (qhat * r[ys - 2]) > ((rhat << MOO_LIW_BITS) + qq[j + ys - 2])) + if (qhat > MOO_TYPE_MAX(moo_liw_t) || (qhat * y2) > ((rhat << MOO_LIW_BITS) + qq[j - 2])) { qhat = qhat - 1; - rhat = rhat + r[ys - 1]; + rhat = rhat + y1; if (rhat <= MOO_TYPE_MAX(moo_liw_t)) goto adjust_quotient; } //printf ("qhat == %llx rhat == %llx\n", (unsigned long long)qhat, (unsigned long long)rhat); -#if 1 /* multiply and subtract */ for (k = 0, i = 0; i < ys; i++) { dw = qhat * r[i]; - t = qq[j + i] - k - (dw & MOO_TYPE_MAX(moo_liw_t)); - qq[j + i] = t; + t = qq[g + i] - k - (dw & MOO_TYPE_MAX(moo_liw_t)); + qq[g + i] = t; k = (dw >> MOO_LIW_BITS) - (t >> MOO_LIW_BITS); } - t = qq[j + ys] - k; - qq[j + ys] = t; + MOO_ASSERT (moo, j == g + ys); + t = qq[j] - k; + qq[j] = t; /* test remainder */ - q[j] = qhat; + q[g] = qhat; /* store quotient */ if (t < 0) { - q[j] = q[j] - 1; + q[g] = q[g] - 1; /* add back */ + for (k = 0, i = 0; i < ys; i++) { - t = (moo_lidw_t)qq[j + i] + r[i] + k; - qq[j + i] = (moo_liw_t)t; + t = (moo_lidw_t)qq[g + i] + r[i] + k; + qq[g + i] = (moo_liw_t)t; k = t >> MOO_LIW_BITS; } - qq[j + ys] += k; + MOO_ASSERT (moo, j == g + ys); + qq[j] += k; } -#else - - for (k = 0, i = 0; i <= n; i++) - { - p = qhat * (i == n ? 0 : v[i]); - k2 = (p >> 16); - - d = u[j + i] - (uint16_t)p; - k2 += (d > u[j + i]); - - u[j + i] = d - k; - k2 += (u[j + i] > d); - - k = k2; - } - - /* Test remainder. */ - q[j] = qhat; - if (k != 0) { - /* Add back. */ - q[j]--; - k = 0; - for (i = 0; i < n; i++) { - t = u[j + i] + v[i] + k; - u[j + i] = (uint16_t)t; - k = t >> 16; - } - -#endif } for (i = 0; i < ys - 1; i++) @@ -2270,7 +2276,7 @@ static moo_oop_t divide_unsigned_integers (moo_t* moo, moo_oop_t x, moo_oop_t y, if (!rr) return MOO_NULL; #if defined(USE_DIVIDE_UNSIGNED_ARRAY2) - divide_unsigned_array3 (moo, + divide_unsigned_array2 (moo, #else divide_unsigned_array (moo, #endif @@ -2897,7 +2903,6 @@ oops_einval: return MOO_NULL; } - moo_oop_t moo_negateint (moo_t* moo, moo_oop_t x) { if (MOO_OOP_IS_SMOOI(x)) diff --git a/moo/lib/moo-cmn.h b/moo/lib/moo-cmn.h index 5b1121a..82a7e0f 100644 --- a/moo/lib/moo-cmn.h +++ b/moo/lib/moo-cmn.h @@ -853,6 +853,16 @@ typedef struct moo_t moo_t; #define MOO_HAVE_BUILTIN_CTZLL #endif + #if __has_builtin(__builtin_clz) + #define MOO_HAVE_BUILTIN_CLZ + #endif + #if __has_builtin(__builtin_clzl) + #define MOO_HAVE_BUILTIN_CLZL + #endif + #if __has_builtin(__builtin_clzll) + #define MOO_HAVE_BUILTIN_CLZLL + #endif + #if __has_builtin(__builtin_uadd_overflow) #define MOO_HAVE_BUILTIN_UADD_OVERFLOW #endif