diff --git a/moo/lib/bigint.c b/moo/lib/bigint.c index 18bc364..887c193 100644 --- a/moo/lib/bigint.c +++ b/moo/lib/bigint.c @@ -134,42 +134,7 @@ static const moo_uint8_t debruijn_64[64] = # define LOG2_FOR_POW2_64(x) (debruijn_64[(moo_uint64_t)((moo_uint64_t)(x) * 0x022fdd63cc95386d) >> 58]) #endif -static MOO_INLINE int get_num_leading_zero_bits (moo_oow_t x) -{ -#if defined(MOO_HAVE_BUILTIN_CLZLL) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG_LONG) - return __builtin_clzll(x); /* count the number of leading zeros */ -#elif defined(MOO_HAVE_BUILTIN_CLZL) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG) - return __builtin_clzl(x); /* count the number leading zeros */ -#elif defined(MOO_HAVE_BUILTIN_CLZ) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_INT) - return __builtin_clz(x); /* count the number of leading zeros */ -#elif defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386)) - /* bit scan reverse. not all x86 CPUs have LZCNT. */ - moo_oow_t pos; - __asm__ volatile ( - "bsr %1,%0\n\t" - : "=r"(pos) /* output */ - : "r"(x) /* input */ - ); - return (int)((MOO_SIZEOF(pos) * 8) - pos); -#elif defined(__GNUC__) && (defined(__arm__) - moo_oow_t count; - __asm__ volatile ( - "clz %0,%1\n\t" - : "=r"(count) /* output */ - : "r"(x) /* input */ - ); - return (int)count; -#else - int count = 0; - while (x >= 0) - { - x <<= 1; - count++; - } -#endif -} - -static MOO_INLINE int get_pos_of_msb_set (moo_oow_t x) +static MOO_INLINE int get_pos_of_msb_set_pow2 (moo_oow_t x) { /* the caller must ensure that x is power of 2. if x happens to be zero, * the return value is undefined as each method used may give different result. */ @@ -196,22 +161,53 @@ static MOO_INLINE int get_pos_of_msb_set (moo_oow_t x) ); #endif return (int)pos; -#elif defined(USE_UGLY_CODE) && defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__)) - moo_oow_t pos; - +#elif defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__)) + moo_oow_t n; /* CLZ is available in ARMv5T and above. there is no instruction to * count trailing zeros or something similar. using RBIT with CLZ * would be good in ARMv6T2 and above to avoid further calculation * afte CLZ */ __asm__ volatile ( "clz %0,%1\n\t" + : "=r"(n) /* output */ + : "r"(x) /* input */ + ); + return (int)(MOO_OOW_BITS - n - 1); + /* TODO: PPC - use cntlz, cntlzw, cntlzd, SPARC - use lzcnt, MIPS clz */ +#else + int pos = 0; + while (x >>= 1) pos++; + return pos; +#endif +} + +static MOO_INLINE int get_pos_of_msb_set (moo_oow_t x) +{ + /* x doesn't have to be power of 2. if x is zero, the result is undefined */ +#if defined(MOO_HAVE_BUILTIN_CLZLL) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG_LONG) + return MOO_OOW_BITS - __builtin_clzll(x) - 1; /* count the number of leading zeros */ +#elif defined(MOO_HAVE_BUILTIN_CLZL) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_LONG) + return MOO_OOW_BITS - __builtin_clzl(x) - 1; /* count the number of leading zeros */ +#elif defined(MOO_HAVE_BUILTIN_CLZ) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_INT) + return MOO_OOW_BITS - __builtin_clz(x) - 1; /* count the number of leading zeros */ +#elif !defined(__GNUC__) && (defined(__x86_64) || defined(__amd64) || defined(__i386) || defined(i386)) + /* bit scan reverse. not all x86 CPUs have LZCNT. */ + moo_oow_t pos; + __asm__ volatile ( + "bsr %1,%0\n\t" : "=r"(pos) /* output */ : "r"(x) /* input */ ); - return (int)((MOO_SIZEOF(pos) * 8) - pos - 1); - + return (int)pos; +#elif defined(__GNUC__) && defined(__arm__) && (defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__)) + moo_oow_t n; + __asm__ volatile ( + "clz %0,%1\n\t" + : "=r"(n) /* output */ + : "r"(x) /* input */ + ); + return (int)(MOO_OOW_BITS - n - 1); /* TODO: PPC - use cntlz, cntlzw, cntlzd, SPARC - use lzcnt, MIPS clz */ - #else int pos = 0; while (x >>= 1) pos++; @@ -224,7 +220,7 @@ static MOO_INLINE int get_pos_of_msb_set (moo_oow_t x) #elif defined(MOO_HAVE_UINT64_T) && (MOO_SIZEOF_OOW_T == MOO_SIZEOF_UINT64_T) # define LOG2_FOR_POW2(x) LOG2_FOR_POW2_64(x) #else -# define LOG2_FOR_POW2(x) get_pos_of_msb_set(x) +# define LOG2_FOR_POW2(x) get_pos_of_msb_set_pow2(x) #endif @@ -1337,7 +1333,7 @@ static MOO_INLINE moo_oow_t multiply_unsigned_array_karatsuba (moo_t* moo, const } z[i] = carry; - return; + return count_effective(z, xs + 1); } /* calculate value of nshifts, that is 2^(MOO_LIW_BITS*nshifts) */ @@ -1787,25 +1783,6 @@ static moo_liw_t calculate_remainder (moo_t* moo, moo_liw_t* qr, moo_liw_t* y, m return b; } -static moo_liw_t multiply_unsigned_array_in_place_and_get_carry (moo_liw_t* x, moo_oow_t xs, moo_liw_t y) -{ - /* multiply unsigned array with a single word and put the result - * back to the array. return the last remaining carry */ - - moo_lidw_t dw; - moo_liw_t carry = 0; - moo_oow_t i; - - for (i = 0; i < xs; i++) - { - dw = ((moo_lidw_t)x[i] * y) + carry; - carry = (moo_liw_t)(dw >> MOO_LIW_BITS); - x[i] = (moo_liw_t)dw; - } - - return carry; -} - static void divide_unsigned_array2 (moo_t* moo, const moo_liw_t* x, moo_oow_t xs, const moo_liw_t* y, moo_oow_t ys, moo_liw_t* q, moo_liw_t* r) { moo_oow_t i; @@ -1838,8 +1815,7 @@ static void divide_unsigned_array2 (moo_t* moo, const moo_liw_t* x, moo_oow_t xs y1 = r[ys - 1]; /* highest divisor word */ - - //d = (y1 == MOO_TYPE_MAX(moo_liw_t)? ((moo_liw_t)1): ((moo_liw_t)(((moo_lidw_t)1 << MOO_LIW_BITS) / (y1 + 1)))); + /*d = (y1 == MOO_TYPE_MAX(moo_liw_t)? ((moo_liw_t)1): ((moo_liw_t)(((moo_lidw_t)1 << MOO_LIW_BITS) / (y1 + 1))));*/ d = (moo_liw_t)(((moo_lidw_t)1 << MOO_LIW_BITS) / ((moo_lidw_t)y1 + 1)); if (d > 1) { @@ -1874,7 +1850,7 @@ static void divide_unsigned_array2 (moo_t* moo, const moo_liw_t* x, moo_oow_t xs for (i = xs; i >= ys; --i) { moo_lidw_t dw; - moo_liw_t quo, b, xhi, xlo; + moo_liw_t quo, b, xhi, xlo, rem; /* ---------------------------------------------------------- */ /* estimate the quotient. @@ -1883,59 +1859,49 @@ static void divide_unsigned_array2 (moo_t* moo, const moo_liw_t* x, moo_oow_t xs xhi = q[i]; xlo = q[i - 1]; -//printf ("AAAAAAAAAAAaa %llx %llx\n", (unsigned long long)xhi, (unsigned long long)y1); +/* if (xhi == y1) { quo = MOO_TYPE_MAX(moo_liw_t); + rem = 0; } else { +*/ dw = ((moo_lidw_t)xhi << MOO_LIW_BITS) + xlo; /* TODO: optimize it with ASM - no seperate / and % */ - quo = (moo_liw_t)(dw / y1); /* qhat */ - q[i] = (moo_liw_t)(dw % y1); /* rhat */ + quo = (moo_liw_t)(dw / y1); + //q[i] = (moo_liw_t)(dw % y1); + rem = (moo_liw_t)(dw % y1); -MOO_ASSERT (moo, (dw / y1) == quo); - } + MOO_ASSERT (moo, (dw / y1) == quo); +/* + }*/ - /* ---------------------------------------------------------- */ - /*quo = adjust_for_over_estimate(y1, y2, quo, q[i], q[i - 2]);*/ - -//printf ("qhat == %llx rhat == %llx dw = %llx r[ys-1] = %llx /// %llx %llx\n", (unsigned long long)qhat, (unsigned long long)rhat, (unsigned long long)dw, (unsigned long long)r[ys-1], (unsigned long long)qq[j + ys], (unsigned long long)qq[j + ys - 1]); + /* adjust the quotient if over-estimated */ #if 0 -{ -moo_lidw_t rhat = q[i]; + dw = (moo_lidw_t)rem; adjust_quotient: - if (quo > MOO_TYPE_MAX(moo_liw_t) || ((moo_lidw_t)quo * y2) > (((moo_lidw_t)rhat << MOO_LIW_BITS) + q[i - 2])) + if (quo > MOO_TYPE_MAX(moo_liw_t) || ((moo_lidw_t)quo * y2) > ((dw << MOO_LIW_BITS) + q[i - 2])) { - quo = quo - 1; - rhat = rhat + y1; - if (rhat <= MOO_TYPE_MAX(moo_liw_t)) goto adjust_quotient; + --quo; + dw += y1; + if (dw <= MOO_TYPE_MAX(moo_liw_t)) goto adjust_quotient; } -} #else - { - moo_liw_t c, c2, y2q, rhat; - -//printf ("XXXXXXXXXXXXXXXXXXXXXx %d %d\n", (int)xs, (int)i); -rhat = q[i]; -adjust_quotient: + adjust_quotient: dw = ((moo_lidw_t)quo * y2); - c = (moo_liw_t)(dw >> MOO_LIW_BITS); - y2q = (moo_liw_t)dw; - if (c > rhat || (c == rhat && y2q > q[i - 2])) + b = (moo_liw_t)(dw >> MOO_LIW_BITS); + if (b > rem || (b == rem && (moo_liw_t)dw > q[i - 2])) { -//printf ("ADJUST...............%llu %llu %llu %llu\n", (unsigned long long)c, (unsigned long long)q[i], (unsigned long long)y2q, (unsigned long long)q[i - 2]); --quo; /* too large */ - - dw = (moo_lidw_t)q[i] + y1; /* add back the divisor */ + dw = (moo_lidw_t)rem + y1; /* add back the divisor */ if (dw <= MOO_TYPE_MAX(moo_liw_t)) /* if ((dw >> MOO_LIW_BITS) == 0) */ { - rhat = (moo_liw_t)dw; + rem = (moo_liw_t)dw; goto adjust_quotient; } } - } #endif /* ---------------------------------------------------------- */ b = calculate_remainder(moo, q, r, quo, i - ys, ys); @@ -1976,7 +1942,6 @@ adjust_quotient: } - static void divide_unsigned_array3 (moo_t* moo, const moo_liw_t* x, moo_oow_t xs, const moo_liw_t* y, moo_oow_t ys, moo_liw_t* q, moo_liw_t* r) { moo_oow_t s, i, j, g; @@ -2017,20 +1982,12 @@ static void divide_unsigned_array3 (moo_t* moo, const moo_liw_t* x, moo_oow_t xs moo->inttostr.t.capa = xs + 1; moo->inttostr.t.ptr = t; } - qq = moo->inttostr.t.ptr; -/* -printf ("------------------\n"); -printf ("unsigned u[] = {"); -for (i = 0; i < xs; i++ ) printf ("0x%llx, ", (unsigned long long)x[i]); -printf ("};\n"); -printf ("unsigned v[] = {"); -for (i = 0; i < ys; i++) printf ("0x%llx, ", (unsigned long long)y[i]); -printf ("};\n"); */ -//for (i = 0; i <= xs; i++) printf ("un %llx\n", (unsigned long long)q[i]); - - s = get_num_leading_zero_bits(y[ys - 1]); + y1 = y[ys - 1]; + /*s = MOO_LIW_BITS - ((y1 == 0)? -1: get_pos_of_msb_set(y1)) - 1;*/ + MOO_ASSERT (moo, y1 > 0); /* the highest word can't be non-zero in the context where this function is called */ + s = MOO_LIW_BITS - get_pos_of_msb_set(y1) - 1; for (i = ys; i > 1; ) { --i; @@ -2038,7 +1995,6 @@ printf ("};\n"); */ } r[0] = y[0] << s; - qq[xs] = (moo_lidw_t)x[xs - 1] >> (MOO_LIW_BITS - s); for (i = xs; i > 1; ) { @@ -2047,20 +2003,9 @@ printf ("};\n"); */ } qq[0] = x[0] << s; - -//for (i = 0; i < ys; i++) printf ("vn %llx\n", (unsigned long long)r[i]); -//for (i = 0; i <= xs; i++) printf ("un %llx\n", (unsigned long long)qq[i]); - y1 = r[ys - 1]; y2 = r[ys - 2]; -/* - for (j = xs - ys + 1; j > 0; ) - { - --j; -*/ - - for (j = xs; j >= ys; --j) { g = j - ys; /* position where remainder begins in qq */ @@ -2070,7 +2015,6 @@ printf ("};\n"); */ qhat = dw / y1; rhat = dw - (qhat * y1); - //printf ("qhat == %llx rhat == %llx dw = %llx r[ys-1] = %llx /// %llx %llx\n", (unsigned long long)qhat, (unsigned long long)rhat, (unsigned long long)dw, (unsigned long long)r[ys-1], (unsigned long long)qq[j + ys], (unsigned long long)qq[j + ys - 1]); adjust_quotient: if (qhat > MOO_TYPE_MAX(moo_liw_t) || (qhat * y2) > ((rhat << MOO_LIW_BITS) + qq[j - 2])) { @@ -2079,7 +2023,6 @@ printf ("};\n"); */ if (rhat <= MOO_TYPE_MAX(moo_liw_t)) goto adjust_quotient; } -//printf ("qhat == %llx rhat == %llx\n", (unsigned long long)qhat, (unsigned long long)rhat); /* multiply and subtract */ for (k = 0, i = 0; i < ys; i++) { @@ -2114,16 +2057,6 @@ printf ("};\n"); */ r[i] = (qq[i] >> s) | ((moo_lidw_t)qq[i + 1] << (MOO_LIW_BITS - s)); } r[i] = qq[i] >> s; - - //for (i = (xs == ys)? 1: (xs - ys + 1); i < xs; i++) q[i] = 0; - -/* - printf ("q => "); - for (i = xs ; i > 0; ) printf ("%08x ", q[--i]); - printf ("\n"); - printf ("r => "); - for (i = ys ; i > 0; ) printf ("%08x ", r[--i]); - printf ("\n"); */ } #endif @@ -2255,7 +2188,9 @@ static moo_oop_t divide_unsigned_integers (moo_t* moo, moo_oop_t x, moo_oop_t y, moo_pushvolat (moo, &x); moo_pushvolat (moo, &y); #define USE_DIVIDE_UNSIGNED_ARRAY2 -#if defined(USE_DIVIDE_UNSIGNED_ARRAY2) +//#define USE_DIVIDE_UNSIGNED_ARRAY3 + +#if defined(USE_DIVIDE_UNSIGNED_ARRAY2) || defined(USE_DIVIDE_UNSIGNED_ARRAY3) qq = moo_instantiate(moo, moo->_large_positive_integer, MOO_NULL, MOO_OBJ_GET_SIZE(x) + 1); #else qq = moo_instantiate(moo, moo->_large_positive_integer, MOO_NULL, MOO_OBJ_GET_SIZE(x)); @@ -2267,7 +2202,7 @@ static moo_oop_t divide_unsigned_integers (moo_t* moo, moo_oop_t x, moo_oop_t y, } moo_pushvolat (moo, &qq); -#if defined(USE_DIVIDE_UNSIGNED_ARRAY2) +#if defined(USE_DIVIDE_UNSIGNED_ARRAY2) || defined(USE_DIVIDE_UNSIGNED_ARRAY3) rr = moo_instantiate(moo, moo->_large_positive_integer, MOO_NULL, MOO_OBJ_GET_SIZE(y)); #else rr = moo_instantiate(moo, moo->_large_positive_integer, MOO_NULL, MOO_OBJ_GET_SIZE(y) + 1); @@ -2277,12 +2212,15 @@ static moo_oop_t divide_unsigned_integers (moo_t* moo, moo_oop_t x, moo_oop_t y, #if defined(USE_DIVIDE_UNSIGNED_ARRAY2) divide_unsigned_array2 (moo, +#elif defined(USE_DIVIDE_UNSIGNED_ARRAY3) + divide_unsigned_array3 (moo, #else divide_unsigned_array (moo, #endif MOO_OBJ_GET_LIWORD_SLOT(x), MOO_OBJ_GET_SIZE(x), MOO_OBJ_GET_LIWORD_SLOT(y), MOO_OBJ_GET_SIZE(y), - MOO_OBJ_GET_LIWORD_SLOT(qq), MOO_OBJ_GET_LIWORD_SLOT(rr)); + MOO_OBJ_GET_LIWORD_SLOT(qq), MOO_OBJ_GET_LIWORD_SLOT(rr) + ); *r = rr; return qq;