implemented karatsuba multiplication.

added stix_lidi_t and stix_lii_t
This commit is contained in:
hyunghwan.chung 2016-11-09 15:50:18 +00:00
parent bec8ee7254
commit bb45bdc480
6 changed files with 559 additions and 92 deletions

View File

@ -96,7 +96,7 @@
{
| v1 v2 |
v2 := 'have fun'.
" v2 := 'have fun'.
v2 at: 0 put: $H.
@ -109,7 +109,18 @@
v1 write: S'하하하하하하하하 (^o^) ほのかちゃん \n'.
v1 close.
self main2.
self main2."
System logNl: (9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
* 8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888) asString.
System logNl: (9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
- 8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888) asString.
System logNl: (8888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
- 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999) asString.
System logNl:(820186817651640487320479808367534510238779540102526006236474836166734016865205999870833760242352512045225158774173869894826877890589130978987229877889333678492731896878236182891224254464936050871086340438798130266913122427332418216677813151305680453358955006355665628938266331979307689540884269372365762883678113227136498054422414501840232090872158915536978847443767922315217311444711397048331496139248250188991402851129033493732164230227458717486395514436574417275149404197774547389507462779807727615
* 765507696474864454832447821143032209556194237429024272487376513755618415740858933212778176226195677908876814855895611901838419364549855580388081219363378099926549770419687104031809304167273647479680584409544921582452247598843590335565958941218635089801691339265287920342381909847353843571491984747541378691432905678660731517460920201717549951480681654501180257614183394160869490681730637245109396396631700176391975994387097927483353281545628136320635813474136122790139443917922910896873631927820545774) asString.
System logNl:(-820186817651640487320479808367534510238779540102526006236474836166734016865205999870833760242352512045225158774173869894826877890589130978987229877889333678492731896878236182891224254464936050871086340438798130266913122427332418216677813151305680453358955006355665628938266331979307689540884269372365762883678113227136498054422414501840232090872158915536978847443767922315217311444711397048331496139248250188991402851129033493732164230227458717486395514436574417275149404197774547389507462779807727615
* 765507696474864454832447821143032209556194237429024272487376513755618415740858933212778176226195677908876814855895611901838419364549855580388081219363378099926549770419687104031809304167273647479680584409544921582452247598843590335565958941218635089801691339265287920342381909847353843571491984747541378691432905678660731517460920201717549951480681654501180257614183394160869490681730637245109396396631700176391975994387097927483353281545628136320635813474136122790139443917922910896873631927820545774) asString.
System logNl: S'\0\0\0END OF MAIN\0AB\0\0\0C\0\0\0'.
}

View File

@ -24,9 +24,46 @@
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 2002 by The XFree86 Project, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE XFREE86 PROJECT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Except as contained in this notice, the name of the XFree86 Project shall
* not be used in advertising or otherwise to promote the sale, use or other
* dealings in this Software without prior written authorization from the
* XFree86 Project.
*
* Author: Paulo César Pereira de Andrade
*/
#include "stix-prv.h"
#define ENABLE_KARATSUBA
#if defined(STIX_DEBUG_BIGINT)
# define KARATSUBA_CUTOFF 3
#else
# define KARATSUBA_CUTOFF 32
#endif
#if (STIX_LIW_BITS == STIX_OOW_BITS)
/* nothing special */
#elif (STIX_LIW_BITS == STIX_OOHW_BITS)
@ -192,7 +229,7 @@ static int is_normalized_integer (stix_t* stix, stix_oop_t oop)
return 0;
}
STIX_INLINE int is_bigint (stix_t* stix, stix_oop_t x)
STIX_INLINE static int is_bigint (stix_t* stix, stix_oop_t x)
{
stix_oop_t c;
@ -529,15 +566,13 @@ static STIX_INLINE stix_oop_t clone_bigint_to_positive (stix_t* stix, stix_oop_t
static STIX_INLINE stix_oow_t count_effective (stix_liw_t* x, stix_oow_t xs)
{
stix_oow_t i;
for (i = xs; i > 1; )
{
--i;
if (x[i] != 0) return i + 1;
}
#if 0
while (xs > 1 && x[xs - 1] == 0) xs--;
return xs;
#else
while (xs > 1) { if (x[--xs]) return xs + 1; }
return 1;
#endif
}
static STIX_INLINE stix_oow_t count_effective_digits (stix_oop_t oop)
@ -547,7 +582,7 @@ static STIX_INLINE stix_oow_t count_effective_digits (stix_oop_t oop)
for (i = STIX_OBJ_GET_SIZE(oop); i > 1; )
{
--i;
if (((stix_oop_liword_t)oop)->slot[i] != 0) return i + 1;
if (((stix_oop_liword_t)oop)->slot[i]) return i + 1;
}
return 1;
@ -619,7 +654,6 @@ static stix_oop_t normalize_bigint (stix_t* stix, stix_oop_t oop)
return clone_bigint (stix, oop, count);
}
static STIX_INLINE int is_less_unsigned_array (const stix_liw_t* x, stix_oow_t xs, const stix_liw_t* y, stix_oow_t ys)
{
stix_oow_t i;
@ -733,11 +767,63 @@ static void complement2_unsigned_array (const stix_liw_t* x, stix_oow_t xs, stix
static STIX_INLINE stix_oow_t add_unsigned_array (const stix_liw_t* x, stix_oow_t xs, const stix_liw_t* y, stix_oow_t ys, stix_liw_t* z)
{
stix_oow_t i;
#if 1
register stix_oow_t i;
stix_lidw_t w;
stix_lidw_t carry = 0;
STIX_ASSERT (xs >= ys);
if (xs < ys)
{
/* swap x and y */
i = xs;
xs = ys;
ys = i;
i = (stix_oow_t)x;
x = y;
y = (stix_liw_t*)i;
}
w = 0;
i = 0;
while (i < ys)
{
w += (stix_lidw_t)x[i] + (stix_lidw_t)y[i];
z[i++] = w & STIX_LBMASK(stix_lidw_t, STIX_LIW_BITS);
w >>= STIX_LIW_BITS;
}
while (w && i < xs)
{
w += x[i];
z[i++] = w & STIX_LBMASK(stix_lidw_t, STIX_LIW_BITS);
w >>= STIX_LIW_BITS;
}
while (i < xs)
{
z[i] = x[i];
i++;
}
if (w) z[i++] = w & STIX_LBMASK(stix_lidw_t, STIX_LIW_BITS);
return i;
#else
register stix_oow_t i;
stix_lidw_t w;
stix_liw_t carry = 0;
if (xs < ys)
{
/* swap x and y */
i = xs;
xs = ys;
ys = i;
i = (stix_oow_t)x;
x = y;
y = (stix_liw_t*)i;
}
for (i = 0; i < ys; i++)
{
@ -746,26 +832,82 @@ static STIX_INLINE stix_oow_t add_unsigned_array (const stix_liw_t* x, stix_oow_
z[i] = w /*& STIX_LBMASK(stix_lidw_t, STIX_LIW_BITS) */;
}
if (x == z)
{
for (; carry && i < xs; i++)
{
w = (stix_lidw_t)x[i] + carry;
carry = w >> STIX_LIW_BITS;
z[i] = w /*& STIX_LBMASK(stix_lidw_t, STIX_LIW_BITS) */;
}
i = xs;
}
else
{
for (; i < xs; i++)
{
w = (stix_lidw_t)x[i] + carry;
carry = w >> STIX_LIW_BITS;
z[i] = w /*& STIX_LBMASK(stix_lidw_t, STIX_LIW_BITS)*/;
}
}
if (i > 1 && carry == 0) return i - 1;
z[i] = carry;
return i;
if (carry) z[i++] = carry;
return i; /* the number of effective digits in the result */
#endif
}
static STIX_INLINE stix_oow_t subtract_unsigned_array (const stix_liw_t* x, stix_oow_t xs, const stix_liw_t* y, stix_oow_t ys, stix_liw_t* z)
{
#if 1
stix_oow_t i;
stix_lidi_t w = 0;
if (x == y)
{
STIX_ASSERT (xs == ys);
z[0] = 0;
return 1;
}
STIX_ASSERT (!is_less_unsigned_array(x, xs, y, ys));
for (i = 0; i < ys; i++)
{
w += (stix_lidi_t)x[i] - (stix_lidi_t)y[i];
z[i] = w & STIX_LBMASK(stix_lidw_t, STIX_LIW_BITS);
w >>= STIX_LIW_BITS;
}
while (w && i < xs)
{
w += x[i];
z[i++] = w & STIX_LBMASK(stix_lidw_t, STIX_LIW_BITS);
w >>= STIX_LIW_BITS;
}
while (i < xs)
{
z[i] = x[i];
i++;
}
while (i > 1 && z[i - 1] == 0) i--;
return i;
#else
stix_oow_t i;
stix_lidw_t w;
stix_lidw_t borrow = 0;
stix_lidw_t borrowed_word;
if (x == y)
{
STIX_ASSERT (xs == ys);
z[0] = 0;
return 1;
}
STIX_ASSERT (!is_less_unsigned_array(x, xs, y, ys));
borrowed_word = (stix_lidw_t)1 << STIX_LIW_BITS;
@ -799,7 +941,10 @@ static STIX_INLINE stix_oow_t subtract_unsigned_array (const stix_liw_t* x, stix
}
STIX_ASSERT (borrow == 0);
return i;
while (i > 1 && z[i - 1] == 0) i--;
return i; /* the number of effective digits in the result */
#endif
}
static STIX_INLINE void multiply_unsigned_array (const stix_liw_t* x, stix_oow_t xs, const stix_liw_t* y, stix_oow_t ys, stix_liw_t* z)
@ -807,9 +952,21 @@ static STIX_INLINE void multiply_unsigned_array (const stix_liw_t* x, stix_oow_t
stix_lidw_t v;
stix_oow_t pa;
/* TODO: implement Karatsuba or Toom-Cook 3-way algorithm when the input length is long */
if (xs < ys)
{
stix_oow_t i;
pa = (xs < ys)? xs: ys;
/* swap x and y */
i = xs;
xs = ys;
ys = i;
i = (stix_oow_t)x;
x = y;
y = (stix_liw_t*)i;
}
pa = xs;
if (pa <= ((stix_oow_t)1 << (STIX_LIDW_BITS - (STIX_LIW_BITS * 2))))
{
/* Comba(column-array) multiplication */
@ -840,58 +997,341 @@ static STIX_INLINE void multiply_unsigned_array (const stix_liw_t* x, stix_oow_t
}
else
{
#if 1
stix_oow_t i, j;
stix_liw_t carry;
for (i = 0; i < ys; i++)
for (i = 0; i < xs; i++)
{
if (y[i] == 0)
if (x[i] == 0)
{
z[xs + i] = 0;
z[i + ys] = 0;
}
else
{
carry = 0;
for (j = 0; j < xs; j++)
for (j = 0; j < ys; j++)
{
v = (stix_lidw_t)x[j] * (stix_lidw_t)y[i] + (stix_lidw_t)carry + (stix_lidw_t)z[j + i];
z[j + i] = (stix_liw_t)v;
v = (stix_lidw_t)x[i] * (stix_lidw_t)y[j] + (stix_lidw_t)carry + (stix_lidw_t)z[i + j];
z[i + j] = (stix_liw_t)v;
carry = (stix_liw_t)(v >> STIX_LIW_BITS);
}
z[xs + i] = carry;
z[i + j] = carry;
}
}
}
}
/* KARATSUBA MULTIPLICATION
*
* c = |a| * |b|
*
* Let B represent the radix(2^DIGIT_BITS)
* Let n represent half the number of digits
*
* a = a1 * B^n + a0
* b = b1 * B^n + b0
* a * b => a1b1 * B^2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B^n + a0b0
*
* --------------------------------------------------------------------
* For example, for 2 number 0xFAC2 and 0xABCD => A848635A
* DIGIT_BITS = 8 (1 byte, each digit is 1 byte long)
* B = 2^8 = 0x100
* n = 1 (half the digits of 2 digit numbers)
* B^n = 0x100 ^ 1 = 0x100
* B^2n = 0x100 ^ 2 = 0x10000
* 0xFAC2 = 0xFA * 0x100 + 0xC2
* 0xABCD = 0xAB * 0x100 + 0xCD
* a1 = 0xFA, a0 = 0xC2
* b1 = 0xAB, b0 = 0xCD
* a1b1 = 0xFA * 0xAB = 0xA6FE
* a0b0 = 0xC2 * 0xCD = 0x9B5A
* a1 + a0 = 0xFA + 0xC2 = 0x1BC
* b1 + b0 = 0xAB + 0xCD = 0x178
* --------------------------------------------------------------------
* (A6FE * 10000) + (((1BC * 178) - (985A + A6FE)) * 100) + 9B5A =
* (A6FE << (8 * 2)) + (((1BC * 178) - (985A + A6FE)) << (8 * 1)) =
* A6FE0000 + 14CC800 + 9B5A = 9848635A
* --------------------------------------------------------------------
*
* 0xABCD9876 * 0xEFEFABAB => 0xA105C97C9755A8D2
* B = 2^8 = 0x100
* n = 2
* B^n = 0x100 ^ 2 = 0x10000
* B^2n = 0x100 ^ 4 = 0x100000000
* 0xABCD9876 = 0xABCD * 0x10000 + 0x9876
* 0xEFEFABAB = 0xEFEF * 0x10000 + 0xABAB
* a1 = 0xABCD, a0 = 0x9876
* b1 - 0xEFEF, b0 = 0xABAB
* a1b1 = 0xA104C763
* a0b0 = 0x663CA8D2
* a1 + a0 = 0x14443
* b1 + b0 = 0x19B9A
* --------------------------------------------------------------------
* (A104C763 * 100000000) + (((14443 * 19B9A) - (663CA8D2 + A104C763)) * 10000) + 663CA8D2 =
* (A104C763 << (8 * 4)) + (((14443 * 19B9A) - (663CA8D2 + A104C763)) << (8 * 2)) + 663CA8D2 = A105C97C9755A8D2
* --------------------------------------------------------------------
*
* Multiplying by B is t same as shifting by DIGIT_BITS.
* DIGIT_BITS in this implementation is STIX_LIW_BITS
* B => 2^STIX_LIW_BITS
* X * B^n => X << (STIX_LIW_BITS * n)
* X * B^2n => X << (STIX_LIW_BITS * n * 2)
* --------------------------------------------------------------------
*/
#define CANNOT_KARATSUBA(xs, ys) \
((xs) < KARATSUBA_CUTOFF || (ys) < KARATSUBA_CUTOFF || \
((xs) > (ys) && (ys) <= (((xs) + 1) / 2)) || \
((xs) < (ys) && (xs) <= (((ys) + 1) / 2)))
static STIX_INLINE stix_oow_t multiply_unsigned_array_karatsuba (stix_t* stix, const stix_liw_t* x, stix_oow_t xs, const stix_liw_t* y, stix_oow_t ys, stix_liw_t* z)
{
#if 1
stix_lidw_t nshifts;
stix_lidw_t ndigits_xh, ndigits_xl;
stix_lidw_t ndigits_yh, ndigits_yl;
stix_liw_t* tmp[2] = { STIX_NULL, STIX_NULL};
stix_liw_t* zsp;
stix_oow_t tmplen[2];
stix_oow_t xlen, zcapa;
zcapa = xs + ys; /* the caller ensures this capacity for z at the minimum*/
if (xs < ys)
{
stix_oow_t i;
/* swap x and y */
i = xs;
xs = ys;
ys = i;
i = (stix_oow_t)x;
x = y;
y = (stix_liw_t*)i;
}
/* calculate value of nshifts, that is 2^(STIX_LIW_BITS*nshifts) */
nshifts = (xs + 1) / 2;
ndigits_xl = nshifts; /* ndigits of lower part of x */
ndigits_xh = xs - nshifts; /* ndigits of upper part of x */
ndigits_yl = nshifts; /* ndigits of lower part of y */
ndigits_yh = ys - nshifts; /* ndigits of uppoer part of y */
STIX_ASSERT (ndigits_xl >= ndigits_xh);
STIX_ASSERT (ndigits_yl >= ndigits_yh);
/* make a temporary buffer for (b0 + b1) and (a1 * b1) */
tmplen[0] = ndigits_xh + ndigits_yh;
tmplen[1] = ndigits_yl + ndigits_yh + 1;
if (tmplen[1] < tmplen[0]) tmplen[1] = tmplen[0];
tmp[1] = stix_callocmem (stix, STIX_SIZEOF(stix_liw_t) * tmplen[1]); /* TODO: should i use the object memory? */
if (!tmp[1]) goto oops;
/* make a temporary for (a0 + a1) and (a0 * b0) */
tmplen[0] = ndigits_xl + ndigits_yl + 1;
tmp[0] = stix_callocmem (stix, STIX_SIZEOF(stix_liw_t) * tmplen[0]);
if (!tmp[0]) goto oops;
/* tmp[0] = a0 + a1 */
tmplen[0] = add_unsigned_array (x, ndigits_xl, x + nshifts, ndigits_xh, tmp[0]);
/* tmp[1] = b0 + b1 */
tmplen[1] = add_unsigned_array (y, ndigits_yl, y + nshifts, ndigits_yh, tmp[1]);
/*STIX_DEBUG6 (stix, "karatsuba t %p u %p ndigits_xl %d ndigits_xh %d ndigits_yl %d ndigits_yh %d\n", tmp[0], tmp[1], (int)ndigits_xl, (int)ndigits_xh, (int)ndigits_yl, (int)ndigits_yh);*/
/*STIX_DEBUG5 (stix, "zcapa %d, tmplen[0] %d tmplen[1] %d nshifts %d total %d\n", (int)zcapa, (int)tmplen[0], (int)tmplen[1], (int)nshifts, (int)(tmplen[0] + tmplen[1] + nshifts));*/
/* place (a0 + a1) * (b0 + b1) at the shifted position */
zsp = z + nshifts;
if (CANNOT_KARATSUBA(tmplen[0], tmplen[1]))
{
multiply_unsigned_array (tmp[0], tmplen[0], tmp[1], tmplen[1], zsp);
xlen = count_effective (zsp, tmplen[0] + tmplen[1]);
}
else
{
xlen = multiply_unsigned_array_karatsuba(stix, tmp[0], tmplen[0], tmp[1], tmplen[1], zsp);
if (xlen == 0) goto oops;
}
/* tmp[0] = a0 * b0 */
tmplen[0] = ndigits_xl + ndigits_yl;
STIX_MEMSET (tmp[0], 0, sizeof(stix_liw_t) * tmplen[0]);
if (CANNOT_KARATSUBA(ndigits_xl, ndigits_yl))
{
multiply_unsigned_array (x, ndigits_xl, y, ndigits_yl, tmp[0]);
tmplen[0] = count_effective(tmp[0], tmplen[0]);
}
else
{
tmplen[0] = multiply_unsigned_array_karatsuba (stix, x, ndigits_xl, y, ndigits_yl, tmp[0]);
if (tmplen[0] <= 0) goto oops;
}
/* tmp[1] = a1 * b1 */
tmplen[1] = ndigits_xh + ndigits_yh;
STIX_MEMSET (tmp[1], 0, sizeof(stix_liw_t) * tmplen[1]);
if (CANNOT_KARATSUBA(ndigits_xh, ndigits_yh))
{
multiply_unsigned_array (x + nshifts, ndigits_xh, y + nshifts, ndigits_yh, tmp[1]);
tmplen[1] = count_effective (tmp[1], tmplen[1]);
}
else
{
tmplen[1] = multiply_unsigned_array_karatsuba (stix, x + nshifts, ndigits_xh, y + nshifts, ndigits_yh, tmp[1]);
if (tmplen[1] <= 0) goto oops;
}
/* (a0+a1)*(b0+b1) -(a0*b0) */
xlen = subtract_unsigned_array(zsp, xlen, tmp[0], tmplen[0], zsp);
/* (a0+a1)*(b0+b1) - (a0*b0) - (a1*b1) */
xlen = subtract_unsigned_array(zsp, xlen, tmp[1], tmplen[1], zsp);
/* a1b1 is in tmp[1]. add (a1b1 * B^2n) to the high part of 'z' */
zsp = z + (nshifts * 2); /* emulate shifting for "* B^2n". */
xlen = zcapa - (nshifts * 2);
xlen = add_unsigned_array (zsp, xlen, tmp[1], tmplen[1], zsp);
/* z = z + a0b0. a0b0 is in tmp[0] */
xlen = add_unsigned_array(z, zcapa, tmp[0], tmplen[0], z);
stix_freemem (stix, tmp[1]);
stix_freemem (stix, tmp[0]);
return count_effective (z, xlen);
oops:
if (tmp[1]) stix_freemem (stix, tmp[1]);
if (tmp[0]) stix_freemem (stix, tmp[0]);
return 0;
#else
stix_oow_t i, j, idx;
stix_liw_t carry;
stix_lidw_t nshifts;
stix_lidw_t ndigits_xh, ndigits_xl;
stix_lidw_t ndigits_yh, ndigits_yl;
stix_liw_t* tmp[3] = { STIX_NULL, STIX_NULL, STIX_NULL };
stix_liw_t* zsp;
stix_oow_t tmplen[3];
stix_oow_t xlen, zcapa;
for (i = 0; i < ys; i++)
{
idx = i;
zcapa = xs + ys; /* the caller ensures this capacity for z at the minimum*/
for (j = 0; j < xs; j++)
if (xs < ys)
{
v = (stix_lidw_t)x[j] * (stix_lidw_t)y[i] + (stix_lidw_t)carry + (stix_lidw_t)z[idx];
z[idx] = (stix_liw_t)v;
carry = (stix_liw_t)(v >> STIX_LIW_BITS);
idx++;
stix_oow_t i;
/* swap x and y */
i = xs;
xs = ys;
ys = i;
i = (stix_oow_t)x;
x = y;
y = (stix_liw_t*)i;
}
while (carry > 0)
/* calculate value of nshifts, that is 2^(STIX_LIW_BITS*nshifts) */
nshifts = (xs + 1) / 2;
ndigits_xl = nshifts; /* ndigits of lower part of x */
ndigits_xh = xs - nshifts; /* ndigits of upper part of x */
ndigits_yl = nshifts; /* ndigits of lower part of y */
ndigits_yh = ys - nshifts; /* ndigits of uppoer part of y */
STIX_ASSERT (ndigits_xl >= ndigits_xh);
STIX_ASSERT (ndigits_yl >= ndigits_yh);
/* make a temporary buffer for (b0 + b1) and (a1 * b1) */
tmplen[0] = ndigits_yl + ndigits_yh + 1;
tmplen[1] = ndigits_xh + ndigits_yh;
if (tmplen[1] < tmplen[0]) tmplen[1] = tmplen[0];
tmp[1] = stix_callocmem (stix, STIX_SIZEOF(stix_liw_t) * tmplen[1]);
if (!tmp[1]) goto oops;
/* make a temporary for (a0 + a1) and (a0 * b0) */
tmplen[0] = ndigits_xl + ndigits_yl;
tmp[0] = stix_callocmem (stix, STIX_SIZEOF(stix_liw_t) * tmplen[0]);
if (!tmp[0]) goto oops;
/* tmp[0] = a0 + a1 */
tmplen[0] = add_unsigned_array (x, ndigits_xl, x + nshifts, ndigits_xh, tmp[0]);
/* tmp[1] = b0 + b1 */
tmplen[1] = add_unsigned_array (y, ndigits_yl, y + nshifts, ndigits_yh, tmp[1]);
/* tmp[2] = (a0 + a1) * (b0 + b1) */
tmplen[2] = tmplen[0] + tmplen[1];
tmp[2] = stix_callocmem (stix, STIX_SIZEOF(stix_liw_t) * tmplen[2]);
if (!tmp[2]) goto oops;
if (CANNOT_KARATSUBA(tmplen[0], tmplen[1]))
{
v = (stix_lidw_t)z[idx] + (stix_lidw_t)carry;
z[idx] = (stix_liw_t)v;
carry = (stix_liw_t)(v >> STIX_LIW_BITS);
idx++;
multiply_unsigned_array (tmp[0], tmplen[0], tmp[1], tmplen[1], tmp[2]);
xlen = count_effective (tmp[2], tmplen[2]);
}
else
{
xlen = multiply_unsigned_array_karatsuba(stix, tmp[0], tmplen[0], tmp[1], tmplen[1], tmp[2]);
if (xlen == 0) goto oops;
}
/* tmp[0] = a0 * b0 */
tmplen[0] = ndigits_xl + ndigits_yl;
STIX_MEMSET (tmp[0], 0, sizeof(stix_liw_t) * tmplen[0]);
if (CANNOT_KARATSUBA(ndigits_xl, ndigits_yl))
{
multiply_unsigned_array (x, ndigits_xl, y, ndigits_yl, tmp[0]);
tmplen[0] = count_effective(tmp[0], tmplen[0]);
}
else
{
tmplen[0] = multiply_unsigned_array_karatsuba (stix, x, ndigits_xl, y, ndigits_yl, tmp[0]);
if (tmplen[0] <= 0) goto oops;
}
/* tmp[1] = a1 * b1 */
tmplen[1] = ndigits_xh + ndigits_yh;
STIX_MEMSET (tmp[1], 0, sizeof(stix_liw_t) * tmplen[1]);
if (CANNOT_KARATSUBA(ndigits_xh, ndigits_yh))
{
multiply_unsigned_array (x + nshifts, ndigits_xh, y + nshifts, ndigits_yh, tmp[1]);
tmplen[1] = count_effective (tmp[1], tmplen[1]);
}
else
{
tmplen[1] = multiply_unsigned_array_karatsuba (stix, x + nshifts, ndigits_xh, y + nshifts, ndigits_yh, tmp[1]);
if (tmplen[1] <= 0) goto oops;
}
/* w = w - tmp[0] */
xlen = subtract_unsigned_array(tmp[2], xlen, tmp[0], tmplen[0], tmp[2]);
/* r = w - tmp[1] */
zsp = z + nshifts; /* emulate shifting for "* B^n" */
xlen = subtract_unsigned_array(tmp[2], xlen, tmp[1], tmplen[1], zsp);
/* a1b1 is in tmp[1]. add (a1b1 * B^2n) to the high part of 'z' */
zsp = z + (nshifts * 2); /* emulate shifting for "* B^2n". */
xlen = zcapa - (nshifts * 2);
xlen = add_unsigned_array (zsp, xlen, tmp[1], tmplen[1], zsp);
/* z = z + a0b0. a0b0 is in tmp[0] */
xlen = add_unsigned_array(z, zcapa, tmp[0], tmplen[0], z);
stix_freemem (stix, tmp[2]);
stix_freemem (stix, tmp[1]);
stix_freemem (stix, tmp[0]);
return count_effective (z, xlen);
oops:
if (tmp[2]) stix_freemem (stix, tmp[2]);
if (tmp[1]) stix_freemem (stix, tmp[1]);
if (tmp[0]) stix_freemem (stix, tmp[0]);
return 0;
#endif
}
}
static STIX_INLINE void lshift_unsigned_array (stix_liw_t* x, stix_oow_t xs, stix_oow_t bits)
{
@ -1036,23 +1476,11 @@ static stix_oop_t add_unsigned_integers (stix_t* stix, stix_oop_t x, stix_oop_t
stix_poptmps (stix, 2);
if (!z) return STIX_NULL;
if (as >= bs)
{
add_unsigned_array (
((stix_oop_liword_t)x)->slot, as,
((stix_oop_liword_t)y)->slot, bs,
((stix_oop_liword_t)z)->slot
);
}
else
{
add_unsigned_array (
((stix_oop_liword_t)y)->slot, bs,
((stix_oop_liword_t)x)->slot, as,
((stix_oop_liword_t)z)->slot
);
}
return z;
}
@ -1079,12 +1507,12 @@ static stix_oop_t subtract_unsigned_integers (stix_t* stix, stix_oop_t x, stix_o
static stix_oop_t multiply_unsigned_integers (stix_t* stix, stix_oop_t x, stix_oop_t y)
{
stix_oop_t z;
stix_oow_t xz, yz;
stix_oow_t xs, ys;
xz = STIX_OBJ_GET_SIZE(x);
yz = STIX_OBJ_GET_SIZE(y);
xs = STIX_OBJ_GET_SIZE(x);
ys = STIX_OBJ_GET_SIZE(y);
if (yz > STIX_OBJ_SIZE_MAX - xz)
if (ys > STIX_OBJ_SIZE_MAX - xs)
{
stix->errnum = STIX_EOOMEM; /* TOOD: is it a soft failure or hard failure? */
return STIX_NULL;
@ -1092,14 +1520,29 @@ static stix_oop_t multiply_unsigned_integers (stix_t* stix, stix_oop_t x, stix_o
stix_pushtmp (stix, &x);
stix_pushtmp (stix, &y);
z = stix_instantiate (stix, stix->_large_positive_integer, STIX_NULL, xz + yz);
z = stix_instantiate (stix, stix->_large_positive_integer, STIX_NULL, xs + ys);
stix_poptmps (stix, 2);
if (!z) return STIX_NULL;
#if defined(ENABLE_KARATSUBA)
if (CANNOT_KARATSUBA (xs, ys))
{
#endif
multiply_unsigned_array (
((stix_oop_liword_t)x)->slot, STIX_OBJ_GET_SIZE(x),
((stix_oop_liword_t)y)->slot, STIX_OBJ_GET_SIZE(y),
((stix_oop_liword_t)z)->slot);
#if defined(ENABLE_KARATSUBA)
}
else
{
if (multiply_unsigned_array_karatsuba (
stix,
((stix_oop_liword_t)x)->slot, STIX_OBJ_GET_SIZE(x),
((stix_oop_liword_t)y)->slot, STIX_OBJ_GET_SIZE(y),
((stix_oop_liword_t)z)->slot) == 0) return STIX_NULL;
}
#endif
return z;
}

View File

@ -795,6 +795,10 @@ static int get_ident (stix_t* stix, stix_ooci_t char_read_ahead)
return -1;
}
}
else
{
unget_char (stix, &stix->c->lxc);
}
}
else
{

View File

@ -217,7 +217,8 @@ redo:
}
newcapa = STIX_ALIGN(stix->log.len + len, 512); /* TODO: adjust this capacity */
tmp = stix_reallocmem (stix, stix->log.ptr, newcapa * STIX_SIZEOF(*tmp));
/* +1 to handle line ending injection more easily */
tmp = stix_reallocmem (stix, stix->log.ptr, (newcapa + 1) * STIX_SIZEOF(*tmp));
if (!tmp)
{
if (stix->log.len > 0)
@ -231,7 +232,7 @@ redo:
}
stix->log.ptr = tmp;
stix->log.capa = newcapa - 1; /* -1 to handle line ending injection more easily */
stix->log.capa = newcapa;
}
while (len > 0)

View File

@ -50,6 +50,7 @@
/*#define STIX_DEBUG_VM_PROCESSOR*/
/*#define STIX_DEBUG_VM_EXEC*/
/*#define STIX_DEBUG_VM_METHOD_LOOKUP*/
#define STIX_DEBUG_BIGINT
#define STIX_PROFILE_VM
/* allow the caller to drive process switching by calling

View File

@ -126,13 +126,15 @@ typedef struct stix_obj_word_t* stix_oop_word_t;
/* ========================================================================= */
/* BIGINT TYPES AND MACROS */
/* ========================================================================= */
#if STIX_SIZEOF_UINTMAX_T > STIX_SIZEOF_OOW_T
#if (STIX_SIZEOF_UINTMAX_T > STIX_SIZEOF_OOW_T)
# define STIX_USE_FULL_WORD
#endif
#if defined(STIX_USE_FULL_WORD)
typedef stix_oow_t stix_liw_t; /* large integer word */
typedef stix_ooi_t stix_lii_t;
typedef stix_uintmax_t stix_lidw_t; /* large integer double word */
typedef stix_intmax_t stix_lidi_t;
# define STIX_SIZEOF_LIW_T STIX_SIZEOF_OOW_T
# define STIX_SIZEOF_LIDW_T STIX_SIZEOF_UINTMAX_T
# define STIX_LIW_BITS STIX_OOW_BITS
@ -143,7 +145,9 @@ typedef struct stix_obj_word_t* stix_oop_word_t;
#else
typedef stix_oohw_t stix_liw_t;
typedef stix_oohi_t stix_lii_t;
typedef stix_oow_t stix_lidw_t;
typedef stix_ooi_t stix_lidi_t;
# define STIX_SIZEOF_LIW_T STIX_SIZEOF_OOHW_T
# define STIX_SIZEOF_LIDW_T STIX_SIZEOF_OOW_T
# define STIX_LIW_BITS STIX_OOHW_BITS
@ -944,6 +948,7 @@ typedef enum stix_log_mask_t stix_log_mask_t;
#define STIX_LOG3(stix,mask,fmt,a1,a2,a3) do { if (STIX_LOG_ENABLED(stix,mask)) stix_logbfmt(stix, mask, fmt, a1, a2, a3); } while(0)
#define STIX_LOG4(stix,mask,fmt,a1,a2,a3,a4) do { if (STIX_LOG_ENABLED(stix,mask)) stix_logbfmt(stix, mask, fmt, a1, a2, a3, a4); } while(0)
#define STIX_LOG5(stix,mask,fmt,a1,a2,a3,a4,a5) do { if (STIX_LOG_ENABLED(stix,mask)) stix_logbfmt(stix, mask, fmt, a1, a2, a3, a4, a5); } while(0)
#define STIX_LOG6(stix,mask,fmt,a1,a2,a3,a4,a5,a6) do { if (STIX_LOG_ENABLED(stix,mask)) stix_logbfmt(stix, mask, fmt, a1, a2, a3, a4, a5, a6); } while(0)
#define STIX_DEBUG0(stix,fmt) STIX_LOG0(stix, STIX_LOG_DEBUG, fmt)
#define STIX_DEBUG1(stix,fmt,a1) STIX_LOG1(stix, STIX_LOG_DEBUG, fmt, a1)
@ -951,13 +956,15 @@ typedef enum stix_log_mask_t stix_log_mask_t;
#define STIX_DEBUG3(stix,fmt,a1,a2,a3) STIX_LOG3(stix, STIX_LOG_DEBUG, fmt, a1, a2, a3)
#define STIX_DEBUG4(stix,fmt,a1,a2,a3,a4) STIX_LOG4(stix, STIX_LOG_DEBUG, fmt, a1, a2, a3, a4)
#define STIX_DEBUG5(stix,fmt,a1,a2,a3,a4,a5) STIX_LOG5(stix, STIX_LOG_DEBUG, fmt, a1, a2, a3, a4, a5)
#define STIX_DEBUG6(stix,fmt,a1,a2,a3,a4,a5,a6) STIX_LOG6(stix, STIX_LOG_DEBUG, fmt, a1, a2, a3, a4, a5, a6)
#define STIX_INFO0(stix,fmt) STIX_LOG0(stix, STIX_LOG_INFO, fmt)
#define STIX_INFO1(stix,fmt,a1) STIX_LOG1(stix, STIX_LOG_INFO, fmt, a1)
#define STIX_INFO2(stix,fmt,a1,a2) STIX_LOG2(stix, STIX_LOG_INFO, fmt, a1, a2)
#define STIX_INFO3(stix,fmt,a1,a2,a3) STIX_LOG3(stix, STIX_LOG_INFO, fmt, a1, a2, a3)
#define STIX_INFO4(stix,fmt,a1,a2,a3,a4) STIX_LOG4(stix, STIX_LOG_INFO, fmt, a1, a2, a3, a4)
#define STIX_INFO5(stix,fmt,a1,a2,a3,a4,a5) STIX_LOG5(stix, STIX_LOG_INFO, fmt, a1, a2, a3, a4, a5
#define STIX_INFO5(stix,fmt,a1,a2,a3,a4,a5) STIX_LOG5(stix, STIX_LOG_INFO, fmt, a1, a2, a3, a4, a5)
#define STIX_INFO6(stix,fmt,a1,a2,a3,a4,a5,a6) STIX_LOG6(stix, STIX_LOG_INFO, fmt, a1, a2, a3, a4, a5, a6)
#if defined(__cplusplus)
extern "C" {