enhanced stix_mulints()

This commit is contained in:
hyunghwan.chung 2015-11-11 14:46:00 +00:00
parent 0daf143cce
commit 7b28bba988
2 changed files with 71 additions and 70 deletions

View File

@ -364,44 +364,44 @@ static STIX_INLINE stix_oow_t subtract_unsigned_array (const atom_t* x, stix_oow
static STIX_INLINE void multiply_unsigned_array (const atom_t* x, stix_oow_t xs, const atom_t* y, stix_oow_t ys, atom_t* z) static STIX_INLINE void multiply_unsigned_array (const atom_t* x, stix_oow_t xs, const atom_t* y, stix_oow_t ys, atom_t* z)
{ {
#if 0
/* Comba multiplication */ /* TODO: implement Karatsuba algorithm when the input length is long */
/* TODO: I don't know why this doesn't work
0FF0000000000013EC00000000000956A00000000001F20C000000000026E8F0 * 0FF0000000000013EC00000000000956A00000000001F20C000000000026E8F0 must
produce
00FE010000000002 7B028000000002B6 8ABC00000001B216 B580000000A990DE E60000002A6437B9 800000069FA8B4FC 0000009765EB9680 000005E9FB33E100
but this produce
00FE010000000002 7B028000000002B6 8ABC00000001B215 B580000000A990DE E60000002A6437B9 800000069FA8B4FC 0000009765EB9680 000005E9FB33E100*
* */
bigatom_t v; bigatom_t v;
stix_oow_t s1, s2; stix_oow_t pa;
stix_oow_t i, j;
stix_oow_t a, b;
s1 = xs + ys; /* TODO: implement Karatsuba or Toom-Cook 3-way algorithm when the input length is long */
pa = (xs < ys)? xs: ys;
if (pa <= ((stix_oow_t)1 << (BIGATOM_BITS - (ATOM_BITS * 2))))
{
/* Comba(column-array) multiplication */
/* when the input length is too long, v may overflow. if it
* happens, comba's method doesn't work as carry propagation is
* affected badly. so we need to use this method only if
* the input is short enough. */
stix_oow_t pa, ix, iy, iz, tx, ty;
pa = xs + ys;
v = 0; v = 0;
for (i = 0; i < s1; i++) for (ix = 0; ix < pa; ix++)
{ {
b = (i < ys - 1)? i: ys - 1; ty = (ix < ys - 1)? ix: (ys - 1);
a = i - b; tx = ix - ty;
s2 = (xs - a < b + 1)? xs - a: b + 1; iy = (ty + 1 < xs - tx)? (ty + 1): (xs - tx);
for (j = 0; j < s2; j++) for (iz = 0; iz < iy; iz++)
{ {
v += (bigatom_t)x[a + j] * (bigatom_t)y[b - j]; v = v + (bigatom_t)x[tx + iz] * (bigatom_t)y[ty - iz];
} }
z[i] = (atom_t)v; z[ix] = (atom_t)v;
v >>= ATOM_BITS; v = v >> ATOM_BITS;
} }
}
#elif 1 else
{
#if 1
stix_oow_t i, j; stix_oow_t i, j;
bigatom_t v;
atom_t carry; atom_t carry;
for (i = 0; i < ys; i++) for (i = 0; i < ys; i++)
@ -424,9 +424,8 @@ but this produce
} }
} }
#else #else
stix_oow_t i, j, idx; stix_oow_t i, j, idx;
bigatom_t v;
atom_t carry; atom_t carry;
for (i = 0; i < ys; i++) for (i = 0; i < ys; i++)
@ -450,8 +449,8 @@ but this produce
} }
} }
#endif
#endif }
} }
static stix_oop_t add_unsigned_integers (stix_t* stix, stix_oop_t x, stix_oop_t y) static stix_oop_t add_unsigned_integers (stix_t* stix, stix_oop_t x, stix_oop_t y)

View File

@ -111,12 +111,14 @@
typedef stix_oow_t atom_t; typedef stix_oow_t atom_t;
typedef stix_oop_word_t oop_atom_t; typedef stix_oop_word_t oop_atom_t;
# define ATOM_BITS STIX_OOW_BITS # define ATOM_BITS STIX_OOW_BITS
# define BIGATOM_BITS (STIX_SIZEOF(bigatom_t) * 8)
# define SIZEOF_ATOM_T STIX_SIZEOF_OOW_T # define SIZEOF_ATOM_T STIX_SIZEOF_OOW_T
#else #else
typedef stix_oow_t bigatom_t; typedef stix_oow_t bigatom_t;
typedef stix_oohw_t atom_t; typedef stix_oohw_t atom_t;
# define ATOM_BITS STIX_OOHW_BITS
typedef stix_oop_halfword_t oop_atom_t; typedef stix_oop_halfword_t oop_atom_t;
# define ATOM_BITS STIX_OOHW_BITS
# define BIGATOM_BITS STIX_OOW_BITS
# define SIZEOF_ATOM_T STIX_SIZEOF_OOHW_T # define SIZEOF_ATOM_T STIX_SIZEOF_OOHW_T
# define MAKE_WORD(hw1,hw2) ((stix_oow_t)(hw1) | (stix_oow_t)(hw2) << ATOM_BITS) # define MAKE_WORD(hw1,hw2) ((stix_oow_t)(hw1) | (stix_oow_t)(hw2) << ATOM_BITS)
#endif #endif