enhanced stix_mulints()
This commit is contained in:
parent
0daf143cce
commit
7b28bba988
@ -364,94 +364,93 @@ static STIX_INLINE stix_oow_t subtract_unsigned_array (const atom_t* x, stix_oow
|
|||||||
|
|
||||||
static STIX_INLINE void multiply_unsigned_array (const atom_t* x, stix_oow_t xs, const atom_t* y, stix_oow_t ys, atom_t* z)
|
static STIX_INLINE void multiply_unsigned_array (const atom_t* x, stix_oow_t xs, const atom_t* y, stix_oow_t ys, atom_t* z)
|
||||||
{
|
{
|
||||||
#if 0
|
|
||||||
/* Comba multiplication */ /* TODO: implement Karatsuba algorithm when the input length is long */
|
|
||||||
|
|
||||||
/* TODO: I don't know why this doesn't work
|
|
||||||
0FF0000000000013EC00000000000956A00000000001F20C000000000026E8F0 * 0FF0000000000013EC00000000000956A00000000001F20C000000000026E8F0 must
|
|
||||||
produce
|
|
||||||
|
|
||||||
00FE010000000002 7B028000000002B6 8ABC00000001B216 B580000000A990DE E60000002A6437B9 800000069FA8B4FC 0000009765EB9680 000005E9FB33E100
|
|
||||||
|
|
||||||
but this produce
|
|
||||||
00FE010000000002 7B028000000002B6 8ABC00000001B215 B580000000A990DE E60000002A6437B9 800000069FA8B4FC 0000009765EB9680 000005E9FB33E100*
|
|
||||||
* */
|
|
||||||
bigatom_t v;
|
bigatom_t v;
|
||||||
stix_oow_t s1, s2;
|
stix_oow_t pa;
|
||||||
stix_oow_t i, j;
|
|
||||||
stix_oow_t a, b;
|
|
||||||
|
|
||||||
s1 = xs + ys;
|
/* TODO: implement Karatsuba or Toom-Cook 3-way algorithm when the input length is long */
|
||||||
|
|
||||||
v = 0;
|
pa = (xs < ys)? xs: ys;
|
||||||
for (i = 0; i < s1; i++)
|
if (pa <= ((stix_oow_t)1 << (BIGATOM_BITS - (ATOM_BITS * 2))))
|
||||||
{
|
{
|
||||||
b = (i < ys - 1)? i: ys - 1;
|
/* Comba(column-array) multiplication */
|
||||||
a = i - b;
|
|
||||||
s2 = (xs - a < b + 1)? xs - a: b + 1;
|
|
||||||
|
|
||||||
for (j = 0; j < s2; j++)
|
/* when the input length is too long, v may overflow. if it
|
||||||
|
* happens, comba's method doesn't work as carry propagation is
|
||||||
|
* affected badly. so we need to use this method only if
|
||||||
|
* the input is short enough. */
|
||||||
|
|
||||||
|
stix_oow_t pa, ix, iy, iz, tx, ty;
|
||||||
|
|
||||||
|
pa = xs + ys;
|
||||||
|
v = 0;
|
||||||
|
for (ix = 0; ix < pa; ix++)
|
||||||
{
|
{
|
||||||
v += (bigatom_t)x[a + j] * (bigatom_t)y[b - j];
|
ty = (ix < ys - 1)? ix: (ys - 1);
|
||||||
}
|
tx = ix - ty;
|
||||||
|
iy = (ty + 1 < xs - tx)? (ty + 1): (xs - tx);
|
||||||
|
|
||||||
z[i] = (atom_t)v;
|
for (iz = 0; iz < iy; iz++)
|
||||||
v >>= ATOM_BITS;
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif 1
|
|
||||||
stix_oow_t i, j;
|
|
||||||
bigatom_t v;
|
|
||||||
atom_t carry;
|
|
||||||
|
|
||||||
for (i = 0; i < ys; i++)
|
|
||||||
{
|
|
||||||
if (y[i] == 0)
|
|
||||||
{
|
|
||||||
z[xs + i] = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
carry = 0;
|
|
||||||
for (j = 0; j < xs; j++)
|
|
||||||
{
|
{
|
||||||
v = (bigatom_t)x[j] * (bigatom_t)y[i] + (bigatom_t)carry + (bigatom_t)z[j + i];
|
v = v + (bigatom_t)x[tx + iz] * (bigatom_t)y[ty - iz];
|
||||||
z[j + i] = (atom_t)v;
|
|
||||||
carry = (atom_t)(v >> ATOM_BITS);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
z[xs + i] = carry;
|
z[ix] = (atom_t)v;
|
||||||
|
v = v >> ATOM_BITS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
#else
|
|
||||||
stix_oow_t i, j, idx;
|
|
||||||
bigatom_t v;
|
|
||||||
atom_t carry;
|
|
||||||
|
|
||||||
for (i = 0; i < ys; i++)
|
|
||||||
{
|
{
|
||||||
idx = i;
|
#if 1
|
||||||
|
stix_oow_t i, j;
|
||||||
|
atom_t carry;
|
||||||
|
|
||||||
for (j = 0; j < xs; j++)
|
for (i = 0; i < ys; i++)
|
||||||
{
|
{
|
||||||
v = (bigatom_t)x[j] * (bigatom_t)y[i] + (bigatom_t)carry + (bigatom_t)z[idx];
|
if (y[i] == 0)
|
||||||
z[idx] = (atom_t)v;
|
{
|
||||||
carry = (atom_t)(v >> ATOM_BITS);
|
z[xs + i] = 0;
|
||||||
idx++;
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
carry = 0;
|
||||||
|
for (j = 0; j < xs; j++)
|
||||||
|
{
|
||||||
|
v = (bigatom_t)x[j] * (bigatom_t)y[i] + (bigatom_t)carry + (bigatom_t)z[j + i];
|
||||||
|
z[j + i] = (atom_t)v;
|
||||||
|
carry = (atom_t)(v >> ATOM_BITS);
|
||||||
|
}
|
||||||
|
|
||||||
|
z[xs + i] = carry;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while (carry > 0)
|
#else
|
||||||
{
|
stix_oow_t i, j, idx;
|
||||||
v = (bigatom_t)z[idx] + (bigatom_t)carry;
|
atom_t carry;
|
||||||
z[idx] = (atom_t)v;
|
|
||||||
carry = (atom_t)(v >> ATOM_BITS);
|
|
||||||
idx++;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
for (i = 0; i < ys; i++)
|
||||||
|
{
|
||||||
|
idx = i;
|
||||||
|
|
||||||
|
for (j = 0; j < xs; j++)
|
||||||
|
{
|
||||||
|
v = (bigatom_t)x[j] * (bigatom_t)y[i] + (bigatom_t)carry + (bigatom_t)z[idx];
|
||||||
|
z[idx] = (atom_t)v;
|
||||||
|
carry = (atom_t)(v >> ATOM_BITS);
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (carry > 0)
|
||||||
|
{
|
||||||
|
v = (bigatom_t)z[idx] + (bigatom_t)carry;
|
||||||
|
z[idx] = (atom_t)v;
|
||||||
|
carry = (atom_t)(v >> ATOM_BITS);
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static stix_oop_t add_unsigned_integers (stix_t* stix, stix_oop_t x, stix_oop_t y)
|
static stix_oop_t add_unsigned_integers (stix_t* stix, stix_oop_t x, stix_oop_t y)
|
||||||
|
@ -111,12 +111,14 @@
|
|||||||
typedef stix_oow_t atom_t;
|
typedef stix_oow_t atom_t;
|
||||||
typedef stix_oop_word_t oop_atom_t;
|
typedef stix_oop_word_t oop_atom_t;
|
||||||
# define ATOM_BITS STIX_OOW_BITS
|
# define ATOM_BITS STIX_OOW_BITS
|
||||||
|
# define BIGATOM_BITS (STIX_SIZEOF(bigatom_t) * 8)
|
||||||
# define SIZEOF_ATOM_T STIX_SIZEOF_OOW_T
|
# define SIZEOF_ATOM_T STIX_SIZEOF_OOW_T
|
||||||
#else
|
#else
|
||||||
typedef stix_oow_t bigatom_t;
|
typedef stix_oow_t bigatom_t;
|
||||||
typedef stix_oohw_t atom_t;
|
typedef stix_oohw_t atom_t;
|
||||||
# define ATOM_BITS STIX_OOHW_BITS
|
|
||||||
typedef stix_oop_halfword_t oop_atom_t;
|
typedef stix_oop_halfword_t oop_atom_t;
|
||||||
|
# define ATOM_BITS STIX_OOHW_BITS
|
||||||
|
# define BIGATOM_BITS STIX_OOW_BITS
|
||||||
# define SIZEOF_ATOM_T STIX_SIZEOF_OOHW_T
|
# define SIZEOF_ATOM_T STIX_SIZEOF_OOHW_T
|
||||||
# define MAKE_WORD(hw1,hw2) ((stix_oow_t)(hw1) | (stix_oow_t)(hw2) << ATOM_BITS)
|
# define MAKE_WORD(hw1,hw2) ((stix_oow_t)(hw1) | (stix_oow_t)(hw2) << ATOM_BITS)
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user