pursuing more efficient division

This commit is contained in:
hyunghwan.chung 2019-04-02 08:49:41 +00:00
parent f28bd84c50
commit 4aea0e111b

View File

@ -53,6 +53,27 @@
* Author: Paulo César Pereira de Andrade
*/
/*
* Copyright 1994 - 1996 LongView Technologies L.L.C. $Revision: 1.5 $
* Copyright (c) 2006, Sun Microsystems, Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
following conditions are met:
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided with the distribution.
* Neither the name of Sun Microsystems nor the names of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
*/
#include "moo-prv.h"
@ -1589,45 +1610,172 @@ static void divide_unsigned_array (moo_t* moo, const moo_liw_t* x, moo_oow_t xs,
}
}
#if 0
static void divide_unsigned_array2 (moo_t* moo, const moo_liw_t* x, moo_oow_t xs, const moo_liw_t* y, moo_oow_t ys, moo_liw_t* q, moo_liw_t* r)
#if 1
static moo_liw_t adjust_for_over_estimate (moo_liw_t y1, moo_liw_t y2, moo_liw_t q, moo_liw_t xi, moo_liw_t xi2)
{
moo_lidw_t dw;
moo_liw_t c, c2, y2q;
/*moo_liw_t y2q = axpy(y2, q, 0, c);*/
dw = ((moo_lidw_t)q * y2);
c = (moo_liw_t)(dw >> MOO_LIW_BITS);
y2q = (moo_liw_t)dw;
if (c > xi || (c = xi && y2q > xi2))
{
q--; // too large by 1
/*xpy(xi, y1, c2); // add back divisor */
c2 = (moo_liw_t)(((moo_lidw_t)xi + y1) >> MOO_LIW_BITS);
if (!c2)
{
/*y2q = axpy(y2, q, 0, c);*/
dw = ((moo_lidw_t)q * y2);
y2q = (moo_liw_t)dw;
c = (moo_liw_t)(dw >> MOO_LIW_BITS);
if (c > xi || (c = xi && y2q > xi2)) q--; // too large by 2
}
}
return q;
}
static void adjust_for_underflow (moo_t* moo, moo_liw_t* qr, moo_liw_t* divisor, int qrStart, int stop)
{
moo_lidw_t dw;
moo_liw_t carry = 0;
moo_oow_t j, k;
for (j = qrStart, k = 0; k < stop; k++)
{
/*qr[j] = xpy(qr[j], divisor[k], c);*/
dw = (moo_lidw_t)qr[j] + divisor[k] + carry;
carry = (moo_liw_t)(dw >> MOO_LIW_BITS);
qr[j] = (moo_liw_t)dw;
j++;
k++;
}
MOO_ASSERT (moo, carry == 1); // Should have carry to balance borrow; // see Knuth p258 D6
}
static moo_liw_t calculate_remainder (moo_t* moo, moo_liw_t* qr, moo_liw_t* divisor, moo_liw_t q, int qrStart, int stop)
{
moo_lidw_t dw;
moo_liw_t c = 0, c2 = 0, qyk;
moo_oow_t j = qrStart;
moo_oow_t k = 0;
moo_liw_t b = 0;
while (k < stop)
{
/* qr[j] = xmy(qr[j], 0, b); */
dw = (moo_lidw_t)qr[j] - b;
b = (dw >> MOO_LIW_BITS) & 1;
qr[j] = (moo_liw_t)dw;
/* moo_liw_t qyk = axpy(q, divisor[k], 0, c); */
dw = ((moo_lidw_t)divisor[k] * q) + c;
c = (moo_liw_t)(dw >> MOO_LIW_BITS);
qyk = (moo_liw_t)dw;
/*qr[j] = xmy(qr[j], qyk, c2); */
dw = (moo_lidw_t)qr[j] - qyk - c2;
c2 = (dw >> MOO_LIW_BITS) & 1;
qr[j] = (moo_liw_t)dw;
//b = xpy(b, c2, c);
dw = (moo_lidw_t)b + c2 + c;
c = (moo_liw_t)(dw >> MOO_LIW_BITS);
b = (moo_liw_t)dw;
MOO_ASSERT (moo, c == 0);
j++;
k++;
}
return b;
}
static moo_liw_t multiply_unsigned_array_in_place_and_get_carry (moo_liw_t* x, moo_oow_t xs, moo_liw_t y)
{
/* multiply unsigned array with a single word and put the result
* back to the array. return the last remaining carry */
moo_lidw_t dw;
moo_liw_t carry = 0;
moo_oow_t i;
d = (y[ys - 1] == MOO_TYPE_MAX(moo_liw_t)? 1: (((moo_lidw_t)1 << MOO_LIW_BITS) / (y[ys - 1] + 1));
for (i = 0; i < xs; i++)
{
dw = ((moo_lidw_t)x[i] * y) + carry;
carry = (moo_liw_t)(dw >> MOO_LIW_BITS);
x[i] = (moo_liw_t)dw/*& MOO_TYPE_MAX(moo_liw_t)*/;
}
return carry;
}
static void divide_unsigned_array2 (moo_t* moo, moo_liw_t* x, moo_oow_t xs, moo_liw_t* y, moo_oow_t ys, moo_liw_t* q, moo_liw_t* r)
{
moo_oow_t i;
moo_liw_t d, y1, y2;
/* the caller must allocate q to be greater than x by 1 in size. */
for (i = 0; i < xs; i++) q[i] = x[i]; /* copy x to q */
q[xs] = 0;
d = (y[ys - 1] == MOO_TYPE_MAX(moo_liw_t)? 1: (((moo_lidw_t)1 << MOO_LIW_BITS) / (y[ys - 1] + 1)));
if (d > 1)
{
x[xs] = multiply_unsigned_array_in_place_and_get_last_carry(x, xs, d);
carry = multiply_unsigned_array_in_place_and_get_last_carry(y, ys, d); /* carry must be zero */
MOO_ASSERT (moo, carry == 0);
q[xs] = multiply_unsigned_array_in_place_and_get_carry(q, xs, d);
/*carry = */multiply_unsigned_array_in_place_and_get_carry(y, ys, d); /* carry must be zero */
/*MOO_ASSERT (moo, carry == 0);*/
}
y1 = y[ys - 1];
y2 = y[ys - 2];
for (i = xs; i >= ys; )
{
moo_liw_t q;
moo_liw_t y1, y2;
moo_lidw_t dw;
moo_liw_t quo, b, xhi, xlo;
y1 = y[ys - 1];
y2 = y[ys - 2];
xhi = q[i];
xlo = q[i - 1];
if (x[i] == y1)
if (xhi == y1)
{
q = MOO_TYPE_MAX(moo_liw_t);
quo = MOO_TYPE_MAX(moo_liw_t);
}
else
{
moo_lidw_t dw;
dw = ((moo_lidw_t)x[i] << MOO_LIW_BITS) + x[i - 1];
dw = ((moo_lidw_t)xhi << MOO_LIW_BITS) + xlo;
/* TODO: optimize it with ASM - no seperate / and % */
q = dw / y1;
x[i] = dw % y1;
quo = dw / y1;
q[i] = dw % y1;
}
while (i < ys) { j++; k++; }
x[i] = q;
quo = adjust_for_over_estimate(y1, y2, quo, q[i], q[i - 2]);
b = calculate_remainder(moo, q, y, quo, i - ys, ys);
//xmy (xi, 0, b);
b = (moo_liw_t)(((moo_lidw_t)xhi - b) >> MOO_LIW_BITS) & 1;
if (b)
{
/* underflow */
adjust_for_underflow(moo, q, y, i - ys, ys);
q[i] = quo - 1;
}
else
{
q[i] = quo;
}
i--;
}
if (d != 1)
{
moo_lidw_t dw;
@ -1635,15 +1783,30 @@ static void divide_unsigned_array2 (moo_t* moo, const moo_liw_t* x, moo_oow_t xs
for (i = ys; i > 0; )
{
--i;
dw = ((moo_lidw_t)x[i] << MOO_LIW_BITS) + carry;
dw = ((moo_lidw_t)q[i] << MOO_LIW_BITS) + carry;
/* TODO: optimize it with ASM - no seperate / and % */
x[i] = dw / y1;
q[i] = dw / y1;
carry = dw % y1;
}
}
for (i = 0; i < ys; i++) r[i] = q[i];
for (; i <= xs; i++)
{
q[i - ys] = q[i];
q[i] = 0;
}
/*
i = xs;
while (i >= ys && q[i] == 0) i--;
for (; i >= ys; i--) q[i - ys] = q[i];
MOO_MEMMOVE (&q[xxx], &
q[xs] = 0;*/
}
#endif
/* ======================================================================== */
static moo_oop_t add_unsigned_integers (moo_t* moo, moo_oop_t x, moo_oop_t y)
{
moo_oow_t as, bs, zs;
@ -1736,25 +1899,6 @@ static moo_oop_t multiply_unsigned_integers (moo_t* moo, moo_oop_t x, moo_oop_t
return z;
}
static moo_liw_t multiply_unsigned_array_in_place_and_get_last_carry (moo_liw_t* x, moo_oow_t xs, moo_liw_t y)
{
/* multiply unsigned array with a single word and put the result
* back to the array. return the last remaining carry */
moo_lidw_t dw;
moo_liw_t carry = 0;
moo_oow_t i;
for (i = 0; i < xs; i++)
{
dw = ((moo_lidw_t)x[i] * y) + carry;
carry = (moo_liw_t)(dw >> MOO_LIW_BITS);
x[i] = (moo_liw_t)dw/*& MOO_TYPE_MAX(moo_liw_t)*/;
}
return carry;
}
static moo_oop_t divide_unsigned_integers (moo_t* moo, moo_oop_t x, moo_oop_t y, moo_oop_t* r)
{
moo_oop_t qq, rr;
@ -1788,7 +1932,12 @@ static moo_oop_t divide_unsigned_integers (moo_t* moo, moo_oop_t x, moo_oop_t y,
MOO_ASSERT (moo, !is_less_unsigned(x, y));
moo_pushvolat (moo, &x);
moo_pushvolat (moo, &y);
/*#define USE_DIVIDE_UNSIGNED_ARRAY2*/
#if defined(USE_DIVIDE_UNSIGNED_ARRAY2)
qq = moo_instantiate(moo, moo->_large_positive_integer, MOO_NULL, MOO_OBJ_GET_SIZE(x) + 1);
#else
qq = moo_instantiate(moo, moo->_large_positive_integer, MOO_NULL, MOO_OBJ_GET_SIZE(x));
#endif
if (!qq)
{
moo_popvolats (moo, 2);
@ -1800,7 +1949,11 @@ static moo_oop_t divide_unsigned_integers (moo_t* moo, moo_oop_t x, moo_oop_t y,
moo_popvolats (moo, 3);
if (!rr) return MOO_NULL;
#if defined(USE_DIVIDE_UNSIGNED_ARRAY2)
divide_unsigned_array2 (moo,
#else
divide_unsigned_array (moo,
#endif
MOO_OBJ_GET_LIWORD_SLOT(x), MOO_OBJ_GET_SIZE(x),
MOO_OBJ_GET_LIWORD_SLOT(y), MOO_OBJ_GET_SIZE(y),
MOO_OBJ_GET_LIWORD_SLOT(qq), MOO_OBJ_GET_LIWORD_SLOT(rr));
@ -1809,6 +1962,8 @@ static moo_oop_t divide_unsigned_integers (moo_t* moo, moo_oop_t x, moo_oop_t y,
return qq;
}
/* ======================================================================== */
moo_oop_t moo_addints (moo_t* moo, moo_oop_t x, moo_oop_t y)
{
moo_oop_t z;