qse/ase/lib/awk/misc.c
2008-06-04 02:18:55 +00:00

953 lines
20 KiB
C

/*
* $Id: misc.c 115 2008-03-03 11:13:15Z baconevi $
*
* {License}
*/
#include <ase/awk/awk_i.h>
void* ase_awk_malloc (ase_awk_t* awk, ase_size_t size)
{
return ASE_AWK_MALLOC (awk, size);
}
void ase_awk_free (ase_awk_t* awk, void* ptr)
{
ASE_AWK_FREE (awk, ptr);
}
ase_char_t* ase_awk_strxdup (
ase_awk_t* awk, const ase_char_t* ptr, ase_size_t len)
{
return ase_strxdup (ptr, len, &awk->prmfns.mmgr);
}
ase_long_t ase_awk_strxtolong (
ase_awk_t* awk, const ase_char_t* str, ase_size_t len,
int base, const ase_char_t** endptr)
{
ase_long_t n = 0;
const ase_char_t* p;
const ase_char_t* end;
ase_size_t rem;
int digit, negative = 0;
ASE_ASSERT (base < 37);
p = str;
end = str + len;
/* strip off leading spaces */
/*while (ASE_AWK_ISSPACE(awk,*p)) p++;*/
/* check for a sign */
/*while (*p != ASE_T('\0')) */
while (p < end)
{
if (*p == ASE_T('-'))
{
negative = ~negative;
p++;
}
else if (*p == ASE_T('+')) p++;
else break;
}
/* check for a binary/octal/hexadecimal notation */
rem = end - p;
if (base == 0)
{
if (rem >= 1 && *p == ASE_T('0'))
{
p++;
if (rem == 1) base = 8;
else if (*p == ASE_T('x') || *p == ASE_T('X'))
{
p++; base = 16;
}
else if (*p == ASE_T('b') || *p == ASE_T('B'))
{
p++; base = 2;
}
else base = 8;
}
else base = 10;
}
else if (rem >= 2 && base == 16)
{
if (*p == ASE_T('0') &&
(*(p+1) == ASE_T('x') || *(p+1) == ASE_T('X'))) p += 2;
}
else if (rem >= 2 && base == 2)
{
if (*p == ASE_T('0') &&
(*(p+1) == ASE_T('b') || *(p+1) == ASE_T('B'))) p += 2;
}
/* process the digits */
/*while (*p != ASE_T('\0'))*/
while (p < end)
{
if (*p >= ASE_T('0') && *p <= ASE_T('9'))
digit = *p - ASE_T('0');
else if (*p >= ASE_T('A') && *p <= ASE_T('Z'))
digit = *p - ASE_T('A') + 10;
else if (*p >= ASE_T('a') && *p <= ASE_T('z'))
digit = *p - ASE_T('a') + 10;
else break;
if (digit >= base) break;
n = n * base + digit;
p++;
}
if (endptr != ASE_NULL) *endptr = p;
return (negative)? -n: n;
}
/*
* ase_awk_strtoreal is almost a replica of strtod.
*
* strtod.c --
*
* Source code for the "strtod" library procedure.
*
* Copyright (c) 1988-1993 The Regents of the University of California.
* Copyright (c) 1994 Sun Microsystems, Inc.
*
* Permission to use, copy, modify, and distribute this
* software and its documentation for any purpose and without
* fee is hereby granted, provided that the above copyright
* notice appear in all copies. The University of California
* makes no representations about the suitability of this
* software for any purpose. It is provided "as is" without
* express or implied warranty.
*/
#define MAX_EXPONENT 511
ase_real_t ase_awk_strtoreal (ase_awk_t* awk, const ase_char_t* str)
{
/*
* Table giving binary powers of 10. Entry is 10^2^i.
* Used to convert decimal exponents into floating-point numbers.
*/
static ase_real_t powers_of_10[] =
{
10., 100., 1.0e4, 1.0e8, 1.0e16,
1.0e32, 1.0e64, 1.0e128, 1.0e256
};
ase_real_t fraction, dbl_exp, * d;
const ase_char_t* p;
ase_cint_t c;
int exp = 0; /* Esseonent read from "EX" field */
/*
* Esseonent that derives from the fractional part. Under normal
* circumstatnces, it is the negative of the number of digits in F.
* However, if I is very long, the last digits of I get dropped
* (otherwise a long I with a large negative exponent could cause an
* unnecessary overflow on I alone). In this case, frac_exp is
* incremented one for each dropped digit.
*/
int frac_exp;
int mant_size; /* Number of digits in mantissa. */
int dec_pt; /* Number of mantissa digits BEFORE decimal point */
const ase_char_t *pexp; /* Temporarily holds location of exponent in string */
int negative = 0, exp_negative = 0;
p = str;
/* strip off leading blanks */
/*while (ASE_AWK_ISSPACE(awk,*p)) p++;*/
/* check for a sign */
while (*p != ASE_T('\0'))
{
if (*p == ASE_T('-'))
{
negative = ~negative;
p++;
}
else if (*p == ASE_T('+')) p++;
else break;
}
/* Count the number of digits in the mantissa (including the decimal
* point), and also locate the decimal point. */
dec_pt = -1;
for (mant_size = 0; ; mant_size++)
{
c = *p;
if (!ASE_AWK_ISDIGIT (awk, c))
{
if ((c != ASE_T('.')) || (dec_pt >= 0)) break;
dec_pt = mant_size;
}
p++;
}
/*
* Now suck up the digits in the mantissa. Use two integers to
* collect 9 digits each (this is faster than using floating-point).
* If the mantissa has more than 18 digits, ignore the extras, since
* they can't affect the value anyway.
*/
pexp = p;
p -= mant_size;
if (dec_pt < 0)
{
dec_pt = mant_size;
}
else
{
mant_size--; /* One of the digits was the point */
}
if (mant_size > 18)
{
frac_exp = dec_pt - 18;
mant_size = 18;
}
else
{
frac_exp = dec_pt - mant_size;
}
if (mant_size == 0)
{
fraction = 0.0;
/*p = str;*/
p = pexp;
goto done;
}
else
{
int frac1, frac2;
frac1 = 0;
for ( ; mant_size > 9; mant_size--)
{
c = *p;
p++;
if (c == ASE_T('.'))
{
c = *p;
p++;
}
frac1 = 10 * frac1 + (c - ASE_T('0'));
}
frac2 = 0;
for (; mant_size > 0; mant_size--) {
c = *p;
p++;
if (c == ASE_T('.'))
{
c = *p;
p++;
}
frac2 = 10*frac2 + (c - ASE_T('0'));
}
fraction = (1.0e9 * frac1) + frac2;
}
/* Skim off the exponent */
p = pexp;
if ((*p == ASE_T('E')) || (*p == ASE_T('e')))
{
p++;
if (*p == ASE_T('-'))
{
exp_negative = 1;
p++;
}
else
{
if (*p == ASE_T('+')) p++;
exp_negative = 0;
}
if (!ASE_AWK_ISDIGIT (awk, *p))
{
/* p = pexp; */
/* goto done; */
goto no_exp;
}
while (ASE_AWK_ISDIGIT (awk, *p))
{
exp = exp * 10 + (*p - ASE_T('0'));
p++;
}
}
no_exp:
if (exp_negative) exp = frac_exp - exp;
else exp = frac_exp + exp;
/*
* Generate a floating-point number that represents the exponent.
* Do this by processing the exponent one bit at a time to combine
* many powers of 2 of 10. Then combine the exponent with the
* fraction.
*/
if (exp < 0)
{
exp_negative = 1;
exp = -exp;
}
else exp_negative = 0;
if (exp > MAX_EXPONENT) exp = MAX_EXPONENT;
dbl_exp = 1.0;
for (d = powers_of_10; exp != 0; exp >>= 1, d++)
{
if (exp & 01) dbl_exp *= *d;
}
if (exp_negative) fraction /= dbl_exp;
else fraction *= dbl_exp;
done:
return (negative)? -fraction: fraction;
}
ase_real_t ase_awk_strxtoreal (
ase_awk_t* awk, const ase_char_t* str, ase_size_t len,
const ase_char_t** endptr)
{
/*
* Table giving binary powers of 10. Entry is 10^2^i.
* Used to convert decimal exponents into floating-point numbers.
*/
static ase_real_t powers_of_10[] =
{
10., 100., 1.0e4, 1.0e8, 1.0e16,
1.0e32, 1.0e64, 1.0e128, 1.0e256
};
ase_real_t fraction, dbl_exp, * d;
const ase_char_t* p, * end;
ase_cint_t c;
int exp = 0; /* Esseonent read from "EX" field */
/*
* Esseonent that derives from the fractional part. Under normal
* circumstatnces, it is the negative of the number of digits in F.
* However, if I is very long, the last digits of I get dropped
* (otherwise a long I with a large negative exponent could cause an
* unnecessary overflow on I alone). In this case, frac_exp is
* incremented one for each dropped digit.
*/
int frac_exp;
int mant_size; /* Number of digits in mantissa. */
int dec_pt; /* Number of mantissa digits BEFORE decimal point */
const ase_char_t *pexp; /* Temporarily holds location of exponent in string */
int negative = 0, exp_negative = 0;
p = str;
end = str + len;
/* Strip off leading blanks and check for a sign */
/*while (ASE_AWK_ISSPACE(awk,*p)) p++;*/
/*while (*p != ASE_T('\0')) */
while (p < end)
{
if (*p == ASE_T('-'))
{
negative = ~negative;
p++;
}
else if (*p == ASE_T('+')) p++;
else break;
}
/* Count the number of digits in the mantissa (including the decimal
* point), and also locate the decimal point. */
dec_pt = -1;
/*for (mant_size = 0; ; mant_size++) */
for (mant_size = 0; p < end; mant_size++)
{
c = *p;
if (!ASE_AWK_ISDIGIT (awk, c))
{
if (c != ASE_T('.') || dec_pt >= 0) break;
dec_pt = mant_size;
}
p++;
}
/*
* Now suck up the digits in the mantissa. Use two integers to
* collect 9 digits each (this is faster than using floating-point).
* If the mantissa has more than 18 digits, ignore the extras, since
* they can't affect the value anyway.
*/
pexp = p;
p -= mant_size;
if (dec_pt < 0)
{
dec_pt = mant_size;
}
else
{
mant_size--; /* One of the digits was the point */
}
if (mant_size > 18) /* TODO: is 18 correct for ase_real_t??? */
{
frac_exp = dec_pt - 18;
mant_size = 18;
}
else
{
frac_exp = dec_pt - mant_size;
}
if (mant_size == 0)
{
fraction = 0.0;
/*p = str;*/
p = pexp;
goto done;
}
else
{
int frac1, frac2;
frac1 = 0;
for ( ; mant_size > 9; mant_size--)
{
c = *p;
p++;
if (c == ASE_T('.'))
{
c = *p;
p++;
}
frac1 = 10 * frac1 + (c - ASE_T('0'));
}
frac2 = 0;
for (; mant_size > 0; mant_size--) {
c = *p++;
if (c == ASE_T('.'))
{
c = *p;
p++;
}
frac2 = 10 * frac2 + (c - ASE_T('0'));
}
fraction = (1.0e9 * frac1) + frac2;
}
/* Skim off the exponent */
p = pexp;
if (p < end && (*p == ASE_T('E') || *p == ASE_T('e')))
{
p++;
if (p < end)
{
if (*p == ASE_T('-'))
{
exp_negative = 1;
p++;
}
else
{
if (*p == ASE_T('+')) p++;
exp_negative = 0;
}
}
else exp_negative = 0;
if (!(p < end && ASE_AWK_ISDIGIT (awk, *p)))
{
/*p = pexp;*/
/*goto done;*/
goto no_exp;
}
while (p < end && ASE_AWK_ISDIGIT (awk, *p))
{
exp = exp * 10 + (*p - ASE_T('0'));
p++;
}
}
no_exp:
if (exp_negative) exp = frac_exp - exp;
else exp = frac_exp + exp;
/*
* Generate a floating-point number that represents the exponent.
* Do this by processing the exponent one bit at a time to combine
* many powers of 2 of 10. Then combine the exponent with the
* fraction.
*/
if (exp < 0)
{
exp_negative = 1;
exp = -exp;
}
else exp_negative = 0;
if (exp > MAX_EXPONENT) exp = MAX_EXPONENT;
dbl_exp = 1.0;
for (d = powers_of_10; exp != 0; exp >>= 1, d++)
{
if (exp & 01) dbl_exp *= *d;
}
if (exp_negative) fraction /= dbl_exp;
else fraction *= dbl_exp;
done:
if (endptr != ASE_NULL) *endptr = p;
return (negative)? -fraction: fraction;
}
ase_size_t ase_awk_longtostr (
ase_long_t value, int radix, const ase_char_t* prefix,
ase_char_t* buf, ase_size_t size)
{
ase_long_t t, rem;
ase_size_t len, ret, i;
ase_size_t prefix_len;
prefix_len = (prefix != ASE_NULL)? ase_strlen(prefix): 0;
t = value;
if (t == 0)
{
/* zero */
if (buf == ASE_NULL)
{
/* if buf is not given,
* return the number of bytes required */
return prefix_len + 1;
}
if (size < prefix_len+1)
{
/* buffer too small */
return (ase_size_t)-1;
}
for (i = 0; i < prefix_len; i++) buf[i] = prefix[i];
buf[prefix_len] = ASE_T('0');
if (size > prefix_len+1) buf[prefix_len+1] = ASE_T('\0');
return prefix_len+1;
}
/* non-zero values */
len = prefix_len;
if (t < 0) { t = -t; len++; }
while (t > 0) { len++; t /= radix; }
if (buf == ASE_NULL)
{
/* if buf is not given, return the number of bytes required */
return len;
}
if (size < len) return (ase_size_t)-1; /* buffer too small */
if (size > len) buf[len] = ASE_T('\0');
ret = len;
t = value;
if (t < 0) t = -t;
while (t > 0)
{
rem = t % radix;
if (rem >= 10)
buf[--len] = (ase_char_t)rem + ASE_T('a') - 10;
else
buf[--len] = (ase_char_t)rem + ASE_T('0');
t /= radix;
}
if (value < 0)
{
for (i = 1; i <= prefix_len; i++)
{
buf[i] = prefix[i-1];
len--;
}
buf[--len] = ASE_T('-');
}
else
{
for (i = 0; i < prefix_len; i++) buf[i] = prefix[i];
}
return ret;
}
ase_char_t* ase_awk_strtok (
ase_awk_run_t* run, const ase_char_t* s,
const ase_char_t* delim, ase_char_t** tok, ase_size_t* tok_len)
{
return ase_awk_strxntok (
run, s, ase_strlen(s),
delim, ase_strlen(delim), tok, tok_len);
}
ase_char_t* ase_awk_strxtok (
ase_awk_run_t* run, const ase_char_t* s, ase_size_t len,
const ase_char_t* delim, ase_char_t** tok, ase_size_t* tok_len)
{
return ase_awk_strxntok (
run, s, len,
delim, ase_strlen(delim), tok, tok_len);
}
ase_char_t* ase_awk_strntok (
ase_awk_run_t* run, const ase_char_t* s,
const ase_char_t* delim, ase_size_t delim_len,
ase_char_t** tok, ase_size_t* tok_len)
{
return ase_awk_strxntok (
run, s, ase_strlen(s),
delim, delim_len, tok, tok_len);
}
ase_char_t* ase_awk_strxntok (
ase_awk_run_t* run, const ase_char_t* s, ase_size_t len,
const ase_char_t* delim, ase_size_t delim_len,
ase_char_t** tok, ase_size_t* tok_len)
{
const ase_char_t* p = s, *d;
const ase_char_t* end = s + len;
const ase_char_t* sp = ASE_NULL, * ep = ASE_NULL;
const ase_char_t* delim_end = delim + delim_len;
ase_char_t c;
int delim_mode;
#define __DELIM_NULL 0
#define __DELIM_EMPTY 1
#define __DELIM_SPACES 2
#define __DELIM_NOSPACES 3
#define __DELIM_COMPOSITE 4
if (delim == ASE_NULL) delim_mode = __DELIM_NULL;
else
{
delim_mode = __DELIM_EMPTY;
for (d = delim; d < delim_end; d++)
{
if (ASE_AWK_ISSPACE(run->awk,*d))
{
if (delim_mode == __DELIM_EMPTY)
delim_mode = __DELIM_SPACES;
else if (delim_mode == __DELIM_NOSPACES)
{
delim_mode = __DELIM_COMPOSITE;
break;
}
}
else
{
if (delim_mode == __DELIM_EMPTY)
delim_mode = __DELIM_NOSPACES;
else if (delim_mode == __DELIM_SPACES)
{
delim_mode = __DELIM_COMPOSITE;
break;
}
}
}
/* TODO: verify the following statement... */
if (delim_mode == __DELIM_SPACES &&
delim_len == 1 &&
delim[0] != ASE_T(' ')) delim_mode = __DELIM_NOSPACES;
}
if (delim_mode == __DELIM_NULL)
{
/* when ASE_NULL is given as "delim", it trims off the
* leading and trailing spaces characters off the source
* string "s" eventually. */
while (p < end && ASE_AWK_ISSPACE(run->awk,*p)) p++;
while (p < end)
{
c = *p;
if (!ASE_AWK_ISSPACE(run->awk,c))
{
if (sp == ASE_NULL) sp = p;
ep = p;
}
p++;
}
}
else if (delim_mode == __DELIM_EMPTY)
{
/* each character in the source string "s" becomes a token. */
if (p < end)
{
c = *p;
sp = p;
ep = p++;
}
}
else if (delim_mode == __DELIM_SPACES)
{
/* each token is delimited by space characters. all leading
* and trailing spaces are removed. */
while (p < end && ASE_AWK_ISSPACE(run->awk,*p)) p++;
while (p < end)
{
c = *p;
if (ASE_AWK_ISSPACE(run->awk,c)) break;
if (sp == ASE_NULL) sp = p;
ep = p++;
}
while (p < end && ASE_AWK_ISSPACE(run->awk,*p)) p++;
}
else if (delim_mode == __DELIM_NOSPACES)
{
/* each token is delimited by one of charaters
* in the delimeter set "delim". */
if (run->global.ignorecase)
{
while (p < end)
{
c = ASE_AWK_TOUPPER(run->awk, *p);
for (d = delim; d < delim_end; d++)
{
if (c == ASE_AWK_TOUPPER(run->awk,*d)) goto exit_loop;
}
if (sp == ASE_NULL) sp = p;
ep = p++;
}
}
else
{
while (p < end)
{
c = *p;
for (d = delim; d < delim_end; d++)
{
if (c == *d) goto exit_loop;
}
if (sp == ASE_NULL) sp = p;
ep = p++;
}
}
}
else /* if (delim_mode == __DELIM_COMPOSITE) */
{
/* each token is delimited by one of non-space charaters
* in the delimeter set "delim". however, all space characters
* surrounding the token are removed */
while (p < end && ASE_AWK_ISSPACE(run->awk,*p)) p++;
if (run->global.ignorecase)
{
while (p < end)
{
c = ASE_AWK_TOUPPER(run->awk, *p);
if (ASE_AWK_ISSPACE(run->awk,c))
{
p++;
continue;
}
for (d = delim; d < delim_end; d++)
{
if (c == ASE_AWK_TOUPPER(run->awk,*d)) goto exit_loop;
}
if (sp == ASE_NULL) sp = p;
ep = p++;
}
}
else
{
while (p < end)
{
c = *p;
if (ASE_AWK_ISSPACE(run->awk,c))
{
p++;
continue;
}
for (d = delim; d < delim_end; d++)
{
if (c == *d) goto exit_loop;
}
if (sp == ASE_NULL) sp = p;
ep = p++;
}
}
}
exit_loop:
if (sp == ASE_NULL)
{
*tok = ASE_NULL;
*tok_len = (ase_size_t)0;
}
else
{
*tok = (ase_char_t*)sp;
*tok_len = ep - sp + 1;
}
/* if ASE_NULL is returned, this function should not be called anymore */
if (p >= end) return ASE_NULL;
if (delim_mode == __DELIM_EMPTY ||
delim_mode == __DELIM_SPACES) return (ase_char_t*)p;
return (ase_char_t*)++p;
}
ase_char_t* ase_awk_strxntokbyrex (
ase_awk_run_t* run, const ase_char_t* s, ase_size_t len,
void* rex, ase_char_t** tok, ase_size_t* tok_len, int* errnum)
{
int n;
ase_char_t* match_ptr;
ase_size_t match_len, i;
ase_size_t left = len;
const ase_char_t* ptr = s;
const ase_char_t* str_ptr = s;
ase_size_t str_len = len;
while (len > 0)
{
n = ASE_AWK_MATCHREX (
run->awk, rex,
((run->global.ignorecase)? ASE_REX_IGNORECASE: 0),
ptr, left, (const ase_char_t**)&match_ptr, &match_len,
errnum);
if (n == -1) return ASE_NULL;
if (n == 0)
{
/* no match has been found.
* return the entire string as a token */
*tok = (ase_char_t*)str_ptr;
*tok_len = str_len;
*errnum = ASE_AWK_ENOERR;
return ASE_NULL;
}
ASE_ASSERT (n == 1);
if (match_len == 0)
{
ptr++;
left--;
}
else if (run->awk->option & ASE_AWK_STRIPSPACES)
{
/* match at the beginning of the input string */
if (match_ptr == s)
{
for (i = 0; i < match_len; i++)
{
if (!ASE_AWK_ISSPACE(run->awk, match_ptr[i]))
goto exit_loop;
}
/* the match that are all spaces at the
* beginning of the input string is skipped */
ptr += match_len;
left -= match_len;
str_ptr = s + match_len;
str_len -= match_len;
}
else break;
}
else break;
}
exit_loop:
if (len == 0)
{
*tok = (ase_char_t*)str_ptr;
*tok_len = str_len;
*errnum = ASE_AWK_ENOERR;
return ASE_NULL;
}
*tok = (ase_char_t*)str_ptr;
*tok_len = match_ptr - str_ptr;
for (i = 0; i < match_len; i++)
{
if (!ASE_AWK_ISSPACE(run->awk, match_ptr[i]))
{
*errnum = ASE_AWK_ENOERR;
return match_ptr+match_len;
}
}
*errnum = ASE_AWK_ENOERR;
if (run->awk->option & ASE_AWK_STRIPSPACES)
{
return (match_ptr+match_len >= s+len)?
ASE_NULL: (match_ptr+match_len);
}
else
{
return (match_ptr+match_len > s+len)?
ASE_NULL: (match_ptr+match_len);
}
}
#define ASE_AWK_REXERRTOERR(err) \
((err == ASE_REX_ENOERR)? ASE_AWK_ENOERR: \
(err == ASE_REX_ENOMEM)? ASE_AWK_ENOMEM: \
(err == ASE_REX_ERECUR)? ASE_AWK_EREXRECUR: \
(err == ASE_REX_ERPAREN)? ASE_AWK_EREXRPAREN: \
(err == ASE_REX_ERBRACKET)? ASE_AWK_EREXRBRACKET: \
(err == ASE_REX_ERBRACE)? ASE_AWK_EREXRBRACE: \
(err == ASE_REX_EUNBALPAR)? ASE_AWK_EREXUNBALPAR: \
(err == ASE_REX_ECOLON)? ASE_AWK_EREXCOLON: \
(err == ASE_REX_ECRANGE)? ASE_AWK_EREXCRANGE: \
(err == ASE_REX_ECCLASS)? ASE_AWK_EREXCCLASS: \
(err == ASE_REX_EBRANGE)? ASE_AWK_EREXBRANGE: \
(err == ASE_REX_EEND)? ASE_AWK_EREXEND: \
(err == ASE_REX_EGARBAGE)? ASE_AWK_EREXGARBAGE: \
ASE_AWK_EINTERN)
void* ase_awk_buildrex (
ase_awk_t* awk, const ase_char_t* ptn, ase_size_t len, int* errnum)
{
int err;
void* p;
p = ase_buildrex (
&awk->prmfns.mmgr, awk->rex.depth.max.build, ptn, len, &err);
if (p == ASE_NULL) *errnum = ASE_AWK_REXERRTOERR(err);
return p;
}
int ase_awk_matchrex (
ase_awk_t* awk, void* code, int option,
const ase_char_t* str, ase_size_t len,
const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum)
{
int err, x;
x = ase_matchrex (
&awk->prmfns.mmgr, &awk->prmfns.ccls, awk->rex.depth.max.match,
code, option, str, len, match_ptr, match_len, &err);
if (x < 0) *errnum = ASE_AWK_REXERRTOERR(err);
return x;
}