/*
 * $Id: misc.c,v 1.29 2006-10-22 11:34:53 bacon Exp $
 */

#include <sse/awk/awk_i.h>

void* sse_awk_memcpy  (void* dst, const void* src, sse_size_t n)
{
	void* p = dst;
	void* e = (sse_byte_t*)dst + n;

	while (dst < e) 
	{
		*(sse_byte_t*)dst = *(sse_byte_t*)src;
		dst = (sse_byte_t*)dst + 1;
		src = (sse_byte_t*)src + 1;
	}

	return p;
}

void* sse_awk_memset (void* dst, int val, sse_size_t n)
{
	void* p = dst;
	void* e = (sse_byte_t*)p + n;

	while (p < e) 
	{
		*(sse_byte_t*)p = (sse_byte_t)val;
		p = (sse_byte_t*)p + 1;
	}

	return dst;
}

sse_long_t sse_awk_strxtolong (
	sse_awk_t* awk, const sse_char_t* str, sse_size_t len,
	int base, const sse_char_t** endptr)
{
	sse_long_t n = 0;
	const sse_char_t* p;
	const sse_char_t* end;
	sse_size_t rem;
	int digit, negative = 0;

	sse_awk_assert (awk, base < 37); 

	p = str; 
	end = str + len;
	
	/* strip off leading spaces */
	/*while (SSE_AWK_ISSPACE(awk,*p)) p++;*/

	/* check for a sign */
	/*while (*p != SSE_T('\0')) */
	while (p < end)
	{
		if (*p == SSE_T('-')) 
		{
			negative = ~negative;
			p++;
		}
		else if (*p == SSE_T('+')) p++;
		else break;
	}

	/* check for a binary/octal/hexadecimal notation */
	rem = end - p;
	if (base == 0) 
	{
		if (rem >= 1 && *p == SSE_T('0')) 
		{
			p++;

			if (rem == 1) base = 8;
			else if (*p == SSE_T('x') || *p == SSE_T('X'))
			{
				p++; base = 16;
			} 
			else if (*p == SSE_T('b') || *p == SSE_T('B'))
			{
				p++; base = 2;
			}
			else base = 8;
		}
		else base = 10;
	} 
	else if (rem >= 2 && base == 16)
	{
		if (*p == SSE_T('0') && 
		    (*(p+1) == SSE_T('x') || *(p+1) == SSE_T('X'))) p += 2; 
	}
	else if (rem >= 2 && base == 2)
	{
		if (*p == SSE_T('0') && 
		    (*(p+1) == SSE_T('b') || *(p+1) == SSE_T('B'))) p += 2; 
	}

	/* process the digits */
	/*while (*p != SSE_T('\0'))*/
	while (p < end)
	{
		if (*p >= SSE_T('0') && *p <= SSE_T('9'))
			digit = *p - SSE_T('0');
		else if (*p >= SSE_T('A') && *p <= SSE_T('Z'))
			digit = *p - SSE_T('A') + 10;
		else if (*p >= SSE_T('a') && *p <= SSE_T('z'))
			digit = *p - SSE_T('a') + 10;
		else break;

		if (digit >= base) break;
		n = n * base + digit;

		p++;
	}

	if (endptr != SSE_NULL) *endptr = p;
	return (negative)? -n: n;
}


/*
 * sse_awk_strtoreal is almost a replica of strtod.
 *
 * strtod.c --
 *
 *      Source code for the "strtod" library procedure.
 *
 * Copyright (c) 1988-1993 The Regents of the University of California.
 * Copyright (c) 1994 Sun Microsystems, Inc.
 *
 * Permission to use, copy, modify, and distribute this
 * software and its documentation for any purpose and without
 * fee is hereby granted, provided that the above copyright
 * notice appear in all copies.  The University of California
 * makes no representations about the suitability of this
 * software for any purpose.  It is provided "as is" without
 * esseress or implied warranty.
 */

#define MAX_ESSEONENT 511

sse_real_t sse_awk_strtoreal (sse_awk_t* awk, const sse_char_t* str)
{
	/* 
	 * Table giving binary powers of 10. Entry is 10^2^i.  
	 * Used to convert decimal esseonents into floating-point numbers.
	 */ 
	static sse_real_t powers_of_10[] = 
	{
		10.,    100.,   1.0e4,   1.0e8,   1.0e16,
		1.0e32, 1.0e64, 1.0e128, 1.0e256
	};

	sse_real_t fraction, dbl_esse, * d;
	const sse_char_t* p;
	sse_cint_t c;
	int esse = 0;		/* Esseonent read from "EX" field */

	/* 
	 * Esseonent that derives from the fractional part.  Under normal 
	 * circumstatnces, it is the negative of the number of digits in F.
	 * However, if I is very long, the last digits of I get dropped 
	 * (otherwise a long I with a large negative esseonent could cause an
	 * unnecessary overflow on I alone).  In this case, frac_esse is 
	 * incremented one for each dropped digit. 
	 */

	int frac_esse;
	int mant_size; /* Number of digits in mantissa. */
	int dec_pt;    /* Number of mantissa digits BEFORE decimal point */
	const sse_char_t *pesse;  /* Temporarily holds location of esseonent in string */
	int negative = 0, esse_negative = 0;

	p = str;

	/* strip off leading blanks */ 
	/*while (SSE_AWK_ISSPACE(awk,*p)) p++;*/

	/* check for a sign */
	while (*p != SSE_T('\0')) 
	{
		if (*p == SSE_T('-')) 
		{
			negative = ~negative;
			p++;
		}
		else if (*p == SSE_T('+')) p++;
		else break;
	}

	/* Count the number of digits in the mantissa (including the decimal
	 * point), and also locate the decimal point. */
	dec_pt = -1;
	for (mant_size = 0; ; mant_size++) 
	{
		c = *p;
		if (!SSE_AWK_ISDIGIT (awk, c)) 
		{
			if ((c != SSE_T('.')) || (dec_pt >= 0)) break;
			dec_pt = mant_size;
		}
		p++;
	}

	/*
	 * Now suck up the digits in the mantissa.  Use two integers to
	 * collect 9 digits each (this is faster than using floating-point).
	 * If the mantissa has more than 18 digits, ignore the extras, since
	 * they can't affect the value anyway.
	 */
	pesse = p;
	p -= mant_size;
	if (dec_pt < 0) 
	{
		dec_pt = mant_size;
	} 
	else 
	{
		mant_size--;	/* One of the digits was the point */
	}

	if (mant_size > 18) 
	{
		frac_esse = dec_pt - 18;
		mant_size = 18;
	} 
	else 
	{
		frac_esse = dec_pt - mant_size;
	}

	if (mant_size == 0) 
	{
		fraction = 0.0;
		/*p = str;*/
		p = pesse;
		goto done;
	} 
	else 
	{
		int frac1, frac2;
		frac1 = 0;
		for ( ; mant_size > 9; mant_size--) 
		{
			c = *p;
			p++;
			if (c == SSE_T('.')) 
			{
				c = *p;
				p++;
			}
			frac1 = 10 * frac1 + (c - SSE_T('0'));
		}
		frac2 = 0;
		for (; mant_size > 0; mant_size--) {
			c = *p;
			p++;
			if (c == SSE_T('.')) 
			{
				c = *p;
				p++;
			}
			frac2 = 10*frac2 + (c - SSE_T('0'));
		}
		fraction = (1.0e9 * frac1) + frac2;
	}

	/* Skim off the esseonent */
	p = pesse;
	if ((*p == SSE_T('E')) || (*p == SSE_T('e'))) 
	{
		p++;
		if (*p == SSE_T('-')) 
		{
			esse_negative = 1;
			p++;
		} 
		else 
		{
			if (*p == SSE_T('+')) p++;
			esse_negative = 0;
		}
		if (!SSE_AWK_ISDIGIT (awk, *p)) 
		{
			/* p = pesse; */
			/* goto done; */
			goto no_esse;
		}
		while (SSE_AWK_ISDIGIT (awk, *p)) 
		{
			esse = esse * 10 + (*p - SSE_T('0'));
			p++;
		}
	}

no_esse:
	if (esse_negative) esse = frac_esse - esse;
	else esse = frac_esse + esse;

	/*
	 * Generate a floating-point number that represents the esseonent.
	 * Do this by processing the esseonent one bit at a time to combine
	 * many powers of 2 of 10. Then combine the esseonent with the
	 * fraction.
	 */
	if (esse < 0) 
	{
		esse_negative = 1;
		esse = -esse;
	} 
	else esse_negative = 0;

	if (esse > MAX_ESSEONENT) esse = MAX_ESSEONENT;

	dbl_esse = 1.0;

	for (d = powers_of_10; esse != 0; esse >>= 1, d++) 
	{
		if (esse & 01) dbl_esse *= *d;
	}

	if (esse_negative) fraction /= dbl_esse;
	else fraction *= dbl_esse;

done:
	return (negative)? -fraction: fraction;
}

sse_real_t sse_awk_strxtoreal (
	sse_awk_t* awk, const sse_char_t* str, sse_size_t len, 
	const sse_char_t** endptr)
{
	/* 
	 * Table giving binary powers of 10. Entry is 10^2^i.  
	 * Used to convert decimal esseonents into floating-point numbers.
	 */ 
	static sse_real_t powers_of_10[] = 
	{
		10.,    100.,   1.0e4,   1.0e8,   1.0e16,
		1.0e32, 1.0e64, 1.0e128, 1.0e256
	};

	sse_real_t fraction, dbl_esse, * d;
	const sse_char_t* p, * end;
	sse_cint_t c;
	int esse = 0; /* Esseonent read from "EX" field */

	/* 
	 * Esseonent that derives from the fractional part.  Under normal 
	 * circumstatnces, it is the negative of the number of digits in F.
	 * However, if I is very long, the last digits of I get dropped 
	 * (otherwise a long I with a large negative esseonent could cause an
	 * unnecessary overflow on I alone).  In this case, frac_esse is 
	 * incremented one for each dropped digit. 
	 */

	int frac_esse;
	int mant_size; /* Number of digits in mantissa. */
	int dec_pt;    /* Number of mantissa digits BEFORE decimal point */
	const sse_char_t *pesse;  /* Temporarily holds location of esseonent in string */
	int negative = 0, esse_negative = 0;

	p = str;
	end = str + len;

	/* Strip off leading blanks and check for a sign */
	/*while (SSE_AWK_ISSPACE(awk,*p)) p++;*/

	/*while (*p != SSE_T('\0')) */
	while (p < end)
	{
		if (*p == SSE_T('-')) 
		{
			negative = ~negative;
			p++;
		}
		else if (*p == SSE_T('+')) p++;
		else break;
	}

	/* Count the number of digits in the mantissa (including the decimal
	 * point), and also locate the decimal point. */
	dec_pt = -1;
	/*for (mant_size = 0; ; mant_size++) */
	for (mant_size = 0; p < end; mant_size++) 
	{
		c = *p;
		if (!SSE_AWK_ISDIGIT (awk, c)) 
		{
			if (c != SSE_T('.') || dec_pt >= 0) break;
			dec_pt = mant_size;
		}
		p++;
	}

	/*
	 * Now suck up the digits in the mantissa.  Use two integers to
	 * collect 9 digits each (this is faster than using floating-point).
	 * If the mantissa has more than 18 digits, ignore the extras, since
	 * they can't affect the value anyway.
	 */
	pesse = p;
	p -= mant_size;
	if (dec_pt < 0) 
	{
		dec_pt = mant_size;
	} 
	else 
	{
		mant_size--;	/* One of the digits was the point */
	}

	if (mant_size > 18)  /* TODO: is 18 correct for sse_real_t??? */
	{
		frac_esse = dec_pt - 18;
		mant_size = 18;
	} 
	else 
	{
		frac_esse = dec_pt - mant_size;
	}

	if (mant_size == 0) 
	{
		fraction = 0.0;
		/*p = str;*/
		p = pesse;
		goto done;
	} 
	else 
	{
		int frac1, frac2;

		frac1 = 0;
		for ( ; mant_size > 9; mant_size--) 
		{
			c = *p;
			p++;
			if (c == SSE_T('.')) 
			{
				c = *p;
				p++;
			}
			frac1 = 10 * frac1 + (c - SSE_T('0'));
		}

		frac2 = 0;
		for (; mant_size > 0; mant_size--) {
			c = *p++;
			if (c == SSE_T('.')) 
			{
				c = *p;
				p++;
			}
			frac2 = 10 * frac2 + (c - SSE_T('0'));
		}
		fraction = (1.0e9 * frac1) + frac2;
	}

	/* Skim off the esseonent */
	p = pesse;
	if (p < end && (*p == SSE_T('E') || *p == SSE_T('e'))) 
	{
		p++;

		if (p < end) 
		{
			if (*p == SSE_T('-')) 
			{
				esse_negative = 1;
				p++;
			} 
			else 
			{
				if (*p == SSE_T('+')) p++;
				esse_negative = 0;
			}
		}
		else esse_negative = 0;

		if (!(p < end && SSE_AWK_ISDIGIT (awk, *p))) 
		{
			/*p = pesse;*/
			/*goto done;*/
			goto no_esse;
		}

		while (p < end && SSE_AWK_ISDIGIT (awk, *p)) 
		{
			esse = esse * 10 + (*p - SSE_T('0'));
			p++;
		}
	}

no_esse:
	if (esse_negative) esse = frac_esse - esse;
	else esse = frac_esse + esse;

	/*
	 * Generate a floating-point number that represents the esseonent.
	 * Do this by processing the esseonent one bit at a time to combine
	 * many powers of 2 of 10. Then combine the esseonent with the
	 * fraction.
	 */
	if (esse < 0) 
	{
		esse_negative = 1;
		esse = -esse;
	} 
	else esse_negative = 0;

	if (esse > MAX_ESSEONENT) esse = MAX_ESSEONENT;

	dbl_esse = 1.0;

	for (d = powers_of_10; esse != 0; esse >>= 1, d++) 
	{
		if (esse & 01) dbl_esse *= *d;
	}

	if (esse_negative) fraction /= dbl_esse;
	else fraction *= dbl_esse;

done:
	if (endptr != SSE_NULL) *endptr = p;
	return (negative)? -fraction: fraction;
}

sse_size_t sse_awk_longtostr (
	sse_long_t value, int radix, const sse_char_t* prefix, 
	sse_char_t* buf, sse_size_t size)
{
	sse_long_t t, rem;
	sse_size_t len, ret, i;
	sse_size_t prefix_len;

	prefix_len = (prefix != SSE_NULL)? sse_awk_strlen(prefix): 0;

	t = value;
	if (t == 0)
	{
		/* zero */
		if (buf == SSE_NULL) return prefix_len + 1;

		if (size < prefix_len+1) 
		{
			/* buffer too small */
			return (sse_size_t)-1;
		}

		for (i = 0; i < prefix_len; i++) buf[i] = prefix[i];
		buf[prefix_len] = SSE_T('0');
		if (size > prefix_len+1) buf[prefix_len+1] = SSE_T('\0');
		return 1;
	}

	/* non-zero values */
	len = prefix_len;
	if (t < 0) { t = -t; len++; }
	while (t > 0) { len++; t /= radix; }

	if (buf == SSE_NULL)
	{
		/* if buf is not given, return the number of bytes required */
		return len;
	}

	if (size < len) return (sse_size_t)-1; /* buffer too small */
	if (size > len) buf[len] = SSE_T('\0');
	ret = len;

	t = value;
	if (t < 0) t = -t;

	while (t > 0) 
	{
		rem = t % radix;
		if (rem >= 10)
			buf[--len] = (sse_char_t)rem + SSE_T('a') - 10;
		else
			buf[--len] = (sse_char_t)rem + SSE_T('0');
		t /= radix;
	}

	if (value < 0) 
	{
		for (i = 1; i <= prefix_len; i++) 
		{
			buf[i] = prefix[i-1];
			len--;
		}
		buf[--len] = SSE_T('-');
	}
	else
	{
		for (i = 0; i < prefix_len; i++) buf[i] = prefix[i];
	}

	return ret;
}

sse_char_t* sse_awk_strdup (sse_awk_t* awk, const sse_char_t* str)
{
	sse_char_t* tmp;

	tmp = (sse_char_t*) SSE_AWK_MALLOC (
		awk, (sse_awk_strlen(str) + 1) * sse_sizeof(sse_char_t));
	if (tmp == SSE_NULL) return SSE_NULL;

	sse_awk_strcpy (tmp, str);
	return tmp;
}

sse_char_t* sse_awk_strxdup (sse_awk_t* awk, const sse_char_t* str, sse_size_t len)
{
	sse_char_t* tmp;

	tmp = (sse_char_t*) SSE_AWK_MALLOC (
		awk, (len + 1) * sse_sizeof(sse_char_t));
	if (tmp == SSE_NULL) return SSE_NULL;

	sse_awk_strncpy (tmp, str, len);
	return tmp;
}

sse_char_t* sse_awk_strxdup2 (
	sse_awk_t* awk,
	const sse_char_t* str1, sse_size_t len1,
	const sse_char_t* str2, sse_size_t len2)
{
	sse_char_t* tmp;

	tmp = (sse_char_t*) SSE_AWK_MALLOC (
		awk, (len1 + len2 + 1) * sse_sizeof(sse_char_t));
	if (tmp == SSE_NULL) return SSE_NULL;

	sse_awk_strncpy (tmp, str1, len1);
	sse_awk_strncpy (tmp + len1, str2, len2);
	return tmp;
}

sse_size_t sse_awk_strlen (const sse_char_t* str)
{
	const sse_char_t* p = str;
	while (*p != SSE_T('\0')) p++;
	return p - str;
}

sse_size_t sse_awk_strcpy (sse_char_t* buf, const sse_char_t* str)
{
	sse_char_t* org = buf;
	while ((*buf++ = *str++) != SSE_T('\0'));
	return buf - org - 1;
}

sse_size_t sse_awk_strncpy (sse_char_t* buf, const sse_char_t* str, sse_size_t len)
{
	const sse_char_t* end = str + len;
	while (str < end) *buf++ = *str++;
	*buf = SSE_T('\0');
	return len;
}

int sse_awk_strcmp (const sse_char_t* s1, const sse_char_t* s2)
{
	while (*s1 == *s2) 
	{
		if (*s1 == SSE_C('\0')) return 0;
		s1++, s2++;
	}

	return (*s1 > *s2)? 1: -1;
}

int sse_awk_strxncmp (
	const sse_char_t* s1, sse_size_t len1, 
	const sse_char_t* s2, sse_size_t len2)
{
	sse_char_t c1, c2;
	const sse_char_t* end1 = s1 + len1;
	const sse_char_t* end2 = s2 + len2;

	while (s1 < end1)
	{
		c1 = *s1;
		if (s2 < end2) 
		{
			c2 = *s2;
			if (c1 > c2) return 1;
			if (c1 < c2) return -1;
		}
		else return 1;
		s1++; s2++;
	}

	return (s2 < end2)? -1: 0;
}

int sse_awk_strxncasecmp (
	sse_awk_t* awk,
	const sse_char_t* s1, sse_size_t len1, 
	const sse_char_t* s2, sse_size_t len2)
{
	sse_char_t c1, c2;
	const sse_char_t* end1 = s1 + len1;
	const sse_char_t* end2 = s2 + len2;

	while (s1 < end1)
	{
		c1 = SSE_AWK_TOUPPER (awk, *s1); 
		if (s2 < end2) 
		{
			c2 = SSE_AWK_TOUPPER (awk, *s2);
			if (c1 > c2) return 1;
			if (c1 < c2) return -1;
		}
		else return 1;
		s1++; s2++;
	}

	return (s2 < end2)? -1: 0;
}

sse_char_t* sse_awk_strxnstr (
	const sse_char_t* str, sse_size_t strsz, 
	const sse_char_t* sub, sse_size_t subsz)
{
	const sse_char_t* end, * subp;

	if (subsz == 0) return (sse_char_t*)str;
	if (strsz < subsz) return SSE_NULL;
	
	end = str + strsz - subsz;
	subp = sub + subsz;

	while (str <= end) {
		const sse_char_t* x = str;
		const sse_char_t* y = sub;

		while (sse_true) {
			if (y >= subp) return (sse_char_t*)str;
			if (*x != *y) break;
			x++; y++;
		}	

		str++;
	}
		
	return SSE_NULL;
}

sse_char_t* sse_awk_strtok (
	sse_awk_run_t* run, const sse_char_t* s, 
	const sse_char_t* delim, sse_char_t** tok, sse_size_t* tok_len)
{
	return sse_awk_strxntok (
		run, s, sse_awk_strlen(s), 
		delim, sse_awk_strlen(delim), tok, tok_len);
}

sse_char_t* sse_awk_strxtok (
	sse_awk_run_t* run, const sse_char_t* s, sse_size_t len,
	const sse_char_t* delim, sse_char_t** tok, sse_size_t* tok_len)
{
	return sse_awk_strxntok (
		run, s, len, 
		delim, sse_awk_strlen(delim), tok, tok_len);
}

sse_char_t* sse_awk_strntok (
	sse_awk_run_t* run, const sse_char_t* s, 
	const sse_char_t* delim, sse_size_t delim_len,
	sse_char_t** tok, sse_size_t* tok_len)
{
	return sse_awk_strxntok (
		run, s, sse_awk_strlen(s), 
		delim, delim_len, tok, tok_len);
}

sse_char_t* sse_awk_strxntok (
	sse_awk_run_t* run, const sse_char_t* s, sse_size_t len,
	const sse_char_t* delim, sse_size_t delim_len, 
	sse_char_t** tok, sse_size_t* tok_len)
{
	const sse_char_t* p = s, *d;
	const sse_char_t* end = s + len;	
	const sse_char_t* sp = SSE_NULL, * ep = SSE_NULL;
	const sse_char_t* delim_end = delim + delim_len;
	sse_char_t c; 
	int delim_mode;

#define __DELIM_NULL      0
#define __DELIM_EMPTY     1
#define __DELIM_SPACES    2
#define __DELIM_NOSPACES  3
#define __DELIM_COMPOSITE 4
	if (delim == SSE_NULL) delim_mode = __DELIM_NULL;
	else 
	{
		delim_mode = __DELIM_EMPTY;

		for (d = delim; d < delim_end; d++) 
		{
			if (SSE_AWK_ISSPACE(run->awk,*d)) 
			{
				if (delim_mode == __DELIM_EMPTY)
					delim_mode = __DELIM_SPACES;
				else if (delim_mode == __DELIM_NOSPACES)
				{
					delim_mode = __DELIM_COMPOSITE;
					break;
				}
			}
			else
			{
				if (delim_mode == __DELIM_EMPTY)
					delim_mode = __DELIM_NOSPACES;
				else if (delim_mode == __DELIM_SPACES)
				{
					delim_mode = __DELIM_COMPOSITE;
					break;
				}
			}
		}
	}		
	
	if (delim_mode == __DELIM_NULL) 
	{ 
		/* when SSE_NULL is given as "delim", it trims off the 
		 * leading and trailing spaces characters off the source
		 * string "s" eventually. */

		while (p < end && SSE_AWK_ISSPACE(run->awk,*p)) p++;
		while (p < end) 
		{
			c = *p;

			if (!SSE_AWK_ISSPACE(run->awk,c)) 
			{
				if (sp == SSE_NULL) sp = p;
				ep = p;
			}
			p++;
		}
	}
	else if (delim_mode == __DELIM_EMPTY)
	{
		/* each character in the source string "s" becomes a token. */
		if (p < end)
		{
			c = *p;
			sp = p;
			ep = p++;
		}
	}
	else if (delim_mode == __DELIM_SPACES) 
	{
		/* each token is delimited by space characters. all leading
		 * and trailing spaces are removed. */

		while (p < end && SSE_AWK_ISSPACE(run->awk,*p)) p++;
		while (p < end) 
		{
			c = *p;
			if (SSE_AWK_ISSPACE(run->awk,c)) break;
			if (sp == SSE_NULL) sp = p;
			ep = p++;
		}
		while (p < end && SSE_AWK_ISSPACE(run->awk,*p)) p++;
	}
	else if (delim_mode == __DELIM_NOSPACES)
	{
		/* each token is delimited by one of charaters 
		 * in the delimeter set "delim". */
		if (run->global.ignorecase)
		{
			while (p < end) 
			{
				c = SSE_AWK_TOUPPER(run->awk, *p);
				for (d = delim; d < delim_end; d++) 
				{
					if (c == SSE_AWK_TOUPPER(run->awk,*d)) goto exit_loop;
				}

				if (sp == SSE_NULL) sp = p;
				ep = p++;
			}
		}
		else
		{
			while (p < end) 
			{
				c = *p;
				for (d = delim; d < delim_end; d++) 
				{
					if (c == *d) goto exit_loop;
				}

				if (sp == SSE_NULL) sp = p;
				ep = p++;
			}
		}
	}
	else /* if (delim_mode == __DELIM_COMPOSITE) */ 
	{
		/* each token is delimited by one of non-space charaters
		 * in the delimeter set "delim". however, all space characters
		 * surrounding the token are removed */
		while (p < end && SSE_AWK_ISSPACE(run->awk,*p)) p++;
		if (run->global.ignorecase)
		{
			while (p < end) 
			{
				c = SSE_AWK_TOUPPER(run->awk, *p);
				if (SSE_AWK_ISSPACE(run->awk,c)) 
				{
					p++;
					continue;
				}
				for (d = delim; d < delim_end; d++) 
				{
					if (c == SSE_AWK_TOUPPER(run->awk,*d)) goto exit_loop;
				}
				if (sp == SSE_NULL) sp = p;
				ep = p++;
			}
		}
		else
		{
			while (p < end) 
			{
				c = *p;
				if (SSE_AWK_ISSPACE(run->awk,c)) 
				{
					p++;
					continue;
				}
				for (d = delim; d < delim_end; d++) 
				{
					if (c == *d) goto exit_loop;
				}
				if (sp == SSE_NULL) sp = p;
				ep = p++;
			}
		}
	}

exit_loop:
	if (sp == SSE_NULL) 
	{
		*tok = SSE_NULL;
		*tok_len = (sse_size_t)0;
	}
	else 
	{
		*tok = (sse_char_t*)sp;
		*tok_len = ep - sp + 1;
	}

	/* if SSE_NULL is returned, this function should not be called anymore */
	if (p >= end) return SSE_NULL;
	if (delim_mode == __DELIM_EMPTY || 
	    delim_mode == __DELIM_SPACES) return (sse_char_t*)p;
	return (sse_char_t*)++p;
}

sse_char_t* sse_awk_strxntokbyrex (
	sse_awk_run_t* run, const sse_char_t* s, sse_size_t len,
	void* rex, sse_char_t** tok, sse_size_t* tok_len, int* errnum)
{
	int n;
	sse_char_t* match_ptr;
	sse_size_t match_len, i;
	sse_size_t left = len;
	const sse_char_t* ptr = s;
	const sse_char_t* str_ptr = s;
	sse_size_t str_len = len;

	while (len > 0)
	{
		n = sse_awk_matchrex (
			run->awk, rex, 
			((run->global.ignorecase)? SSE_AWK_REX_IGNORECASE: 0),
			ptr, left, (const sse_char_t**)&match_ptr, &match_len, 
			errnum);
		if (n == -1) return SSE_NULL;
		if (n == 0)
		{
			/* no match has been found. 
			 * return the entire string as a token */
			*tok = (sse_char_t*)str_ptr;
			*tok_len = str_len;
			*errnum = SSE_AWK_ENOERR;
			return SSE_NULL; 
		}

		sse_awk_assert (run->awk, n == 1);

		if (match_len == 0)
		{
			ptr++;
			left--;
		}
		else if (run->awk->option & SSE_AWK_STRIPSPACES)
		{
			/* match at the beginning of the input string */
			if (match_ptr == s) 
			{
				for (i = 0; i < match_len; i++)
				{
					if (!SSE_AWK_ISSPACE(run->awk, match_ptr[i]))
						goto exit_loop;
				}

				/* the match that are all spaces at the 
				 * beginning of the input string is skipped */
				ptr += match_len;
				left -= match_len;
				str_ptr = s + match_len;
				str_len -= match_len;
			}
			else  break;
		}
		else break;
	}

exit_loop:
	if (len == 0)
	{
		*tok = (sse_char_t*)str_ptr;
		*tok_len = str_len;
		*errnum = SSE_AWK_ENOERR;
		return SSE_NULL; 
	}

	*tok = (sse_char_t*)str_ptr;
	*tok_len = match_ptr - str_ptr;

	for (i = 0; i < match_len; i++)
	{
		if (!SSE_AWK_ISSPACE(run->awk, match_ptr[i]))
		{
			*errnum = SSE_AWK_ENOERR;
			return match_ptr+match_len;
		}
	}

	*errnum = SSE_AWK_ENOERR;

	if (run->awk->option & SSE_AWK_STRIPSPACES)
	{
		return (match_ptr+match_len >= s+len)? 
			SSE_NULL: (match_ptr+match_len);
	}
	else
	{
		return (match_ptr+match_len > s+len)? 
			SSE_NULL: (match_ptr+match_len);
	}
}

int sse_awk_abort (sse_awk_t* awk, 
	const sse_char_t* esser, const sse_char_t* file, int line)
{
	awk->syscas.dprintf (
		SSE_T("ASSERTION FAILURE AT FILE %s, LINE %d\n%s\n"),
		file, line, esser);
	awk->syscas.abort ();
	return 0;
}