2006-07-17 06:21:39 +00:00
|
|
|
/*
|
2006-07-24 16:23:19 +00:00
|
|
|
* $Id: rex.h,v 1.7 2006-07-24 16:23:19 bacon Exp $
|
2006-07-17 06:21:39 +00:00
|
|
|
**/
|
|
|
|
|
|
|
|
#ifndef _XP_AWK_REX_H_
|
|
|
|
#define _XP_AWK_REX_H_
|
|
|
|
|
|
|
|
#ifndef _XP_AWK_AWK_H_
|
|
|
|
#error Never include this file directly. Include <xp/awk/awk.h> instead
|
|
|
|
#endif
|
|
|
|
|
2006-07-19 15:58:01 +00:00
|
|
|
/*
|
|
|
|
* Regular Expression Syntax
|
|
|
|
* A regular expression is zero or more branches, separated by '|'.
|
|
|
|
* ......
|
|
|
|
* ......
|
|
|
|
*
|
|
|
|
* Compiled form of a regular expression:
|
|
|
|
*
|
2006-07-20 16:21:54 +00:00
|
|
|
* | expression |
|
|
|
|
* | header | branch | branch | branch |
|
|
|
|
* | nb | el | na | bl | cmd | arg | cmd | arg | na | bl | cmd | arg | na | bl | cmd |
|
2006-07-19 15:58:01 +00:00
|
|
|
*
|
|
|
|
* nb: the number of branches
|
|
|
|
* el: the length of a expression excluding the length of nb and el
|
2006-07-20 16:21:54 +00:00
|
|
|
* na: the number of atoms
|
2006-07-19 15:58:01 +00:00
|
|
|
* bl: the length of a branch excluding the length of bl
|
|
|
|
* cmd: The command and repetition info encoded together.
|
|
|
|
* Some commands require an argument to follow them but some other don't.
|
|
|
|
* It is encoded as follows:
|
|
|
|
*
|
|
|
|
* Subexpressions can be nested by having the command "GROUP"
|
|
|
|
* and a subexpression as its argument.
|
|
|
|
*
|
|
|
|
* Examples:
|
|
|
|
* a.c -> |1|6|5|ORD_CHAR(no bound)|a|ANY_CHAR(no bound)|ORD_CHAR(no bound)|c|
|
|
|
|
* ab|xy -> |2|10|4|ORD_CHAR(no bound)|a|ORD_CHAR(no bound)|b|4|ORD_CHAR(no bound)|x|ORD_CHAR(no bound)|y|
|
|
|
|
*/
|
|
|
|
|
2006-07-17 06:21:39 +00:00
|
|
|
struct xp_awk_rex_t
|
|
|
|
{
|
2006-07-19 11:45:24 +00:00
|
|
|
struct
|
|
|
|
{
|
|
|
|
const xp_char_t* ptr;
|
|
|
|
const xp_char_t* end;
|
|
|
|
const xp_char_t* curp;
|
2006-07-20 03:41:00 +00:00
|
|
|
struct
|
|
|
|
{
|
|
|
|
int type;
|
|
|
|
xp_char_t value;
|
|
|
|
} curc;
|
2006-07-19 11:45:24 +00:00
|
|
|
} ptn;
|
|
|
|
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
xp_byte_t* buf;
|
|
|
|
xp_size_t size;
|
|
|
|
xp_size_t capa;
|
|
|
|
} code;
|
|
|
|
|
2006-07-24 11:58:55 +00:00
|
|
|
struct
|
|
|
|
{
|
|
|
|
struct
|
|
|
|
{
|
|
|
|
const xp_char_t* ptr;
|
|
|
|
const xp_char_t* end;
|
|
|
|
} str;
|
|
|
|
} match;
|
|
|
|
|
2006-07-24 16:23:19 +00:00
|
|
|
int errnum;
|
2006-07-17 06:21:39 +00:00
|
|
|
xp_bool_t __dynamic;
|
|
|
|
};
|
|
|
|
|
2006-07-24 16:23:19 +00:00
|
|
|
enum
|
|
|
|
{
|
|
|
|
XP_AWK_REX_ENOERR, /* no error */
|
|
|
|
XP_AWK_REX_ENOMEM, /* ran out of memory */
|
|
|
|
XP_AWK_REX_ENOPTN, /* no pattern compiled */
|
|
|
|
XP_AWK_REX_ERPAREN, /* a right parenthesis is expected */
|
|
|
|
XP_AWK_REX_ERBRACKET, /* a right bracket is expected */
|
|
|
|
XP_AWK_REX_ERBRACE, /* a right brace is expected */
|
|
|
|
XP_AWK_REX_ECOLON, /* a colon is expected */
|
|
|
|
XP_AWK_REX_ECRANGE, /* invalid character range */
|
|
|
|
XP_AWK_REX_ECCLASS, /* invalid character class */
|
|
|
|
XP_AWK_REX_EBRANGE, /* invalid boundary range */
|
|
|
|
XP_AWK_REX_EEND, /* unexpected end of the pattern */
|
|
|
|
XP_AWK_REX_EGARBAGE /* garbage after the pattern */
|
|
|
|
};
|
|
|
|
|
2006-07-17 06:21:39 +00:00
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex);
|
|
|
|
void xp_awk_rex_close (xp_awk_rex_t* rex);
|
2006-07-24 16:23:19 +00:00
|
|
|
void xp_awk_rex_clear (xp_awk_rex_t* rex);
|
2006-07-24 11:58:55 +00:00
|
|
|
|
2006-07-24 16:23:19 +00:00
|
|
|
int xp_awk_rex_compile (
|
|
|
|
xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len);
|
2006-07-17 06:21:39 +00:00
|
|
|
|
2006-07-24 11:58:55 +00:00
|
|
|
int xp_awk_rex_match (xp_awk_rex_t* rex,
|
|
|
|
const xp_char_t* str, xp_size_t len,
|
|
|
|
const xp_char_t** match_ptr, xp_size_t* match_len);
|
|
|
|
|
2006-07-24 16:23:19 +00:00
|
|
|
void xp_awk_rex_print (xp_awk_rex_t* rex);
|
|
|
|
|
2006-07-17 06:21:39 +00:00
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|