improved tokenization by rex
This commit is contained in:
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: awk.h 459 2011-05-17 14:37:51Z hyunghwan.chung $
|
||||
* $Id: awk.h 462 2011-05-18 14:36:40Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2011 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -201,8 +201,7 @@ typedef struct qse_awk_val_real_t qse_awk_val_real_t;
|
||||
struct qse_awk_val_str_t
|
||||
{
|
||||
QSE_AWK_VAL_HDR;
|
||||
qse_char_t* ptr;
|
||||
qse_size_t len;
|
||||
qse_xstr_t val;
|
||||
};
|
||||
typedef struct qse_awk_val_str_t qse_awk_val_str_t;
|
||||
|
||||
@ -355,6 +354,14 @@ struct qse_awk_nde_t
|
||||
QSE_AWK_NDE_HDR;
|
||||
};
|
||||
|
||||
typedef int (*qse_awk_sprintf_t) (
|
||||
qse_awk_t* awk,
|
||||
qse_char_t* buf,
|
||||
qse_size_t size,
|
||||
const qse_char_t* fmt,
|
||||
...
|
||||
);
|
||||
|
||||
typedef qse_real_t (*qse_awk_math1_t) (
|
||||
qse_awk_t* awk,
|
||||
qse_real_t x
|
||||
@ -366,18 +373,31 @@ typedef qse_real_t (*qse_awk_math2_t) (
|
||||
qse_real_t y
|
||||
);
|
||||
|
||||
typedef qse_real_t (*qse_awk_pow_t) (
|
||||
qse_awk_t* awk,
|
||||
qse_real_t x,
|
||||
qse_real_t y
|
||||
|
||||
typedef void* (*qse_awk_buildrex_t) (
|
||||
qse_awk_t* awk,
|
||||
const qse_char_t* ptn,
|
||||
qse_size_t len
|
||||
);
|
||||
|
||||
typedef int (*qse_awk_sprintf_t) (
|
||||
qse_awk_t* awk,
|
||||
qse_char_t* buf,
|
||||
qse_size_t size,
|
||||
const qse_char_t* fmt,
|
||||
...
|
||||
typedef int (*qse_awk_matchrex_t) (
|
||||
qse_awk_t* awk,
|
||||
void* code,
|
||||
int option,
|
||||
const qse_char_t* str,
|
||||
qse_size_t len,
|
||||
const qse_char_t** mptr,
|
||||
qse_size_t* mlen
|
||||
);
|
||||
|
||||
typedef void (*qse_awk_freerex_t) (
|
||||
qse_awk_t* awk,
|
||||
void* code
|
||||
);
|
||||
|
||||
typedef qse_bool_t (*qse_awk_isemptyrex_t) (
|
||||
qse_awk_t* awk,
|
||||
void* code
|
||||
);
|
||||
|
||||
/**
|
||||
@ -564,33 +584,10 @@ struct qse_awk_prm_t
|
||||
struct
|
||||
{
|
||||
/* TODO: accept regular expression handling functions */
|
||||
void* (*build) (
|
||||
qse_awk_t* awk,
|
||||
const qse_char_t* ptn,
|
||||
qse_size_t len,
|
||||
int* errnum
|
||||
);
|
||||
|
||||
int (*match) (
|
||||
qse_awk_t* awk,
|
||||
void* code,
|
||||
int option,
|
||||
const qse_char_t* str,
|
||||
qse_size_t len,
|
||||
const qse_char_t** mptr,
|
||||
qse_size_t* mlen,
|
||||
int* errnum
|
||||
);
|
||||
|
||||
void (*free) (
|
||||
qse_awk_t* awk,
|
||||
void* code
|
||||
);
|
||||
|
||||
qse_bool_t (*isempty) (
|
||||
qse_awk_t* awk,
|
||||
void* code
|
||||
);
|
||||
qse_awk_buildrex_t build;
|
||||
qse_awk_matchrex_t match;
|
||||
qse_awk_freerex_t free;
|
||||
qse_awk_isemptyrex_t isempty;
|
||||
} rex;
|
||||
#endif
|
||||
};
|
||||
@ -723,11 +720,11 @@ enum qse_awk_option_t
|
||||
QSE_AWK_NEWLINE = (1 << 5),
|
||||
|
||||
/**
|
||||
* strips off leading and trailing spaces when splitting a record
|
||||
* into fields with a regular expression.
|
||||
* remove empty fields when splitting a record if FS is a regular
|
||||
* expression and the match is all spaces.
|
||||
*
|
||||
* @code
|
||||
* BEGIN { FS="[:[:space:]]+"; }
|
||||
* BEGIN { FS="[[:space:]]+"; }
|
||||
* {
|
||||
* print "NF=" NF;
|
||||
* for (i = 0; i < NF; i++) print i " [" $(i+1) "]";
|
||||
@ -735,6 +732,17 @@ enum qse_awk_option_t
|
||||
* @endcode
|
||||
* " a b c " is split to [a], [b], [c] if #QSE_AWK_STRIPRECSPC is on.
|
||||
* Otherwise, it is split to [], [a], [b], [c], [].
|
||||
*
|
||||
* @code
|
||||
* BEGIN {
|
||||
* n=split(" o my god ", x, /[ o]+/);
|
||||
* for (i=1;i<=n;i++) print "[" x[i] "]";
|
||||
* }
|
||||
* @endcode
|
||||
* The above example splits the string to [], [my], [g], [d]
|
||||
* if #QSE_AWK_STRIPRECSPC is on. Otherwise, it results in
|
||||
* [], [my], [g], [d], []. Note that the first empty field is not
|
||||
* removed as the field separator is not all spaces. (space + 'o').
|
||||
*/
|
||||
QSE_AWK_STRIPRECSPC = (1 << 6),
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: rex.h 441 2011-04-22 14:28:43Z hyunghwan.chung $
|
||||
* $Id: rex.h 462 2011-05-18 14:36:40Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2011 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -264,9 +264,9 @@ qse_rex_node_t* qse_rex_comp (
|
||||
);
|
||||
|
||||
int qse_rex_exec (
|
||||
qse_rex_t* rex,
|
||||
qse_rex_t* rex,
|
||||
const qse_cstr_t* str,
|
||||
const qse_cstr_t* substr,
|
||||
const qse_cstr_t* substr,
|
||||
qse_cstr_t* matstr
|
||||
);
|
||||
|
||||
@ -285,10 +285,8 @@ int qse_matchrex (
|
||||
qse_size_t depth,
|
||||
void* code,
|
||||
int option,
|
||||
const qse_char_t* str,
|
||||
qse_size_t len,
|
||||
const qse_char_t* substr,
|
||||
qse_size_t sublen,
|
||||
const qse_cstr_t* str,
|
||||
const qse_cstr_t* substr,
|
||||
qse_cstr_t* match,
|
||||
qse_rex_errnum_t* errnum
|
||||
);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: str.h 446 2011-04-30 15:24:38Z hyunghwan.chung $
|
||||
* $Id: str.h 462 2011-05-18 14:36:40Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2011 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -31,17 +31,19 @@
|
||||
* deal with a string pointer and length in a structure.
|
||||
*/
|
||||
|
||||
#define QSE_MBS_LEN(s) ((s)->len) /**< string length */
|
||||
#define QSE_MBS_PTR(s) ((s)->ptr) /**< string buffer pointer */
|
||||
#define QSE_MBS_XSTR(s) (&((s)->val)) /**< string pointer and length as a aggregate */
|
||||
#define QSE_MBS_LEN(s) ((s)->val.len) /**< string length */
|
||||
#define QSE_MBS_PTR(s) ((s)->val.ptr) /**< string buffer pointer */
|
||||
#define QSE_MBS_CAPA(s) ((s)->capa) /**< string buffer capacity */
|
||||
#define QSE_MBS_CHAR(s,idx) ((s)->ptr[idx]) /**< character at given position */
|
||||
#define QSE_MBS_LASTCHAR(s) ((s)->ptr[(s)->len-1]) /**< last character. unsafe if length <= 0 */
|
||||
#define QSE_MBS_CHAR(s,idx) ((s)->val.ptr[idx]) /**< character at given position */
|
||||
#define QSE_MBS_LASTCHAR(s) ((s)->val.ptr[(s)->val.len-1]) /**< last character. unsafe if length <= 0 */
|
||||
|
||||
#define QSE_WCS_LEN(s) ((s)->len) /**< string buffer length */
|
||||
#define QSE_WCS_PTR(s) ((s)->ptr) /**< string buffer pointer */
|
||||
#define QSE_WCS_XSTR(s) (&((s)->val)) /**< string pointer and length as an aggregate*/
|
||||
#define QSE_WCS_LEN(s) ((s)->val.len) /**< string buffer length */
|
||||
#define QSE_WCS_PTR(s) ((s)->val.ptr) /**< string buffer pointer */
|
||||
#define QSE_WCS_CAPA(s) ((s)->capa) /**< string buffer capacity */
|
||||
#define QSE_WCS_CHAR(s,idx) ((s)->ptr[idx]) /**< character at given position */
|
||||
#define QSE_WCS_LASTCHAR(s) ((s)->ptr[(s)->len-1]) /**< last character. unsafe if length <= 0 */
|
||||
#define QSE_WCS_CHAR(s,idx) ((s)->val.ptr[idx]) /**< character at given position */
|
||||
#define QSE_WCS_LASTCHAR(s) ((s)->val.ptr[(s)->val.len-1]) /**< last character. unsafe if length <= 0 */
|
||||
|
||||
typedef struct qse_mbs_t qse_mbs_t;
|
||||
typedef struct qse_wcs_t qse_wcs_t;
|
||||
@ -57,6 +59,7 @@ typedef qse_size_t (*qse_wcs_sizer_t) (
|
||||
);
|
||||
|
||||
#ifdef QSE_CHAR_IS_MCHAR
|
||||
# define QSE_STR_XSTR(s) ((qse_xstr_t*)QSE_MBS_XSTR(s))
|
||||
# define QSE_STR_LEN(s) QSE_MBS_LEN(s)
|
||||
# define QSE_STR_PTR(s) QSE_MBS_PTR(s)
|
||||
# define QSE_STR_CAPA(s) QSE_MBS_CAPA(s)
|
||||
@ -65,6 +68,7 @@ typedef qse_size_t (*qse_wcs_sizer_t) (
|
||||
# define qse_str_t qse_mbs_t
|
||||
# define qse_str_sizer_t qse_mbs_sizer_t
|
||||
#else
|
||||
# define QSE_STR_XSTR(s) ((qse_xstr_t*)QSE_WCS_XSTR(s))
|
||||
# define QSE_STR_LEN(s) QSE_WCS_LEN(s)
|
||||
# define QSE_STR_PTR(s) QSE_WCS_PTR(s)
|
||||
# define QSE_STR_CAPA(s) QSE_WCS_CAPA(s)
|
||||
@ -82,8 +86,7 @@ struct qse_mbs_t
|
||||
{
|
||||
QSE_DEFINE_COMMON_FIELDS (mbs)
|
||||
qse_mbs_sizer_t sizer; /**< buffer resizer function */
|
||||
qse_mchar_t* ptr; /**< buffer/string pointer */
|
||||
qse_size_t len; /**< string length */
|
||||
qse_mxstr_t val; /**< buffer/string pointer and lengh */
|
||||
qse_size_t capa; /**< buffer capacity */
|
||||
};
|
||||
|
||||
@ -94,8 +97,7 @@ struct qse_wcs_t
|
||||
{
|
||||
QSE_DEFINE_COMMON_FIELDS (wcs)
|
||||
qse_wcs_sizer_t sizer; /**< buffer resizer function */
|
||||
qse_wchar_t* ptr; /**< buffer/string pointer */
|
||||
qse_size_t len; /**< string length */
|
||||
qse_wxstr_t val; /**< buffer/string pointer and lengh */
|
||||
qse_size_t capa; /**< buffer capacity */
|
||||
};
|
||||
|
||||
|
Reference in New Issue
Block a user