enhanced the awk parser a little

This commit is contained in:
hyung-hwan 2009-02-04 00:26:06 +00:00
parent 14bc247637
commit 9b994d4a7c
2 changed files with 101 additions and 70 deletions

View File

@ -246,55 +246,45 @@ struct kwent_t
int valid; /* the entry is valid when this option is set */ int valid; /* the entry is valid when this option is set */
}; };
/* this table should match the kw_t enums in parse.h */
static kwent_t kwtab[] = static kwent_t kwtab[] =
{ {
/* operators */ /* keep this table in sync with the kw_t enums in <parse.h>.
{ QSE_T("in"), 2, TOKEN_IN, 0 }, * also keep it sorted by the first field for binary search */
/* top-level block starters */
{ QSE_T("BEGIN"), 5, TOKEN_BEGIN, QSE_AWK_PABLOCK }, { QSE_T("BEGIN"), 5, TOKEN_BEGIN, QSE_AWK_PABLOCK },
{ QSE_T("END"), 3, TOKEN_END, QSE_AWK_PABLOCK }, { QSE_T("END"), 3, TOKEN_END, QSE_AWK_PABLOCK },
{ QSE_T("function"), 8, TOKEN_FUNCTION, 0 },
/* keywords for variable declaration */
{ QSE_T("local"), 5, TOKEN_LOCAL, QSE_AWK_EXPLICIT },
{ QSE_T("global"), 6, TOKEN_GLOBAL, QSE_AWK_EXPLICIT },
/* keywords that start statements excluding expression statements */
{ QSE_T("if"), 2, TOKEN_IF, 0 },
{ QSE_T("else"), 4, TOKEN_ELSE, 0 },
{ QSE_T("while"), 5, TOKEN_WHILE, 0 },
{ QSE_T("for"), 3, TOKEN_FOR, 0 },
{ QSE_T("do"), 2, TOKEN_DO, 0 },
{ QSE_T("break"), 5, TOKEN_BREAK, 0 }, { QSE_T("break"), 5, TOKEN_BREAK, 0 },
{ QSE_T("continue"), 8, TOKEN_CONTINUE, 0 }, { QSE_T("continue"), 8, TOKEN_CONTINUE, 0 },
{ QSE_T("return"), 6, TOKEN_RETURN, 0 }, { QSE_T("delete"), 6, TOKEN_DELETE, 0 },
{ QSE_T("do"), 2, TOKEN_DO, 0 },
{ QSE_T("else"), 4, TOKEN_ELSE, 0 },
{ QSE_T("exit"), 4, TOKEN_EXIT, 0 }, { QSE_T("exit"), 4, TOKEN_EXIT, 0 },
{ QSE_T("for"), 3, TOKEN_FOR, 0 },
{ QSE_T("function"), 8, TOKEN_FUNCTION, 0 },
{ QSE_T("getline"), 7, TOKEN_GETLINE, QSE_AWK_EIO },
{ QSE_T("global"), 6, TOKEN_GLOBAL, QSE_AWK_EXPLICIT },
{ QSE_T("if"), 2, TOKEN_IF, 0 },
{ QSE_T("in"), 2, TOKEN_IN, 0 },
{ QSE_T("local"), 5, TOKEN_LOCAL, QSE_AWK_EXPLICIT },
{ QSE_T("next"), 4, TOKEN_NEXT, QSE_AWK_PABLOCK }, { QSE_T("next"), 4, TOKEN_NEXT, QSE_AWK_PABLOCK },
{ QSE_T("nextfile"), 8, TOKEN_NEXTFILE, QSE_AWK_PABLOCK }, { QSE_T("nextfile"), 8, TOKEN_NEXTFILE, QSE_AWK_PABLOCK },
{ QSE_T("nextofile"), 9, TOKEN_NEXTOFILE, QSE_AWK_PABLOCK | QSE_AWK_NEXTOFILE }, { QSE_T("nextofile"), 9, TOKEN_NEXTOFILE, QSE_AWK_PABLOCK | QSE_AWK_NEXTOFILE },
{ QSE_T("delete"), 6, TOKEN_DELETE, 0 },
{ QSE_T("reset"), 5, TOKEN_RESET, QSE_AWK_RESET },
{ QSE_T("print"), 5, TOKEN_PRINT, QSE_AWK_EIO }, { QSE_T("print"), 5, TOKEN_PRINT, QSE_AWK_EIO },
{ QSE_T("printf"), 6, TOKEN_PRINTF, QSE_AWK_EIO }, { QSE_T("printf"), 6, TOKEN_PRINTF, QSE_AWK_EIO },
{ QSE_T("reset"), 5, TOKEN_RESET, QSE_AWK_RESET },
/* keywords that can start an expression */ { QSE_T("return"), 6, TOKEN_RETURN, 0 },
{ QSE_T("getline"), 7, TOKEN_GETLINE, QSE_AWK_EIO }, { QSE_T("while"), 5, TOKEN_WHILE, 0 }
{ QSE_NULL, 0, 0, 0 }
}; };
typedef struct gbl_t gbl_t; typedef struct global_t global_t;
struct gbl_t struct global_t
{ {
const qse_char_t* name; const qse_char_t* name;
qse_size_t name_len; qse_size_t name_len;
int valid; int valid;
}; };
static gbl_t gtab[] = static global_t gtab[] =
{ {
{ QSE_T("ARGC"), 4, 0 }, { QSE_T("ARGC"), 4, 0 },
{ QSE_T("ARGV"), 4, 0 }, { QSE_T("ARGV"), 4, 0 },
@ -1422,20 +1412,20 @@ static void adjust_static_globals (qse_awk_t* awk)
} }
} }
typedef struct check_gbl_t check_gbl_t; typedef struct check_global_t check_global_t;
struct check_gbl_t struct check_global_t
{ {
qse_cstr_t name; qse_cstr_t name;
qse_size_t index; qse_size_t index;
qse_lda_walk_t walk; qse_lda_walk_t walk;
}; };
static qse_lda_walk_t check_gbl (qse_lda_t* lda, qse_size_t index, void* arg) static qse_lda_walk_t check_global (qse_lda_t* lda, qse_size_t index, void* arg)
{ {
qse_cstr_t tmp; qse_cstr_t tmp;
qse_awk_t* awk = *(qse_awk_t**)qse_lda_getxtn(lda); qse_awk_t* awk = *(qse_awk_t**)qse_lda_getxtn(lda);
check_gbl_t* cg = (check_gbl_t*)arg; check_global_t* cg = (check_global_t*)arg;
tmp.ptr = QSE_LDA_DPTR(lda,index); tmp.ptr = QSE_LDA_DPTR(lda,index);
tmp.len = QSE_LDA_DLEN(lda,index); tmp.len = QSE_LDA_DLEN(lda,index);
@ -1464,28 +1454,28 @@ static qse_lda_walk_t check_gbl (qse_lda_t* lda, qse_size_t index, void* arg)
static qse_size_t get_global ( static qse_size_t get_global (
qse_awk_t* awk, const qse_char_t* name, qse_size_t len) qse_awk_t* awk, const qse_char_t* name, qse_size_t len)
{ {
check_gbl_t cg; check_global_t cg;
cg.name.ptr = name; cg.name.ptr = name;
cg.name.len = len; cg.name.len = len;
cg.index = QSE_LDA_NIL; cg.index = QSE_LDA_NIL;
cg.walk = QSE_LDA_WALK_BACKWARD; cg.walk = QSE_LDA_WALK_BACKWARD;
qse_lda_rwalk (awk->parse.gbls, check_gbl, &cg); qse_lda_rwalk (awk->parse.gbls, check_global, &cg);
return cg.index; return cg.index;
} }
static qse_size_t find_global ( static qse_size_t find_global (
qse_awk_t* awk, const qse_char_t* name, qse_size_t len) qse_awk_t* awk, const qse_char_t* name, qse_size_t len)
{ {
check_gbl_t cg; check_global_t cg;
cg.name.ptr = name; cg.name.ptr = name;
cg.name.len = len; cg.name.len = len;
cg.index = QSE_LDA_NIL; cg.index = QSE_LDA_NIL;
cg.walk = QSE_LDA_WALK_FORWARD; cg.walk = QSE_LDA_WALK_FORWARD;
qse_lda_walk (awk->parse.gbls, check_gbl, &cg); qse_lda_walk (awk->parse.gbls, check_global, &cg);
return cg.index; return cg.index;
} }
@ -5444,34 +5434,75 @@ static int skip_comment (qse_awk_t* awk)
static int classify_ident ( static int classify_ident (
qse_awk_t* awk, const qse_char_t* name, qse_size_t len) qse_awk_t* awk, const qse_char_t* name, qse_size_t len)
{ {
kwent_t* kwp; if (QSE_MAP_SIZE(awk->wtab) <= 0)
qse_map_pair_t* pair;
for (kwp = kwtab; kwp->name != QSE_NULL; kwp++)
{ {
const qse_char_t* k; /* perform binary search if no custom words are specified */
qse_size_t l;
if (kwp->valid != 0 && /* declaring left, right, mid to be of int is ok
(awk->option & kwp->valid) != kwp->valid) continue; * because we know kwtab is small enough. */
int left = 0, right = QSE_COUNTOF(kwtab) - 1, mid;
pair = qse_map_search (awk->wtab, kwp->name, kwp->name_len); while (left <= right)
if (pair != QSE_NULL)
{ {
k = ((qse_cstr_t*)(pair->vptr))->ptr; int n;
l = ((qse_cstr_t*)(pair->vptr))->len; kwent_t* kwp;
}
else
{
k = kwp->name;
l = kwp->name_len;
}
if (qse_strxncmp (k, l, name, len) == 0) mid = (left + right) / 2;
{ kwp = &kwtab[mid];
return kwp->type; n = qse_strxncmp (kwp->name, kwp->name_len, name, len);
if (n > 0)
{
/* if left, right, mid were of qse_size_t,
* you would need the following line.
if (mid == 0) break;
*/
right = mid - 1;
}
else if (n < 0) left = mid + 1;
else
{
if (kwp->valid != 0 &&
(awk->option & kwp->valid) != kwp->valid)
break;
return kwp->type;
}
} }
} }
else
{
/* perform linear search if there are any custom words set */
kwent_t* kwp, * end;
qse_map_pair_t* pair;
end = kwtab + QSE_COUNTOF(kwtab);
for (kwp = kwtab; kwp < end; kwp++)
{
const qse_char_t* k;
qse_size_t l;
if (kwp->valid != 0 &&
(awk->option & kwp->valid) != kwp->valid) continue;
pair = qse_map_search (awk->wtab, kwp->name, kwp->name_len);
if (pair != QSE_NULL)
{
k = ((qse_cstr_t*)(pair->vptr))->ptr;
l = ((qse_cstr_t*)(pair->vptr))->len;
}
else
{
k = kwp->name;
l = kwp->name_len;
}
if (qse_strxncmp (k, l, name, len) == 0)
{
return kwp->type;
}
}
}
return TOKEN_IDENT; return TOKEN_IDENT;
} }

View File

@ -22,29 +22,29 @@
/* these enums should match kwtab in parse.c */ /* these enums should match kwtab in parse.c */
enum kw_t enum kw_t
{ {
KW_IN,
KW_BEGIN, KW_BEGIN,
KW_END, KW_END,
KW_FUNCTION,
KW_LOCAL,
KW_GLOBAL,
KW_IF,
KW_ELSE,
KW_WHILE,
KW_FOR,
KW_DO,
KW_BREAK, KW_BREAK,
KW_CONTINUE, KW_CONTINUE,
KW_RETURN, KW_DELETE,
KW_DO,
KW_ELSE,
KW_EXIT, KW_EXIT,
KW_FOR,
KW_FUNCTION,
KW_GETLINE,
KW_GLOBAL,
KW_IF,
KW_IN,
KW_LOCAL,
KW_NEXT, KW_NEXT,
KW_NEXTFILE, KW_NEXTFILE,
KW_NEXTOFILE, KW_NEXTOFILE,
KW_DELETE,
KW_RESET,
KW_PRINT, KW_PRINT,
KW_PRINTF, KW_PRINTF,
KW_GETLINE, KW_RESET,
KW_RETURN,
KW_WHILE
}; };
#ifdef __cplusplus #ifdef __cplusplus