enhanced the awk parser a little
This commit is contained in:
parent
14bc247637
commit
9b994d4a7c
@ -246,55 +246,45 @@ struct kwent_t
|
|||||||
int valid; /* the entry is valid when this option is set */
|
int valid; /* the entry is valid when this option is set */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* this table should match the kw_t enums in parse.h */
|
|
||||||
static kwent_t kwtab[] =
|
static kwent_t kwtab[] =
|
||||||
{
|
{
|
||||||
/* operators */
|
/* keep this table in sync with the kw_t enums in <parse.h>.
|
||||||
{ QSE_T("in"), 2, TOKEN_IN, 0 },
|
* also keep it sorted by the first field for binary search */
|
||||||
|
|
||||||
/* top-level block starters */
|
|
||||||
{ QSE_T("BEGIN"), 5, TOKEN_BEGIN, QSE_AWK_PABLOCK },
|
{ QSE_T("BEGIN"), 5, TOKEN_BEGIN, QSE_AWK_PABLOCK },
|
||||||
{ QSE_T("END"), 3, TOKEN_END, QSE_AWK_PABLOCK },
|
{ QSE_T("END"), 3, TOKEN_END, QSE_AWK_PABLOCK },
|
||||||
{ QSE_T("function"), 8, TOKEN_FUNCTION, 0 },
|
|
||||||
|
|
||||||
/* keywords for variable declaration */
|
|
||||||
{ QSE_T("local"), 5, TOKEN_LOCAL, QSE_AWK_EXPLICIT },
|
|
||||||
{ QSE_T("global"), 6, TOKEN_GLOBAL, QSE_AWK_EXPLICIT },
|
|
||||||
|
|
||||||
/* keywords that start statements excluding expression statements */
|
|
||||||
{ QSE_T("if"), 2, TOKEN_IF, 0 },
|
|
||||||
{ QSE_T("else"), 4, TOKEN_ELSE, 0 },
|
|
||||||
{ QSE_T("while"), 5, TOKEN_WHILE, 0 },
|
|
||||||
{ QSE_T("for"), 3, TOKEN_FOR, 0 },
|
|
||||||
{ QSE_T("do"), 2, TOKEN_DO, 0 },
|
|
||||||
{ QSE_T("break"), 5, TOKEN_BREAK, 0 },
|
{ QSE_T("break"), 5, TOKEN_BREAK, 0 },
|
||||||
{ QSE_T("continue"), 8, TOKEN_CONTINUE, 0 },
|
{ QSE_T("continue"), 8, TOKEN_CONTINUE, 0 },
|
||||||
{ QSE_T("return"), 6, TOKEN_RETURN, 0 },
|
{ QSE_T("delete"), 6, TOKEN_DELETE, 0 },
|
||||||
|
{ QSE_T("do"), 2, TOKEN_DO, 0 },
|
||||||
|
{ QSE_T("else"), 4, TOKEN_ELSE, 0 },
|
||||||
{ QSE_T("exit"), 4, TOKEN_EXIT, 0 },
|
{ QSE_T("exit"), 4, TOKEN_EXIT, 0 },
|
||||||
|
{ QSE_T("for"), 3, TOKEN_FOR, 0 },
|
||||||
|
{ QSE_T("function"), 8, TOKEN_FUNCTION, 0 },
|
||||||
|
{ QSE_T("getline"), 7, TOKEN_GETLINE, QSE_AWK_EIO },
|
||||||
|
{ QSE_T("global"), 6, TOKEN_GLOBAL, QSE_AWK_EXPLICIT },
|
||||||
|
{ QSE_T("if"), 2, TOKEN_IF, 0 },
|
||||||
|
{ QSE_T("in"), 2, TOKEN_IN, 0 },
|
||||||
|
{ QSE_T("local"), 5, TOKEN_LOCAL, QSE_AWK_EXPLICIT },
|
||||||
{ QSE_T("next"), 4, TOKEN_NEXT, QSE_AWK_PABLOCK },
|
{ QSE_T("next"), 4, TOKEN_NEXT, QSE_AWK_PABLOCK },
|
||||||
{ QSE_T("nextfile"), 8, TOKEN_NEXTFILE, QSE_AWK_PABLOCK },
|
{ QSE_T("nextfile"), 8, TOKEN_NEXTFILE, QSE_AWK_PABLOCK },
|
||||||
{ QSE_T("nextofile"), 9, TOKEN_NEXTOFILE, QSE_AWK_PABLOCK | QSE_AWK_NEXTOFILE },
|
{ QSE_T("nextofile"), 9, TOKEN_NEXTOFILE, QSE_AWK_PABLOCK | QSE_AWK_NEXTOFILE },
|
||||||
{ QSE_T("delete"), 6, TOKEN_DELETE, 0 },
|
|
||||||
{ QSE_T("reset"), 5, TOKEN_RESET, QSE_AWK_RESET },
|
|
||||||
{ QSE_T("print"), 5, TOKEN_PRINT, QSE_AWK_EIO },
|
{ QSE_T("print"), 5, TOKEN_PRINT, QSE_AWK_EIO },
|
||||||
{ QSE_T("printf"), 6, TOKEN_PRINTF, QSE_AWK_EIO },
|
{ QSE_T("printf"), 6, TOKEN_PRINTF, QSE_AWK_EIO },
|
||||||
|
{ QSE_T("reset"), 5, TOKEN_RESET, QSE_AWK_RESET },
|
||||||
/* keywords that can start an expression */
|
{ QSE_T("return"), 6, TOKEN_RETURN, 0 },
|
||||||
{ QSE_T("getline"), 7, TOKEN_GETLINE, QSE_AWK_EIO },
|
{ QSE_T("while"), 5, TOKEN_WHILE, 0 }
|
||||||
|
|
||||||
{ QSE_NULL, 0, 0, 0 }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct gbl_t gbl_t;
|
typedef struct global_t global_t;
|
||||||
|
|
||||||
struct gbl_t
|
struct global_t
|
||||||
{
|
{
|
||||||
const qse_char_t* name;
|
const qse_char_t* name;
|
||||||
qse_size_t name_len;
|
qse_size_t name_len;
|
||||||
int valid;
|
int valid;
|
||||||
};
|
};
|
||||||
|
|
||||||
static gbl_t gtab[] =
|
static global_t gtab[] =
|
||||||
{
|
{
|
||||||
{ QSE_T("ARGC"), 4, 0 },
|
{ QSE_T("ARGC"), 4, 0 },
|
||||||
{ QSE_T("ARGV"), 4, 0 },
|
{ QSE_T("ARGV"), 4, 0 },
|
||||||
@ -1422,20 +1412,20 @@ static void adjust_static_globals (qse_awk_t* awk)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct check_gbl_t check_gbl_t;
|
typedef struct check_global_t check_global_t;
|
||||||
|
|
||||||
struct check_gbl_t
|
struct check_global_t
|
||||||
{
|
{
|
||||||
qse_cstr_t name;
|
qse_cstr_t name;
|
||||||
qse_size_t index;
|
qse_size_t index;
|
||||||
qse_lda_walk_t walk;
|
qse_lda_walk_t walk;
|
||||||
};
|
};
|
||||||
|
|
||||||
static qse_lda_walk_t check_gbl (qse_lda_t* lda, qse_size_t index, void* arg)
|
static qse_lda_walk_t check_global (qse_lda_t* lda, qse_size_t index, void* arg)
|
||||||
{
|
{
|
||||||
qse_cstr_t tmp;
|
qse_cstr_t tmp;
|
||||||
qse_awk_t* awk = *(qse_awk_t**)qse_lda_getxtn(lda);
|
qse_awk_t* awk = *(qse_awk_t**)qse_lda_getxtn(lda);
|
||||||
check_gbl_t* cg = (check_gbl_t*)arg;
|
check_global_t* cg = (check_global_t*)arg;
|
||||||
|
|
||||||
tmp.ptr = QSE_LDA_DPTR(lda,index);
|
tmp.ptr = QSE_LDA_DPTR(lda,index);
|
||||||
tmp.len = QSE_LDA_DLEN(lda,index);
|
tmp.len = QSE_LDA_DLEN(lda,index);
|
||||||
@ -1464,28 +1454,28 @@ static qse_lda_walk_t check_gbl (qse_lda_t* lda, qse_size_t index, void* arg)
|
|||||||
static qse_size_t get_global (
|
static qse_size_t get_global (
|
||||||
qse_awk_t* awk, const qse_char_t* name, qse_size_t len)
|
qse_awk_t* awk, const qse_char_t* name, qse_size_t len)
|
||||||
{
|
{
|
||||||
check_gbl_t cg;
|
check_global_t cg;
|
||||||
|
|
||||||
cg.name.ptr = name;
|
cg.name.ptr = name;
|
||||||
cg.name.len = len;
|
cg.name.len = len;
|
||||||
cg.index = QSE_LDA_NIL;
|
cg.index = QSE_LDA_NIL;
|
||||||
cg.walk = QSE_LDA_WALK_BACKWARD;
|
cg.walk = QSE_LDA_WALK_BACKWARD;
|
||||||
|
|
||||||
qse_lda_rwalk (awk->parse.gbls, check_gbl, &cg);
|
qse_lda_rwalk (awk->parse.gbls, check_global, &cg);
|
||||||
return cg.index;
|
return cg.index;
|
||||||
}
|
}
|
||||||
|
|
||||||
static qse_size_t find_global (
|
static qse_size_t find_global (
|
||||||
qse_awk_t* awk, const qse_char_t* name, qse_size_t len)
|
qse_awk_t* awk, const qse_char_t* name, qse_size_t len)
|
||||||
{
|
{
|
||||||
check_gbl_t cg;
|
check_global_t cg;
|
||||||
|
|
||||||
cg.name.ptr = name;
|
cg.name.ptr = name;
|
||||||
cg.name.len = len;
|
cg.name.len = len;
|
||||||
cg.index = QSE_LDA_NIL;
|
cg.index = QSE_LDA_NIL;
|
||||||
cg.walk = QSE_LDA_WALK_FORWARD;
|
cg.walk = QSE_LDA_WALK_FORWARD;
|
||||||
|
|
||||||
qse_lda_walk (awk->parse.gbls, check_gbl, &cg);
|
qse_lda_walk (awk->parse.gbls, check_global, &cg);
|
||||||
return cg.index;
|
return cg.index;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5444,10 +5434,49 @@ static int skip_comment (qse_awk_t* awk)
|
|||||||
static int classify_ident (
|
static int classify_ident (
|
||||||
qse_awk_t* awk, const qse_char_t* name, qse_size_t len)
|
qse_awk_t* awk, const qse_char_t* name, qse_size_t len)
|
||||||
{
|
{
|
||||||
|
if (QSE_MAP_SIZE(awk->wtab) <= 0)
|
||||||
|
{
|
||||||
|
/* perform binary search if no custom words are specified */
|
||||||
|
|
||||||
|
/* declaring left, right, mid to be of int is ok
|
||||||
|
* because we know kwtab is small enough. */
|
||||||
|
int left = 0, right = QSE_COUNTOF(kwtab) - 1, mid;
|
||||||
|
|
||||||
|
while (left <= right)
|
||||||
|
{
|
||||||
|
int n;
|
||||||
kwent_t* kwp;
|
kwent_t* kwp;
|
||||||
|
|
||||||
|
mid = (left + right) / 2;
|
||||||
|
kwp = &kwtab[mid];
|
||||||
|
n = qse_strxncmp (kwp->name, kwp->name_len, name, len);
|
||||||
|
if (n > 0)
|
||||||
|
{
|
||||||
|
/* if left, right, mid were of qse_size_t,
|
||||||
|
* you would need the following line.
|
||||||
|
if (mid == 0) break;
|
||||||
|
*/
|
||||||
|
right = mid - 1;
|
||||||
|
}
|
||||||
|
else if (n < 0) left = mid + 1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (kwp->valid != 0 &&
|
||||||
|
(awk->option & kwp->valid) != kwp->valid)
|
||||||
|
break;
|
||||||
|
|
||||||
|
return kwp->type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* perform linear search if there are any custom words set */
|
||||||
|
kwent_t* kwp, * end;
|
||||||
qse_map_pair_t* pair;
|
qse_map_pair_t* pair;
|
||||||
|
|
||||||
for (kwp = kwtab; kwp->name != QSE_NULL; kwp++)
|
end = kwtab + QSE_COUNTOF(kwtab);
|
||||||
|
for (kwp = kwtab; kwp < end; kwp++)
|
||||||
{
|
{
|
||||||
const qse_char_t* k;
|
const qse_char_t* k;
|
||||||
qse_size_t l;
|
qse_size_t l;
|
||||||
@ -5473,6 +5502,8 @@ static int classify_ident (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
return TOKEN_IDENT;
|
return TOKEN_IDENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,29 +22,29 @@
|
|||||||
/* these enums should match kwtab in parse.c */
|
/* these enums should match kwtab in parse.c */
|
||||||
enum kw_t
|
enum kw_t
|
||||||
{
|
{
|
||||||
KW_IN,
|
|
||||||
KW_BEGIN,
|
KW_BEGIN,
|
||||||
KW_END,
|
KW_END,
|
||||||
KW_FUNCTION,
|
|
||||||
KW_LOCAL,
|
|
||||||
KW_GLOBAL,
|
|
||||||
KW_IF,
|
|
||||||
KW_ELSE,
|
|
||||||
KW_WHILE,
|
|
||||||
KW_FOR,
|
|
||||||
KW_DO,
|
|
||||||
KW_BREAK,
|
KW_BREAK,
|
||||||
KW_CONTINUE,
|
KW_CONTINUE,
|
||||||
KW_RETURN,
|
KW_DELETE,
|
||||||
|
KW_DO,
|
||||||
|
KW_ELSE,
|
||||||
KW_EXIT,
|
KW_EXIT,
|
||||||
|
KW_FOR,
|
||||||
|
KW_FUNCTION,
|
||||||
|
KW_GETLINE,
|
||||||
|
KW_GLOBAL,
|
||||||
|
KW_IF,
|
||||||
|
KW_IN,
|
||||||
|
KW_LOCAL,
|
||||||
KW_NEXT,
|
KW_NEXT,
|
||||||
KW_NEXTFILE,
|
KW_NEXTFILE,
|
||||||
KW_NEXTOFILE,
|
KW_NEXTOFILE,
|
||||||
KW_DELETE,
|
|
||||||
KW_RESET,
|
|
||||||
KW_PRINT,
|
KW_PRINT,
|
||||||
KW_PRINTF,
|
KW_PRINTF,
|
||||||
KW_GETLINE,
|
KW_RESET,
|
||||||
|
KW_RETURN,
|
||||||
|
KW_WHILE
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
Loading…
x
Reference in New Issue
Block a user