updated the feed-based reader to handle hashmarked character literals
This commit is contained in:
parent
65aacaaf4f
commit
3d6abc38bd
18
bin/main.c
18
bin/main.c
@ -855,13 +855,7 @@ static int feed_loop (hcl_t* hcl, xtn_t* xtn, int cflags, int verbose)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* the compiler must be invoked whenever feed() sees a complete object */
|
/* the compiler must be invoked whenever feed() sees a complete object */
|
||||||
|
if (x <= -1) goto feed_error;
|
||||||
if (x <= -1)
|
|
||||||
{
|
|
||||||
if (hcl->errnum == HCL_ESYNERR) print_synerr (hcl);
|
|
||||||
else hcl_logbfmt (hcl, HCL_LOG_STDERR, "ERROR: cannot feed - [%d] %js\n", hcl_geterrnum(hcl), hcl_geterrmsg(hcl));
|
|
||||||
goto oops; /* TODO: proceed or just exit? */
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (n == 0 || feof(fp))
|
if (n == 0 || feof(fp))
|
||||||
@ -880,12 +874,20 @@ static int feed_loop (hcl_t* hcl, xtn_t* xtn, int cflags, int verbose)
|
|||||||
goto oops;
|
goto oops;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
hcl_endfeed (hcl);
|
if (hcl_endfeed (hcl) <= -1)
|
||||||
|
{
|
||||||
|
feed_error:
|
||||||
|
if (hcl->errnum == HCL_ESYNERR) print_synerr (hcl);
|
||||||
|
else hcl_logbfmt (hcl, HCL_LOG_STDERR, "ERROR: cannot feed - [%d] %js\n", hcl_geterrnum(hcl), hcl_geterrmsg(hcl));
|
||||||
|
goto oops; /* TODO: proceed or just exit? */
|
||||||
|
}
|
||||||
|
|
||||||
fclose (fp);
|
fclose (fp);
|
||||||
|
|
||||||
/* TODO: execute code? */
|
/* TODO: execute code? */
|
||||||
if (hcl_getbclen(hcl) > 0)
|
if (hcl_getbclen(hcl) > 0)
|
||||||
{
|
{
|
||||||
|
/* TODO: execute code... */
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -532,16 +532,37 @@ struct hcl_flx_dt_t
|
|||||||
int col_next;
|
int col_next;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct hcl_flx_hc_t hcl_flx_hc_t; /* hash-marked character like #\, #\newline */
|
||||||
|
struct hcl_flx_hc_t
|
||||||
|
{
|
||||||
|
/* state data */
|
||||||
|
hcl_oow_t char_count;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct hcl_flx_rn_t hcl_flx_rn_t; /* radixed number */
|
||||||
|
struct hcl_flx_rn_t
|
||||||
|
{
|
||||||
|
/* input data */
|
||||||
|
hcl_iotok_type_t tok_type;
|
||||||
|
hcl_synerrnum_t synerr_code;
|
||||||
|
int radix;
|
||||||
|
|
||||||
|
/* state data */
|
||||||
|
int invalid;
|
||||||
|
hcl_oow_t digit_count;
|
||||||
|
};
|
||||||
|
|
||||||
typedef struct hcl_flx_qt_t hcl_flx_qt_t; /* quoted token */
|
typedef struct hcl_flx_qt_t hcl_flx_qt_t; /* quoted token */
|
||||||
struct hcl_flx_qt_t
|
struct hcl_flx_qt_t
|
||||||
{
|
{
|
||||||
/* input data */
|
/* input data */
|
||||||
|
hcl_iotok_type_t tok_type;
|
||||||
|
hcl_synerrnum_t synerr_code;
|
||||||
hcl_ooch_t end_char;
|
hcl_ooch_t end_char;
|
||||||
hcl_ooch_t esc_char;
|
hcl_ooch_t esc_char;
|
||||||
hcl_oow_t min_len;
|
hcl_oow_t min_len;
|
||||||
hcl_oow_t max_len;
|
hcl_oow_t max_len;
|
||||||
hcl_iotok_type_t tok_type;
|
|
||||||
hcl_synerrnum_t synerr_code;
|
|
||||||
int regex;
|
int regex;
|
||||||
|
|
||||||
/* state data */
|
/* state data */
|
||||||
@ -553,9 +574,11 @@ struct hcl_flx_qt_t
|
|||||||
enum hcl_flx_state_t
|
enum hcl_flx_state_t
|
||||||
{
|
{
|
||||||
HCL_FLX_START,
|
HCL_FLX_START,
|
||||||
HCL_FLX_DELIM_TOKEN,
|
|
||||||
HCL_FLX_COMMENT,
|
HCL_FLX_COMMENT,
|
||||||
HCL_FLX_SHARP_TOKEN,
|
HCL_FLX_DELIM_TOKEN,
|
||||||
|
HCL_FLX_HASHED_TOKEN, /* hash-marked token */
|
||||||
|
HCL_FLX_HASHED_CHAR, /* hash-marked character that begins with #\ */
|
||||||
|
HCL_FLX_RADIXED_NUMBER,
|
||||||
HCL_FLX_QUOTED_TOKEN
|
HCL_FLX_QUOTED_TOKEN
|
||||||
};
|
};
|
||||||
typedef enum hcl_flx_state_t hcl_flx_state_t;
|
typedef enum hcl_flx_state_t hcl_flx_state_t;
|
||||||
@ -615,6 +638,8 @@ struct hcl_compiler_t
|
|||||||
union
|
union
|
||||||
{
|
{
|
||||||
hcl_flx_dt_t dt; /* delimiter token */
|
hcl_flx_dt_t dt; /* delimiter token */
|
||||||
|
hcl_flx_hc_t hc; /* hash-marked character */
|
||||||
|
hcl_flx_rn_t rn; /* radixed number */
|
||||||
hcl_flx_qt_t qt; /* quoted token */
|
hcl_flx_qt_t qt; /* quoted token */
|
||||||
} u;
|
} u;
|
||||||
} lx;
|
} lx;
|
||||||
|
277
lib/read.c
277
lib/read.c
@ -718,7 +718,7 @@ static int get_string (hcl_t* hcl, hcl_ooch_t end_char, hcl_ooch_t esc_char, int
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int get_radix_number (hcl_t* hcl, hcl_ooci_t rc, int radix)
|
static int get_radixed_number (hcl_t* hcl, hcl_ooci_t rc, int radix)
|
||||||
{
|
{
|
||||||
hcl_ooci_t c;
|
hcl_ooci_t c;
|
||||||
|
|
||||||
@ -743,6 +743,7 @@ static int get_radix_number (hcl_t* hcl, hcl_ooci_t rc, int radix)
|
|||||||
|
|
||||||
if (!is_delimchar(c))
|
if (!is_delimchar(c))
|
||||||
{
|
{
|
||||||
|
/* collect more characters to form a complete token for the error message below */
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
ADD_TOKEN_CHAR(hcl, c);
|
ADD_TOKEN_CHAR(hcl, c);
|
||||||
@ -761,7 +762,7 @@ static int get_radix_number (hcl_t* hcl, hcl_ooci_t rc, int radix)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int get_sharp_token (hcl_t* hcl)
|
static int get_hashed_token (hcl_t* hcl)
|
||||||
{
|
{
|
||||||
hcl_ooci_t c;
|
hcl_ooci_t c;
|
||||||
int radix;
|
int radix;
|
||||||
@ -806,16 +807,16 @@ static int get_sharp_token (hcl_t* hcl)
|
|||||||
case 'b':
|
case 'b':
|
||||||
radix = 2;
|
radix = 2;
|
||||||
radixnum:
|
radixnum:
|
||||||
if (get_radix_number (hcl, c, radix) <= -1) return -1;
|
if (get_radixed_number (hcl, c, radix) <= -1) return -1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'e':
|
case 'e':
|
||||||
if (get_radix_number(hcl, c, 10) <= -1) return -1;
|
if (get_radixed_number(hcl, c, 10) <= -1) return -1;
|
||||||
SET_TOKEN_TYPE (hcl, HCL_IOTOK_ERRLIT);
|
SET_TOKEN_TYPE (hcl, HCL_IOTOK_ERRLIT);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'p':
|
case 'p':
|
||||||
if (get_radix_number(hcl, c, 16) <= -1) return -1;
|
if (get_radixed_number(hcl, c, 16) <= -1) return -1;
|
||||||
SET_TOKEN_TYPE (hcl, HCL_IOTOK_SMPTRLIT);
|
SET_TOKEN_TYPE (hcl, HCL_IOTOK_SMPTRLIT);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -864,7 +865,7 @@ static int get_sharp_token (hcl_t* hcl)
|
|||||||
"invalid hexadecimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
|
"invalid hexadecimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
c = c * 16 + CHAR_TO_NUM(hcl->c->tok.name.ptr[i], 16); /* don't care if it is for 'p' */
|
c = c * 16 + CHAR_TO_NUM(TOKEN_NAME_CHAR(hcl, i), 16); /* don't care if it is for 'p' */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#if (HCL_SIZEOF_OOCH_T >= 2)
|
#if (HCL_SIZEOF_OOCH_T >= 2)
|
||||||
@ -1203,7 +1204,7 @@ retry:
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case '#':
|
case '#':
|
||||||
if (get_sharp_token(hcl) <= -1) return -1;
|
if (get_hashed_token(hcl) <= -1) return -1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '+':
|
case '+':
|
||||||
@ -1236,7 +1237,7 @@ retry:
|
|||||||
radix = 16;
|
radix = 16;
|
||||||
radnumlit:
|
radnumlit:
|
||||||
ADD_TOKEN_CHAR (hcl, oldc);
|
ADD_TOKEN_CHAR (hcl, oldc);
|
||||||
if (get_radix_number(hcl, c, radix) <= -1) return -1;
|
if (get_radixed_number(hcl, c, radix) <= -1) return -1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@ -2364,8 +2365,37 @@ static int feed_continue_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_flx_state_t st
|
|||||||
|
|
||||||
/* short-cuts to lexer state data */
|
/* short-cuts to lexer state data */
|
||||||
#define FLX_DT(hcl) (&((hcl)->c->feed.lx.u.dt))
|
#define FLX_DT(hcl) (&((hcl)->c->feed.lx.u.dt))
|
||||||
|
#define FLX_HC(hcl) (&((hcl)->c->feed.lx.u.hc))
|
||||||
|
#define FLX_RN(hcl) (&((hcl)->c->feed.lx.u.rn))
|
||||||
#define FLX_QT(hcl) (&((hcl)->c->feed.lx.u.qt))
|
#define FLX_QT(hcl) (&((hcl)->c->feed.lx.u.qt))
|
||||||
|
|
||||||
|
static HCL_INLINE void init_flx_rn (hcl_flx_rn_t* rn, hcl_iotok_type_t tok_type, hcl_synerrnum_t synerr_code, int radix)
|
||||||
|
{
|
||||||
|
HCL_MEMSET (rn, 0, HCL_SIZEOF(*rn));
|
||||||
|
rn->tok_type = tok_type;
|
||||||
|
rn->synerr_code = synerr_code;
|
||||||
|
rn->radix = radix;
|
||||||
|
}
|
||||||
|
|
||||||
|
static HCL_INLINE void init_flx_hc (hcl_flx_hc_t* hc)
|
||||||
|
{
|
||||||
|
HCL_MEMSET (hc, 0, HCL_SIZEOF(*hc));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static HCL_INLINE void init_flx_qt (hcl_flx_qt_t* qt, hcl_iotok_type_t tok_type, hcl_synerrnum_t synerr_code, hcl_ooch_t end_char, hcl_ooch_t esc_char, hcl_oow_t min_len, hcl_oow_t max_len)
|
||||||
|
{
|
||||||
|
HCL_MEMSET (qt, 0, HCL_SIZEOF(*qt));
|
||||||
|
qt->tok_type = tok_type;
|
||||||
|
qt->synerr_code = synerr_code;
|
||||||
|
qt->end_char = end_char;
|
||||||
|
qt->esc_char = esc_char;
|
||||||
|
qt->min_len = min_len;
|
||||||
|
qt->max_len = max_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static int flx_start (hcl_t* hcl, hcl_ooci_t c)
|
static int flx_start (hcl_t* hcl, hcl_ooci_t c)
|
||||||
{
|
{
|
||||||
HCL_ASSERT (hcl, FLX_STATE(hcl) == HCL_FLX_START);
|
HCL_ASSERT (hcl, FLX_STATE(hcl) == HCL_FLX_START);
|
||||||
@ -2413,36 +2443,20 @@ static int flx_start (hcl_t* hcl, hcl_ooci_t c)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case '#':
|
case '#':
|
||||||
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_SHARP_TOKEN);
|
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_HASHED_TOKEN);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '\"':
|
case '\"':
|
||||||
HCL_MEMSET (FLX_QT(hcl), 0, HCL_SIZEOF(*FLX_QT(hcl)));
|
init_flx_qt (FLX_QT(hcl), HCL_IOTOK_STRLIT, HCL_SYNERR_STRLIT, c, '\\', 0, HCL_TYPE_MAX(hcl_oow_t));
|
||||||
FLX_QT(hcl)->end_char = c;
|
|
||||||
FLX_QT(hcl)->esc_char = '\\';
|
|
||||||
FLX_QT(hcl)->min_len = 0;
|
|
||||||
FLX_QT(hcl)->max_len = HCL_TYPE_MAX(hcl_oow_t);
|
|
||||||
FLX_QT(hcl)->tok_type = HCL_IOTOK_STRLIT;
|
|
||||||
FLX_QT(hcl)->synerr_code = HCL_SYNERR_STRLIT;
|
|
||||||
FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */
|
FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '\'':
|
case '\'':
|
||||||
HCL_MEMSET (FLX_QT(hcl), 0, HCL_SIZEOF(*FLX_QT(hcl)));
|
init_flx_qt (FLX_QT(hcl), HCL_IOTOK_CHARLIT, HCL_SYNERR_CHARLIT, c, '\\', 1, 1);
|
||||||
FLX_QT(hcl)->end_char = c;
|
|
||||||
FLX_QT(hcl)->esc_char = '\\';
|
|
||||||
FLX_QT(hcl)->min_len = 1;
|
|
||||||
FLX_QT(hcl)->max_len = 1;
|
|
||||||
FLX_QT(hcl)->tok_type = HCL_IOTOK_CHARLIT;
|
|
||||||
FLX_QT(hcl)->synerr_code = HCL_SYNERR_CHARLIT;
|
|
||||||
FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */
|
FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
case '#':
|
|
||||||
if (get_sharp_token(hcl) <= -1) return -1;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case '+':
|
case '+':
|
||||||
case '-':
|
case '-':
|
||||||
oldc = c;
|
oldc = c;
|
||||||
@ -2473,7 +2487,7 @@ static int flx_start (hcl_t* hcl, hcl_ooci_t c)
|
|||||||
radix = 16;
|
radix = 16;
|
||||||
radnumlit:
|
radnumlit:
|
||||||
ADD_TOKEN_CHAR (hcl, oldc);
|
ADD_TOKEN_CHAR (hcl, oldc);
|
||||||
if (get_radix_number(hcl, c, radix) <= -1) return -1;
|
if (get_radixed_number(hcl, c, radix) <= -1) return -1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@ -2612,6 +2626,12 @@ not_consumed:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int flx_comment (hcl_t* hcl, hcl_ooci_t c)
|
||||||
|
{
|
||||||
|
if (is_linebreak(c)) FEED_CONTINUE (hcl, HCL_FLX_START);
|
||||||
|
return 1; /* consumed */
|
||||||
|
}
|
||||||
|
|
||||||
static int flx_delim_token (hcl_t* hcl, hcl_ooci_t c)
|
static int flx_delim_token (hcl_t* hcl, hcl_ooci_t c)
|
||||||
{
|
{
|
||||||
if (find_delim_token_char(hcl, c, FLX_DT(hcl)->row_start, FLX_DT(hcl)->row_end, FLX_DT(hcl)->col_next, FLX_DT(hcl)))
|
if (find_delim_token_char(hcl, c, FLX_DT(hcl)->row_start, FLX_DT(hcl)->row_end, FLX_DT(hcl)->col_next, FLX_DT(hcl)))
|
||||||
@ -2642,13 +2662,7 @@ not_consumed:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int flx_comment (hcl_t* hcl, hcl_ooci_t c)
|
static int flx_hashed_token (hcl_t* hcl, hcl_ooci_t c)
|
||||||
{
|
|
||||||
if (is_linebreak(c)) FEED_CONTINUE (hcl, HCL_FLX_START);
|
|
||||||
return 1; /* consumed */
|
|
||||||
}
|
|
||||||
|
|
||||||
static int flx_sharp_token (hcl_t* hcl, hcl_ooci_t c)
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* #xXXXX hexadecimal
|
* #xXXXX hexadecimal
|
||||||
@ -2685,6 +2699,36 @@ static int flx_sharp_token (hcl_t* hcl, hcl_ooci_t c)
|
|||||||
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_COMMENT);
|
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_COMMENT);
|
||||||
goto consumed;
|
goto consumed;
|
||||||
|
|
||||||
|
/* --------------------------- */
|
||||||
|
|
||||||
|
case 'x':
|
||||||
|
init_flx_rn (FLX_RN(hcl), HCL_IOTOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 16);
|
||||||
|
goto radixed_number;
|
||||||
|
|
||||||
|
case 'o':
|
||||||
|
init_flx_rn (FLX_RN(hcl), HCL_IOTOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 8);
|
||||||
|
goto radixed_number;
|
||||||
|
|
||||||
|
case 'b':
|
||||||
|
init_flx_rn (FLX_RN(hcl), HCL_IOTOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 2);
|
||||||
|
goto radixed_number;
|
||||||
|
|
||||||
|
case 'e':
|
||||||
|
init_flx_rn (FLX_RN(hcl), HCL_IOTOK_ERRLIT, HCL_SYNERR_ERRLIT, 10);
|
||||||
|
goto radixed_number;
|
||||||
|
|
||||||
|
case 'p':
|
||||||
|
init_flx_rn (FLX_RN(hcl), HCL_IOTOK_SMPTRLIT, HCL_SYNERR_SMPTRLIT, 16);
|
||||||
|
radixed_number:
|
||||||
|
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_RADIXED_NUMBER);
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* --------------------------- */
|
||||||
|
case '\\':
|
||||||
|
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_HASHED_CHAR);
|
||||||
|
goto consumed;
|
||||||
|
|
||||||
|
/* --------------------------- */
|
||||||
case '[':
|
case '[':
|
||||||
FEED_WRAP_UP_WITH_CHAR (hcl, c, HCL_IOTOK_BAPAREN);
|
FEED_WRAP_UP_WITH_CHAR (hcl, c, HCL_IOTOK_BAPAREN);
|
||||||
goto consumed;
|
goto consumed;
|
||||||
@ -2693,6 +2737,9 @@ static int flx_sharp_token (hcl_t* hcl, hcl_ooci_t c)
|
|||||||
FEED_WRAP_UP_WITH_CHAR (hcl, c, HCL_IOTOK_QLPAREN);
|
FEED_WRAP_UP_WITH_CHAR (hcl, c, HCL_IOTOK_QLPAREN);
|
||||||
goto consumed;
|
goto consumed;
|
||||||
|
|
||||||
|
/* --------------------------- */
|
||||||
|
/* TODO: #include... etc more directives? */
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// TODO: fix this part
|
// TODO: fix this part
|
||||||
if (is_spacechar(c) || c == HCL_UCI_EOF)
|
if (is_spacechar(c) || c == HCL_UCI_EOF)
|
||||||
@ -2711,13 +2758,159 @@ not_consumed:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* double-quoted string */
|
static int flx_hashed_char (hcl_t* hcl, hcl_ooci_t c)
|
||||||
|
{
|
||||||
|
hcl_flx_hc_t* hc = FLX_HC(hcl);
|
||||||
|
|
||||||
|
if (is_delimchar(c))
|
||||||
|
{
|
||||||
|
if (hc->char_count == 0)
|
||||||
|
{
|
||||||
|
hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
|
||||||
|
"no valid character in character literal %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TOKEN_NAME_LEN(hcl) >= 4)
|
||||||
|
{
|
||||||
|
int max_digit_count = 0;
|
||||||
|
|
||||||
|
if (TOKEN_NAME_CHAR(hcl, 2) == 'x')
|
||||||
|
{
|
||||||
|
hcl_oow_t i;
|
||||||
|
max_digit_count = 2;
|
||||||
|
|
||||||
|
hexcharlit:
|
||||||
|
if (TOKEN_NAME_LEN(hcl) - 3 > max_digit_count)
|
||||||
|
{
|
||||||
|
hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
|
||||||
|
"invalid hexadecimal character character literal %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
c = 0;
|
||||||
|
for (i = 3; i < TOKEN_NAME_LEN(hcl); i++)
|
||||||
|
{
|
||||||
|
if (!is_xdigitchar(TOKEN_NAME_CHAR(hcl, i)))
|
||||||
|
{
|
||||||
|
hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
|
||||||
|
"invalid hexadecimal character character literal %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
c = c * 16 + CHAR_TO_NUM(TOKEN_NAME_CHAR(hcl, i), 16); /* don't care if it is for 'p' */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#if (HCL_SIZEOF_OOCH_T >= 2)
|
||||||
|
else if (TOKEN_NAME_CHAR(hcl, 2) == 'u')
|
||||||
|
{
|
||||||
|
max_digit_count = 4;
|
||||||
|
goto hexcharlit;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if (HCL_SIZEOF_OOCH_T >= 4)
|
||||||
|
else if (TOKEN_NAME_CHAR(hcl, 2) == 'U')
|
||||||
|
{
|
||||||
|
max_digit_count = 8;
|
||||||
|
goto hexcharlit;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
else if (does_token_name_match(hcl, VOCA_BACKSPACE)) c = '\b';
|
||||||
|
else if (does_token_name_match(hcl, VOCA_LINEFEED)) c = '\n';
|
||||||
|
else if (does_token_name_match(hcl, VOCA_NEWLINE)) c = '\n'; /* TODO: convert it to host newline convention. how to handle if it's composed of 2 letters like \r\n? */
|
||||||
|
else if (does_token_name_match(hcl, VOCA_NUL)) c = '\0'; /* null character. not the object null */
|
||||||
|
else if (does_token_name_match(hcl, VOCA_PAGE)) c = '\f';
|
||||||
|
else if (does_token_name_match(hcl, VOCA_RETURN)) c = '\r';
|
||||||
|
else if (does_token_name_match(hcl, VOCA_RUBOUT)) c = '\x7F'; /* DEL */
|
||||||
|
else if (does_token_name_match(hcl, VOCA_SPACE)) c = ' ';
|
||||||
|
else if (does_token_name_match(hcl, VOCA_TAB)) c = '\t';
|
||||||
|
else if (does_token_name_match(hcl, VOCA_VTAB)) c = '\v';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
|
||||||
|
"invalid character literal %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
HCL_ASSERT (hcl, TOKEN_NAME_LEN(hcl) == 3);
|
||||||
|
c = TOKEN_NAME_CHAR(hcl, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* reset the token name to the converted character */
|
||||||
|
CLEAR_TOKEN_NAME (hcl);
|
||||||
|
ADD_TOKEN_CHAR (hcl, c);
|
||||||
|
FEED_WRAP_UP (hcl, HCL_IOTOK_CHARLIT);
|
||||||
|
|
||||||
|
goto not_consumed;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ADD_TOKEN_CHAR (hcl, c);
|
||||||
|
hc->char_count++;
|
||||||
|
goto consumed;
|
||||||
|
}
|
||||||
|
|
||||||
|
consumed:
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
not_consumed:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int flx_radixed_number (hcl_t* hcl, hcl_ooci_t c)
|
||||||
|
{
|
||||||
|
hcl_flx_rn_t* rn = FLX_RN(hcl);
|
||||||
|
|
||||||
|
if (CHAR_TO_NUM(c, rn->radix) >= rn->radix)
|
||||||
|
{
|
||||||
|
if (rn->digit_count == 0)
|
||||||
|
{
|
||||||
|
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
|
||||||
|
"no valid digit after radix specifier in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
else if (is_delimchar(c))
|
||||||
|
{
|
||||||
|
if (rn->invalid)
|
||||||
|
{
|
||||||
|
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
|
||||||
|
"invalid digit in radixed number in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
FEED_WRAP_UP (hcl, rn->tok_type);
|
||||||
|
goto not_consumed;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ADD_TOKEN_CHAR(hcl, c);
|
||||||
|
rn->digit_count++;
|
||||||
|
rn->invalid = 1;
|
||||||
|
goto consumed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
HCL_ASSERT (hcl, !is_delimchar(c));
|
||||||
|
ADD_TOKEN_CHAR(hcl, c);
|
||||||
|
rn->digit_count++;
|
||||||
|
goto consumed;
|
||||||
|
}
|
||||||
|
|
||||||
|
consumed:
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
not_consumed:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
|
||||||
{
|
{
|
||||||
hcl_flx_qt_t* qt = FLX_QT(hcl);
|
hcl_flx_qt_t* qt = FLX_QT(hcl);
|
||||||
|
|
||||||
if (c == HCL_OOCI_EOF)
|
if (c == HCL_OOCI_EOF)
|
||||||
{
|
{
|
||||||
hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl) /*hcl->c->feed.lx.loc?*/, HCL_NULL);
|
hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl) /*FLX_LOC(hcl) instead?*/, HCL_NULL);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2881,15 +3074,13 @@ static int feed_char (hcl_t* hcl, hcl_ooci_t c)
|
|||||||
switch (FLX_STATE(hcl))
|
switch (FLX_STATE(hcl))
|
||||||
{
|
{
|
||||||
case HCL_FLX_START: return flx_start(hcl, c);
|
case HCL_FLX_START: return flx_start(hcl, c);
|
||||||
case HCL_FLX_DELIM_TOKEN: return flx_delim_token(hcl, c);
|
|
||||||
case HCL_FLX_COMMENT: return flx_comment(hcl, c);
|
case HCL_FLX_COMMENT: return flx_comment(hcl, c);
|
||||||
case HCL_FLX_SHARP_TOKEN: return flx_sharp_token(hcl, c);
|
case HCL_FLX_DELIM_TOKEN: return flx_delim_token(hcl, c);
|
||||||
|
case HCL_FLX_HASHED_TOKEN: return flx_hashed_token(hcl, c);
|
||||||
|
case HCL_FLX_HASHED_CHAR: return flx_hashed_char(hcl, c);
|
||||||
|
case HCL_FLX_RADIXED_NUMBER: return flx_radixed_number(hcl, c);
|
||||||
case HCL_FLX_QUOTED_TOKEN: return flx_quoted_token(hcl, c);
|
case HCL_FLX_QUOTED_TOKEN: return flx_quoted_token(hcl, c);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
case HCL_FLX_SQSTR:
|
|
||||||
return flx_sqstr(hcl, c);
|
|
||||||
|
|
||||||
case HCL_FLX_DIRECTIVE:
|
case HCL_FLX_DIRECTIVE:
|
||||||
break;
|
break;
|
||||||
*/
|
*/
|
||||||
|
Loading…
Reference in New Issue
Block a user