From b8e0b3578b8cfc6929df742c893d67bdb05468ba Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Wed, 24 Jan 2024 16:26:30 +0900 Subject: [PATCH] WIP - implementing b or u prefix for a string or a character literal --- lang.txt | 3 +++ lib/hcl-prv.h | 10 ++++++++- lib/read.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++ t/feed-01.hcl | 10 ++++----- 4 files changed, 74 insertions(+), 6 deletions(-) diff --git a/lang.txt b/lang.txt index 116e27f..7e6f1b5 100644 --- a/lang.txt +++ b/lang.txt @@ -23,6 +23,9 @@ b"..." or B"..." for an byte string constant notation u"..." or U"..." for an explicit unicode string constant notation? + #b[ ] byte array?? + #[ ] normal array? + make basic branded types to an object if possible. for example (#[10 20]:at 1) diff --git a/lib/hcl-prv.h b/lib/hcl-prv.h index bd94d21..470bce0 100644 --- a/lib/hcl-prv.h +++ b/lib/hcl-prv.h @@ -632,6 +632,12 @@ struct hcl_flx_st_t }; +typedef struct hcl_flx_bu_t hcl_flx_bu_t; +struct hcl_flx_bu_t +{ + hcl_ooch_t start_c; +}; + enum hcl_flx_state_t { HCL_FLX_START, @@ -645,7 +651,8 @@ enum hcl_flx_state_t HCL_FLX_PLAIN_IDENT, /* plain identifier */ HCL_FLX_PLAIN_NUMBER, /* plain number */ HCL_FLX_QUOTED_TOKEN, /* string, character */ - HCL_FLX_SIGNED_TOKEN /* prefixed with + or - */ + HCL_FLX_SIGNED_TOKEN, /* prefixed with + or - */ + HCL_FLX_BU /* beginning with B or U */ }; typedef enum hcl_flx_state_t hcl_flx_state_t; @@ -735,6 +742,7 @@ struct hcl_compiler_t hcl_flx_pn_t pn; /* plain number */ hcl_flx_qt_t qt; /* quoted token */ hcl_flx_st_t st; /* signed token */ + hcl_flx_st_t bu; /* b or u prefix */ } u; } lx; diff --git a/lib/read.c b/lib/read.c index 8306df0..fbaeefe 100644 --- a/lib/read.c +++ b/lib/read.c @@ -1719,6 +1719,7 @@ static int feed_continue_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_flx_state_t st #define FLX_PN(hcl) (&((hcl)->c->feed.lx.u.pn)) #define FLX_QT(hcl) (&((hcl)->c->feed.lx.u.qt)) #define FLX_ST(hcl) (&((hcl)->c->feed.lx.u.st)) +#define FLX_BU(hcl) (&((hcl)->c->feed.lx.u.bu)) static HCL_INLINE void init_flx_hc (hcl_flx_hc_t* hc) { @@ -1765,6 +1766,12 @@ static HCL_INLINE void init_flx_st (hcl_flx_st_t* st, hcl_ooch_t sign_c) st->sign_c = sign_c; } +static HCL_INLINE void init_flx_bu (hcl_flx_bu_t* bu, hcl_ooch_t start_c) +{ + HCL_MEMSET (bu, 0, HCL_SIZEOF(*bu)); + bu->start_c = start_c; +} + static void reset_flx_token (hcl_t* hcl) { /* clear the token name, reset its location */ @@ -1852,6 +1859,14 @@ static int flx_start (hcl_t* hcl, hcl_ooci_t c) FEED_CONTINUE (hcl, HCL_FLX_PLAIN_NUMBER); goto not_consumed; + case 'B': + case 'b': + case 'U': + case 'u': + init_flx_bu(FLX_BU(hcl), c); + FEED_CONTINUE_WITH_CHAR(hcl, c, HCL_FLX_BU); + goto consumed; + default: /* TODO: limit the identifier characters and cause syntax error for other characters.. */ init_flx_pi (FLX_PI(hcl)); @@ -2564,6 +2579,47 @@ not_consumed: return 0; } +static int flx_bu (hcl_t* hcl, hcl_ooci_t c) +{ + hcl_flx_bu_t* bu = FLX_BU(hcl); + + if (c == '\"') + { +/* TODO: determine type based on the start_c */ + reset_flx_token (hcl); + init_flx_qt (FLX_QT(hcl), HCL_TOK_STRLIT, HCL_SYNERR_STRLIT, c, '\\', 0, HCL_TYPE_MAX(hcl_oow_t)); + FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard prefix, quote and move on */ + goto consumed; + } + else if (c == '\'') + { +/* TODO: determine type based on the start_c */ + reset_flx_token (hcl); + init_flx_qt (FLX_QT(hcl), HCL_TOK_CHARLIT, HCL_SYNERR_CHARLIT, c, '\\', 1, 1); + FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* dicard prefix, quote, and move on */ + goto consumed; + } + else + { + /* not followed by a quote. switch to the plain identifier */ + init_flx_pi (FLX_PI(hcl)); + + /* the prefix is already in the token buffer. just adjust state data */ + FLX_PI(hcl)->char_count++; + FLX_PI(hcl)->seg_len++; + + /* refeed c */ + FEED_CONTINUE (hcl, HCL_FLX_PLAIN_IDENT); + goto not_consumed; + } + +consumed: + return 1; + +not_consumed: + return 0; +} + /* ------------------------------------------------------------------------ */ static int feed_char (hcl_t* hcl, hcl_ooci_t c) @@ -2583,6 +2639,7 @@ static int feed_char (hcl_t* hcl, hcl_ooci_t c) case HCL_FLX_PLAIN_NUMBER: return flx_plain_number(hcl, c); case HCL_FLX_QUOTED_TOKEN: return flx_quoted_token(hcl, c); case HCL_FLX_SIGNED_TOKEN: return flx_signed_token(hcl, c); + case HCL_FLX_BU: return flx_bu(hcl, c); default: /* unknown state */ diff --git a/t/feed-01.hcl b/t/feed-01.hcl index 2afbcf8..e53155b 100644 --- a/t/feed-01.hcl +++ b/t/feed-01.hcl @@ -1,4 +1,4 @@ -## this file is to test the reader/feeder againsst weirdly formatted input text. +## this file is to test the reader/feeder against weirdly formatted input text. { ## START @@ -7,7 +7,7 @@ defun xxx (x y z ::: r ) { - | k + | k b s | @@ -25,9 +25,9 @@ defun xxx (x y z [ j ] \ - := (xxx - 10 - 20 + := (xxx + 10 + 20 30) if (eqv? j 12960000) \