From 689e6e2bf811b5270b50019dca7fa1ff71e713ee Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Sun, 5 Jun 2005 16:44:05 +0000 Subject: [PATCH] *** empty log message *** --- ase/stx/parser.c | 154 ++++++++++++++++++++++++--------- ase/stx/parser.h | 11 ++- ase/stx/syntax.bnf | 196 ++++++++++++++++++++++++++++++++++++++++++ ase/stx/token.h | 15 ++-- ase/test/stx/parser.c | 36 +++++--- 5 files changed, 346 insertions(+), 66 deletions(-) create mode 100644 ase/stx/syntax.bnf diff --git a/ase/stx/parser.c b/ase/stx/parser.c index ad278009..55e59f6c 100644 --- a/ase/stx/parser.c +++ b/ase/stx/parser.c @@ -1,5 +1,5 @@ /* - * $Id: parser.c,v 1.12 2005-06-05 05:26:24 bacon Exp $ + * $Id: parser.c,v 1.13 2005-06-05 16:44:05 bacon Exp $ */ #include @@ -7,6 +7,9 @@ #include static int __get_token (xp_stx_parser_t* parser); +static int __get_ident (xp_stx_parser_t* parser); +static int __get_charlit (xp_stx_parser_t* parser); +static int __get_strlit (xp_stx_parser_t* parser); static int __skip_spaces (xp_stx_parser_t* parser); static int __skip_comment (xp_stx_parser_t* parser); static int __get_char (xp_stx_parser_t* parser); @@ -50,12 +53,14 @@ void xp_stx_parser_close (xp_stx_parser_t* parser) do { if (__get_char(parser) == -1) return -1; } while (0) #define UNGET_CHAR(parser,c) \ do { if (__unget_char(parser,c) == -1) return -1; } while (0) +#define GET_TOKEN(parser) \ + do { if (__get_token(parser) == -1) return -1; } while (0) + int xp_stx_parser_parse_method ( xp_stx_parser_t* parser, xp_stx_word_t method_class, void* input) { - if (parser->input == XP_NULL || - parser->input_reset == XP_NULL || + if (parser->input_reset == XP_NULL || parser->input_consume == XP_NULL) { parser->error_code = XP_STX_PARSER_ERROR_INVALID; return -1; @@ -63,6 +68,9 @@ int xp_stx_parser_parse_method ( RESET_INPUT (parser, input); GET_CHAR (parser); + + GET_TOKEN (parser); +xp_printf (XP_TEXT("%d, [%s]\n"), parser->token.type, parser->token.buffer); return 0; } @@ -81,12 +89,18 @@ static int __get_token (xp_stx_parser_t* parser) } while (1); c = parser->curc; + xp_stx_token_clear (&parser->token); if (xp_stx_isalpha(c)) { + if (__get_ident(parser) == -1) return -1; } else if (xp_stx_isdigit(c)) { } + else if (c == XP_STX_CHAR('$')) { + if (__get_charlit(parser) == -1) return -1; + } else if (c == XP_STX_CHAR('\'')) { + if (__get_strlit(parser) == -1) return -1; } else { parser->error_code = XP_STX_PARSER_ERROR_CHAR; @@ -96,6 +110,91 @@ static int __get_token (xp_stx_parser_t* parser) return 0; } +static int __get_ident (xp_stx_parser_t* parser) +{ + /* + * identifier ::= letter (letter | digit)* + * keyword ::= identifier ':' + */ + + xp_cint_t c = parser->curc; + parser->token.type = XP_STX_TOKEN_IDENT; + + do { + if (xp_stx_token_addc (&parser->token, c) == -1) { + parser->error_code = XP_STX_PARSER_ERROR_MEMORY; + return -1; + } + GET_CHAR (parser); + c = parser->curc; + } while (xp_stx_isalnum(c)); + + if (c == XP_STX_CHAR(':')) { + parser->token.type = XP_STX_TOKEN_KEYWORD; + GET_CHAR (parser); + } + + return 0; +} + +static int __get_charlit (xp_stx_parser_t* parser) +{ + /* + * character_literal ::= '$' character + * character ::= "Any character in the implementation-defined character set" + */ + + xp_cint_t c = parser->curc; + if (c == XP_STX_CHAR_EOF) { + parser->error_code = XP_STX_PARSER_ERROR_CHARLIT; + return -1; + } + + parser->token.type = XP_STX_TOKEN_CHARLIT; + if (xp_stx_token_addc (&parser->token, c) == -1) { + parser->error_code = XP_STX_PARSER_ERROR_MEMORY; + return -1; + } + + GET_CHAR (parser); + return 0; +} + +static int __get_strlit (xp_stx_parser_t* parser) +{ + /* + * string_literal ::= stringDelimiter stringBody stringDelimiter + * stringBody ::= (nonStringDelimiter | (stringDelimiter stringDelimiter)*) + * stringDelimiter ::= ''' "a single quote" + */ + + /* TODO: C-like string */ + + xp_cint_t c = parser->curc; + parser->token.type = XP_STX_TOKEN_STRLIT; + + do { + do { + if (xp_stx_token_addc (&parser->token, c) == -1) { + parser->error_code = XP_STX_PARSER_ERROR_MEMORY; + return -1; + } + GET_CHAR (parser); + c = parser->curc; + + if (c == XP_STX_CHAR_EOF) { + parser->error_code = XP_STX_PARSER_ERROR_STRLIT; + return -1; + } + } while (c != XP_STX_CHAR('\'')); + + GET_CHAR (parser); + c = parser->curc; + } while (c == XP_STX_CHAR('\'')); + + return 0; +} + static int __skip_spaces (xp_stx_parser_t* parser) { while (xp_stx_isspace(parser->curc)) GET_CHAR (parser); @@ -112,11 +211,17 @@ static int __skip_comment (xp_stx_parser_t* parser) static int __get_char (xp_stx_parser_t* parser) { xp_cint_t c; - if (parser->input_consume (parser, &c) == -1) { - parser->error_code = XP_STX_PARSER_ERROR_INPUT; - return -1; + + if (parser->ungotc_count > 0) { + parser->curc = parser->ungotc[parser->ungotc_count--]; + } + else { + if (parser->input_consume (parser, &c) == -1) { + parser->error_code = XP_STX_PARSER_ERROR_INPUT; + return -1; + } + parser->curc = c; } - parser->curc = c; return 0; } @@ -138,38 +243,3 @@ static int __reset_input (xp_stx_parser_t* parser, void* input) return 0; } -/* -static int __get_token (xp_stx_parser_t* parser) -{ - xp_cint_t c = parser->curc; - - __skip_spaces (parser); - __skip_comment (parser); - - switch (c) { - case - } - - return -1; -} - -static int __get_char (xp_stx_parser_t* parser) -{ - xp_cint_t c = parser->curp; - - if (c == XP_STX_CHAR('\0')) { - parser->curc = XP_EOF_CHAR; - } - else { - parser->curc = c; - parser->curp++; - } - - return 0; -} - -static int __skip_spaces (xp_stx_parser_t* parser) -{ - while (xp_stx_isspace(parser->curc)) __get_char -} -*/ diff --git a/ase/stx/parser.h b/ase/stx/parser.h index c6348e6f..babeb57b 100644 --- a/ase/stx/parser.h +++ b/ase/stx/parser.h @@ -1,5 +1,5 @@ /* - * $Id: parser.h,v 1.9 2005-06-05 05:26:24 bacon Exp $ + * $Id: parser.h,v 1.10 2005-06-05 16:44:05 bacon Exp $ */ #ifndef _XP_STX_PARSER_H_ @@ -12,8 +12,11 @@ enum { XP_STX_PARSER_ERROR_NONE = 0, XP_STX_PARSER_ERROR_INPUT, + XP_STX_PARSER_ERROR_MEMORY, XP_STX_PARSER_ERROR_INVALID, - XP_STX_PARSER_ERROR_CHAR + XP_STX_PARSER_ERROR_CHAR, + XP_STX_PARSER_ERROR_CHARLIT, + XP_STX_PARSER_ERROR_STRLIT }; typedef struct xp_stx_parser_t xp_stx_parser_t; @@ -41,8 +44,8 @@ extern "C" { xp_stx_parser_t* xp_stx_parser_open (xp_stx_parser_t* parser); void xp_stx_parser_close (xp_stx_parser_t* parser); -int xp_stx_parse_method (xp_stx_parser_t* parser, - xp_stx_word_t method_class, xp_stx_char_t* method_text); +int xp_stx_parser_parse_method ( + xp_stx_parser_t* parser, xp_stx_word_t method_class, void* input); #ifdef __cplusplus } diff --git a/ase/stx/syntax.bnf b/ase/stx/syntax.bnf new file mode 100644 index 00000000..81112540 --- /dev/null +++ b/ase/stx/syntax.bnf @@ -0,0 +1,196 @@ +~~~ method grammar ~~~ + + ::= + + [ ] + [] + + ::= | + | + + + ::= unarySelector + + ::= binarySelector + + ::= (keyword )+ + + ::= '|' '|' + + ::= identifier* + + ::= '[' ']' + + ::= [* '|'] + [] [] + + ::= ':' identifier + + ::= + ( ['.'] ) | + ( ['.' []]) + + ::= returnOperator + + ::= + | + + + ::= assignmentOperator + + ::= + [ ] + + := identifier + + ::= + identifier | + | + | + ( '(' ')' ) + + + ::= + (+ * [] ) | + (+ [] ) | + + + ::= unarySelector + + ::= binarySelector + + ::= * + + ::= (keyword )+ + + ::= * * + + ::= (';' )* + + ::= + | + | + | + | + | + + + ::= ['-'] + + ::= integer | float | scaledDecimal + + ::= quotedCharacter + + ::= quotedString + + ::= hashedString + + ::= quotedSelector + + ::= '#(' * ')' + + ::= | identifier + +reserved identifiers -> nil true false self super + + +~~~ lexical grammar ~~~ + +character ::= + "Any character in the implementation-defined character set" + +whitespace ::= + "Any non-printing character interpreted as white space + including spaces, tabs, and line breaks" + +digit ::= + '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' + +uppercaseAlphabetic ::= + 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | + 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | + 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' + +lowercaseAlphabetic ::= + 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | + 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | + 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' + +nonCaseLetter ::= '_' + +letter ::= + uppercaseAlphabetic | + lowercaseAlphabetic | + nonCaseLetter | + "implementation defined letters" + +commentDelimiter ::= '"' + +nonCommentDelimiter::= +"any character that is not a commentDelimiter " + + comment := + commentDelimiter nonCommentDelimiter * commentDelimiter + +identifier ::= letter (letter | digit)* + +keyword ::= identifier ':' + +binaryCharacter ::= + '!' | '%' | '&' | '*' | '+' | ',' | + '/' | '<' | '=' | '>' | '?' | '@' | + '\' | '~' | '|' | '-' + +binarySelector ::= binaryCharacter+ + +returnOperator ::= '^' + +assignmentOperator ::= ':=' + + +integer ::= decimalInteger | radixInteger + +decimalInteger ::= digits + +digits ::= digit+ + +radixInteger ::= radixSpecifier 'r' radixDigits + +radixSpecifier := digits + +radixDigits ::= (digit | uppercaseAlphabetic)+ + + +float ::= mantissa [exponentLetter exponent] + +mantissa ::= digits'.' digits + +exponent ::= ['-']decimalInteger + +exponentLetter ::= 'e' | 'd' | 'q' + +scaledDecimal ::= scaledMantissa 's' [fractionalDigits] + +scaledMantissa ::= decimalInteger | mantissa + +fractionalDigits ::= decimalInteger + +quotedCharacter ::= '$' character + +quotedString ::= stringDelimiter stringBody stringDelimiter + +stringBody ::= (nonStringDelimiter | (stringDelimiter stringDelimiter)*) + +stringDelimiter ::= ''' "a single quote" + +nonStringDelimiter ::= "any character except stringDelimiter" + +hashedString ::= '#' quotedString + +quotedSelector ::= '#' (unarySelector | binarySelector | keywordSelector) + +keywordSelector ::= keyword+ + +separator ::= (whitespace | comment)* + + diff --git a/ase/stx/token.h b/ase/stx/token.h index 2cca34e6..0a0f75ca 100644 --- a/ase/stx/token.h +++ b/ase/stx/token.h @@ -1,5 +1,5 @@ /* - * $Id: token.h,v 1.3 2005-06-02 16:14:58 bacon Exp $ + * $Id: token.h,v 1.4 2005-06-05 16:44:05 bacon Exp $ */ #ifndef _XP_STX_TOKEN_H_ @@ -9,14 +9,11 @@ enum { - XP_STX_TOKEN_END = 0, - XP_STX_TOKEN_STRING = 1, - XP_STX_TOKEN_IDENT = 2, - XP_STX_TOKEN_SELF = 3, - XP_STX_TOKEN_SUPER = 4, - XP_STX_TOKEN_NIL = 5, - XP_STX_TOKEN_TRUE = 6, - XP_STX_TOKEN_FALSE = 7 + XP_STX_TOKEN_END = 0, + XP_STX_TOKEN_CHARLIT, + XP_STX_TOKEN_STRLIT, + XP_STX_TOKEN_IDENT, + XP_STX_TOKEN_KEYWORD }; struct xp_stx_token_t diff --git a/ase/test/stx/parser.c b/ase/test/stx/parser.c index 9e163d00..f5f5064a 100644 --- a/ase/test/stx/parser.c +++ b/ase/test/stx/parser.c @@ -12,23 +12,27 @@ struct ss_t { - xp_stx_char_t* text; + const xp_stx_char_t* text; xp_size_t size; xp_size_t index; }; typedef struct ss_t ss_t; -int ss_reset (xp_stx_parser_t* parser) +int ss_input (void* owner, int cmd, void* arg) { - return 0; -} + ss_t* ss = (ss_t*)owner; -int ss_consume (xp_stx_parser_t* parser, xp_stx_cint_t* c) -{ - ss_t* ss = (ss_t*)parser->input; - if (ss->index < ss->size) *c = ss->text[ss->index++]; - else *c = XP_STX_CHAR_EOF; + if (cmd == XP_STX_PARSER_INPUT_OPEN) { + return 0; + } + else if (cmd == XP_STX_PARSER_INPUT_CLOSE) { + return 0; + } + else if (cmd == XP_STX_PARSER_INPUT_CONSUME) { + if (ss->index < ss->size) *c = ss->text[ss->index++]; + else *c = XP_STX_CHAR_EOF; + } return 0; } @@ -49,8 +53,18 @@ int xp_main (int argc, xp_char_t* argv[]) return -1; } - parser.input_reset = ss_reset; - parser.input_consume = ss_consume; + parser.input_func = ss_func; + + { + /* + ss_t ss = { + XP_STX_TEXT("isNil\n^true"), + 11, + 0 + }; + */ + xp_stx_parser_parse_method (&parser, 0, &ss); + } xp_stx_parser_close (&parser); xp_printf (XP_TEXT("== End of program ==\n"));