/* * $Id: read.c,v 1.29 2006-11-02 10:12:01 bacon Exp $ */ #include #define IS_IDENT(c) \ ((c) == ASE_T('+') || (c) == ASE_T('-') || \ (c) == ASE_T('*') || (c) == ASE_T('/') || \ (c) == ASE_T('%') || (c) == ASE_T('&') || \ (c) == ASE_T('<') || (c) == ASE_T('>') || \ (c) == ASE_T('=') || (c) == ASE_T('_') || \ (c) == ASE_T('?')) #define TOKEN_CLEAR(lsp) ase_lsp_name_clear (&(lsp)->token.name) #define TOKEN_TYPE(lsp) (lsp)->token.type #define TOKEN_IVAL(lsp) (lsp)->token.ival #define TOKEN_RVAL(lsp) (lsp)->token.rval #define TOKEN_SVAL(lsp) (lsp)->token.name.buf #define TOKEN_SLEN(lsp) (lsp)->token.name.size #define TOKEN_ADD_CHAR(lsp,ch) do { \ if (ase_lsp_name_addc(&(lsp)->token.name, ch) == -1) { \ lsp->errnum = ASE_LSP_ENOMEM; \ return -1; \ } \ } while (0) #define TOKEN_COMPARE(lsp,str) ase_lsp_name_compare (&(lsp)->token.name, str) #define TOKEN_END 0 #define TOKEN_INT 1 #define TOKEN_REAL 2 #define TOKEN_STRING 3 #define TOKEN_LPAREN 4 #define TOKEN_RPAREN 5 #define TOKEN_IDENT 6 #define TOKEN_QUOTE 7 #define TOKEN_DOT 8 #define TOKEN_INVALID 50 #define TOKEN_UNTERM_STRING 51 #define NEXT_CHAR(lsp) \ do { if (read_char(lsp) == -1) return -1;} while (0) #define NEXT_TOKEN(lsp) \ do { if (read_token(lsp) == -1) return ASE_NULL; } while (0) static ase_lsp_obj_t* read_obj (ase_lsp_t* lsp); static ase_lsp_obj_t* read_list (ase_lsp_t* lsp); static ase_lsp_obj_t* read_quote (ase_lsp_t* lsp); static int read_char (ase_lsp_t* lsp); static int read_token (ase_lsp_t* lsp); static int read_number (ase_lsp_t* lsp, int negative); static int read_ident (ase_lsp_t* lsp); static int read_string (ase_lsp_t* lsp); ase_lsp_obj_t* ase_lsp_read (ase_lsp_t* lsp) { if (lsp->curc == ASE_CHAR_EOF && read_char(lsp) == -1) return ASE_NULL; lsp->errnum = ASE_LSP_ENOERR; NEXT_TOKEN (lsp); lsp->mem->read = read_obj (lsp); if (lsp->mem->read != ASE_NULL) ase_lsp_deepunlockobj (lsp, lsp->mem->read); return lsp->mem->read; } static ase_lsp_obj_t* read_obj (ase_lsp_t* lsp) { ase_lsp_obj_t* obj; switch (TOKEN_TYPE(lsp)) { case TOKEN_END: lsp->errnum = ASE_LSP_ERR_END; return ASE_NULL; case TOKEN_LPAREN: NEXT_TOKEN (lsp); return read_list (lsp); case TOKEN_QUOTE: NEXT_TOKEN (lsp); return read_quote (lsp); case TOKEN_INT: obj = ase_lsp_makeintobj (lsp->mem, TOKEN_IVAL(lsp)); if (obj == ASE_NULL) lsp->errnum = ASE_LSP_ENOMEM; ase_lsp_lockobj (lsp, obj); return obj; case TOKEN_REAL: obj = ase_lsp_makerealobj (lsp->mem, TOKEN_RVAL(lsp)); if (obj == ASE_NULL) lsp->errnum = ASE_LSP_ENOMEM; ase_lsp_lockobj (lsp, obj); return obj; case TOKEN_STRING: obj = ase_lsp_makestr ( lsp->mem, TOKEN_SVAL(lsp), TOKEN_SLEN(lsp)); if (obj == ASE_NULL) lsp->errnum = ASE_LSP_ENOMEM; ase_lsp_lockobj (lsp, obj); return obj; case TOKEN_IDENT: ASE_LSP_ASSERT (lsp, lsp->mem->nil != ASE_NULL && lsp->mem->t != ASE_NULL); if (TOKEN_COMPARE(lsp,ASE_T("nil")) == 0) { obj = lsp->mem->nil; } else if (TOKEN_COMPARE(lsp,ASE_T("t")) == 0) { obj = lsp->mem->t; } else { obj = ase_lsp_makesym ( lsp->mem, TOKEN_SVAL(lsp), TOKEN_SLEN(lsp)); if (obj == ASE_NULL) lsp->errnum = ASE_LSP_ENOMEM; } ase_lsp_lockobj (lsp, obj); return obj; } lsp->errnum = ASE_LSP_ERR_SYNTAX; return ASE_NULL; } static ase_lsp_obj_t* read_list (ase_lsp_t* lsp) { ase_lsp_obj_t* obj; ase_lsp_obj_cons_t* p, * first = ASE_NULL, * prev = ASE_NULL; while (TOKEN_TYPE(lsp) != TOKEN_RPAREN) { if (TOKEN_TYPE(lsp) == TOKEN_END) { lsp->errnum = ASE_LSP_ERR_SYNTAX; /* unexpected end of input */ return ASE_NULL; } if (TOKEN_TYPE(lsp) == TOKEN_DOT) { if (prev == ASE_NULL) { lsp->errnum = ASE_LSP_ERR_SYNTAX; /* unexpected dot */ return ASE_NULL; } NEXT_TOKEN (lsp); obj = read_obj (lsp); if (obj == ASE_NULL) { if (lsp->errnum == ASE_LSP_ERR_END) { /* unexpected end of input */ lsp->errnum = ASE_LSP_ERR_SYNTAX; } return ASE_NULL; } prev->cdr = obj; NEXT_TOKEN (lsp); if (TOKEN_TYPE(lsp) != TOKEN_RPAREN) { lsp->errnum = ASE_LSP_ERR_SYNTAX; /* ) expected */ return ASE_NULL; } break; } obj = read_obj (lsp); if (obj == ASE_NULL) { if (lsp->errnum == ASE_LSP_ERR_END) { /* unexpected end of input */ lsp->errnum = ASE_LSP_ERR_SYNTAX; } return ASE_NULL; } p = (ase_lsp_obj_cons_t*)ase_lsp_makecons ( lsp->mem, lsp->mem->nil, lsp->mem->nil); if (p == ASE_NULL) { lsp->errnum = ASE_LSP_ENOMEM; return ASE_NULL; } ase_lsp_lockobj (lsp, (ase_lsp_obj_t*)p); if (first == ASE_NULL) first = p; if (prev != ASE_NULL) prev->cdr = (ase_lsp_obj_t*)p; p->car = obj; prev = p; NEXT_TOKEN (lsp); } return (first == ASE_NULL)? lsp->mem->nil: (ase_lsp_obj_t*)first; } static ase_lsp_obj_t* read_quote (ase_lsp_t* lsp) { ase_lsp_obj_t* cons, * tmp; tmp = read_obj (lsp); if (tmp == ASE_NULL) { if (lsp->errnum == ASE_LSP_ERR_END) { // unexpected end of input lsp->errnum = ASE_LSP_ERR_SYNTAX; } return ASE_NULL; } cons = ase_lsp_makecons (lsp->mem, tmp, lsp->mem->nil); if (cons == ASE_NULL) { lsp->errnum = ASE_LSP_ENOMEM; return ASE_NULL; } ase_lsp_lockobj (lsp, cons); cons = ase_lsp_makecons (lsp->mem, lsp->mem->quote, cons); if (cons == ASE_NULL) { lsp->errnum = ASE_LSP_ENOMEM; return ASE_NULL; } ase_lsp_lockobj (lsp, cons); return cons; } static int read_char (ase_lsp_t* lsp) { ase_ssize_t n; ase_char_t c; if (lsp->input_func == ASE_NULL) { lsp->errnum = ASE_LSP_ERR_INPUT_NOT_ATTACHED; return -1; } n = lsp->input_func(ASE_LSP_IO_READ, lsp->input_arg, &c, 1); if (n == -1) { lsp->errnum = ASE_LSP_ERR_INPUT; return -1; } if (n == 0) lsp->curc = ASE_CHAR_EOF; else lsp->curc = c; return 0; } static int read_token (ase_lsp_t* lsp) { ASE_LSP_ASSERT (lsp, lsp->input_func != ASE_NULL); TOKEN_CLEAR (lsp); while (1) { // skip white spaces while (ASE_LSP_ISSPACE(lsp, lsp->curc)) NEXT_CHAR (lsp); // skip the comments here if (lsp->curc == ASE_T(';')) { do { NEXT_CHAR (lsp); } while (lsp->curc != ASE_T('\n') && lsp->curc != ASE_CHAR_EOF); } else break; } if (lsp->curc == ASE_CHAR_EOF) { TOKEN_TYPE(lsp) = TOKEN_END; return 0; } else if (lsp->curc == ASE_T('(')) { TOKEN_ADD_CHAR (lsp, lsp->curc); TOKEN_TYPE(lsp) = TOKEN_LPAREN; NEXT_CHAR (lsp); return 0; } else if (lsp->curc == ASE_T(')')) { TOKEN_ADD_CHAR (lsp, lsp->curc); TOKEN_TYPE(lsp) = TOKEN_RPAREN; NEXT_CHAR (lsp); return 0; } else if (lsp->curc == ASE_T('\'')) { TOKEN_ADD_CHAR (lsp, lsp->curc); TOKEN_TYPE(lsp) = TOKEN_QUOTE; NEXT_CHAR (lsp); return 0; } else if (lsp->curc == ASE_T('.')) { TOKEN_ADD_CHAR (lsp, lsp->curc); TOKEN_TYPE(lsp) = TOKEN_DOT; NEXT_CHAR (lsp); return 0; } else if (lsp->curc == ASE_T('-')) { TOKEN_ADD_CHAR (lsp, lsp->curc); NEXT_CHAR (lsp); if (ASE_LSP_ISDIGIT(lsp,lsp->curc)) { return read_number (lsp, 1); } else if (IS_IDENT(lsp->curc)) { return read_ident (lsp); } else { TOKEN_TYPE(lsp) = TOKEN_IDENT; return 0; } } else if (ASE_LSP_ISDIGIT(lsp,lsp->curc)) { return read_number (lsp, 0); } else if (ASE_LSP_ISALPHA(lsp,lsp->curc) || IS_IDENT(lsp->curc)) { return read_ident (lsp); } else if (lsp->curc == ASE_T('\"')) { NEXT_CHAR (lsp); return read_string (lsp); } TOKEN_TYPE(lsp) = TOKEN_INVALID; NEXT_CHAR (lsp); // consume return 0; } static int read_number (ase_lsp_t* lsp, int negative) { ase_long_t ival = 0; ase_real_t rval = 0.; do { ival = ival * 10 + (lsp->curc - ASE_T('0')); TOKEN_ADD_CHAR (lsp, lsp->curc); NEXT_CHAR (lsp); } while (ASE_LSP_ISDIGIT(lsp,lsp->curc)); /* TODO: extend parsing floating point number */ if (lsp->curc == ASE_T('.')) { ase_real_t fraction = 0.1; NEXT_CHAR (lsp); rval = (ase_real_t)ival; while (ASE_LSP_ISDIGIT(lsp, lsp->curc)) { rval += (ase_real_t)(lsp->curc - ASE_T('0')) * fraction; fraction *= 0.1; NEXT_CHAR (lsp); } TOKEN_RVAL(lsp) = rval; TOKEN_TYPE(lsp) = TOKEN_REAL; if (negative) rval *= -1; } else { TOKEN_IVAL(lsp) = ival; TOKEN_TYPE(lsp) = TOKEN_INT; if (negative) ival *= -1; } return 0; } #if 0 static int __read_number (ase_lsp_t* lsp, int negative) { ase_cint_t c; ASE_LSP_ASSERT (lsp, ASE_LSP_STR_LEN(&lsp->token.name) == 0); SET_TOKEN_TYPE (lsp, TOKEN_INT); c = lsp->src.lex.curc; if (c == ASE_T('0')) { ADD_TOKEN_CHAR (lsp, c); GET_CHAR_TO (lsp, c); if (c == ASE_T('x') || c == ASE_T('X')) { /* hexadecimal number */ do { ADD_TOKEN_CHAR (lsp, c); GET_CHAR_TO (lsp, c); } while (ASE_LSP_ISXDIGIT (lsp, c)); return 0; } else if (c == ASE_T('b') || c == ASE_T('B')) { /* binary number */ do { ADD_TOKEN_CHAR (lsp, c); GET_CHAR_TO (lsp, c); } while (c == ASE_T('0') || c == ASE_T('1')); return 0; } else if (c != '.') { /* octal number */ while (c >= ASE_T('0') && c <= ASE_T('7')) { ADD_TOKEN_CHAR (lsp, c); GET_CHAR_TO (lsp, c); } return 0; } } while (ASE_LSP_ISDIGIT (lsp, c)) { ADD_TOKEN_CHAR (lsp, c); GET_CHAR_TO (lsp, c); } if (c == ASE_T('.')) { /* floating-point number */ SET_TOKEN_TYPE (lsp, TOKEN_REAL); ADD_TOKEN_CHAR (lsp, c); GET_CHAR_TO (lsp, c); while (ASE_LSP_ISDIGIT (lsp, c)) { ADD_TOKEN_CHAR (lsp, c); GET_CHAR_TO (lsp, c); } } if (c == ASE_T('E') || c == ASE_T('e')) { SET_TOKEN_TYPE (lsp, TOKEN_REAL); ADD_TOKEN_CHAR (lsp, c); GET_CHAR_TO (lsp, c); if (c == ASE_T('+') || c == ASE_T('-')) { ADD_TOKEN_CHAR (lsp, c); GET_CHAR_TO (lsp, c); } while (ASE_LSP_ISDIGIT (lsp, c)) { ADD_TOKEN_CHAR (lsp, c); GET_CHAR_TO (lsp, c); } } return 0; } #endif static int read_ident (ase_lsp_t* lsp) { do { TOKEN_ADD_CHAR (lsp, lsp->curc); NEXT_CHAR (lsp); } while (ASE_LSP_ISALNUM(lsp,lsp->curc) || IS_IDENT(lsp->curc)); TOKEN_TYPE(lsp) = TOKEN_IDENT; return 0; } static int read_string (ase_lsp_t* lsp) { int escaped = 0; ase_cint_t code = 0; do { if (lsp->curc == ASE_CHAR_EOF) { TOKEN_TYPE(lsp) = TOKEN_UNTERM_STRING; return 0; } // TODO: if (escaped == 3) { /* \xNN */ } else if (escaped == 2) { /* \000 */ } else if (escaped == 1) { /* backslash + character */ if (lsp->curc == ASE_T('a')) lsp->curc = ASE_T('\a'); else if (lsp->curc == ASE_T('b')) lsp->curc = ASE_T('\b'); else if (lsp->curc == ASE_T('f')) lsp->curc = ASE_T('\f'); else if (lsp->curc == ASE_T('n')) lsp->curc = ASE_T('\n'); else if (lsp->curc == ASE_T('r')) lsp->curc = ASE_T('\r'); else if (lsp->curc == ASE_T('t')) lsp->curc = ASE_T('\t'); else if (lsp->curc == ASE_T('v')) lsp->curc = ASE_T('\v'); else if (lsp->curc == ASE_T('0')) { escaped = 2; code = 0; NEXT_CHAR (lsp); continue; } else if (lsp->curc == ASE_T('x')) { escaped = 3; code = 0; NEXT_CHAR (lsp); continue; } } else if (lsp->curc == ASE_T('\\')) { escaped = 1; NEXT_CHAR (lsp); continue; } TOKEN_ADD_CHAR (lsp, lsp->curc); NEXT_CHAR (lsp); } while (lsp->curc != ASE_T('\"')); TOKEN_TYPE(lsp) = TOKEN_STRING; NEXT_CHAR (lsp); return 0; }