package interp import ( "unicode" ) var err_refeed *error_t = &error_t{msg: "refeed"} // this is not a real error and is used as a control element var err_sudden_eof *error_t = &error_t{msg: "unexpected end of input"} var err_right_bracket *error_t = &error_t{msg: "unbalanced right bracket"} var err_right_brace *error_t = &error_t{msg: "unbalanced right brace"} var err_feed_state *error_t = &error_t{msg: "internal error - invalid feed state"} func is_delim(c rune) bool { return c == EOF_RUNE || c == '[' || c == ']' || c == '{' || c == '}' || c == '"' || c == ';' || unicode.IsSpace(c) } func is_xdigit(c rune) bool { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') } func digit_to_code(c rune) int32 { if c >= '0' && c <= '9' { return int32(c) - int32('0') } else if c >= 'A' && c <= 'Z' { return int32(c) - int32('A') + 10 } else if c >= 'a' && c <= 'z' { return int32(c) - int32('a') + 10 } else { return 0 // this is an error } } func escape_max_to_rune(max int) rune { switch max { case 8: return 'U' case 4: return 'u' case 3: return '0' case 2: return 'x' default: return '\x00' } } func escape_single_rune(c rune) rune { switch c { case 'a': return '\a' case 'b': return '\b' case 'f': return '\f' case 'n': return '\n' case 'r': return '\r' case 't': return '\t' case 'v': return '\v' case '\n': return ' ' default: return c } } func (interp *Interp) init_or_cmd(c rune, mode int) error { var err error if c == EOF_RUNE { if mode >= 1 { /* not in INIT */ return err_sudden_eof } interp.pop_feed_struct() } else if c == '\\' { interp.push_feed_struct(FEED_WORD) interp.start_escape() } else if c == '"' { interp.push_feed_struct(FEED_DQUOTE) } else if c == '[' { interp.push_feed_struct(FEED_BRACKET) } else if c == ']' { if mode == 1 && interp.feed.state == FEED_BRACKET { interp.pop_feed_struct() } else if interp.strict { err = err_right_bracket } else { goto normal_rune } } else if c == '{' { interp.push_feed_struct(FEED_BRACE) } else if c == '}' { if mode == 2 && interp.feed.state == FEED_BRACE { interp.pop_feed_struct() } else if interp.strict { err = err_right_brace } else { goto normal_rune } } else if c == '$' { interp.push_feed_struct(FEED_DOLLAR) } else if c == '\n' || c == ';' { interp.push_feed_struct(FEED_EOL) interp.add_rune_to_token(c) } else if unicode.IsSpace(c) { interp.push_feed_struct(FEED_SEP) interp.add_rune_to_token(c) } else { goto normal_rune } done: return err normal_rune: interp.push_feed_struct(FEED_WORD) interp.add_rune_to_token(c) goto done } func (interp *Interp) do_init(c rune) error { return interp.init_or_cmd(c, 0) } func (interp *Interp) do_bracket(c rune) error { return interp.init_or_cmd(c, 1) } func (interp *Interp) do_brace(c rune) error { /* var err error if c == '}' { interp.pop_feed_struct() } else { interp.push_feed_struct(FEED_BRACED_TEXT) err = err_refeed } return err */ return interp.init_or_cmd(c, 2) } func (interp *Interp) do_braced_text(c rune) error { var err error /* text enclosed in {} */ if c == EOF_RUNE { err = err_sudden_eof } else if interp.feed.extra.escaped { // escaping in braces are simpler than the regular escaping(handle_escape()) if c == '\n' { c = ' ' } interp.add_rune_to_token(c) interp.feed.extra.escaped = false } else if c == '\\' { // escaping is supported inside {}. but escaping is a bit different // {abc\}} produces abc\}, not abc} interp.feed.extra.escaped = true /* so not using start_escape() */ interp.add_rune_to_token(c) } else if c == '{' { interp.feed.extra.brace_count++ interp.add_rune_to_token(c) } else if c == '}' { if interp.feed.extra.brace_count > 0 { interp.add_rune_to_token(c) interp.feed.extra.brace_count-- } else { interp.pop_feed_struct() err = err_refeed // back to FEED_BRACE } } else { interp.add_rune_to_token(c) } return err } func (interp *Interp) do_dquote(c rune) error { var err error /* if c == '"' { interp.pop_feed_struct() // } else if c == '[' { // interp.push_feed_struct(FEED_BRACKET) // } else if c == ']' && interp.strict { // err = err_right_bracket } else { interp.push_feed_struct(FEED_DQUOTED_TEXT) err = err_refeed } return err */ if c == EOF_RUNE { return err_sudden_eof } else if c == '\\' { interp.push_feed_struct(FEED_DQUOTED_TEXT) interp.start_escape() } else if c == '"' { interp.pop_feed_struct() } else if c == '[' { interp.push_feed_struct(FEED_BRACKET) } else if c == ']' { if interp.feed.state == FEED_BRACKET { interp.pop_feed_struct() } else if interp.strict { err = err_right_bracket } else { goto normal_rune } } else if c == '$' { interp.push_feed_struct(FEED_DOLLAR) } else { goto normal_rune } done: return err normal_rune: interp.push_feed_struct(FEED_DQUOTED_TEXT) interp.add_rune_to_token(c) goto done } func (interp *Interp) do_dquoted_text(c rune) error { var err error /* if c == EOF_RUNE { err = err_sudden_eof } else if interp.feed.extra.escaped { err = interp.handle_escape(c) } else if c == '\\' { interp.start_escape() } else if c == '"' { interp.pop_feed_struct() err = err_refeed } else if c == '[' { interp.pop_feed_struct() err = err_refeed } else if c == ']' && interp.strict { err = err_right_bracket } else if c == '$' { // a variable inside a double-quoted string interp.push_feed_struct(FEED_DOLLAR) } else { interp.add_rune_to_token(c) }*/ if interp.feed.extra.escaped { err = interp.handle_escape(c) } else if c == '\\' { interp.start_escape() } else if c == EOF_RUNE || c == '"' || c == '[' { interp.pop_feed_struct() err = err_refeed } else { interp.add_rune_to_token(c) } return err } func (interp *Interp) do_dollar(c rune) error { var err error if unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) { interp.add_rune_to_token(c) } else if c == '{' { if len(interp.feed.token) > 0 { err = err_refeed } else { interp.feed.extra.var_braced = true } } else if c == '}' && interp.feed.extra.var_braced { interp.pop_feed_struct() } else if interp.feed.extra.var_braced { interp.add_rune_to_token(c) } else { if len(interp.feed.token) == 0 { // $ is followed by an invalid variable letter. // switch to a normal word mode by hack and add a dollar sign as a token interp.feed.state = FEED_WORD interp.add_rune_to_token('$') } else { interp.pop_feed_struct() } err = err_refeed } return err } func (interp *Interp) do_word(c rune) error { var err error if interp.feed.extra.escaped { err = interp.handle_escape(c) } else if c == '\\' { interp.start_escape() } else if is_delim(c) { // TODO: if not str characters interp.pop_feed_struct() err = err_refeed } else { interp.add_rune_to_token(c) } return err } func (interp *Interp) do_sep(c rune) error { if c == '\n' || !unicode.IsSpace(c) { interp.pop_feed_struct() return err_refeed } interp.add_rune_to_token(c) return nil } func (interp *Interp) do_eol(c rune) error { if c != '\n' { interp.pop_feed_struct() return err_refeed } interp.add_rune_to_token(c) return nil } func (interp *Interp) push_feed_struct(state feed_state_t) *feed_struct_t { var feed *feed_struct_t = &feed_struct_t{state: state} feed.parent = interp.feed if feed.parent == nil { feed.level = 0 } else { feed.level = feed.parent.level + 1 } interp.feed = feed return feed } func (interp *Interp) finalize_feed_struct(feed *feed_struct_t, is_stmt bool) { var stmt_cnode *Cnode_t if feed.cnode_tmp_last != nil { if is_stmt { // wrap the list of cnodes as a single statement node stmt_cnode = &Cnode_t{code: CNODE_STMT, child: feed.cnode_tmp_first} // token is empty if feed.cnode_last == nil { feed.cnode_first = stmt_cnode } else { feed.cnode_last.next = stmt_cnode } feed.cnode_last = stmt_cnode } else { if feed.cnode_last == nil { feed.cnode_first = feed.cnode_tmp_first } else { feed.cnode_last.next = feed.cnode_tmp_first } feed.cnode_last = feed.cnode_tmp_last } feed.cnode_tmp_first = nil feed.cnode_tmp_last = nil } } func (inter *Interp) add_cnode_to_feed_struct(feed *feed_struct_t, cnode *Cnode_t) { if feed.cnode_tmp_last == nil { feed.cnode_tmp_first = cnode } else { feed.cnode_tmp_last.next = cnode } feed.cnode_tmp_last = cnode } var feed_to_cnode_code_tab [11]cnode_code_t = [11]cnode_code_t{ FEED_TOP: CNODE_EOL, // this must never be used FEED_INIT: CNODE_INIT, FEED_BRACKET: CNODE_BRACKET, FEED_BRACE: CNODE_BRACE, FEED_BRACED_TEXT: CNODE_TEXT, FEED_DQUOTE: CNODE_DQUOTE, FEED_DQUOTED_TEXT: CNODE_TEXT, FEED_DOLLAR: CNODE_VAR, FEED_SEP: CNODE_SEP, FEED_WORD: CNODE_TEXT, FEED_EOL: CNODE_EOL, } func (interp *Interp) pop_feed_struct() *feed_struct_t { var ( feed *feed_struct_t parent *feed_struct_t cnode *Cnode_t ) feed = interp.feed // to pop off if feed == nil { // excessive pop request. it's an internal implementation error // TODO: return error??? panic("excessive pop off feed stack") return nil // TODO: return failure??? } parent = feed.parent //fmt.Printf("parent %p feed [%p]>>\n", parent, feed) if parent == nil { // this must be FEED_TOP being popped off. if feed.state != FEED_TOP { panic("invalid internal state - top feed struct not FEED_TOP") } interp.feed = nil // indicate that the feed stack is empty return feed // return the old feed struct } if feed.state == FEED_DOLLAR && len(feed.token) == 0 { // switch the dollar sign not followed by name to a literal dollar sign text feed.state = FEED_WORD interp.add_rune_to_token('$') } if feed.state == FEED_EOL { interp.finalize_feed_struct(parent, true) parent.cnode_cont = false } else if feed.state == FEED_SEP { parent.cnode_cont = false } else { cnode = &Cnode_t{code: feed_to_cnode_code_tab[feed.state], token: feed.token} if cnode.code == CNODE_BRACKET || cnode.code == CNODE_BRACE || cnode.code == CNODE_INIT { // popping a container feed struct interp.finalize_feed_struct(feed, true) cnode.child = feed.cnode_first } else if cnode.code == CNODE_DQUOTE { interp.finalize_feed_struct(feed, false) cnode.child = feed.cnode_first } /*else if cnode.code == CNODE_VAR { fmt.Printf(">>>>\n") interp.dump_cnodes(parent.cnode_first, true) fmt.Printf(">>>>\n") interp.dump_cnodes(parent.cnode_tmp_first, true) fmt.Printf(">>>>\n") interp.dump_cnodes(cnode, true) }*/ // add the current cnode to the parent feed struct interp.add_cnode_to_feed_struct(parent, cnode) if parent.cnode_cont { interp.add_cnode_to_feed_struct(parent, &Cnode_t{code: CNODE_JOIN}) } else { parent.cnode_cont = true } if feed.state == FEED_INIT { if parent.state != FEED_TOP { panic("internal error - parent struct not FEED_TOP") } interp.finalize_feed_struct(parent, false) } } interp.feed = parent return feed } func (interp *Interp) add_rune_to_token(c rune) { interp.feed.token = append(interp.feed.token, c) } func (interp *Interp) start_escape() { interp.feed.extra.escaped = true interp.feed.extra.escape_len = 0 interp.feed.extra.escape_max = 0 interp.feed.extra.escape_val = 0 } func (interp *Interp) end_escape() { interp.feed.extra.escaped = false } func (interp *Interp) handle_escape(c rune) error { if c == EOF_RUNE { goto stop_escaping } switch interp.feed.extra.escape_max { case 8: // \UXXXXXXXX fallthrough case 4: // \uXXXX fallthrough case 2: // \xXX if is_xdigit(c) { interp.feed.extra.escape_val = interp.feed.extra.escape_val*16 + digit_to_code(c) interp.feed.extra.escape_len++ } else { goto stop_escaping } case 3: // \ooo if c >= '0' && c <= '7' { interp.feed.extra.escape_val = interp.feed.extra.escape_val*8 + digit_to_code(c) interp.feed.extra.escape_len++ } else { goto stop_escaping } case 0: if c == 'x' { interp.feed.extra.escape_max = 2 } else if c == 'u' { interp.feed.extra.escape_max = 4 } else if c == 'U' { interp.feed.extra.escape_max = 8 } else if c >= '0' && c <= '7' { interp.feed.extra.escape_max = 3 return err_refeed } else { interp.add_rune_to_token(escape_single_rune(c)) interp.end_escape() return nil } default: goto stop_escaping } if interp.feed.extra.escape_len == interp.feed.extra.escape_max { interp.add_rune_to_token(rune(interp.feed.extra.escape_val)) interp.end_escape() } return nil stop_escaping: if interp.feed.extra.escape_len == 0 { c = escape_max_to_rune(interp.feed.extra.escape_max) if c != '\x00' { interp.add_rune_to_token(c) } } else { interp.add_rune_to_token(rune(interp.feed.extra.escape_val)) } interp.end_escape() return err_refeed } // ------------------------------------------------------------------------------- func (interp *Interp) BeginFeed() { for interp.feed != nil { interp.pop_feed_struct() } interp.push_feed_struct(FEED_TOP) interp.push_feed_struct(FEED_INIT) } func (interp *Interp) FeedRune(c rune) error { var err error start_over: switch interp.feed.state { case FEED_INIT: err = interp.do_init(c) case FEED_BRACKET: err = interp.do_bracket(c) case FEED_BRACE: err = interp.do_brace(c) case FEED_BRACED_TEXT: err = interp.do_braced_text(c) case FEED_DQUOTE: err = interp.do_dquote(c) case FEED_DQUOTED_TEXT: err = interp.do_dquoted_text(c) case FEED_DOLLAR: err = interp.do_dollar(c) case FEED_SEP: err = interp.do_sep(c) case FEED_WORD: err = interp.do_word(c) case FEED_EOL: err = interp.do_eol(c) default: err = err_feed_state } if err == err_refeed { goto start_over } return err } func (interp *Interp) FeedRunes(text []rune) error { var ( c rune err error ) for _, c = range text { err = interp.FeedRune(c) if err != nil { return err } } return nil } func (interp *Interp) EndFeed() (*Cnode_t, error) { var err error = interp.FeedRune(EOF_RUNE) if err != nil { return nil, err } if interp.feed == nil || interp.feed.state != FEED_TOP { return nil, err_feed_state } return interp.feed.cnode_first, nil }