645 lines
14 KiB
Go
645 lines
14 KiB
Go
package interp
|
|
|
|
import (
|
|
"unicode"
|
|
)
|
|
|
|
var err_refeed *error_t = &error_t{msg: "refeed"} // this is not a real error and is used as a control element
|
|
|
|
var err_sudden_eof *error_t = &error_t{msg: "unexpected end of input"}
|
|
var err_right_bracket *error_t = &error_t{msg: "unbalanced right bracket"}
|
|
var err_right_brace *error_t = &error_t{msg: "unbalanced right brace"}
|
|
var err_feed_state *error_t = &error_t{msg: "internal error - invalid feed state"}
|
|
|
|
func is_delim(c rune) bool {
|
|
return c == EOF_RUNE || c == '[' || c == ']' || c == '{' || c == '}' || c == '"' || c == ';' || unicode.IsSpace(c)
|
|
}
|
|
|
|
func is_xdigit(c rune) bool {
|
|
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
|
|
}
|
|
|
|
func digit_to_code(c rune) int32 {
|
|
if c >= '0' && c <= '9' {
|
|
return int32(c) - int32('0')
|
|
} else if c >= 'A' && c <= 'Z' {
|
|
return int32(c) - int32('A') + 10
|
|
} else if c >= 'a' && c <= 'z' {
|
|
return int32(c) - int32('a') + 10
|
|
} else {
|
|
return 0 // this is an error
|
|
}
|
|
}
|
|
|
|
func escape_max_to_rune(max int) rune {
|
|
switch max {
|
|
case 8:
|
|
return 'U'
|
|
case 4:
|
|
return 'u'
|
|
case 3:
|
|
return '0'
|
|
case 2:
|
|
return 'x'
|
|
default:
|
|
return '\x00'
|
|
}
|
|
}
|
|
|
|
func escape_single_rune(c rune) rune {
|
|
switch c {
|
|
case 'a':
|
|
return '\a'
|
|
case 'b':
|
|
return '\b'
|
|
case 'f':
|
|
return '\f'
|
|
case 'n':
|
|
return '\n'
|
|
case 'r':
|
|
return '\r'
|
|
case 't':
|
|
return '\t'
|
|
case 'v':
|
|
return '\v'
|
|
case '\n':
|
|
return ' '
|
|
default:
|
|
return c
|
|
}
|
|
}
|
|
|
|
func (interp *Interp) init_or_cmd(c rune, mode int) error {
|
|
var err error
|
|
|
|
if c == EOF_RUNE {
|
|
if mode >= 1 {
|
|
/* not in INIT */
|
|
return err_sudden_eof
|
|
}
|
|
interp.pop_feed_struct()
|
|
} else if c == '\\' {
|
|
interp.push_feed_struct(FEED_WORD)
|
|
interp.start_escape()
|
|
} else if c == '"' {
|
|
interp.push_feed_struct(FEED_DQUOTE)
|
|
} else if c == '[' {
|
|
interp.push_feed_struct(FEED_BRACKET)
|
|
} else if c == ']' {
|
|
if mode == 1 && interp.feed.state == FEED_BRACKET {
|
|
interp.pop_feed_struct()
|
|
} else if interp.strict {
|
|
err = err_right_bracket
|
|
} else {
|
|
goto normal_rune
|
|
}
|
|
} else if c == '{' {
|
|
interp.push_feed_struct(FEED_BRACE)
|
|
} else if c == '}' {
|
|
if mode == 2 && interp.feed.state == FEED_BRACE {
|
|
interp.pop_feed_struct()
|
|
} else if interp.strict {
|
|
err = err_right_brace
|
|
} else {
|
|
goto normal_rune
|
|
}
|
|
} else if c == '$' {
|
|
interp.push_feed_struct(FEED_DOLLAR)
|
|
} else if c == '\n' || c == ';' {
|
|
interp.push_feed_struct(FEED_EOL)
|
|
interp.add_rune_to_token(c)
|
|
} else if unicode.IsSpace(c) {
|
|
interp.push_feed_struct(FEED_SEP)
|
|
interp.add_rune_to_token(c)
|
|
} else {
|
|
goto normal_rune
|
|
}
|
|
|
|
done:
|
|
return err
|
|
|
|
normal_rune:
|
|
interp.push_feed_struct(FEED_WORD)
|
|
interp.add_rune_to_token(c)
|
|
goto done
|
|
}
|
|
|
|
func (interp *Interp) do_init(c rune) error {
|
|
return interp.init_or_cmd(c, 0)
|
|
}
|
|
|
|
func (interp *Interp) do_bracket(c rune) error {
|
|
return interp.init_or_cmd(c, 1)
|
|
}
|
|
|
|
func (interp *Interp) do_brace(c rune) error {
|
|
/*
|
|
var err error
|
|
|
|
if c == '}' {
|
|
interp.pop_feed_struct()
|
|
} else {
|
|
interp.push_feed_struct(FEED_BRACED_TEXT)
|
|
err = err_refeed
|
|
}
|
|
return err
|
|
*/
|
|
return interp.init_or_cmd(c, 2)
|
|
}
|
|
|
|
func (interp *Interp) do_braced_text(c rune) error {
|
|
var err error
|
|
|
|
/* text enclosed in {} */
|
|
if c == EOF_RUNE {
|
|
err = err_sudden_eof
|
|
} else if interp.feed.extra.escaped {
|
|
// escaping in braces are simpler than the regular escaping(handle_escape())
|
|
if c == '\n' {
|
|
c = ' '
|
|
}
|
|
interp.add_rune_to_token(c)
|
|
interp.feed.extra.escaped = false
|
|
} else if c == '\\' {
|
|
// escaping is supported inside {}. but escaping is a bit different
|
|
// {abc\}} produces abc\}, not abc}
|
|
interp.feed.extra.escaped = true /* so not using start_escape() */
|
|
interp.add_rune_to_token(c)
|
|
} else if c == '{' {
|
|
interp.feed.extra.brace_count++
|
|
interp.add_rune_to_token(c)
|
|
} else if c == '}' {
|
|
if interp.feed.extra.brace_count > 0 {
|
|
interp.add_rune_to_token(c)
|
|
interp.feed.extra.brace_count--
|
|
} else {
|
|
interp.pop_feed_struct()
|
|
err = err_refeed // back to FEED_BRACE
|
|
}
|
|
} else {
|
|
interp.add_rune_to_token(c)
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (interp *Interp) do_dquote(c rune) error {
|
|
var err error
|
|
/*
|
|
if c == '"' {
|
|
interp.pop_feed_struct()
|
|
|
|
// } else if c == '[' {
|
|
// interp.push_feed_struct(FEED_BRACKET)
|
|
// } else if c == ']' && interp.strict {
|
|
// err = err_right_bracket
|
|
|
|
} else {
|
|
interp.push_feed_struct(FEED_DQUOTED_TEXT)
|
|
err = err_refeed
|
|
}
|
|
return err
|
|
*/
|
|
|
|
if c == EOF_RUNE {
|
|
return err_sudden_eof
|
|
} else if c == '\\' {
|
|
interp.push_feed_struct(FEED_DQUOTED_TEXT)
|
|
interp.start_escape()
|
|
} else if c == '"' {
|
|
interp.pop_feed_struct()
|
|
} else if c == '[' {
|
|
interp.push_feed_struct(FEED_BRACKET)
|
|
} else if c == ']' {
|
|
if interp.feed.state == FEED_BRACKET {
|
|
interp.pop_feed_struct()
|
|
} else if interp.strict {
|
|
err = err_right_bracket
|
|
} else {
|
|
goto normal_rune
|
|
}
|
|
} else if c == '$' {
|
|
interp.push_feed_struct(FEED_DOLLAR)
|
|
} else {
|
|
goto normal_rune
|
|
}
|
|
|
|
done:
|
|
return err
|
|
|
|
normal_rune:
|
|
interp.push_feed_struct(FEED_DQUOTED_TEXT)
|
|
interp.add_rune_to_token(c)
|
|
goto done
|
|
}
|
|
|
|
func (interp *Interp) do_dquoted_text(c rune) error {
|
|
var err error
|
|
|
|
/*
|
|
if c == EOF_RUNE {
|
|
err = err_sudden_eof
|
|
} else if interp.feed.extra.escaped {
|
|
err = interp.handle_escape(c)
|
|
} else if c == '\\' {
|
|
interp.start_escape()
|
|
} else if c == '"' {
|
|
interp.pop_feed_struct()
|
|
err = err_refeed
|
|
} else if c == '[' {
|
|
interp.pop_feed_struct()
|
|
err = err_refeed
|
|
} else if c == ']' && interp.strict {
|
|
err = err_right_bracket
|
|
} else if c == '$' {
|
|
// a variable inside a double-quoted string
|
|
interp.push_feed_struct(FEED_DOLLAR)
|
|
} else {
|
|
interp.add_rune_to_token(c)
|
|
}*/
|
|
|
|
if interp.feed.extra.escaped {
|
|
err = interp.handle_escape(c)
|
|
} else if c == '\\' {
|
|
interp.start_escape()
|
|
} else if c == EOF_RUNE || c == '"' || c == '[' {
|
|
interp.pop_feed_struct()
|
|
err = err_refeed
|
|
} else {
|
|
interp.add_rune_to_token(c)
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
func (interp *Interp) do_dollar(c rune) error {
|
|
var err error
|
|
|
|
if unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) {
|
|
interp.add_rune_to_token(c)
|
|
} else if c == '{' {
|
|
if len(interp.feed.token) > 0 {
|
|
err = err_refeed
|
|
} else {
|
|
interp.feed.extra.var_braced = true
|
|
}
|
|
} else if c == '}' && interp.feed.extra.var_braced {
|
|
interp.pop_feed_struct()
|
|
} else if interp.feed.extra.var_braced {
|
|
interp.add_rune_to_token(c)
|
|
} else {
|
|
if len(interp.feed.token) == 0 {
|
|
// $ is followed by an invalid variable letter.
|
|
// switch to a normal word mode by hack and add a dollar sign as a token
|
|
interp.feed.state = FEED_WORD
|
|
interp.add_rune_to_token('$')
|
|
} else {
|
|
interp.pop_feed_struct()
|
|
}
|
|
err = err_refeed
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
func (interp *Interp) do_word(c rune) error {
|
|
var err error
|
|
|
|
if interp.feed.extra.escaped {
|
|
err = interp.handle_escape(c)
|
|
} else if c == '\\' {
|
|
interp.start_escape()
|
|
} else if is_delim(c) { // TODO: if not str characters
|
|
interp.pop_feed_struct()
|
|
err = err_refeed
|
|
} else {
|
|
interp.add_rune_to_token(c)
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
func (interp *Interp) do_sep(c rune) error {
|
|
if c == '\n' || !unicode.IsSpace(c) {
|
|
interp.pop_feed_struct()
|
|
return err_refeed
|
|
}
|
|
|
|
interp.add_rune_to_token(c)
|
|
return nil
|
|
}
|
|
|
|
func (interp *Interp) do_eol(c rune) error {
|
|
|
|
if c != '\n' {
|
|
interp.pop_feed_struct()
|
|
return err_refeed
|
|
}
|
|
|
|
interp.add_rune_to_token(c)
|
|
return nil
|
|
}
|
|
|
|
func (interp *Interp) push_feed_struct(state feed_state_t) *feed_struct_t {
|
|
var feed *feed_struct_t = &feed_struct_t{state: state}
|
|
feed.parent = interp.feed
|
|
if feed.parent == nil {
|
|
feed.level = 0
|
|
} else {
|
|
feed.level = feed.parent.level + 1
|
|
}
|
|
|
|
interp.feed = feed
|
|
return feed
|
|
}
|
|
|
|
func (interp *Interp) finalize_feed_struct(feed *feed_struct_t, is_stmt bool) {
|
|
|
|
var stmt_cnode *Cnode_t
|
|
|
|
if feed.cnode_tmp_last != nil {
|
|
if is_stmt {
|
|
// wrap the list of cnodes as a single statement node
|
|
stmt_cnode = &Cnode_t{code: CNODE_STMT, child: feed.cnode_tmp_first} // token is empty
|
|
|
|
if feed.cnode_last == nil {
|
|
feed.cnode_first = stmt_cnode
|
|
} else {
|
|
feed.cnode_last.next = stmt_cnode
|
|
}
|
|
feed.cnode_last = stmt_cnode
|
|
} else {
|
|
if feed.cnode_last == nil {
|
|
feed.cnode_first = feed.cnode_tmp_first
|
|
|
|
} else {
|
|
feed.cnode_last.next = feed.cnode_tmp_first
|
|
}
|
|
feed.cnode_last = feed.cnode_tmp_last
|
|
}
|
|
|
|
feed.cnode_tmp_first = nil
|
|
feed.cnode_tmp_last = nil
|
|
}
|
|
}
|
|
|
|
func (inter *Interp) add_cnode_to_feed_struct(feed *feed_struct_t, cnode *Cnode_t) {
|
|
if feed.cnode_tmp_last == nil {
|
|
feed.cnode_tmp_first = cnode
|
|
} else {
|
|
feed.cnode_tmp_last.next = cnode
|
|
}
|
|
feed.cnode_tmp_last = cnode
|
|
}
|
|
|
|
var feed_to_cnode_code_tab [11]cnode_code_t = [11]cnode_code_t{
|
|
FEED_TOP: CNODE_INVALID, // this must never be used
|
|
FEED_INIT: CNODE_INIT,
|
|
FEED_BRACKET: CNODE_BRACKET,
|
|
FEED_BRACE: CNODE_BRACE,
|
|
FEED_BRACED_TEXT: CNODE_TEXT,
|
|
FEED_DQUOTE: CNODE_DQUOTE,
|
|
FEED_DQUOTED_TEXT: CNODE_TEXT,
|
|
FEED_DOLLAR: CNODE_VAR,
|
|
FEED_SEP: CNODE_INVALID,
|
|
FEED_WORD: CNODE_TEXT,
|
|
FEED_EOL: CNODE_INVALID,
|
|
}
|
|
|
|
func (interp *Interp) pop_feed_struct() *feed_struct_t {
|
|
var (
|
|
feed *feed_struct_t
|
|
parent *feed_struct_t
|
|
cnode *Cnode_t
|
|
)
|
|
|
|
feed = interp.feed // to pop off
|
|
if feed == nil {
|
|
// excessive pop request. it's an internal implementation error
|
|
// TODO: return error???
|
|
panic("excessive pop off feed stack")
|
|
return nil // TODO: return failure???
|
|
}
|
|
|
|
parent = feed.parent
|
|
|
|
//fmt.Printf("parent %p feed [%p]>>\n", parent, feed)
|
|
if parent == nil {
|
|
// this must be FEED_TOP being popped off.
|
|
if feed.state != FEED_TOP {
|
|
panic("invalid internal state - top feed struct not FEED_TOP")
|
|
}
|
|
interp.feed = nil // indicate that the feed stack is empty
|
|
return feed // return the old feed struct
|
|
}
|
|
|
|
if feed.state == FEED_DOLLAR && len(feed.token) == 0 {
|
|
// switch the dollar sign not followed by name to a literal dollar sign text
|
|
feed.state = FEED_WORD
|
|
interp.add_rune_to_token('$')
|
|
}
|
|
|
|
if feed.state == FEED_EOL {
|
|
interp.finalize_feed_struct(parent, true)
|
|
parent.cnode_cont = false
|
|
} else if feed.state == FEED_SEP {
|
|
parent.cnode_cont = false
|
|
} else {
|
|
cnode = &Cnode_t{code: feed_to_cnode_code_tab[feed.state], token: feed.token}
|
|
if cnode.code == CNODE_INVALID {
|
|
panic("internal error - CNODE INVALID encountered")
|
|
}
|
|
if cnode.code == CNODE_BRACKET || cnode.code == CNODE_BRACE || cnode.code == CNODE_INIT {
|
|
// popping a container feed struct
|
|
interp.finalize_feed_struct(feed, true)
|
|
cnode.child = feed.cnode_first
|
|
} else if cnode.code == CNODE_DQUOTE {
|
|
interp.finalize_feed_struct(feed, false)
|
|
cnode.child = feed.cnode_first
|
|
}
|
|
|
|
// add the current cnode to the parent feed struct
|
|
interp.add_cnode_to_feed_struct(parent, cnode)
|
|
|
|
if parent.cnode_cont {
|
|
interp.add_cnode_to_feed_struct(parent, &Cnode_t{code: CNODE_JOIN})
|
|
} else {
|
|
parent.cnode_cont = true
|
|
}
|
|
|
|
if feed.state == FEED_INIT {
|
|
if parent.state != FEED_TOP {
|
|
panic("internal error - parent struct not FEED_TOP")
|
|
}
|
|
interp.finalize_feed_struct(parent, false)
|
|
}
|
|
}
|
|
|
|
interp.feed = parent
|
|
return feed
|
|
}
|
|
|
|
func (interp *Interp) add_rune_to_token(c rune) {
|
|
interp.feed.token = append(interp.feed.token, c)
|
|
}
|
|
|
|
func (interp *Interp) start_escape() {
|
|
interp.feed.extra.escaped = true
|
|
interp.feed.extra.escape_len = 0
|
|
interp.feed.extra.escape_max = 0
|
|
interp.feed.extra.escape_val = 0
|
|
}
|
|
|
|
func (interp *Interp) end_escape() {
|
|
interp.feed.extra.escaped = false
|
|
}
|
|
|
|
func (interp *Interp) handle_escape(c rune) error {
|
|
if c == EOF_RUNE {
|
|
goto stop_escaping
|
|
}
|
|
|
|
switch interp.feed.extra.escape_max {
|
|
case 8: // \UXXXXXXXX
|
|
fallthrough
|
|
case 4: // \uXXXX
|
|
fallthrough
|
|
case 2: // \xXX
|
|
if is_xdigit(c) {
|
|
interp.feed.extra.escape_val = interp.feed.extra.escape_val*16 + digit_to_code(c)
|
|
interp.feed.extra.escape_len++
|
|
} else {
|
|
goto stop_escaping
|
|
}
|
|
case 3: // \ooo
|
|
if c >= '0' && c <= '7' {
|
|
interp.feed.extra.escape_val = interp.feed.extra.escape_val*8 + digit_to_code(c)
|
|
interp.feed.extra.escape_len++
|
|
} else {
|
|
goto stop_escaping
|
|
}
|
|
|
|
case 0:
|
|
if c == 'x' {
|
|
interp.feed.extra.escape_max = 2
|
|
} else if c == 'u' {
|
|
interp.feed.extra.escape_max = 4
|
|
} else if c == 'U' {
|
|
interp.feed.extra.escape_max = 8
|
|
} else if c >= '0' && c <= '7' {
|
|
interp.feed.extra.escape_max = 3
|
|
return err_refeed
|
|
} else {
|
|
interp.add_rune_to_token(escape_single_rune(c))
|
|
interp.end_escape()
|
|
return nil
|
|
}
|
|
|
|
default:
|
|
goto stop_escaping
|
|
}
|
|
|
|
if interp.feed.extra.escape_len == interp.feed.extra.escape_max {
|
|
interp.add_rune_to_token(rune(interp.feed.extra.escape_val))
|
|
interp.end_escape()
|
|
}
|
|
return nil
|
|
|
|
stop_escaping:
|
|
if interp.feed.extra.escape_len == 0 {
|
|
c = escape_max_to_rune(interp.feed.extra.escape_max)
|
|
if c != '\x00' {
|
|
interp.add_rune_to_token(c)
|
|
}
|
|
} else {
|
|
interp.add_rune_to_token(rune(interp.feed.extra.escape_val))
|
|
}
|
|
interp.end_escape()
|
|
return err_refeed
|
|
}
|
|
|
|
// -------------------------------------------------------------------------------
|
|
|
|
func (interp *Interp) BeginFeed() {
|
|
for interp.feed != nil {
|
|
interp.pop_feed_struct()
|
|
}
|
|
interp.push_feed_struct(FEED_TOP)
|
|
interp.push_feed_struct(FEED_INIT)
|
|
}
|
|
|
|
func (interp *Interp) FeedRune(c rune) error {
|
|
var err error
|
|
|
|
start_over:
|
|
switch interp.feed.state {
|
|
|
|
case FEED_INIT:
|
|
err = interp.do_init(c)
|
|
|
|
case FEED_BRACKET:
|
|
err = interp.do_bracket(c)
|
|
|
|
case FEED_BRACE:
|
|
err = interp.do_brace(c)
|
|
|
|
case FEED_BRACED_TEXT:
|
|
err = interp.do_braced_text(c)
|
|
|
|
case FEED_DQUOTE:
|
|
err = interp.do_dquote(c)
|
|
|
|
case FEED_DQUOTED_TEXT:
|
|
err = interp.do_dquoted_text(c)
|
|
|
|
case FEED_DOLLAR:
|
|
err = interp.do_dollar(c)
|
|
|
|
case FEED_SEP:
|
|
err = interp.do_sep(c)
|
|
|
|
case FEED_WORD:
|
|
err = interp.do_word(c)
|
|
|
|
case FEED_EOL:
|
|
err = interp.do_eol(c)
|
|
|
|
default:
|
|
err = err_feed_state
|
|
}
|
|
|
|
if err == err_refeed {
|
|
goto start_over
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
func (interp *Interp) FeedRunes(text []rune) error {
|
|
var (
|
|
c rune
|
|
err error
|
|
)
|
|
|
|
for _, c = range text {
|
|
err = interp.FeedRune(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (interp *Interp) EndFeed() (*Cnode_t, error) {
|
|
var err error = interp.FeedRune(EOF_RUNE)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if interp.feed == nil || interp.feed.state != FEED_TOP {
|
|
return nil, err_feed_state
|
|
}
|
|
|
|
return interp.feed.cnode_first, nil
|
|
}
|