pcl/interp/feed.go
2023-07-21 18:32:51 +09:00

645 lines
14 KiB
Go

package interp
import (
"unicode"
)
var err_refeed *error_t = &error_t{msg: "refeed"} // this is not a real error and is used as a control element
var err_sudden_eof *error_t = &error_t{msg: "unexpected end of input"}
var err_right_bracket *error_t = &error_t{msg: "unbalanced right bracket"}
var err_right_brace *error_t = &error_t{msg: "unbalanced right brace"}
var err_feed_state *error_t = &error_t{msg: "internal error - invalid feed state"}
func is_delim(c rune) bool {
return c == EOF_RUNE || c == '[' || c == ']' || c == '{' || c == '}' || c == '"' || c == ';' || unicode.IsSpace(c)
}
func is_xdigit(c rune) bool {
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
}
func digit_to_code(c rune) int32 {
if c >= '0' && c <= '9' {
return int32(c) - int32('0')
} else if c >= 'A' && c <= 'Z' {
return int32(c) - int32('A') + 10
} else if c >= 'a' && c <= 'z' {
return int32(c) - int32('a') + 10
} else {
return 0 // this is an error
}
}
func escape_max_to_rune(max int) rune {
switch max {
case 8:
return 'U'
case 4:
return 'u'
case 3:
return '0'
case 2:
return 'x'
default:
return '\x00'
}
}
func escape_single_rune(c rune) rune {
switch c {
case 'a':
return '\a'
case 'b':
return '\b'
case 'f':
return '\f'
case 'n':
return '\n'
case 'r':
return '\r'
case 't':
return '\t'
case 'v':
return '\v'
case '\n':
return ' '
default:
return c
}
}
func (interp *Interp) init_or_cmd(c rune, mode int) error {
var err error
if c == EOF_RUNE {
if mode >= 1 {
/* not in INIT */
return err_sudden_eof
}
interp.pop_feed_struct()
} else if c == '\\' {
interp.push_feed_struct(FEED_WORD)
interp.start_escape()
} else if c == '"' {
interp.push_feed_struct(FEED_DQUOTE)
} else if c == '[' {
interp.push_feed_struct(FEED_BRACKET)
} else if c == ']' {
if mode == 1 && interp.feed.state == FEED_BRACKET {
interp.pop_feed_struct()
} else if interp.strict {
err = err_right_bracket
} else {
goto normal_rune
}
} else if c == '{' {
interp.push_feed_struct(FEED_BRACE)
} else if c == '}' {
if mode == 2 && interp.feed.state == FEED_BRACE {
interp.pop_feed_struct()
} else if interp.strict {
err = err_right_brace
} else {
goto normal_rune
}
} else if c == '$' {
interp.push_feed_struct(FEED_DOLLAR)
} else if c == '\n' || c == ';' {
interp.push_feed_struct(FEED_EOL)
interp.add_rune_to_token(c)
} else if unicode.IsSpace(c) {
interp.push_feed_struct(FEED_SEP)
interp.add_rune_to_token(c)
} else {
goto normal_rune
}
done:
return err
normal_rune:
interp.push_feed_struct(FEED_WORD)
interp.add_rune_to_token(c)
goto done
}
func (interp *Interp) do_init(c rune) error {
return interp.init_or_cmd(c, 0)
}
func (interp *Interp) do_bracket(c rune) error {
return interp.init_or_cmd(c, 1)
}
func (interp *Interp) do_brace(c rune) error {
/*
var err error
if c == '}' {
interp.pop_feed_struct()
} else {
interp.push_feed_struct(FEED_BRACED_TEXT)
err = err_refeed
}
return err
*/
return interp.init_or_cmd(c, 2)
}
func (interp *Interp) do_braced_text(c rune) error {
var err error
/* text enclosed in {} */
if c == EOF_RUNE {
err = err_sudden_eof
} else if interp.feed.extra.escaped {
// escaping in braces are simpler than the regular escaping(handle_escape())
if c == '\n' {
c = ' '
}
interp.add_rune_to_token(c)
interp.feed.extra.escaped = false
} else if c == '\\' {
// escaping is supported inside {}. but escaping is a bit different
// {abc\}} produces abc\}, not abc}
interp.feed.extra.escaped = true /* so not using start_escape() */
interp.add_rune_to_token(c)
} else if c == '{' {
interp.feed.extra.brace_count++
interp.add_rune_to_token(c)
} else if c == '}' {
if interp.feed.extra.brace_count > 0 {
interp.add_rune_to_token(c)
interp.feed.extra.brace_count--
} else {
interp.pop_feed_struct()
err = err_refeed // back to FEED_BRACE
}
} else {
interp.add_rune_to_token(c)
}
return err
}
func (interp *Interp) do_dquote(c rune) error {
var err error
/*
if c == '"' {
interp.pop_feed_struct()
// } else if c == '[' {
// interp.push_feed_struct(FEED_BRACKET)
// } else if c == ']' && interp.strict {
// err = err_right_bracket
} else {
interp.push_feed_struct(FEED_DQUOTED_TEXT)
err = err_refeed
}
return err
*/
if c == EOF_RUNE {
return err_sudden_eof
} else if c == '\\' {
interp.push_feed_struct(FEED_DQUOTED_TEXT)
interp.start_escape()
} else if c == '"' {
interp.pop_feed_struct()
} else if c == '[' {
interp.push_feed_struct(FEED_BRACKET)
} else if c == ']' {
if interp.feed.state == FEED_BRACKET {
interp.pop_feed_struct()
} else if interp.strict {
err = err_right_bracket
} else {
goto normal_rune
}
} else if c == '$' {
interp.push_feed_struct(FEED_DOLLAR)
} else {
goto normal_rune
}
done:
return err
normal_rune:
interp.push_feed_struct(FEED_DQUOTED_TEXT)
interp.add_rune_to_token(c)
goto done
}
func (interp *Interp) do_dquoted_text(c rune) error {
var err error
/*
if c == EOF_RUNE {
err = err_sudden_eof
} else if interp.feed.extra.escaped {
err = interp.handle_escape(c)
} else if c == '\\' {
interp.start_escape()
} else if c == '"' {
interp.pop_feed_struct()
err = err_refeed
} else if c == '[' {
interp.pop_feed_struct()
err = err_refeed
} else if c == ']' && interp.strict {
err = err_right_bracket
} else if c == '$' {
// a variable inside a double-quoted string
interp.push_feed_struct(FEED_DOLLAR)
} else {
interp.add_rune_to_token(c)
}*/
if interp.feed.extra.escaped {
err = interp.handle_escape(c)
} else if c == '\\' {
interp.start_escape()
} else if c == EOF_RUNE || c == '"' || c == '[' {
interp.pop_feed_struct()
err = err_refeed
} else {
interp.add_rune_to_token(c)
}
return err
}
func (interp *Interp) do_dollar(c rune) error {
var err error
if unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) {
interp.add_rune_to_token(c)
} else if c == '{' {
if len(interp.feed.token) > 0 {
err = err_refeed
} else {
interp.feed.extra.var_braced = true
}
} else if c == '}' && interp.feed.extra.var_braced {
interp.pop_feed_struct()
} else if interp.feed.extra.var_braced {
interp.add_rune_to_token(c)
} else {
if len(interp.feed.token) == 0 {
// $ is followed by an invalid variable letter.
// switch to a normal word mode by hack and add a dollar sign as a token
interp.feed.state = FEED_WORD
interp.add_rune_to_token('$')
} else {
interp.pop_feed_struct()
}
err = err_refeed
}
return err
}
func (interp *Interp) do_word(c rune) error {
var err error
if interp.feed.extra.escaped {
err = interp.handle_escape(c)
} else if c == '\\' {
interp.start_escape()
} else if is_delim(c) { // TODO: if not str characters
interp.pop_feed_struct()
err = err_refeed
} else {
interp.add_rune_to_token(c)
}
return err
}
func (interp *Interp) do_sep(c rune) error {
if c == '\n' || !unicode.IsSpace(c) {
interp.pop_feed_struct()
return err_refeed
}
interp.add_rune_to_token(c)
return nil
}
func (interp *Interp) do_eol(c rune) error {
if c != '\n' {
interp.pop_feed_struct()
return err_refeed
}
interp.add_rune_to_token(c)
return nil
}
func (interp *Interp) push_feed_struct(state feed_state_t) *feed_struct_t {
var feed *feed_struct_t = &feed_struct_t{state: state}
feed.parent = interp.feed
if feed.parent == nil {
feed.level = 0
} else {
feed.level = feed.parent.level + 1
}
interp.feed = feed
return feed
}
func (interp *Interp) finalize_feed_struct(feed *feed_struct_t, is_stmt bool) {
var stmt_cnode *Cnode_t
if feed.cnode_tmp_last != nil {
if is_stmt {
// wrap the list of cnodes as a single statement node
stmt_cnode = &Cnode_t{code: CNODE_STMT, child: feed.cnode_tmp_first} // token is empty
if feed.cnode_last == nil {
feed.cnode_first = stmt_cnode
} else {
feed.cnode_last.next = stmt_cnode
}
feed.cnode_last = stmt_cnode
} else {
if feed.cnode_last == nil {
feed.cnode_first = feed.cnode_tmp_first
} else {
feed.cnode_last.next = feed.cnode_tmp_first
}
feed.cnode_last = feed.cnode_tmp_last
}
feed.cnode_tmp_first = nil
feed.cnode_tmp_last = nil
}
}
func (inter *Interp) add_cnode_to_feed_struct(feed *feed_struct_t, cnode *Cnode_t) {
if feed.cnode_tmp_last == nil {
feed.cnode_tmp_first = cnode
} else {
feed.cnode_tmp_last.next = cnode
}
feed.cnode_tmp_last = cnode
}
var feed_to_cnode_code_tab [11]cnode_code_t = [11]cnode_code_t{
FEED_TOP: CNODE_INVALID, // this must never be used
FEED_INIT: CNODE_INIT,
FEED_BRACKET: CNODE_BRACKET,
FEED_BRACE: CNODE_BRACE,
FEED_BRACED_TEXT: CNODE_TEXT,
FEED_DQUOTE: CNODE_DQUOTE,
FEED_DQUOTED_TEXT: CNODE_TEXT,
FEED_DOLLAR: CNODE_VAR,
FEED_SEP: CNODE_INVALID,
FEED_WORD: CNODE_TEXT,
FEED_EOL: CNODE_INVALID,
}
func (interp *Interp) pop_feed_struct() *feed_struct_t {
var (
feed *feed_struct_t
parent *feed_struct_t
cnode *Cnode_t
)
feed = interp.feed // to pop off
if feed == nil {
// excessive pop request. it's an internal implementation error
// TODO: return error???
panic("excessive pop off feed stack")
return nil // TODO: return failure???
}
parent = feed.parent
//fmt.Printf("parent %p feed [%p]>>\n", parent, feed)
if parent == nil {
// this must be FEED_TOP being popped off.
if feed.state != FEED_TOP {
panic("invalid internal state - top feed struct not FEED_TOP")
}
interp.feed = nil // indicate that the feed stack is empty
return feed // return the old feed struct
}
if feed.state == FEED_DOLLAR && len(feed.token) == 0 {
// switch the dollar sign not followed by name to a literal dollar sign text
feed.state = FEED_WORD
interp.add_rune_to_token('$')
}
if feed.state == FEED_EOL {
interp.finalize_feed_struct(parent, true)
parent.cnode_cont = false
} else if feed.state == FEED_SEP {
parent.cnode_cont = false
} else {
cnode = &Cnode_t{code: feed_to_cnode_code_tab[feed.state], token: feed.token}
if cnode.code == CNODE_INVALID {
panic("internal error - CNODE INVALID encountered")
}
if cnode.code == CNODE_BRACKET || cnode.code == CNODE_BRACE || cnode.code == CNODE_INIT {
// popping a container feed struct
interp.finalize_feed_struct(feed, true)
cnode.child = feed.cnode_first
} else if cnode.code == CNODE_DQUOTE {
interp.finalize_feed_struct(feed, false)
cnode.child = feed.cnode_first
}
// add the current cnode to the parent feed struct
interp.add_cnode_to_feed_struct(parent, cnode)
if parent.cnode_cont {
interp.add_cnode_to_feed_struct(parent, &Cnode_t{code: CNODE_JOIN})
} else {
parent.cnode_cont = true
}
if feed.state == FEED_INIT {
if parent.state != FEED_TOP {
panic("internal error - parent struct not FEED_TOP")
}
interp.finalize_feed_struct(parent, false)
}
}
interp.feed = parent
return feed
}
func (interp *Interp) add_rune_to_token(c rune) {
interp.feed.token = append(interp.feed.token, c)
}
func (interp *Interp) start_escape() {
interp.feed.extra.escaped = true
interp.feed.extra.escape_len = 0
interp.feed.extra.escape_max = 0
interp.feed.extra.escape_val = 0
}
func (interp *Interp) end_escape() {
interp.feed.extra.escaped = false
}
func (interp *Interp) handle_escape(c rune) error {
if c == EOF_RUNE {
goto stop_escaping
}
switch interp.feed.extra.escape_max {
case 8: // \UXXXXXXXX
fallthrough
case 4: // \uXXXX
fallthrough
case 2: // \xXX
if is_xdigit(c) {
interp.feed.extra.escape_val = interp.feed.extra.escape_val*16 + digit_to_code(c)
interp.feed.extra.escape_len++
} else {
goto stop_escaping
}
case 3: // \ooo
if c >= '0' && c <= '7' {
interp.feed.extra.escape_val = interp.feed.extra.escape_val*8 + digit_to_code(c)
interp.feed.extra.escape_len++
} else {
goto stop_escaping
}
case 0:
if c == 'x' {
interp.feed.extra.escape_max = 2
} else if c == 'u' {
interp.feed.extra.escape_max = 4
} else if c == 'U' {
interp.feed.extra.escape_max = 8
} else if c >= '0' && c <= '7' {
interp.feed.extra.escape_max = 3
return err_refeed
} else {
interp.add_rune_to_token(escape_single_rune(c))
interp.end_escape()
return nil
}
default:
goto stop_escaping
}
if interp.feed.extra.escape_len == interp.feed.extra.escape_max {
interp.add_rune_to_token(rune(interp.feed.extra.escape_val))
interp.end_escape()
}
return nil
stop_escaping:
if interp.feed.extra.escape_len == 0 {
c = escape_max_to_rune(interp.feed.extra.escape_max)
if c != '\x00' {
interp.add_rune_to_token(c)
}
} else {
interp.add_rune_to_token(rune(interp.feed.extra.escape_val))
}
interp.end_escape()
return err_refeed
}
// -------------------------------------------------------------------------------
func (interp *Interp) BeginFeed() {
for interp.feed != nil {
interp.pop_feed_struct()
}
interp.push_feed_struct(FEED_TOP)
interp.push_feed_struct(FEED_INIT)
}
func (interp *Interp) FeedRune(c rune) error {
var err error
start_over:
switch interp.feed.state {
case FEED_INIT:
err = interp.do_init(c)
case FEED_BRACKET:
err = interp.do_bracket(c)
case FEED_BRACE:
err = interp.do_brace(c)
case FEED_BRACED_TEXT:
err = interp.do_braced_text(c)
case FEED_DQUOTE:
err = interp.do_dquote(c)
case FEED_DQUOTED_TEXT:
err = interp.do_dquoted_text(c)
case FEED_DOLLAR:
err = interp.do_dollar(c)
case FEED_SEP:
err = interp.do_sep(c)
case FEED_WORD:
err = interp.do_word(c)
case FEED_EOL:
err = interp.do_eol(c)
default:
err = err_feed_state
}
if err == err_refeed {
goto start_over
}
return err
}
func (interp *Interp) FeedRunes(text []rune) error {
var (
c rune
err error
)
for _, c = range text {
err = interp.FeedRune(c)
if err != nil {
return err
}
}
return nil
}
func (interp *Interp) EndFeed() (*Cnode_t, error) {
var err error = interp.FeedRune(EOF_RUNE)
if err != nil {
return nil, err
}
if interp.feed == nil || interp.feed.state != FEED_TOP {
return nil, err_feed_state
}
return interp.feed.cnode_first, nil
}