From a8ea8fbf94b631f23d0b3e18838f56c691343993 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Fri, 21 Jul 2023 18:32:51 +0900 Subject: [PATCH] starting the project --- Makefile | 5 + bin/main.go | 119 +++++++++ go.mod | 3 + interp/eval.go | 305 +++++++++++++++++++++++ interp/feed.go | 649 +++++++++++++++++++++++++++++++++++++++++++++++++ interp/pcl.go | 366 ++++++++++++++++++++++++++++ interp/proc.go | 76 ++++++ 7 files changed, 1523 insertions(+) create mode 100644 Makefile create mode 100644 bin/main.go create mode 100644 go.mod create mode 100644 interp/eval.go create mode 100644 interp/feed.go create mode 100644 interp/pcl.go create mode 100644 interp/proc.go diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b77b0d7 --- /dev/null +++ b/Makefile @@ -0,0 +1,5 @@ +all: + cd bin && CGO_ENABLED=0 go build -x -o pcl main.go + +clean: + cd bin && go clean -x && rm -f pcl diff --git a/bin/main.go b/bin/main.go new file mode 100644 index 0000000..4c43635 --- /dev/null +++ b/bin/main.go @@ -0,0 +1,119 @@ +package main + +import ( + "bufio" + "fmt" + "io" + "os" + "path" + pcl "pcl/interp" +) + +func main() { + var ( + interp *pcl.Interp + f *os.File + r *bufio.Reader + c rune + node *pcl.Cnode_t + v *string + err error + ) + + if len(os.Args) != 2 { + fmt.Printf("Usage: %s filename\n", path.Base(os.Args[0])) + goto oops + } + + interp, err = pcl.NewInterp(1000, true) + if err != nil { + fmt.Printf("Error %s\n", err.Error()) + goto oops + } + + f, err = os.Open(os.Args[1]) + if err != nil { + fmt.Printf("Error %s\n", err.Error()) + goto oops + } + + interp.BeginFeed() + + r = bufio.NewReader(f) + for { + c, _, err = r.ReadRune() + if err != nil { + if err == io.EOF { + break + } + fmt.Printf("Error in ReadRune - %s\n", err.Error()) + goto oops + } + + err = interp.FeedRune(c) + if err != nil { + fmt.Printf("Error in FeedRune - %s\n", err.Error()) + goto oops + } + } + node, err = interp.EndFeed() + if err != nil { + fmt.Printf("Error in EndFeed %s\n", err.Error()) + goto oops + } + + interp.Dump(node) + fmt.Printf("------------- Executing ----------------\n") + v, err = interp.Execute(node) + if err != nil { + fmt.Printf("Error in Execute - %s\n", err.Error()) + goto oops + } + + if v == nil { + panic("return value mut not be nil") + } + fmt.Printf("RETURN VALUE = [%s]\n", *v) + + /* + err = interp.FeedRunes([]rune(` + proc inc{x} { + puts {10 20} + return [expr $x + 1] + } + \{abc 11 2\ \1011 \ 2\x65 \uBc29\uaD6cdefg\uZZ\xZZ\U0000BC29\UAD6cZZ \ + [donkey 1 [expr [expr 2 + 3] + 3] ] + hello { world { }man} + "command { l a n g }" + set a [puts "1111" "22 22" "3333 [expr "123" + 2] 4444"] + abc [expr [expr 2 + "4[expr 2 * 6]"] + 9] + puts $a ${ kkk qqq } + puts "\x65\ubc29\n" + {}`)) + */ + //err = interp.FeedRunes([]rune(`hello [world [1 9] 2] + //`)) + /* + if err != nil { + fmt.Printf("ERROR %s\n", err) + } else { + err = interp.EndFeed() + if err != nil { + fmt.Printf("ERROR %s\n", err) + } + }*/ + + interp.Close() + f.Close() + os.Exit(0) + +oops: + if interp != nil { + interp.Close() + + } + if f != nil { + f.Close() + } + os.Exit(1) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..9362af6 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module pcl + +go 1.20 diff --git a/interp/eval.go b/interp/eval.go new file mode 100644 index 0000000..41bb2d7 --- /dev/null +++ b/interp/eval.go @@ -0,0 +1,305 @@ +package interp + +import ( + "fmt" + "unsafe" +) + +var err_num_args *error_t = &error_t{msg: "wrong number of arguments"} + +/* + value stack (p.stack) + + <--- SP + ARG1 + ARG0 + NAME + RET + + evaluation stack (p.ctx) +*/ + +func (p *process_t) push_cnode_value(val *Cnode_t) error { + if p.sp >= cap(p.stack) { + return fmt.Errorf("stack full") + } + + p.stack[p.sp] = unsafe.Pointer(uintptr(unsafe.Pointer(val)) | 1) + p.sp++ + p.ctx.count++ + + return nil +} + +func (p *process_t) push_string_value(val string) error { + if p.sp >= cap(p.stack) { + return fmt.Errorf("stack full") + } + + p.stack[p.sp] = unsafe.Pointer(&val) + p.sp++ + p.ctx.count++ + + return nil +} + +func (p *process_t) merge_top_values() error { + var new_val string + + if p.sp < 2 { + return fmt.Errorf("stack corrupt") + } + new_val = *(*string)(p.stack[p.sp-2]) + *(*string)(p.stack[p.sp-1]) + p.sp-- + p.stack[p.sp] = nil + p.stack[p.sp-1] = unsafe.Pointer(&new_val) + p.ctx.count-- + return nil +} + +func (p *process_t) pop_value() unsafe.Pointer { + var v unsafe.Pointer + p.sp-- + v = p.stack[p.sp] + p.stack[p.sp] = nil + return v +} + +func (p *process_t) call() error { + var ( + proc func(*process_t) error + callee *string + ) + + callee = p.GetCalleeName() + // TODO: use a map + switch *callee { + case "if": + proc = proc_if + case "puts": + proc = proc_puts + default: + proc = proc_unknown + } + return proc(p) +} + +func (p *process_t) GetCalleeName() *string { + return (*string)(p.stack[p.sp-p.ctx.count+1]) +} + +func (p *process_t) GetArg(idx int) unsafe.Pointer { + return (p.stack[p.sp-p.ctx.count+2+idx]) +} + +func (p *process_t) GetNumArgs() int { + return p.ctx.count - 2 +} + +func (p *process_t) Return(val string) { + p.stack[p.sp-p.ctx.count] = unsafe.Pointer(&val) +} + +func (p *process_t) push_context(node *Cnode_t) { + p.ctx = &context_t{count: 0, parent_ctx: p.ctx, parent_node: node} +} + +func (p *process_t) pop_context() (node *Cnode_t) { + var i int + + node = p.ctx.parent_node + + // clean up the unused part of the stack + for i = 1; i < p.ctx.count; i++ { + p.stack[p.sp-p.ctx.count+i] = nil + } + + // pop off the cleaned arguments + p.sp -= p.ctx.count - 1 // keep the return value in the stack + p.ctx = p.ctx.parent_ctx + + if p.ctx != nil { + p.ctx.count++ // let the return value be the argument to the caller + } + return +} + +func (interp *Interp) eval_atom_node(node *Cnode_t) (*string, error) { + + var ( + p process_t + v *string + err error + ) + + p.interp = interp + p.push_context(nil) + p.push_string_value("") // placeholder for return value + + for node != nil { + switch node.code { + case CNODE_BRACKET: // this is a container + if node.child != nil { + p.push_context(node) + err = p.push_string_value("") // placeholder for return value + if err != nil { + goto oops + } + node = node.child + continue + } else { + err = p.push_string_value("") + if err != nil { + goto oops + } + } + + case CNODE_BRACE: // this is a container + err = p.push_cnode_value(node) + if err != nil { + goto oops + } + + case CNODE_DQUOTE: // this is a container + // TODO: at the feed layer, recompose CNODE_DQUOTE item to + // successive ATOM item + // "abc $ddd [xx 11]" must be TEXT("abc ") + VAR("ddd") + BRACKET["xx 11"] without SEP in between + + // this is not right..... + if node.child != nil { + // TODO: something is not right here. handle inner stuffs + //fmt.Printf("pushing [%s]\n", string(node.child.token)) + err = p.push_string_value(string(node.child.token)) + } else { + err = p.push_string_value("") + } + if err != nil { + goto oops + } + + case CNODE_VAR: + + case CNODE_TEXT: + //fmt.Printf("pushing [%s]\n", string(node.token)) + err = p.push_string_value(string(node.token)) + if err != nil { + goto oops + } + + case CNODE_SEP: + // skip + case CNODE_EOL: + // skip + + default: + err = fmt.Errorf("internal error - non-atom node - %d", node.code) + goto oops + } + + node = node.next + check_end: + if node == nil { // reached the last node + err = p.call() + if err != nil { + goto oops + } + if p.ctx.parent_ctx != nil { + node = p.pop_context() + if node.seqno > 0 { + // merge the top 2 values (return from [] + previous argument) + // for instance, the expression `aa[qq 11]` must product a single word + // `aa` concatenated of the result of `qq 11` + err = p.merge_top_values() + if err != nil { + goto oops + } + } + node = node.next + + // take `aa 11 22 [dd [bb cc]]` as an example + // after [bb cc] is called, it must call `[dd]`` followed by `aa`. + // this goto loop is to handle successive calls when a nested call is the + // last argument + goto check_end + } + } + } + + v = (*string)(p.pop_value()) + p.pop_context() + if p.ctx != nil { + err = fmt.Errorf("internal error - dangling process context") + goto oops + } + + return v, nil + +oops: + return nil, err +} + +func (interp *Interp) eval_arg(p *process_t, pos int) (*string, error) { + var ( + ptr uintptr + ) + + ptr = uintptr(p.GetArg(pos)) + if ptr&1 == 1 { // cnode + ptr &= ^uintptr(1) + interp.dump_cnodes((*Cnode_t)(unsafe.Pointer(ptr)), true) + return interp.eval_atom_node((*Cnode_t)(unsafe.Pointer(ptr)).child) + } else { + return (*string)(unsafe.Pointer(ptr)), nil + } +} + +func (interp *Interp) eval_arg_literally(p *process_t, pos int) (*string, error) { + var ( + ptr uintptr + //cnode *Cnode_t + ) + + ptr = uintptr(p.GetArg(pos)) + if ptr&1 == 1 { // cnode + ptr &= ^uintptr(1) + //cnode = (*Cnode_t)(unsafe.Pointer(ptr)) + //cnode.child i hate this portion.... + return nil, fmt.Errorf("not supported - unable to evaluate {} literally") + } else { + return (*string)(unsafe.Pointer(ptr)), nil + } +} + +func (interp *Interp) EvalText(text []rune) (*string, error) { + var ( + v *string + node *Cnode_t + err error + ) + + interp.BeginFeed() // this resets the feed stack to the initial state + + err = interp.FeedRunes(text) + if err != nil { + goto oops + } + + node, err = interp.EndFeed() + if err != nil { + goto oops + } + + //fmt.Printf("--------------------\n") + //interp.dump_cnodes(node, true) + //fmt.Printf("--------------------\n") + + v, err = interp.Execute(node) + if err != nil { + goto oops + } + + return v, nil + +oops: + return nil, err +} diff --git a/interp/feed.go b/interp/feed.go new file mode 100644 index 0000000..e18f823 --- /dev/null +++ b/interp/feed.go @@ -0,0 +1,649 @@ +package interp + +import ( + "unicode" +) + +var err_refeed *error_t = &error_t{msg: "refeed"} // this is not a real error and is used as a control element + +var err_sudden_eof *error_t = &error_t{msg: "unexpected end of input"} +var err_right_bracket *error_t = &error_t{msg: "unbalanced right bracket"} +var err_right_brace *error_t = &error_t{msg: "unbalanced right brace"} +var err_feed_state *error_t = &error_t{msg: "internal error - invalid feed state"} + +func is_delim(c rune) bool { + return c == EOF_RUNE || c == '[' || c == ']' || c == '{' || c == '}' || c == '"' || c == ';' || unicode.IsSpace(c) +} + +func is_xdigit(c rune) bool { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') +} + +func digit_to_code(c rune) int32 { + if c >= '0' && c <= '9' { + return int32(c) - int32('0') + } else if c >= 'A' && c <= 'Z' { + return int32(c) - int32('A') + 10 + } else if c >= 'a' && c <= 'z' { + return int32(c) - int32('a') + 10 + } else { + return 0 // this is an error + } +} + +func escape_max_to_rune(max int) rune { + switch max { + case 8: + return 'U' + case 4: + return 'u' + case 3: + return '0' + case 2: + return 'x' + default: + return '\x00' + } +} + +func escape_single_rune(c rune) rune { + switch c { + case 'a': + return '\a' + case 'b': + return '\b' + case 'f': + return '\f' + case 'n': + return '\n' + case 'r': + return '\r' + case 't': + return '\t' + case 'v': + return '\v' + case '\n': + return ' ' + default: + return c + } +} + +func (interp *Interp) init_or_cmd(c rune, mode int) error { + var err error + + if c == EOF_RUNE { + if mode >= 1 { + /* not in INIT */ + return err_sudden_eof + } + interp.pop_feed_struct() + } else if c == '\\' { + interp.push_feed_struct(FEED_WORD) + interp.start_escape() + } else if c == '"' { + interp.push_feed_struct(FEED_DQUOTE) + } else if c == '[' { + interp.push_feed_struct(FEED_BRACKET) + } else if c == ']' { + if mode == 1 && interp.feed.state == FEED_BRACKET { + interp.pop_feed_struct() + } else if interp.strict { + err = err_right_bracket + } else { + goto normal_rune + } + } else if c == '{' { + interp.push_feed_struct(FEED_BRACE) + } else if c == '}' { + if mode == 2 && interp.feed.state == FEED_BRACE { + interp.pop_feed_struct() + } else if interp.strict { + err = err_right_brace + } else { + goto normal_rune + } + } else if c == '$' { + interp.push_feed_struct(FEED_DOLLAR) + } else if c == '\n' || c == ';' { + interp.push_feed_struct(FEED_EOL) + interp.add_rune_to_token(c) + } else if unicode.IsSpace(c) { + interp.push_feed_struct(FEED_SEP) + interp.add_rune_to_token(c) + } else { + goto normal_rune + } + +done: + return err + +normal_rune: + interp.push_feed_struct(FEED_WORD) + interp.add_rune_to_token(c) + goto done +} + +func (interp *Interp) do_init(c rune) error { + return interp.init_or_cmd(c, 0) +} + +func (interp *Interp) do_bracket(c rune) error { + return interp.init_or_cmd(c, 1) +} + +func (interp *Interp) do_brace(c rune) error { + /* + var err error + + if c == '}' { + interp.pop_feed_struct() + } else { + interp.push_feed_struct(FEED_BRACED_TEXT) + err = err_refeed + } + return err + */ + return interp.init_or_cmd(c, 2) +} + +func (interp *Interp) do_braced_text(c rune) error { + var err error + + /* text enclosed in {} */ + if c == EOF_RUNE { + err = err_sudden_eof + } else if interp.feed.extra.escaped { + // escaping in braces are simpler than the regular escaping(handle_escape()) + if c == '\n' { + c = ' ' + } + interp.add_rune_to_token(c) + interp.feed.extra.escaped = false + } else if c == '\\' { + // escaping is supported inside {}. but escaping is a bit different + // {abc\}} produces abc\}, not abc} + interp.feed.extra.escaped = true /* so not using start_escape() */ + interp.add_rune_to_token(c) + } else if c == '{' { + interp.feed.extra.brace_count++ + interp.add_rune_to_token(c) + } else if c == '}' { + if interp.feed.extra.brace_count > 0 { + interp.add_rune_to_token(c) + interp.feed.extra.brace_count-- + } else { + interp.pop_feed_struct() + err = err_refeed // back to FEED_BRACE + } + } else { + interp.add_rune_to_token(c) + } + return err +} + +func (interp *Interp) do_dquote(c rune) error { + var err error + /* + if c == '"' { + interp.pop_feed_struct() + + // } else if c == '[' { + // interp.push_feed_struct(FEED_BRACKET) + // } else if c == ']' && interp.strict { + // err = err_right_bracket + + } else { + interp.push_feed_struct(FEED_DQUOTED_TEXT) + err = err_refeed + } + return err + */ + + if c == EOF_RUNE { + return err_sudden_eof + } else if c == '\\' { + interp.push_feed_struct(FEED_DQUOTED_TEXT) + interp.start_escape() + } else if c == '"' { + interp.pop_feed_struct() + } else if c == '[' { + interp.push_feed_struct(FEED_BRACKET) + } else if c == ']' { + if interp.feed.state == FEED_BRACKET { + interp.pop_feed_struct() + } else if interp.strict { + err = err_right_bracket + } else { + goto normal_rune + } + } else if c == '$' { + interp.push_feed_struct(FEED_DOLLAR) + } else { + goto normal_rune + } + +done: + return err + +normal_rune: + interp.push_feed_struct(FEED_DQUOTED_TEXT) + interp.add_rune_to_token(c) + goto done +} + +func (interp *Interp) do_dquoted_text(c rune) error { + var err error + + /* + if c == EOF_RUNE { + err = err_sudden_eof + } else if interp.feed.extra.escaped { + err = interp.handle_escape(c) + } else if c == '\\' { + interp.start_escape() + } else if c == '"' { + interp.pop_feed_struct() + err = err_refeed + } else if c == '[' { + interp.pop_feed_struct() + err = err_refeed + } else if c == ']' && interp.strict { + err = err_right_bracket + } else if c == '$' { + // a variable inside a double-quoted string + interp.push_feed_struct(FEED_DOLLAR) + } else { + interp.add_rune_to_token(c) + }*/ + + if interp.feed.extra.escaped { + err = interp.handle_escape(c) + } else if c == '\\' { + interp.start_escape() + } else if c == EOF_RUNE || c == '"' || c == '[' { + interp.pop_feed_struct() + err = err_refeed + } else { + interp.add_rune_to_token(c) + } + + return err +} + +func (interp *Interp) do_dollar(c rune) error { + var err error + + if unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) { + interp.add_rune_to_token(c) + } else if c == '{' { + if len(interp.feed.token) > 0 { + err = err_refeed + } else { + interp.feed.extra.var_braced = true + } + } else if c == '}' && interp.feed.extra.var_braced { + interp.pop_feed_struct() + } else if interp.feed.extra.var_braced { + interp.add_rune_to_token(c) + } else { + if len(interp.feed.token) == 0 { + // $ is followed by an invalid variable letter. + // switch to a normal word mode by hack and add a dollar sign as a token + interp.feed.state = FEED_WORD + interp.add_rune_to_token('$') + } else { + interp.pop_feed_struct() + } + err = err_refeed + } + + return err +} + +func (interp *Interp) do_word(c rune) error { + var err error + + if interp.feed.extra.escaped { + err = interp.handle_escape(c) + } else if c == '\\' { + interp.start_escape() + } else if is_delim(c) { // TODO: if not str characters + interp.pop_feed_struct() + err = err_refeed + } else { + interp.add_rune_to_token(c) + } + + return err +} + +func (interp *Interp) do_sep(c rune) error { + if c == '\n' || !unicode.IsSpace(c) { + interp.pop_feed_struct() + return err_refeed + } + + interp.add_rune_to_token(c) + return nil +} + +func (interp *Interp) do_eol(c rune) error { + + if c != '\n' { + interp.pop_feed_struct() + return err_refeed + } + + interp.add_rune_to_token(c) + return nil +} + +func (interp *Interp) push_feed_struct(state feed_state_t) *feed_struct_t { + var feed *feed_struct_t = &feed_struct_t{state: state} + feed.parent = interp.feed + if feed.parent == nil { + feed.level = 0 + } else { + feed.level = feed.parent.level + 1 + } + + interp.feed = feed + return feed +} + +func (interp *Interp) finalize_feed_struct(feed *feed_struct_t, is_stmt bool) { + + var stmt_cnode *Cnode_t + + if feed.cnode_tmp_last != nil { + if is_stmt { + // wrap the list of cnodes as a single statement node + stmt_cnode = &Cnode_t{code: CNODE_STMT, child: feed.cnode_tmp_first} // token is empty + + if feed.cnode_last == nil { + feed.cnode_first = stmt_cnode + } else { + feed.cnode_last.next = stmt_cnode + } + feed.cnode_last = stmt_cnode + } else { + if feed.cnode_last == nil { + feed.cnode_first = feed.cnode_tmp_first + + } else { + feed.cnode_last.next = feed.cnode_tmp_first + } + feed.cnode_last = feed.cnode_tmp_last + } + + feed.cnode_tmp_first = nil + feed.cnode_tmp_last = nil + } +} + +func (inter *Interp) add_cnode_to_feed_struct(feed *feed_struct_t, cnode *Cnode_t) { + if feed.cnode_tmp_last == nil { + feed.cnode_tmp_first = cnode + } else { + feed.cnode_tmp_last.next = cnode + } + feed.cnode_tmp_last = cnode +} + +var feed_to_cnode_code_tab [11]cnode_code_t = [11]cnode_code_t{ + FEED_TOP: CNODE_EOL, // this must never be used + FEED_INIT: CNODE_INIT, + FEED_BRACKET: CNODE_BRACKET, + FEED_BRACE: CNODE_BRACE, + FEED_BRACED_TEXT: CNODE_TEXT, + FEED_DQUOTE: CNODE_DQUOTE, + FEED_DQUOTED_TEXT: CNODE_TEXT, + FEED_DOLLAR: CNODE_VAR, + FEED_SEP: CNODE_SEP, + FEED_WORD: CNODE_TEXT, + FEED_EOL: CNODE_EOL, +} + +func (interp *Interp) pop_feed_struct() *feed_struct_t { + var ( + feed *feed_struct_t + parent *feed_struct_t + cnode *Cnode_t + ) + + feed = interp.feed // to pop off + if feed == nil { + // excessive pop request. it's an internal implementation error + // TODO: return error??? + panic("excessive pop off feed stack") + return nil // TODO: return failure??? + } + + parent = feed.parent + + //fmt.Printf("parent %p feed [%p]>>\n", parent, feed) + if parent == nil { + // this must be FEED_TOP being popped off. + if feed.state != FEED_TOP { + panic("invalid internal state - top feed struct not FEED_TOP") + } + interp.feed = nil // indicate that the feed stack is empty + return feed // return the old feed struct + } + + if feed.state == FEED_DOLLAR && len(feed.token) == 0 { + // switch the dollar sign not followed by name to a literal dollar sign text + feed.state = FEED_WORD + interp.add_rune_to_token('$') + } + + if feed.state == FEED_EOL { + interp.finalize_feed_struct(parent, true) + parent.cnode_cont = false + } else if feed.state == FEED_SEP { + parent.cnode_cont = false + } else { + cnode = &Cnode_t{code: feed_to_cnode_code_tab[feed.state], token: feed.token} + if cnode.code == CNODE_BRACKET || cnode.code == CNODE_BRACE || cnode.code == CNODE_INIT { + // popping a container feed struct + interp.finalize_feed_struct(feed, true) + cnode.child = feed.cnode_first + } else if cnode.code == CNODE_DQUOTE { + interp.finalize_feed_struct(feed, false) + cnode.child = feed.cnode_first + } /*else if cnode.code == CNODE_VAR { + fmt.Printf(">>>>\n") + interp.dump_cnodes(parent.cnode_first, true) + fmt.Printf(">>>>\n") + interp.dump_cnodes(parent.cnode_tmp_first, true) + fmt.Printf(">>>>\n") + interp.dump_cnodes(cnode, true) + + }*/ + + // add the current cnode to the parent feed struct + interp.add_cnode_to_feed_struct(parent, cnode) + + if parent.cnode_cont { + interp.add_cnode_to_feed_struct(parent, &Cnode_t{code: CNODE_JOIN}) + } else { + parent.cnode_cont = true + } + + if feed.state == FEED_INIT { + if parent.state != FEED_TOP { + panic("internal error - parent struct not FEED_TOP") + } + interp.finalize_feed_struct(parent, false) + } + } + + interp.feed = parent + return feed +} + +func (interp *Interp) add_rune_to_token(c rune) { + interp.feed.token = append(interp.feed.token, c) +} + +func (interp *Interp) start_escape() { + interp.feed.extra.escaped = true + interp.feed.extra.escape_len = 0 + interp.feed.extra.escape_max = 0 + interp.feed.extra.escape_val = 0 +} + +func (interp *Interp) end_escape() { + interp.feed.extra.escaped = false +} + +func (interp *Interp) handle_escape(c rune) error { + if c == EOF_RUNE { + goto stop_escaping + } + + switch interp.feed.extra.escape_max { + case 8: // \UXXXXXXXX + fallthrough + case 4: // \uXXXX + fallthrough + case 2: // \xXX + if is_xdigit(c) { + interp.feed.extra.escape_val = interp.feed.extra.escape_val*16 + digit_to_code(c) + interp.feed.extra.escape_len++ + } else { + goto stop_escaping + } + case 3: // \ooo + if c >= '0' && c <= '7' { + interp.feed.extra.escape_val = interp.feed.extra.escape_val*8 + digit_to_code(c) + interp.feed.extra.escape_len++ + } else { + goto stop_escaping + } + + case 0: + if c == 'x' { + interp.feed.extra.escape_max = 2 + } else if c == 'u' { + interp.feed.extra.escape_max = 4 + } else if c == 'U' { + interp.feed.extra.escape_max = 8 + } else if c >= '0' && c <= '7' { + interp.feed.extra.escape_max = 3 + return err_refeed + } else { + interp.add_rune_to_token(escape_single_rune(c)) + interp.end_escape() + return nil + } + + default: + goto stop_escaping + } + + if interp.feed.extra.escape_len == interp.feed.extra.escape_max { + interp.add_rune_to_token(rune(interp.feed.extra.escape_val)) + interp.end_escape() + } + return nil + +stop_escaping: + if interp.feed.extra.escape_len == 0 { + c = escape_max_to_rune(interp.feed.extra.escape_max) + if c != '\x00' { + interp.add_rune_to_token(c) + } + } else { + interp.add_rune_to_token(rune(interp.feed.extra.escape_val)) + } + interp.end_escape() + return err_refeed +} + +// ------------------------------------------------------------------------------- + +func (interp *Interp) BeginFeed() { + for interp.feed != nil { + interp.pop_feed_struct() + } + interp.push_feed_struct(FEED_TOP) + interp.push_feed_struct(FEED_INIT) +} + +func (interp *Interp) FeedRune(c rune) error { + var err error + +start_over: + switch interp.feed.state { + + case FEED_INIT: + err = interp.do_init(c) + + case FEED_BRACKET: + err = interp.do_bracket(c) + + case FEED_BRACE: + err = interp.do_brace(c) + + case FEED_BRACED_TEXT: + err = interp.do_braced_text(c) + + case FEED_DQUOTE: + err = interp.do_dquote(c) + + case FEED_DQUOTED_TEXT: + err = interp.do_dquoted_text(c) + + case FEED_DOLLAR: + err = interp.do_dollar(c) + + case FEED_SEP: + err = interp.do_sep(c) + + case FEED_WORD: + err = interp.do_word(c) + + case FEED_EOL: + err = interp.do_eol(c) + + default: + err = err_feed_state + } + + if err == err_refeed { + goto start_over + } + + return err +} + +func (interp *Interp) FeedRunes(text []rune) error { + var ( + c rune + err error + ) + + for _, c = range text { + err = interp.FeedRune(c) + if err != nil { + return err + } + } + + return nil +} + +func (interp *Interp) EndFeed() (*Cnode_t, error) { + var err error = interp.FeedRune(EOF_RUNE) + if err != nil { + return nil, err + } + + if interp.feed == nil || interp.feed.state != FEED_TOP { + return nil, err_feed_state + } + + return interp.feed.cnode_first, nil +} diff --git a/interp/pcl.go b/interp/pcl.go new file mode 100644 index 0000000..3ce0bd3 --- /dev/null +++ b/interp/pcl.go @@ -0,0 +1,366 @@ +package interp + +import ( + "fmt" + "runtime" + "unsafe" +) + +type error_t struct { + msg string +} + +func (m *error_t) Error() string { + return m.msg +} + +type Var struct { + next *Var + name string + val string +} + +type rcode_t int + +const ( + R_OK rcode_t = iota + R_ERR + R_RETURN + R_BREAK + R_CONTINUE +) + +const NULL_RUNE rune = '\u0000' +const EOF_RUNE rune = rune(^0) + +type context_t struct { + count int + parent_ctx *context_t + parent_node *Cnode_t +} + +type process_t struct { + interp *Interp + stack [16]unsafe.Pointer // value stack - TODO: change size + sp int + ctx *context_t +} + +type call_frame_t struct { + vars *Var + parent *call_frame_t +} + +type cnode_code_t int + +const ( + CNODE_INIT cnode_code_t = iota + CNODE_BRACKET + CNODE_BRACE + CNODE_DQUOTE + CNODE_VAR + CNODE_TEXT + CNODE_SEP + CNODE_EOL + CNODE_STMT + CNODE_JOIN // merge the two top elements off the stack +) + +type feed_state_t int + +const ( + FEED_TOP feed_state_t = iota + FEED_INIT + FEED_BRACKET // container for [] + FEED_BRACE // container for {} + FEED_BRACED_TEXT // uninterpreted text enclosed in {} + FEED_DQUOTE // container for "" + FEED_DQUOTED_TEXT // literal text inside "" + FEED_DOLLAR // variable reference + FEED_SEP // separator + FEED_WORD // unquoted word + FEED_EOL // end of line +) + +type feed_struct_t struct { + parent *feed_struct_t + level int + state feed_state_t + token []rune + cnode_first *Cnode_t + cnode_last *Cnode_t + cnode_tmp_first *Cnode_t + cnode_tmp_last *Cnode_t + cnode_cont bool + extra struct { + brace_count int + var_braced bool + escaped bool + escape_len int + escape_max int + escape_val int32 + } +} + +type Cnode_t struct { + next *Cnode_t + child *Cnode_t // for container nodes + code cnode_code_t + seqno int + token []rune +} + +type Interp struct { + strict bool + level int + max_level int + + feed *feed_struct_t + call_frame *call_frame_t + result string +} + +func NewInterp(max_level int, strict bool) (*Interp, error) { + + var ( + interp *Interp + ) + + interp = &Interp{ + strict: strict, + level: 0, + max_level: max_level, + } + runtime.SetFinalizer(interp, func(interp *Interp) { + interp.Close() + }) + + interp.push_feed_struct(FEED_TOP) + interp.push_feed_struct(FEED_INIT) + + return interp, nil +} + +func (interp *Interp) Close() { + for interp.feed != nil { + interp.pop_feed_struct() + } +} + +/* +func (interp *Interp) GetVar(name string, global bool) *Var { + var ( + v *Var + f *CallFrame + ) + + if global || is_clone_prefixed(name) { + f = interp.call_frame + for f.parent != nil { + f = f.parent + } + + v = f.vars + } else { + v = interp.call_frame.vars + } + + // TODO: handle array + + for v != nil { + // TODO: make the variable list can be hash map... + if v.name == name { + return v + } + } + + return nil +} + +func (interp *Interp) SetVar(name string, val string, global bool) { + var ( + f *CallFrame + v *Var + ) + + f = interp.call_frame + v = interp.GetVar(name, global) + + if v != nil { + //if v->val = nil { // IS THIS POSSIBLE + // + //} + } else { + // TODO: handle aray + + if global || is_clone_prefixed((name)) { + // TODD avand name by two charactes if clone_prefixed + f = get_top_call_frame(f) + } + + v = &Var{name: name} + v.next = f.vars + f.vars = v + } + + v.val = val +} + +func (interp *Interp) SetGlobalVar(name string, val string) { + +} + +func (interp *Interp) SetIntVar(name string, val int) { + +} + + +func is_clone_prefixed(name string) bool { + return len(name) > 2 && name[0] == ':' && name[1] == ':' +} + +func get_top_call_frame(f *CallFrame) *CallFrame { + for f.parent != nil { + f = f.parent + } + return f +} +*/ + +func (interp *Interp) Execute(node_head *Cnode_t) (*string, error) { + + var ( + node *Cnode_t + v *string + err error + ) + + v = new(string) // if there is no code the execute, the return value is an empty string + + for node = node_head; node != nil; node = node.next { + if node.code != CNODE_INIT { + return nil, fmt.Errorf("non-init node") + } + + if node.child == nil { // TODO: optmize the cnode tree that this check is not needed. the reader must not create an INIT node with empty + break + } + + v, err = interp.eval_atom_node(node.child) + if err != nil { + return nil, err + } + } + + return v, nil +} + +func (interp *Interp) dump_cnodes(node *Cnode_t, nl bool) { + interp.__dump_cnodes(node, nl, 0) +} + +func (interp *Interp) print_tabs(tabs int) { + var i int + for i = 0; i < tabs; i++ { + fmt.Printf(" ") + } +} + +func (interp *Interp) __dump_cnodes(node *Cnode_t, nl bool, tabs int) { + for node != nil { + + switch node.code { + case CNODE_INIT: + //fmt.Printf("[I]") + interp.__dump_cnodes(node.child, false, tabs) + //fmt.Printf("[E]") + fmt.Printf("\n") + case CNODE_STMT: + //fmt.Printf("[S]") + interp.print_tabs(tabs) + interp.__dump_cnodes(node.child, false, tabs) + //fmt.Printf("[E]") + if node.next != nil { + fmt.Printf("\n") + } + + case CNODE_BRACKET: // this is a container + if node.child == nil { + fmt.Printf("[]") + } else if node.child.next == nil { + // only 1 item inside brackets + fmt.Printf("[") + interp.__dump_cnodes(node.child, false, 0) + fmt.Printf("]") + } else { + fmt.Printf("[\n") + interp.__dump_cnodes(node.child, false, tabs+1) + fmt.Printf("\n") + interp.print_tabs(tabs) + fmt.Printf("]") + } + + case CNODE_BRACE: // this is a container + if node.child == nil { + fmt.Printf("{}") + } else if node.child.next == nil { + // only 1 item inside braces + fmt.Printf("{") + interp.__dump_cnodes(node.child, false, 0) + fmt.Printf("}") + } else { + fmt.Printf("{\n") + interp.__dump_cnodes(node.child, false, tabs+1) + fmt.Printf("\n") + interp.print_tabs(tabs) + fmt.Printf("}") + } + + case CNODE_DQUOTE: // this is a container + fmt.Printf("\"") + interp.__dump_cnodes(node.child, false, 0) + fmt.Printf("\"") + + case CNODE_VAR: + fmt.Printf("${%s}", string(node.token)) + + case CNODE_TEXT: + // TODO if text contains some specical characters while not + // contained inside other containers, some escaping is required + fmt.Printf("%s", string(node.token)) + + case CNODE_SEP: + fmt.Printf(" ") + + case CNODE_EOL: + fmt.Printf("\n") + + case CNODE_JOIN: + // do nothing + //fmt.Printf("") + + default: + fmt.Printf("UNKNOWN") + } + + if node.code != CNODE_STMT { + if node.next != nil && node.next.next != nil && node.next.next.code == CNODE_JOIN { + // do nothing + } else if node.next != nil && node.next.code == CNODE_JOIN { + // do nothing + } else if node.next != nil { + fmt.Printf(" ") + } + } + + node = node.next + + } + //if nl { + // fmt.Printf("\n") + //} +} + +func (interp *Interp) Dump(node *Cnode_t) { + interp.dump_cnodes(node, true) +} diff --git a/interp/proc.go b/interp/proc.go new file mode 100644 index 0000000..333a6d9 --- /dev/null +++ b/interp/proc.go @@ -0,0 +1,76 @@ +package interp + +import "fmt" + +func proc_expr(p *process_t) error { + + return nil +} + +func proc_if(p *process_t) error { + var ( + v *string + err error + ) + + if p.GetNumArgs() < 2 { + err = err_num_args + goto done + } + + //v, err = p.interp.EvalText([]rune(*p.GetArg(0))) + v, err = p.interp.eval_arg(p, 0) + if err != nil { + goto done + } + + if *v != "" { + //v, err = p.interp.eval_atom_node((*Cnode_t)(p.GetArg(1))) + v, err = p.interp.eval_arg(p, 1) + if err != nil { + goto done + } + + p.Return(*v) + } else { + // TODO: if elseif else + } + +done: + return err +} + +func proc_puts(p *process_t) error { + var ( + i int + nargs int + v *string + err error + ) + + //fmt.Printf("callee=%s\n", *(p.GetCalleeName())) + //fmt.Printf("callee=%s\n", *(p.GetArg(-1))) + //fmt.Printf("arg count = %d\n", p.GetNumArgs()) + + nargs = p.GetNumArgs() + for i = 0; i < nargs; i++ { + //fmt.Printf("%s", *p.GetArg(i)) + v, err = p.interp.eval_arg_literally(p, i) + if err != nil { + return err + } + fmt.Printf("%s", *v) + } + + if nargs >= 1 { + p.Return(*v) + } else { + p.Return("hello") + } + return nil +} + +func proc_unknown(p *process_t) error { + fmt.Printf("Unknown command - %s\n", *(p.GetCalleeName())) + return nil +}