diff --git a/ase/test/awk/lisp/awklisp.org b/ase/test/awk/lisp/awklisp.org new file mode 100644 index 00000000..c4b9d6c0 --- /dev/null +++ b/ase/test/awk/lisp/awklisp.org @@ -0,0 +1,500 @@ +#!/usr/bin/gawk -f + +# --- Representation of Lisp data + +BEGIN { + a_number = 0 + pair_ptr = a_pair = 1 + symbol_ptr = a_symbol = 2 + + type_name[a_number] = "number" + type_name[a_pair] = "pair" + type_name[a_symbol] = "symbol" +} + +function is(type, expr) +{ + if (expr % 4 != type) + error("Expected a " type_name[type] ", not a " type_name[expr % 4]) + return expr +} + +function is_number(expr) { return expr % 4 == 0 } +function is_pair(expr) { return expr % 4 == 1 } +function is_symbol(expr) { return expr % 4 == 2 } +function is_atom(expr) { return expr % 4 != 1 } + +function make_number(n) { return n * 4 } + +function numeric_value(expr) +{ + if (expr % 4 != 0) error("Not a number") + return expr / 4 +} + +# Return the symbol :string names. +function string_to_symbol(string) +{ + if (string in intern) + return intern[string] + symbol_ptr += 4 + intern[string] = symbol_ptr + printname[symbol_ptr] = string + return symbol_ptr +} + +# Define a primitive procedure, with :nparams parameters, +# bound to the symbol named :name. +function def_prim(name, nparams, sym) +{ + sym = string_to_symbol(name) + value[sym] = string_to_symbol(sprintf("#", name)) + if (nparams != "") + num_params[value[sym]] = nparams + return value[sym] +} + +# --- Garbage collection + +# Make a new pair. +function cons(the_car, the_cdr) +{ + while (pair_ptr in marks) { + delete marks[pair_ptr] + pair_ptr += 4 + } + if (pair_ptr == pair_limit) + gc(the_car, the_cdr) + car[pair_ptr] = the_car + cdr[pair_ptr] = the_cdr + pair_ptr += 4 + return pair_ptr - 4 +} + +function protect(object) { protected[++protected_ptr] = object } +function unprotect() { --protected_ptr } + +function mark(object) +{ + while (is_pair(object) && !(object in marks)) { #** speed + marks[object] = 1 + mark(car[object]) + object = cdr[object] + } +} + +function gc(the_car, the_cdr, p, i) +{ + if (loud_gc) + printf("\nGC...") >"/dev/stderr" + mark(the_car); mark(the_cdr) + for (p in protected) + mark(protected[p]) + for (p in stack) + mark(stack[p]) + for (p in value) + mark(value[p]) + for (p in property) { + i = index(SUBSEP, p) + mark(substr(p, 1, i-1)) + mark(substr(p, i+1)) + mark(property[p]) + } + pair_ptr = a_pair + while (pair_ptr in marks) { + delete marks[pair_ptr] + pair_ptr += 4 + } + if (pair_ptr == pair_limit) { + if (loud_gc) + printf("Expanding heap...") >"/dev/stderr" + pair_limit += 4 * heap_increment + } +} + +# --- Set up + +BEGIN { + srand() + + frame_ptr = stack_ptr = 0 + + if (heap_increment == "") heap_increment = 1500 + pair_limit = a_pair + 4 * heap_increment + + NIL = string_to_symbol("nil") + T = string_to_symbol("t") + value[NIL] = NIL + value[T] = T + car[NIL] = cdr[NIL] = NIL # this is convenient in a couple places... + + THE_EOF_OBJECT = string_to_symbol("#eof") + value[string_to_symbol("the-eof-object")] = THE_EOF_OBJECT + eof = "(eof)" + + QUOTE = string_to_symbol("quote"); is_special[QUOTE] = 1 + LAMBDA = string_to_symbol("lambda"); is_special[LAMBDA] = 1 + IF = string_to_symbol("if"); is_special[IF] = 1 + SETQ = string_to_symbol("set!"); is_special[SETQ] = 1 + DEFINE = string_to_symbol("define"); is_special[DEFINE] = 1 + PROGN = string_to_symbol("begin"); is_special[PROGN] = 1 + WHILE = string_to_symbol("while"); is_special[WHILE] = 1 + + EQ = def_prim("eq?", 2) + NULL = def_prim("null?", 1) + CAR = def_prim("car", 1) + CDR = def_prim("cdr", 1) + CADR = def_prim("cadr", 1) + CDDR = def_prim("cddr", 1) + CONS = def_prim("cons", 2) + LIST = def_prim("list") + EVAL = def_prim("eval", 1) + APPLY = def_prim("apply", 2) + READ = def_prim("read", 0) + WRITE = def_prim("write", 1) + NEWLINE = def_prim("newline", 0) + ADD = def_prim("+", 2) + SUB = def_prim("-", 2) + MUL = def_prim("*", 2) + DIV = def_prim("quotient", 2) + MOD = def_prim("remainder", 2) + LT = def_prim("<", 2) + GET = def_prim("get", 2) + PUT = def_prim("put", 3) + ATOMP = def_prim("atom?", 1) + PAIRP = def_prim("pair?", 1) + SYMBOLP = def_prim("symbol?", 1) + NUMBERP = def_prim("number?", 1) + SETCAR = def_prim("set-car!", 2) + SETCDR = def_prim("set-cdr!", 2) + NREV = def_prim("reverse!", 1) + GENSYM = def_prim("gensym", 0) + RANDOM = def_prim("random", 1) + ERROR = def_prim("error") + + DRIVER = string_to_symbol("top-level-driver") +} + +# --- The interpreter + +BEGIN { + for (;;) { + if (DRIVER in value && value[DRIVER] != NIL) + apply(value[DRIVER]) + else { + expr = read() + if (expr == THE_EOF_OBJECT) + break; + protect(expr) + print_expr(eval(expr)) + unprotect() + } + } + + if (profiling) + for (proc in call_count) { + printf("%5d ", call_count[proc]) + print_expr(proc) + } +} + +# All the interpretation routines have the precondition that their +# arguments are protected from garbage collection. + +function eval(expr, old_frame_ptr) +{ + if (is_atom(expr)) #** speed + if (is_symbol(expr)) { + if (!(expr in value)) error("Unbound variable: " printname[expr]) + return value[expr] + } else + return expr + + op = car[expr] # op is global to save awk stack space + + if (!(op in is_special)) { + old_frame_ptr = frame_ptr + frame_ptr = stack_ptr + + eval_rands(cdr[expr]) + protect(proc = eval(car[expr])) + result = apply(proc) + unprotect() + + stack_ptr = frame_ptr + frame_ptr = old_frame_ptr + return result + } + + if (op == QUOTE) return car[cdr[expr]] + if (op == LAMBDA) return expr + if (op == IF) return eval(car[cdr[expr]]) != NIL \ + ? eval(car[cdr[cdr[expr]]]) \ + : eval(car[cdr[cdr[cdr[expr]]]]) + if (op == PROGN) return progn(cdr[expr]) + if (op == SETQ) { + if (!(car[cdr[expr]] in value)) + error("Unbound variable: " printname[car[cdr[expr]]]) + return value[car[cdr[expr]]] = eval(car[cdr[cdr[expr]]]) + } + if (op == WHILE) { + while (eval(car[cdr[expr]]) != NIL) + progn(cdr[cdr[expr]]) + return NIL + } + if (op == DEFINE) { + value[car[cdr[expr]]] = eval(car[cdr[cdr[expr]]]) + return car[cdr[expr]] + } + + error("BUG: Unknown special form") +} + +# Evaluate a sequence of expressions, returning the last value. +function progn(exprs) +{ + for (; cdr[exprs] != NIL; exprs = cdr[exprs]) + eval(car[exprs]) + return eval(car[exprs]) +} + +# Evaluate the operands of a procedure, pushing the results on the stack. +function eval_rands(rands) +{ + for (; rands != NIL; rands = cdr[rands]) + stack[stack_ptr++] = eval(car[rands]) +} + +# Call the procedure :proc, with args stack[frame_ptr]..stack[stack_ptr-1] +# (in that order). +function apply(proc) +{ + if (profiling) + ++call_count[proc] + if (car[proc] == LAMBDA) { + extend_env(car[cdr[proc]]) + result = progn(cdr[cdr[proc]]) # result is global to save stack space + unwind_env(car[cdr[proc]]) + return result + } + if (proc in num_params && num_params[proc] != stack_ptr - frame_ptr) + error("Wrong number of arguments to " printname[cdr[proc]]) + + if (proc == CAR) return car[is(a_pair, stack[frame_ptr])] + if (proc == CDR) return cdr[is(a_pair, stack[frame_ptr])] + if (proc == CONS) return cons(stack[frame_ptr], stack[frame_ptr+1]) + if (proc == NULL) return stack[frame_ptr] == NIL ? T : NIL + if (proc == EQ) return stack[frame_ptr] == stack[frame_ptr+1] ? T : NIL + if (proc == ATOMP) return is_atom(stack[frame_ptr]) ? T : NIL + if (proc == ADD) return is(a_number, stack[frame_ptr]) + is(a_number, stack[frame_ptr+1]) + if (proc == SUB) return is(a_number, stack[frame_ptr]) - is(a_number, stack[frame_ptr+1]) + if (proc == MUL) return make_number(numeric_value(stack[frame_ptr]) * numeric_value(stack[frame_ptr+1])) + if (proc == DIV) return make_number(int(numeric_value(stack[frame_ptr]) / numeric_value(stack[frame_ptr+1]))) + if (proc == MOD) return make_number(numeric_value(stack[frame_ptr]) % numeric_value(stack[frame_ptr+1])) + if (proc == LT) return (stack[frame_ptr] + 0 < stack[frame_ptr+1] + 0) ? T : NIL + if (proc == GET) return (stack[frame_ptr], stack[frame_ptr+1]) in property ? property[stack[frame_ptr], stack[frame_ptr+1]] : NIL + if (proc == PUT) return property[stack[frame_ptr], stack[frame_ptr+1]] = stack[frame_ptr+2] + if (proc == CADR) return car[is(a_pair, cdr[is(a_pair, stack[frame_ptr])])] + if (proc == CDDR) return cdr[is(a_pair, cdr[is(a_pair, stack[frame_ptr])])] + if (proc == LIST) return listify_args() + if (proc == SYMBOLP)return is_symbol(stack[frame_ptr]) ? T : NIL + if (proc == PAIRP) return is_pair(stack[frame_ptr]) ? T : NIL + if (proc == NUMBERP)return is_number(stack[frame_ptr]) ? T : NIL + if (proc == SETCAR) return car[is(a_pair, stack[frame_ptr])] = stack[frame_ptr+1] + if (proc == SETCDR) return cdr[is(a_pair, stack[frame_ptr])] = stack[frame_ptr+1] + if (proc == APPLY) return do_apply(stack[frame_ptr], stack[frame_ptr+1]) + if (proc == EVAL) return eval(stack[frame_ptr]) + if (proc == NREV) return nreverse(stack[frame_ptr], NIL) + if (proc == WRITE) { write_expr(stack[frame_ptr]); printf(" "); return NIL } + if (proc == NEWLINE){ printf("\n"); return NIL } + if (proc == READ) return read() + if (proc == RANDOM) return make_number(int(rand() * numeric_value(stack[frame_ptr]))) + if (proc == GENSYM) return string_to_symbol("#G" ++gensym_counter) + if (proc == ERROR) { printf("Error!\n"); print_expr(listify_args()); exit(1) } + error("Unknown procedure type") +} + +function do_apply(proc, args, old_frame_ptr) +{ + old_frame_ptr = frame_ptr + frame_ptr = stack_ptr + + for (; is_pair(args); args = cdr[args]) + stack[stack_ptr++] = car[args] + if (args != NIL) + error("Bad argument to APPLY: not a proper list") + result = apply(proc) + + stack_ptr = frame_ptr + frame_ptr = old_frame_ptr + return result +} + +function listify_args( p, result) +{ + result = NIL + for (p = stack_ptr - 1; frame_ptr <= p; --p) + result = cons(stack[p], result) + return result +} + +# --- The environment +# The current environment is represented by the set of values +# value[sym] where sym is a symbol. extend_env(vars) adds a new +# set of bindings for vars, saving the old values; unwind_env(vars) +# restores those old values. The new value for the nth member of +# vars is frame_ptr[n]; coincidentally, that's also where we +# store away the old value, since that stack frame's not needed +# for anything else after the extend_env() call. + +function extend_env(vars, p, temp) +{ + for (p = frame_ptr; vars != NIL; vars = cdr[vars]) { + if (p == stack_ptr) + error("Too many arguments to procedure") + temp = value[car[vars]] + value[car[vars]] = stack[p] + stack[p] = temp + ++p + } + if (p != stack_ptr) + error("Not enough arguments to procedure") +} + +function unwind_env(vars, p) +{ + for (p = frame_ptr; vars != NIL; vars = cdr[vars]) { + if (stack[p] == "") + delete value[car[vars]] + else + value[car[vars]] = stack[p] + ++p + } +} + +# --- Output + +function print_expr(expr) +{ + write_expr(expr) + print "" +} + +function write_expr(expr) +{ + if (is_atom(expr)) { + if (!is_symbol(expr)) + printf("%d", numeric_value(expr)) + else { + if (!(expr in printname)) + error("BUG: " expr " has no printname") + printf("%s", printname[expr]) + } + } else { + printf("(") + write_expr(car[expr]) + for (expr = cdr[expr]; is_pair(expr); expr = cdr[expr]) { + printf(" ") + write_expr(car[expr]) + } + if (expr != NIL) { + printf(" . ") + write_expr(expr) + } + printf(")") + } +} + +# --- Input + +function read( committed, result) +{ + skip_blanks() + if (token == eof) + if (committed) + error("Unexpected EOF") + else + return THE_EOF_OBJECT + if (token == "(") { # read a list + advance() + result = NIL + for (;;) { + skip_blanks() + if (token == ".") { + advance() + after_dot = read(1) + skip_blanks() + if (token != ")") + error("')' expected") + advance() + return nreverse(result, after_dot) + } else if (token == ")") { + advance() + return nreverse(result, NIL) + } else { + protect(result) + result = cons(read(1), result) + unprotect() + } + } + } else if (token == "'") { # a quoted expression + advance() + return cons(QUOTE, cons(read(1), NIL)) + } else if (token ~ /^-?[0-9]+$/) { # a number + result = make_number(token) + advance() + return result + } else { # a symbol + result = string_to_symbol(token) + advance() + return result + } +} + +function skip_blanks() +{ + while (token ~ /^[ \t]*$/) + advance() +} + +function advance() +{ + if (token == eof) return eof + if (token == "") { + if (getline line <= 0) { + token = eof + return + } + } + if (match(line, "^[()'.]") || + match(line, "^[_A-Za-z0-9=!@$%&*<>?+\\-*/:]+") || + match(line, "^[ \\t]+")) { + token = substr(line, RSTART, RLENGTH) + line = substr(line, RLENGTH+1) + } else if (line == "" || substr(line, 1, 1) == ";") + token = "" # this kludge permits interactive use + else + error("Lexical error starting at " line) +} + +# --- Miscellany + +# Destructively reverse :list and append :reversed_head. +function nreverse(list, reversed_head, tail) +{ + while (is_pair(list)) { #** speed? + tail = cdr[list] + cdr[list] = reversed_head + reversed_head = list + list = tail + } + if (list != NIL) + error("Not a proper list - reverse!") + return reversed_head +} + +function error(reason) +{ + print "ERROR: " reason >"/dev/stderr" + exit(1) +}