;;; parser construction (printf "parse-utils.s Mon Apr 26 11:55:14 1993~%") ;;; Using List-of-frames representation ;;; **************************************************************** ;;; **************************************************************** ;;; **************************************************************** ;;; include scan-ds.s: ;;; scanner using data structure representations ;;; **************************************************************** ;;; streams -- same as in functional version (define stream-get (lambda (stream rcvr) (let ((the-pair (stream))) (rcvr (car the-pair) (cdr the-pair))))) (define make-constant-stream (lambda (c) (lambda () (cons c (make-constant-stream c))))) (define stream->list (lambda (stream end-of-stream?) (stream-get stream (lambda (val newstream) (if (end-of-stream? val) '() (cons val (stream->list newstream end-of-stream?))))))) (define list->stream (lambda (l) (if (null? l) (make-constant-stream '()) (lambda () (cons (car l) (list->stream (cdr l))))))) (define string->stream (lambda (str) (let ((length (string-length str))) (letrec ((chars-from (lambda (i) (lambda () (cons (if (>= i length) #\^ (string-ref str i)) (chars-from (+ i 1))))))) (chars-from 0))))) ;;; **************************************************************** ;;; Data Structures (define-record scanner-result (token char stream)) (define-record token (class data)) ;;; **************************************************************** ;;; Cookers (define cook-identifier (lambda (buffer) (let ((sym (string->symbol (list->string buffer)))) (if (memq sym **keywords-list**) (make-token sym #f) (make-token 'identifier sym))))) (define cook-number (lambda (buffer) (make-token 'number (string->number (list->string buffer))))) (define apply-cooker (lambda (cooker char-list) (case cooker ((cook-identifier) (cook-identifier char-list)) ((cook-number) (cook-number char-list)) (else (if (symbol? cooker) (make-token cooker #f) ; make delimiter token (else (error 'apply-cooker "unknown cooker ~s" cooker))))))) ;;; **************************************************************** ;;; Testers (define apply-tester (lambda (tester ch) (cond ((char? tester) (char=? tester ch)) ((eq? tester 'else) #t) (else (case tester ((whitespace) (char-whitespace? ch)) ((alphabetic) (char-alphabetic? ch)) ((numeric) (char-numeric? ch)) (else (error 'apply-tester "unknown tester ~s" tester))))))) ;;; **************************************************************** ;;; Main loop (define apply-automaton (lambda (automaton state buf c stream) (let ((opcode (car state)) (next-state (cadr state))) '(printf "apply-state: opcode = ~s c = ~s~%" opcode c) (case opcode ((shift) (stream-get stream (lambda (char1 new-stream) (apply-automaton automaton next-state (cons c buf) char1 new-stream)))) ((drop) (stream-get stream (lambda (char1 new-stream) (apply-automaton automaton next-state buf char1 new-stream)))) ((emit) (let ((cooker (cadr state))) (let ((token (apply-cooker cooker (reverse buf)))) ;(printf "emitting token ~s~n" token) (make-scanner-result token c stream)))) ((cond) (apply-automaton automaton (apply-scanner-cond (cdr state) c) buf c stream)) ((goto) (apply-automaton automaton (cadr (assq next-state automaton)) buf c stream)) ((fail) (let ((msg (cadr state))) (error 'apply-automaton "scanner failed in state ~s on input ~s" msg c))) ((debug-state) (let ((msg (cadr state)) (next-state (caddr state))) (printf "~s ~s ~s~%" msg buf c) (apply-automaton automaton next-state buf c stream))) (else (error 'apply-automaton "bad opcode in state ~s" state)))))) (define apply-scanner-cond (lambda (alternatives c) (letrec ((loop (lambda (alts) (if (null? alts) (error 'apply-state "couldn't match character ~s in state ~%~s" c alternatives) (let ((alt1 (car alts))) ; (printf "apply-scanner-cond: c = ~s alternative = ~s~%" ; c alt1) (if (apply-tester (car alt1) c) (cadr alt1) (loop (cdr alts)))))))) (loop alternatives)))) ;;; **************************************************************** ;;; Scaffolding (define automaton->stream-transducer (lambda (automaton) (letrec ((loop (lambda (char stream) (let ((next-result (apply-automaton automaton (automaton->start-state automaton) '() char stream))) (let ((next-token (scanner-result->token next-result)) (next-char (scanner-result->char next-result)) (next-stream (scanner-result->stream next-result))) (cons next-token (lambda () (loop next-char next-stream)))))))) (lambda (stream) (lambda () (stream-get stream loop)))))) (define automaton->start-state (lambda (automaton) (cadr (car automaton)))) (define driver-1 (lambda (automaton string) (stream->list (let ((transducer (automaton->stream-transducer automaton)) (stream (string->stream string))) (transducer stream)) (lambda (token) (eq? (token->class token) 'end-marker))))) ;;; **************************************************************** ;;; Example automaton (define automaton-1 '((start-state (cond (whitespace (drop (goto start-state))) (alphabetic (shift (goto identifier-state))) (numeric (shift (goto number-state))) (#\+ (drop (emit plus-sym))) (#\: (shift (goto assign-sym-state))) (#\% (drop (goto comment-state))) (#\; (drop (emit semicolon))) (#\( (drop (emit lparen))) (#\) (drop (emit rparen))) (#\^ (emit end-marker)) (else (fail start-state)))) (identifier-state (cond (alphabetic (shift (goto identifier-state))) (numeric (shift (goto identifier-state))) (else (emit cook-identifier)))) (number-state (cond (numeric (shift (goto number-state))) (else (emit cook-number)))) (assign-sym-state (cond (#\= (shift (emit assign-sym))) (else (shift (goto identifier-state))))) (comment-state (cond (#\newline (drop (goto start-state))) (#\^ (goto start-state)) (else (drop (goto comment-state))))))) (define **keywords-list** '()) ; > (driver-1 automaton-1 "abc ; def ; % comment ; xyz 13") ; emitting token (token identifier abc) ; emitting token (token identifier def) ; emitting token (token identifier xyz) ; emitting token (token number 13) ; emitting token (token end-marker #f) ; ((token identifier abc) ; (token identifier def) ; (token identifier xyz) ; (token number 13)) ; > ;;; end of scan-ds.s ;;; **************************************************************** ;;; **************************************************************** ;;; **************************************************************** ;;; string->stream and stream->list are defined in scan.s (define token-stream-get stream-get) ;;; **************************************************************** ;;; **************************************************************** ;;; Parser architecture ;;; parser = (list tree) * item * item-stream -> tree * item * item-stream ;;; The item register can either contain an item or '() -- the latter ;;; signifying an empty buffer, to be filled when necessary. (define-record parser-result (tree item stream)) ;;; Grammar of actions: ;;; action :: = ((check/drop class) . action) ;;; ((check/shift class) . action) ;;; ((process/nt non-terminal) . action) ;;; ((reduce prod-name)) ;;; ((emit-list)) ;;; ((goto non-terminal)) ;;; (cond (non-terminal action) ... (else non-terminal)) ;;; ;;; parser ::= ((non-terminal action) ...) (define *trace-apply-parser* #f) ;;; redefine fill-token-register, parser-check to be uncurried: (define fill-token-register (lambda (buf token stream action) (if (null? token) (token-stream-get stream (lambda (token stream) (action buf token stream))) (action buf token stream)))) (define parser-check (lambda (class buf token stream action) (fill-token-register buf token stream (lambda (buf token stream) (if (eq? class (token->class token)) (action buf token stream) (error 'check "looking for ~s, found ~s" class token)))))) (define apply-parser-action (lambda (parser action buf token stream) (if *trace-apply-parser* (printf "apply-parser-action: action = ~s~% buf = ~s token = ~s~%" action buf token)) (if (eq? (car action) 'cond) ;; it's a cond (let ((alternatives (cdr action))) (fill-token-register buf token stream (lambda (buf token stream) (apply-parser-action parser (apply-parser-cond alternatives token) buf token stream)))) ;; otherwise it's an ordinary instruction (let ((instruction (car action)) (action (cdr action)) (whole-action action)) (case (car instruction) ((check/drop) (let ((class (cadr instruction))) (parser-check class buf token stream (lambda (buf token stream) (apply-parser-action parser action buf '() stream))))) ((check/shift) (let ((class (cadr instruction))) (parser-check class buf token stream (lambda (buf token stream) (apply-parser-action parser action (cons (token->data token) buf) '() stream))))) ((reduce) (let ((prod-name (cadr instruction))) '(printf "reducing ~s: buf = ~s~%" prod-name buf) (make-parser-result (apply (make-record-from-name prod-name) (reverse buf)) token stream))) ((emit-list) '(printf "emit-list: emitting ~s~%" (reverse buf)) (make-parser-result (reverse buf) token stream)) ((fail) (let ((state (cadr instruction))) (error 'parser "couldn't match token ~s in state ~s" token state))) ((parser-goto goto) (let ((non-terminal (cadr instruction))) (apply-parser-action parser (cadr (assq non-terminal parser)) buf token stream))) ((process-nt) (let ((non-terminal (cadr instruction))) (let ((next-result (apply-parser-action parser (cadr (assq non-terminal parser)) '() token stream))) (record-case next-result (parser-result (tree token stream) (apply-parser-action parser action (cons tree buf) token stream)) (else (error 'process-nt "bad parser-result ~s~%" next-result)))))) (else (error 'apply-parser-action "unknown action ~s~%" instruction))))))) (define apply-parser-cond (lambda (alternatives token) '(printf "apply-parser-cond: alternatives = ~s token = ~s~%" alternatives token) (if (null? alternatives) (error 'apply-parser-cond "couldn't match token ~s~%" token) (let ((alternative-1 (car alternatives))) '(printf "apply-parser-cond: token = ~s alternative = ~s~%" token alternative-1) (if (or (eq? (car alternative-1) (token->class token)) ;; "else" is always true if it's last alternative (and (eq? (car alternative-1) 'else) (null? (cdr alternatives)))) (cadr alternative-1) (apply-parser-cond (cdr alternatives) token)))))) ;; new parse-top-level ;; (define parse-top-level (lambda (parser token-stream) (let ((result (apply-parser-action parser '((goto start-state)) '() '() token-stream))) '(printf "top-level parse returned.~%") '(pretty-print result) (record-case result (parser-result (tree token stream) (let ((token (if (null? token) (token-stream-get stream (lambda (token stream) token)) token))) (if (eq? (token->class token) 'end-marker) tree (error 'parse-top-level "symbols left over: ~s..." token)))) (else (error 'parse-top-level "top-level-parse not a parser-result")))))) ;; a simpler driver (define simple-parse-top-level (lambda (parser token-stream) (apply-parser-action '((goto start-state)) '() '() token-stream))) ;;; **************************************************************** ;;; constructors for example (define-record compound-command (command-list)) (define-record while-command (exp cmd)) (define-record if-command (exp cmd1 cmd2)) (define-record assignment-command (var exp)) (define-record variable-expression (var)) (define-record sum-expression (exp1 exp2)) (define-record end-marker-command ()) (define **keywords-list** '(begin end exit)) (define parser-2 '((start-state ((goto command))) (command (cond (begin ((check/drop begin) (process-nt compound-command) (reduce compound-command))) (identifier ((check/shift identifier) (check/drop assign-sym) (process-nt expression) (reduce assignment))) (exit ((check/drop exit) (reduce end-marker-command))) (else ((fail command))))) (compound-command ((process-nt command) (parser-goto compound-command-loop))) (compound-command-loop (cond (semicolon ((check/drop semicolon) (process-nt command) (parser-goto compound-command-loop))) (end ((check/drop end) (emit-list))) (else ((fail compound-command-loop))))) (expression (cond (identifier ((check/shift identifier) (reduce var-expression))) (number ((check/shift number) (reduce const-expression))) (lparen ((check/drop lparen) (process-nt expression) (check/drop plus-sym) (process-nt expression) (check/drop rparen) (reduce addition-expression))) (else ((parser-fail expression))))))) (define string->token-stream (lambda (state string) ((automaton->stream-transducer state) (string->stream string)))) (define test1 (lambda (input-string) (parse-top-level parser-2 (string->token-stream automaton-1 input-string)))) ; > (test1 "x := y") ; (assignment x (var-expression y)) ; > (test1 "begin x:=y; x := z end") ; (compound-command ; ((assignment x (var-expression y)) ; (assignment x (var-expression z)))) ; >