;;; scanner using data structure representations ;;; **************************************************************** ;;; streams -- same as in functional version (define stream-get (lambda (stream rcvr) (let ((the-pair (stream))) (rcvr (car the-pair) (cdr the-pair))))) (define make-constant-stream (lambda (c) (lambda () (cons c (make-constant-stream c))))) (define stream->list (lambda (stream end-of-stream?) (stream-get stream (lambda (val newstream) (if (end-of-stream? val) '() (cons val (stream->list newstream end-of-stream?))))))) (define list->stream (lambda (l) (if (null? l) (make-constant-stream '()) (lambda () (cons (car l) (list->stream (cdr l))))))) (define string->stream (lambda (str) (let ((length (string-length str))) (letrec ((chars-from (lambda (i) (lambda () (cons (if (>= i length) #\^ (string-ref str i)) (chars-from (+ i 1))))))) (chars-from 0))))) ;;; **************************************************************** ;;; Data Structures (define-record scanner-result (token char stream)) (define-record token (class data)) ;;; **************************************************************** ;;; Cookers (define cook-identifier (lambda (buffer) (let ((sym (string->symbol (list->string buffer)))) (if (memq sym **keywords-list**) (make-token sym #f) (make-token 'identifier sym))))) (define cook-number (lambda (buffer) (make-token 'number (string->number (list->string buffer))))) (define apply-cooker (lambda (cooker char-list) (case cooker ((cook-identifier) (cook-identifier char-list)) ((cook-number) (cook-number char-list)) (else (if (symbol? cooker) (make-token cooker #f) ; make delimiter token (else (error 'apply-cooker "unknown cooker ~s" cooker))))))) ;;; **************************************************************** ;;; Testers (define apply-tester (lambda (tester ch) (cond ((char? tester) (char=? tester ch)) ((eq? tester 'else) #t) (else (case tester ((whitespace) (char-whitespace? ch)) ((alphabetic) (char-alphabetic? ch)) ((numeric) (char-numeric? ch)) (else (error 'apply-tester "unknown tester ~s" tester))))))) ;;; **************************************************************** ;;; Main loop (define apply-automaton (lambda (automaton state buf c stream) (let ((opcode (car state)) (next-state (cadr state))) '(printf "apply-state: opcode = ~s c = ~s~%" opcode c) (case opcode ((shift) (stream-get stream (lambda (char1 new-stream) (apply-automaton automaton next-state (cons c buf) char1 new-stream)))) ((drop) (stream-get stream (lambda (char1 new-stream) (apply-automaton automaton next-state buf char1 new-stream)))) ((emit) (let ((cooker (cadr state))) (let ((token (apply-cooker cooker (reverse buf)))) (printf "emitting token ~s~n" token) (make-scanner-result token c stream)))) ((cond) (apply-automaton automaton (apply-scanner-cond (cdr state) c) buf c stream)) ((goto) (apply-automaton automaton (cadr (assq next-state automaton)) buf c stream)) ((fail) (let ((msg (cadr state))) (error 'apply-automaton "scanner failed in state ~s on input ~s" msg c))) ((debug-state) (let ((msg (cadr state)) (next-state (caddr state))) (printf "~s ~s ~s~%" msg buf c) (apply-automaton automaton next-state buf c stream))) (else (error 'apply-automaton "bad opcode in state ~s" state)))))) (define apply-scanner-cond (lambda (alternatives c) (letrec ((loop (lambda (alts) (if (null? alts) (error 'apply-state "couldn't match character ~s in state ~%~s" c alternatives) (let ((alt1 (car alts))) ; (printf "apply-scanner-cond: c = ~s alternative = ~s~%" ; c alt1) (if (apply-tester (car alt1) c) (cadr alt1) (loop (cdr alts)))))))) (loop alternatives)))) ;;; **************************************************************** ;;; Scaffolding (define automaton->stream-transducer (lambda (automaton) (letrec ((loop (lambda (char stream) (let ((next-result (apply-automaton automaton (automaton->start-state automaton) '() char stream))) (let ((next-token (scanner-result->token next-result)) (next-char (scanner-result->char next-result)) (next-stream (scanner-result->stream next-result))) (cons next-token (lambda () (loop next-char next-stream)))))))) (lambda (stream) (lambda () (stream-get stream loop)))))) (define automaton->start-state (lambda (automaton) (cadr (car automaton)))) (define driver-1 (lambda (automaton string) (stream->list (let ((transducer (automaton->stream-transducer automaton)) (stream (string->stream string))) (transducer stream)) (lambda (token) (eq? (token->class token) 'end-marker))))) ;;; **************************************************************** ;;; Example automaton (define automaton-1 '((start-state (cond (whitespace (drop (goto start-state))) (alphabetic (shift (goto identifier-state))) (numeric (shift (goto number-state))) (#\+ (drop (emit plus-sym))) (#\: (shift (goto assign-sym-state))) (#\% (drop (goto comment-state))) (#\; (drop (emit semicolon))) (#\( (drop (emit lparen))) (#\) (drop (emit rparen))) (#\^ (emit end-marker)) (else (fail start-state)))) (identifier-state (cond (alphabetic (shift (goto identifier-state))) (numeric (shift (goto identifier-state))) (else (emit cook-identifier)))) (number-state (cond (numeric (shift (goto number-state))) (else (emit cook-number)))) (assign-sym-state (cond (#\= (shift (emit assign-sym))) (else (shift (goto identifier-state))))) (comment-state (cond (#\newline (drop (goto start-state))) (#\^ (goto start-state)) (else (drop (goto comment-state))))))) (define **keywords-list** '()) ; > (driver-1 automaton-1 "abc ; def ; % comment ; xyz 13") ; emitting token (token identifier abc) ; emitting token (token identifier def) ; emitting token (token identifier xyz) ; emitting token (token number 13) ; emitting token (token end-marker #f) ; ((token identifier abc) ; (token identifier def) ; (token identifier xyz) ; (token number 13)) ; >