diff options
Diffstat (limited to 'third_party/lisp/npg/src/define.lisp')
-rw-r--r-- | third_party/lisp/npg/src/define.lisp | 408 |
1 files changed, 408 insertions, 0 deletions
diff --git a/third_party/lisp/npg/src/define.lisp b/third_party/lisp/npg/src/define.lisp new file mode 100644 index 000000000000..f52f0381a2de --- /dev/null +++ b/third_party/lisp/npg/src/define.lisp @@ -0,0 +1,408 @@ +;;; define.lisp --- grammar rules definition + +;;; Copyright (C) 2003-2006, 2009 by Walter C. Pelissero + +;;; Author: Walter C. Pelissero <walter@pelissero.de> +;;; Project: NPG a Naive Parser Generator + +#+cmu (ext:file-comment "$Module: define.lisp $") + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Lesser General Public License +;;; as published by the Free Software Foundation; either version 2.1 +;;; of the License, or (at your option) any later version. +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Lesser General Public License for more details. +;;; You should have received a copy of the GNU Lesser General Public +;;; License along with this library; if not, write to the Free +;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +;;; 02111-1307 USA + +(in-package :naive-parser-generator) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defvar *smart-default-reduction* t + "If true the default reductions take only the non-static tokens - +those that are not declared as strings in the grammar.") + +;; These two are filled with DEFRULE. +(defvar *rules* (make-rules-table)) +(defvar *keywords* (make-keywords-table)) + +(defun make-action-arguments (tokens) + "Given a list of tokens making up a production, return three values: +the list of variables for the function reducing this production, those +that are non static and their unambiguous user-friendly names." + (flet ((unique (sym list) + (if (not (assoc sym list)) + sym + (loop + for i of-type fixnum from 2 + for x = (intern (format nil "~:@(~A~)~A" sym i)) + while (assoc x list) + finally (return x))))) + (loop + for tok in tokens + for i of-type fixnum from 1 + for arg = (intern (format nil "$~A" i) (find-package #.*package*)) + collect arg into args + unless (const-terminal-p tok) + collect arg into vars + and when (symbolp tok) + collect (list (unique tok named-vars) arg) into named-vars + when (and (listp tok) + (symbolp (cadr tok))) + collect (list (unique (cadr tok) named-vars) arg) into named-vars + finally + (return (values args vars named-vars))))) + +(defun make-action-function (name tokens action) + "Create a function with name NAME, arguments derived from TOKENS and +body ACTION. Return it's definition." + (let ((function + (multiple-value-bind (args vars named-vars) + (make-action-arguments tokens) + `(lambda ,args + (declare (ignorable ,@args)) + (let (($vars (list ,@vars)) + ($all (list ,@args)) + ,@named-vars + ($alist (list ,@(mapcar #'(lambda (v) + `(cons ',(intern (symbol-name (car v))) + ,(cadr v))) + named-vars)))) + (declare (ignorable $vars $all $alist ,@(mapcar #'car named-vars))) + (flet ((make-object (&optional type args) + (apply #'make-instance (or type ',name) + (append args $alist)))) + ,action)))))) + (when *compile-print* + (if *compile-verbose* + (format t "; Compiling ~S:~% ~S~%" name function) + (format t "; Compiling ~S~%" name))) + (compile name function))) + +(defun define-rule (name productions) + "Accept a rule in EBNF-like syntax, translate it into a sexp and a +call to INSERT-RULE-IN-CURRENT-GRAMMAR." + (flet ((transform (productions) + (loop + for tok in productions + with prod = nil + with action = nil + with phase = nil + with new-prods = nil + while tok + do (cond ((eq tok :=) + (push (list (nreverse prod) action) new-prods) + (setf prod nil + action nil + phase :prod)) + ((eq tok :reduce) + (setf phase :action)) + ((eq tok :tag) + (setf phase :tag)) + ((eq phase :tag) + (setf action `(cons ,tok $vars))) + ((eq phase :action) + (setf action tok)) + ((eq phase :prod) + (push tok prod))) + finally + (return (cdr (nreverse (cons (list (nreverse prod) action) new-prods))))))) + (insert-rule-in-current-grammar name (transform productions)))) + +(defmacro defrule (name &rest productions) + "Wrapper macro for DEFINE-RULE." + `(define-rule ',name ',productions)) + +(defun make-optional-rule (token) + "Make a rule for a possibly missing (non)terminal (? syntax) and +return it." + (insert-rule-in-current-grammar + (gensym (concatenate 'string "OPT-" + (if (rule-p token) + (symbol-name (rule-name token)) + (string-upcase token)))) + `(((,token)) (())))) + +(defun make-alternative-rule (tokens) + "Make a rule for a list of alternatives (\"or\" syntax) and return it." + (insert-rule-in-current-grammar + (gensym "ALT") + (mapcar #'(lambda (alternative) + `((,alternative))) + tokens))) + +(defun make-nonempty-list-rule (token &optional separator) + "Make a rule for a non-empty list (+ syntax) and return it." + (let ((rule-name (gensym (concatenate 'string "NELST-" + (if (rule-p token) + (symbol-name (rule-name token)) + (string-upcase token)))))) + (insert-rule-in-current-grammar + rule-name + (if separator + `(((,token ,separator ,rule-name) + (cons $1 $3)) + ((,token) ,#'list)) + `(((,token ,rule-name) + (cons $1 $2)) + ((,token) ,#'list)))))) + +(defun make-list-rule (token &optional separator) + "Make a rule for a possibly empty list (* syntax) return it." + (make-optional-rule (make-nonempty-list-rule token separator))) + +(defun const-terminal-p (object) + (or (stringp object) + (keywordp object))) + +(defun expand-production-token (tok) + "Translate token of the type NAME? or NAME* or NAME+ into (? NAME) +or (* NAME) or (+ NAME). This is used by the DEFRULE macro." + (if (symbolp tok) + (let* ((name (symbol-name tok)) + (last (char name (1- (length name)))) + ;; this looks silly but we need to make sure that we + ;; return symbols interned in this package, no one else + (op (cadr (assoc last '((#\? ?) (#\+ +) (#\* *)))))) + (if (and (> (length name) 1) op) + (list op + (intern (subseq name 0 (1- (length name))))) + tok)) + tok)) + +(defun EBNF-to-SEBNF (tokens) + "Take a production as a list of TOKENS and expand it. This turns a +EBNF syntax into a sexp-based EBNF syntax or SEBNF." + (loop + for tok in tokens + for token = (expand-production-token tok) + with new-tokens = '() + do (cond ((member token '(* + ?)) + (setf (car new-tokens) + (list token (car new-tokens)))) + (t + (push token new-tokens))) + finally (return (nreverse new-tokens)))) + +(defun SEBNF-to-BNF (tokens) + "Take a production in SEBNF (Symbolic Extended BNF) syntax and turn +it into BNF. The production is simplified but the current grammar is +populated with additional rules." + (flet ((make-complex-token-rule (tok) + (ecase (car tok) + (* (apply #'make-list-rule (cdr tok))) + (+ (apply #'make-nonempty-list-rule (cdr tok))) + (? (make-optional-rule (cadr tok))) + (or (make-alternative-rule (cdr tok)))))) + (loop + for token in tokens + with new-tokens = '() + with keywords = '() + do (cond ((listp token) + (push (make-complex-token-rule token) new-tokens)) + (t + (push token new-tokens) + (when (const-terminal-p token) + (push token keywords)))) + finally (return (values (nreverse new-tokens) keywords))))) + +(defun make-default-action-function (name tokens) + "Create a sexp to be used as default action in case one is not +supplied in the production. This is usually a quite sensible +one. That is, only the non-constant tokens are returned in a +list and in case only a variable token is available that one is +returned (not included in a list). If all the tokens are +constant, then all of them are returned in a list." + (cond ((null tokens) + ;; if the production matched the empty list (no tokens) we + ;; return always nil, that is the function LIST applied to no + ;; arguments + #'list) + ((null (cdr tokens)) + ;; if the production matches just one token we simply return + ;; that + #'identity) + (*smart-default-reduction* + ;; If we are required to be "smart" then create a function + ;; that simply returns the non static tokens of the + ;; production. If the production doesn't have nonterminal, + ;; then return all the tokens. If the production has only + ;; one argument then return that one only. + (make-action-function name tokens '(cond + ((null $vars) $all) + ((null (cdr $vars)) (car $vars)) + (t $vars)))) + (t + ;; in all the other cases we return all the token matching + ;; the production + #'list))) + +(defun make-production-from-descr (name production-description) + "Take a production NAME and its description in the form of a sexp +and return a production structure object together with a list of used +keywords." + (destructuring-bind (tokens &optional action) production-description + (let ((expanded-tokens (EBNF-to-SEBNF tokens))) + (multiple-value-bind (production-tokens keywords) + (sebnf-to-bnf expanded-tokens) + (let ((funct + (cond ((not action) + (make-default-action-function name expanded-tokens)) + ((or (listp action) + ;; the case when the action is simply to + ;; return a token (ie $2) or a constant value + (symbolp action)) + (make-action-function name expanded-tokens action)) + ((functionp action) + action) + (t ; action is a constant + #'(lambda (&rest args) + (declare (ignore args)) + action))))) + (values + ;; Make a promise instead of actually resolving the + ;; nonterminals. This avoids endless recursion. + (make-production :tokens production-tokens + :tokens-length (length production-tokens) + :action funct) + keywords)))))) + +(defun remove-immediate-left-recursivity (rule) + "Turn left recursive rules of the type + A -> A x | y +into + A -> y A2 + A2 -> x A2 | E +where E is the empty production." + (let ((name (rule-name rule)) + (productions (rule-productions rule))) + (loop + for prod in productions + for tokens = (prod-tokens prod) + ;; when immediately left recursive + when (eq (car tokens) rule) + collect prod into left-recursive + else + collect prod into non-left-recursive + finally + ;; found any left recursive production? + (when left-recursive + (warn "rule ~S is left recursive" name) + (let ((new-rule (make-rule :name (gensym "REWRITE")))) + ;; A -> y A2 + (setf (rule-productions rule) + (mapcar #'(lambda (p) + (let ((tokens (prod-tokens p)) + (action (prod-action p))) + (make-production :tokens (append tokens (list new-rule)) + :tokens-length (1+ (prod-tokens-length p)) + :action #'(lambda (&rest args) + (let ((f-A2 (car (last args))) + (head (butlast args))) + (funcall f-A2 (apply action head))))))) + non-left-recursive)) + ;; A2 -> x A2 | E + (setf (rule-productions new-rule) + (append + (mapcar #'(lambda (p) + (let ((tokens (prod-tokens p)) + (action (prod-action p))) + (make-production :tokens (append (cdr tokens) (list new-rule)) + :tokens-length (prod-tokens-length p) + :action #'(lambda (&rest args) + (let ((f-A2 (car (last args))) + (head (butlast args))) + #'(lambda (x) + (funcall f-A2 (apply action x head)))))))) + left-recursive) + (list + (make-production :tokens nil + :tokens-length 0 + :action #'(lambda () #'(lambda (arg) arg))))))))))) + +(defun remove-left-recursivity-from-rules (rules) + (loop + for rule being each hash-value in rules + do + ;; More to be done here. For now only the trivial immediate left + ;; recursivity is removed -wcp18/11/03. + (remove-immediate-left-recursivity rule))) + +(defun resolve-all-nonterminals (rules) + (loop + for rule being each hash-value in rules + do (loop + for production in (rule-productions rule) + do (setf (prod-tokens production) + (resolve-nonterminals (prod-tokens production) rules))))) + +(defun make-rule-productions (rule-name production-descriptions) + "Return a production object that belongs to RULE-NAME made according +to PRODUCTION-DESCRIPTIONS. See also MAKE-PRODUCTION-FROM-DESCR." + (loop + for descr in production-descriptions + for i of-type fixnum from 1 by 1 + for prod-name = (intern (format nil "~:@(~A~)-PROD~A" rule-name i)) + with productions = '() + with keywords = '() + do (progn + (multiple-value-bind (production keyws) + (make-production-from-descr prod-name descr) + (push production productions) + (setf keywords (append keyws keywords)))) + finally (return + (values (nreverse productions) keywords)))) + +(defun create-rule (name production-descriptions) + "Return a new rule object together with a list of keywords making up +the production definitions." + (multiple-value-bind (productions keywords) + (make-rule-productions name production-descriptions) + (values (make-rule :name name :productions productions) + keywords))) + +(defun insert-rule-in-current-grammar (name productions) + "Add rule to the current grammar and its keywords to the keywords +hash table. You don't want to use this directly. See DEFRULE macro +instead." + (when (find-rule name *rules*) + (error "redefining rule ~A" name)) + (multiple-value-bind (rule keywords) + (create-rule name productions) + (add-rule name rule *rules*) + (dolist (term keywords) + (add-keyword term *keywords*)) + rule)) + +(defun resolve-nonterminals (tokens rules) + "Given a list of production tokens, try to expand the nonterminal +ones with their respective rule from the the RULES pool." + (flet ((resolve-symbol (sym) + (or (find-rule sym rules) + sym))) + (mapcar #'(lambda (tok) + (if (symbolp tok) + (resolve-symbol tok) + tok)) + tokens))) + +(defun reset-grammar () + "Empty the current grammar from any existing rule." + (setf *rules* (make-rules-table) + *keywords* (make-keywords-table))) + +(defun generate-grammar (&optional (equal-p #'string-equal)) + "Return a GRAMMAR structure suitable for the PARSE function, using +the current rules. EQUAL-P, if present, is a function to be used to +match the input tokens; it defaults to STRING-EQUAL." + (resolve-all-nonterminals *rules*) + (remove-left-recursivity-from-rules *rules*) + (make-grammar :rules *rules* + :keywords *keywords* + :equal-p equal-p)) |