about summary refs log tree commit diff
path: root/third_party/lisp/npg/src/define.lisp
;;;  define.lisp --- grammar rules definition

;;;  Copyright (C) 2003-2006, 2009 by Walter C. Pelissero

;;;  Author: Walter C. Pelissero <walter@pelissero.de>
;;;  Project: NPG a Naive Parser Generator

#+cmu (ext:file-comment "$Module: define.lisp $")

;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public License
;;; as published by the Free Software Foundation; either version 2.1
;;; of the License, or (at your option) any later version.
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Lesser General Public License for more details.
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this library; if not, write to the Free
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
;;; 02111-1307 USA

(in-package :naive-parser-generator)

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defvar *smart-default-reduction* t
  "If true the default reductions take only the non-static tokens -
those that are not declared as strings in the grammar.")

;; These two are filled with DEFRULE.
(defvar *rules* (make-rules-table))
(defvar *keywords* (make-keywords-table))

(defun make-action-arguments (tokens)
  "Given a list of tokens making up a production, return three values:
the list of variables for the function reducing this production, those
that are non static and their unambiguous user-friendly names."
  (flet ((unique (sym list)
	   (if (not (assoc sym list))
	       sym
	       (loop
		  for i of-type fixnum from 2
		  for x = (intern (format nil "~:@(~A~)~A" sym i))
		  while (assoc x list)
		  finally (return x)))))
    (loop
       for tok in tokens
       for i of-type fixnum from 1
       for arg = (intern (format nil "$~A" i) (find-package #.*package*))
       collect arg into args
       unless (const-terminal-p tok)
         collect arg into vars
         and when (symbolp tok)
           collect (list (unique tok named-vars) arg) into named-vars
       when (and (listp tok)
		 (symbolp (cadr tok)))
	 collect (list (unique (cadr tok) named-vars) arg) into named-vars
       finally
       (return (values args vars named-vars)))))

(defun make-action-function (name tokens action)
  "Create a function with name NAME, arguments derived from TOKENS and
body ACTION.  Return it's definition."
  (let ((function
	 (multiple-value-bind (args vars named-vars)
	     (make-action-arguments tokens)
	   `(lambda ,args
	      (declare (ignorable ,@args))
	      (let (($vars (list ,@vars))
		    ($all (list ,@args))
		    ,@named-vars
		    ($alist (list ,@(mapcar #'(lambda (v)
						`(cons ',(intern (symbol-name (car v)))
						       ,(cadr v)))
					    named-vars))))
		(declare (ignorable $vars $all $alist ,@(mapcar #'car named-vars)))
		(flet ((make-object (&optional type args)
			 (apply #'make-instance (or type ',name)
				(append args $alist))))
		  ,action))))))
    (when *compile-print*
      (if *compile-verbose*
	  (format t "; Compiling ~S:~%  ~S~%" name function)
	  (format t "; Compiling ~S~%" name)))
    (compile name function)))

(defun define-rule (name productions)
  "Accept a rule in EBNF-like syntax, translate it into a sexp and a
call to INSERT-RULE-IN-CURRENT-GRAMMAR."
  (flet ((transform (productions)
	   (loop
	      for tok in productions
	      with prod = nil
	      with action = nil
	      with phase = nil
	      with new-prods = nil
	      while tok
	      do (cond ((eq tok :=)
			(push (list (nreverse prod) action) new-prods)
			(setf prod nil
			      action nil
			      phase :prod))
		       ((eq tok :reduce)
			(setf phase :action))
		       ((eq tok :tag)
			(setf phase :tag))
		       ((eq phase :tag)
			(setf action `(cons ,tok $vars)))
		       ((eq phase :action)
			(setf action tok))
		       ((eq phase :prod)
			(push tok prod)))
	      finally
		(return (cdr (nreverse (cons (list (nreverse prod) action) new-prods)))))))
    (insert-rule-in-current-grammar name (transform productions))))

(defmacro defrule (name &rest productions)
  "Wrapper macro for DEFINE-RULE."
  `(define-rule ',name ',productions))

(defun make-optional-rule (token)
  "Make a rule for a possibly missing (non)terminal (? syntax) and
return it."
  (insert-rule-in-current-grammar
   (gensym (concatenate 'string "OPT-"
			(if (rule-p token)
			    (symbol-name (rule-name token))
			    (string-upcase token))))
   `(((,token)) (()))))

(defun make-alternative-rule (tokens)
  "Make a rule for a list of alternatives (\"or\" syntax) and return it."
  (insert-rule-in-current-grammar
   (gensym "ALT")
   (mapcar #'(lambda (alternative)
	       `((,alternative)))
	   tokens)))

(defun make-nonempty-list-rule (token &optional separator)
  "Make a rule for a non-empty list (+ syntax) and return it."
  (let ((rule-name (gensym (concatenate 'string "NELST-"
					(if (rule-p token)
					    (symbol-name (rule-name token))
					    (string-upcase token))))))
    (insert-rule-in-current-grammar
     rule-name
     (if separator
	 `(((,token ,separator ,rule-name)
	    (cons $1 $3))
	   ((,token) ,#'list))
	 `(((,token ,rule-name)
	    (cons $1 $2))
	   ((,token) ,#'list))))))

(defun make-list-rule (token &optional separator)
  "Make a rule for a possibly empty list (* syntax) return it."
  (make-optional-rule (make-nonempty-list-rule token separator)))

(defun const-terminal-p (object)
  (or (stringp object)
      (keywordp object)))

(defun expand-production-token (tok)
  "Translate token of the type NAME? or NAME* or NAME+ into (? NAME)
or (* NAME) or (+ NAME).  This is used by the DEFRULE macro."
  (if (symbolp tok)
      (let* ((name (symbol-name tok))
	     (last (char name (1- (length name))))
	     ;; this looks silly but we need to make sure that we
	     ;; return symbols interned in this package, no one else
	     (op (cadr (assoc last '((#\? ?) (#\+ +) (#\* *))))))
	(if (and (> (length name) 1) op)
	    (list op
		  (intern (subseq name 0 (1- (length name)))))
	    tok))
      tok))

(defun EBNF-to-SEBNF (tokens)
  "Take a production as a list of TOKENS and expand it.  This turns a
EBNF syntax into a sexp-based EBNF syntax or SEBNF."
  (loop
     for tok in tokens
     for token = (expand-production-token tok)
     with new-tokens = '()
     do (cond ((member token '(* + ?))
	       (setf (car new-tokens)
		     (list token (car new-tokens))))
	      (t
	       (push token new-tokens)))
     finally (return (nreverse new-tokens))))

(defun SEBNF-to-BNF (tokens)
  "Take a production in SEBNF (Symbolic Extended BNF) syntax and turn
it into BNF.  The production is simplified but the current grammar is
populated with additional rules."
  (flet ((make-complex-token-rule (tok)
	   (ecase (car tok)
	     (* (apply #'make-list-rule (cdr tok)))
	     (+ (apply #'make-nonempty-list-rule (cdr tok)))
	     (? (make-optional-rule (cadr tok)))
	     (or (make-alternative-rule (cdr tok))))))
    (loop
       for token in tokens
       with new-tokens = '()
       with keywords = '()
       do (cond ((listp token)
		 (push (make-complex-token-rule token) new-tokens))
		(t
		 (push token new-tokens)
		 (when (const-terminal-p token)
		   (push token keywords))))
       finally (return (values (nreverse new-tokens) keywords)))))

(defun make-default-action-function (name tokens)
  "Create a sexp to be used as default action in case one is not
supplied in the production.  This is usually a quite sensible
one.  That is, only the non-constant tokens are returned in a
list and in case only a variable token is available that one is
returned (not included in a list).  If all the tokens are
constant, then all of them are returned in a list."
  (cond ((null tokens)
	 ;; if the production matched the empty list (no tokens) we
	 ;; return always nil, that is the function LIST applied to no
	 ;; arguments
	 #'list)
	((null (cdr tokens))
	 ;; if the production matches just one token we simply return
	 ;; that
	 #'identity)
	(*smart-default-reduction*
	 ;; If we are required to be "smart" then create a function
	 ;; that simply returns the non static tokens of the
	 ;; production.  If the production doesn't have nonterminal,
	 ;; then return all the tokens.  If the production has only
	 ;; one argument then return that one only.
	 (make-action-function name tokens '(cond
					     ((null $vars) $all)
					     ((null (cdr $vars)) (car $vars))
					     (t $vars))))
	(t
	 ;; in all the other cases we return all the token matching
	 ;; the production
	 #'list)))

(defun make-production-from-descr (name production-description)
  "Take a production NAME and its description in the form of a sexp
and return a production structure object together with a list of used
keywords."
  (destructuring-bind (tokens &optional action) production-description
    (let ((expanded-tokens (EBNF-to-SEBNF tokens)))
      (multiple-value-bind (production-tokens keywords)
	  (sebnf-to-bnf expanded-tokens)
      (let ((funct
	     (cond ((not action)
		    (make-default-action-function name expanded-tokens))
		   ((or (listp action)
			;; the case when the action is simply to
			;; return a token (ie $2) or a constant value
			(symbolp action))
		    (make-action-function name expanded-tokens action))
		   ((functionp action)
		    action)
		   (t			; action is a constant
		    #'(lambda (&rest args)
			(declare (ignore args))
			action)))))
	(values
	 ;; Make a promise instead of actually resolving the
	 ;; nonterminals.  This avoids endless recursion.
	 (make-production :tokens production-tokens
			  :tokens-length (length production-tokens)
			  :action funct)
	 keywords))))))

(defun remove-immediate-left-recursivity (rule)
  "Turn left recursive rules of the type
    A -> A x | y
into
    A -> y A2
    A2 -> x A2 | E
where E is the empty production."
  (let ((name (rule-name rule))
	(productions (rule-productions rule)))
    (loop
       for prod in productions
       for tokens = (prod-tokens prod)
       ;; when immediately left recursive
       when (eq (car tokens) rule)
       collect prod into left-recursive
       else
       collect prod into non-left-recursive
       finally
	 ;; found any left recursive production?
	 (when left-recursive
	   (warn "rule ~S is left recursive" name)
	   (let ((new-rule (make-rule :name (gensym "REWRITE"))))
	     ;; A -> y A2
	     (setf (rule-productions rule)
		   (mapcar #'(lambda (p)
			       (let ((tokens (prod-tokens p))
				     (action (prod-action p)))
				 (make-production :tokens (append tokens (list new-rule))
						  :tokens-length (1+ (prod-tokens-length p))
						  :action #'(lambda (&rest args)
							      (let ((f-A2 (car (last args)))
								    (head (butlast args)))
								(funcall f-A2 (apply action head)))))))
			   non-left-recursive))
	     ;; A2 -> x A2 | E
	     (setf (rule-productions new-rule)
		   (append
		    (mapcar #'(lambda (p)
				(let ((tokens (prod-tokens p))
				      (action (prod-action p)))
				  (make-production :tokens (append (cdr tokens) (list new-rule))
						   :tokens-length (prod-tokens-length p)
						   :action #'(lambda (&rest args)
							       (let ((f-A2 (car (last args)))
								     (head (butlast args)))
								 #'(lambda (x)
								     (funcall f-A2 (apply action x head))))))))
			    left-recursive)
		    (list
		     (make-production :tokens nil
				      :tokens-length 0
				      :action #'(lambda () #'(lambda (arg) arg)))))))))))

(defun remove-left-recursivity-from-rules (rules)
  (loop
     for rule being each hash-value in rules
     do
     ;; More to be done here.  For now only the trivial immediate left
     ;; recursivity is removed -wcp18/11/03.
       (remove-immediate-left-recursivity rule)))

(defun resolve-all-nonterminals (rules)
  (loop
     for rule being each hash-value in rules
     do (loop
	   for production in (rule-productions rule)
	   do (setf (prod-tokens production)
		    (resolve-nonterminals (prod-tokens production) rules)))))

(defun make-rule-productions (rule-name production-descriptions)
  "Return a production object that belongs to RULE-NAME made according
to PRODUCTION-DESCRIPTIONS.  See also MAKE-PRODUCTION-FROM-DESCR."
  (loop
     for descr in production-descriptions
     for i of-type fixnum from 1 by 1
     for prod-name = (intern (format nil "~:@(~A~)-PROD~A" rule-name i))
     with productions = '()
     with keywords = '()
     do (progn
	  (multiple-value-bind (production keyws)
	      (make-production-from-descr prod-name descr)
	    (push production productions)
	    (setf keywords (append keyws keywords))))
     finally (return
	       (values (nreverse productions) keywords))))

(defun create-rule (name production-descriptions)
  "Return a new rule object together with a list of keywords making up
the production definitions."
  (multiple-value-bind (productions keywords)
      (make-rule-productions name production-descriptions)
    (values (make-rule :name name :productions productions)
	    keywords)))

(defun insert-rule-in-current-grammar (name productions)
  "Add rule to the current grammar and its keywords to the keywords
hash table.  You don't want to use this directly.  See DEFRULE macro
instead."
  (when (find-rule name *rules*)
    (error "redefining rule ~A" name))
  (multiple-value-bind (rule keywords)
      (create-rule name productions)
    (add-rule name rule *rules*)
    (dolist (term keywords)
      (add-keyword term *keywords*))
    rule))

(defun resolve-nonterminals (tokens rules)
  "Given a list of production tokens, try to expand the nonterminal
ones with their respective rule from the the RULES pool."
  (flet ((resolve-symbol (sym)
	   (or (find-rule sym rules)
	       sym)))
    (mapcar #'(lambda (tok)
		(if (symbolp tok)
		    (resolve-symbol tok)
		    tok))
	    tokens)))

(defun reset-grammar ()
  "Empty the current grammar from any existing rule."
  (setf *rules* (make-rules-table)
	*keywords* (make-keywords-table)))

(defun generate-grammar (&optional (equal-p #'string-equal))
  "Return a GRAMMAR structure suitable for the PARSE function, using
the current rules.  EQUAL-P, if present, is a function to be used to
match the input tokens; it defaults to STRING-EQUAL."
  (resolve-all-nonterminals *rules*)
  (remove-left-recursivity-from-rules *rules*)
  (make-grammar :rules *rules*
		:keywords *keywords*
		:equal-p equal-p))