;;; define.lisp --- grammar rules definition
;;; Copyright (C) 2003-2006, 2009 by Walter C. Pelissero
;;; Author: Walter C. Pelissero <walter@pelissero.de>
;;; Project: NPG a Naive Parser Generator
#+cmu (ext:file-comment "$Module: define.lisp $")
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public License
;;; as published by the Free Software Foundation; either version 2.1
;;; of the License, or (at your option) any later version.
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Lesser General Public License for more details.
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this library; if not, write to the Free
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
;;; 02111-1307 USA
(in-package :naive-parser-generator)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defvar *smart-default-reduction* t
"If true the default reductions take only the non-static tokens -
those that are not declared as strings in the grammar.")
;; These two are filled with DEFRULE.
(defvar *rules* (make-rules-table))
(defvar *keywords* (make-keywords-table))
(defun make-action-arguments (tokens)
"Given a list of tokens making up a production, return three values:
the list of variables for the function reducing this production, those
that are non static and their unambiguous user-friendly names."
(flet ((unique (sym list)
(if (not (assoc sym list))
sym
(loop
for i of-type fixnum from 2
for x = (intern (format nil "~:@(~A~)~A" sym i))
while (assoc x list)
finally (return x)))))
(loop
for tok in tokens
for i of-type fixnum from 1
for arg = (intern (format nil "$~A" i) (find-package #.*package*))
collect arg into args
unless (const-terminal-p tok)
collect arg into vars
and when (symbolp tok)
collect (list (unique tok named-vars) arg) into named-vars
when (and (listp tok)
(symbolp (cadr tok)))
collect (list (unique (cadr tok) named-vars) arg) into named-vars
finally
(return (values args vars named-vars)))))
(defun make-action-function (name tokens action)
"Create a function with name NAME, arguments derived from TOKENS and
body ACTION. Return it's definition."
(let ((function
(multiple-value-bind (args vars named-vars)
(make-action-arguments tokens)
`(lambda ,args
(declare (ignorable ,@args))
(let (($vars (list ,@vars))
($all (list ,@args))
,@named-vars
($alist (list ,@(mapcar #'(lambda (v)
`(cons ',(intern (symbol-name (car v)))
,(cadr v)))
named-vars))))
(declare (ignorable $vars $all $alist ,@(mapcar #'car named-vars)))
(flet ((make-object (&optional type args)
(apply #'make-instance (or type ',name)
(append args $alist))))
,action))))))
(when *compile-print*
(if *compile-verbose*
(format t "; Compiling ~S:~% ~S~%" name function)
(format t "; Compiling ~S~%" name)))
(compile name function)))
(defun define-rule (name productions)
"Accept a rule in EBNF-like syntax, translate it into a sexp and a
call to INSERT-RULE-IN-CURRENT-GRAMMAR."
(flet ((transform (productions)
(loop
for tok in productions
with prod = nil
with action = nil
with phase = nil
with new-prods = nil
while tok
do (cond ((eq tok :=)
(push (list (nreverse prod) action) new-prods)
(setf prod nil
action nil
phase :prod))
((eq tok :reduce)
(setf phase :action))
((eq tok :tag)
(setf phase :tag))
((eq phase :tag)
(setf action `(cons ,tok $vars)))
((eq phase :action)
(setf action tok))
((eq phase :prod)
(push tok prod)))
finally
(return (cdr (nreverse (cons (list (nreverse prod) action) new-prods)))))))
(insert-rule-in-current-grammar name (transform productions))))
(defmacro defrule (name &rest productions)
"Wrapper macro for DEFINE-RULE."
`(define-rule ',name ',productions))
(defun make-optional-rule (token)
"Make a rule for a possibly missing (non)terminal (? syntax) and
return it."
(insert-rule-in-current-grammar
(gensym (concatenate 'string "OPT-"
(if (rule-p token)
(symbol-name (rule-name token))
(string-upcase token))))
`(((,token)) (()))))
(defun make-alternative-rule (tokens)
"Make a rule for a list of alternatives (\"or\" syntax) and return it."
(insert-rule-in-current-grammar
(gensym "ALT")
(mapcar #'(lambda (alternative)
`((,alternative)))
tokens)))
(defun make-nonempty-list-rule (token &optional separator)
"Make a rule for a non-empty list (+ syntax) and return it."
(let ((rule-name (gensym (concatenate 'string "NELST-"
(if (rule-p token)
(symbol-name (rule-name token))
(string-upcase token))))))
(insert-rule-in-current-grammar
rule-name
(if separator
`(((,token ,separator ,rule-name)
(cons $1 $3))
((,token) ,#'list))
`(((,token ,rule-name)
(cons $1 $2))
((,token) ,#'list))))))
(defun make-list-rule (token &optional separator)
"Make a rule for a possibly empty list (* syntax) return it."
(make-optional-rule (make-nonempty-list-rule token separator)))
(defun const-terminal-p (object)
(or (stringp object)
(keywordp object)))
(defun expand-production-token (tok)
"Translate token of the type NAME? or NAME* or NAME+ into (? NAME)
or (* NAME) or (+ NAME). This is used by the DEFRULE macro."
(if (symbolp tok)
(let* ((name (symbol-name tok))
(last (char name (1- (length name))))
;; this looks silly but we need to make sure that we
;; return symbols interned in this package, no one else
(op (cadr (assoc last '((#\? ?) (#\+ +) (#\* *))))))
(if (and (> (length name) 1) op)
(list op
(intern (subseq name 0 (1- (length name)))))
tok))
tok))
(defun EBNF-to-SEBNF (tokens)
"Take a production as a list of TOKENS and expand it. This turns a
EBNF syntax into a sexp-based EBNF syntax or SEBNF."
(loop
for tok in tokens
for token = (expand-production-token tok)
with new-tokens = '()
do (cond ((member token '(* + ?))
(setf (car new-tokens)
(list token (car new-tokens))))
(t
(push token new-tokens)))
finally (return (nreverse new-tokens))))
(defun SEBNF-to-BNF (tokens)
"Take a production in SEBNF (Symbolic Extended BNF) syntax and turn
it into BNF. The production is simplified but the current grammar is
populated with additional rules."
(flet ((make-complex-token-rule (tok)
(ecase (car tok)
(* (apply #'make-list-rule (cdr tok)))
(+ (apply #'make-nonempty-list-rule (cdr tok)))
(? (make-optional-rule (cadr tok)))
(or (make-alternative-rule (cdr tok))))))
(loop
for token in tokens
with new-tokens = '()
with keywords = '()
do (cond ((listp token)
(push (make-complex-token-rule token) new-tokens))
(t
(push token new-tokens)
(when (const-terminal-p token)
(push token keywords))))
finally (return (values (nreverse new-tokens) keywords)))))
(defun make-default-action-function (name tokens)
"Create a sexp to be used as default action in case one is not
supplied in the production. This is usually a quite sensible
one. That is, only the non-constant tokens are returned in a
list and in case only a variable token is available that one is
returned (not included in a list). If all the tokens are
constant, then all of them are returned in a list."
(cond ((null tokens)
;; if the production matched the empty list (no tokens) we
;; return always nil, that is the function LIST applied to no
;; arguments
#'list)
((null (cdr tokens))
;; if the production matches just one token we simply return
;; that
#'identity)
(*smart-default-reduction*
;; If we are required to be "smart" then create a function
;; that simply returns the non static tokens of the
;; production. If the production doesn't have nonterminal,
;; then return all the tokens. If the production has only
;; one argument then return that one only.
(make-action-function name tokens '(cond
((null $vars) $all)
((null (cdr $vars)) (car $vars))
(t $vars))))
(t
;; in all the other cases we return all the token matching
;; the production
#'list)))
(defun make-production-from-descr (name production-description)
"Take a production NAME and its description in the form of a sexp
and return a production structure object together with a list of used
keywords."
(destructuring-bind (tokens &optional action) production-description
(let ((expanded-tokens (EBNF-to-SEBNF tokens)))
(multiple-value-bind (production-tokens keywords)
(sebnf-to-bnf expanded-tokens)
(let ((funct
(cond ((not action)
(make-default-action-function name expanded-tokens))
((or (listp action)
;; the case when the action is simply to
;; return a token (ie $2) or a constant value
(symbolp action))
(make-action-function name expanded-tokens action))
((functionp action)
action)
(t ; action is a constant
#'(lambda (&rest args)
(declare (ignore args))
action)))))
(values
;; Make a promise instead of actually resolving the
;; nonterminals. This avoids endless recursion.
(make-production :tokens production-tokens
:tokens-length (length production-tokens)
:action funct)
keywords))))))
(defun remove-immediate-left-recursivity (rule)
"Turn left recursive rules of the type
A -> A x | y
into
A -> y A2
A2 -> x A2 | E
where E is the empty production."
(let ((name (rule-name rule))
(productions (rule-productions rule)))
(loop
for prod in productions
for tokens = (prod-tokens prod)
;; when immediately left recursive
when (eq (car tokens) rule)
collect prod into left-recursive
else
collect prod into non-left-recursive
finally
;; found any left recursive production?
(when left-recursive
(warn "rule ~S is left recursive" name)
(let ((new-rule (make-rule :name (gensym "REWRITE"))))
;; A -> y A2
(setf (rule-productions rule)
(mapcar #'(lambda (p)
(let ((tokens (prod-tokens p))
(action (prod-action p)))
(make-production :tokens (append tokens (list new-rule))
:tokens-length (1+ (prod-tokens-length p))
:action #'(lambda (&rest args)
(let ((f-A2 (car (last args)))
(head (butlast args)))
(funcall f-A2 (apply action head)))))))
non-left-recursive))
;; A2 -> x A2 | E
(setf (rule-productions new-rule)
(append
(mapcar #'(lambda (p)
(let ((tokens (prod-tokens p))
(action (prod-action p)))
(make-production :tokens (append (cdr tokens) (list new-rule))
:tokens-length (prod-tokens-length p)
:action #'(lambda (&rest args)
(let ((f-A2 (car (last args)))
(head (butlast args)))
#'(lambda (x)
(funcall f-A2 (apply action x head))))))))
left-recursive)
(list
(make-production :tokens nil
:tokens-length 0
:action #'(lambda () #'(lambda (arg) arg)))))))))))
(defun remove-left-recursivity-from-rules (rules)
(loop
for rule being each hash-value in rules
do
;; More to be done here. For now only the trivial immediate left
;; recursivity is removed -wcp18/11/03.
(remove-immediate-left-recursivity rule)))
(defun resolve-all-nonterminals (rules)
(loop
for rule being each hash-value in rules
do (loop
for production in (rule-productions rule)
do (setf (prod-tokens production)
(resolve-nonterminals (prod-tokens production) rules)))))
(defun make-rule-productions (rule-name production-descriptions)
"Return a production object that belongs to RULE-NAME made according
to PRODUCTION-DESCRIPTIONS. See also MAKE-PRODUCTION-FROM-DESCR."
(loop
for descr in production-descriptions
for i of-type fixnum from 1 by 1
for prod-name = (intern (format nil "~:@(~A~)-PROD~A" rule-name i))
with productions = '()
with keywords = '()
do (progn
(multiple-value-bind (production keyws)
(make-production-from-descr prod-name descr)
(push production productions)
(setf keywords (append keyws keywords))))
finally (return
(values (nreverse productions) keywords))))
(defun create-rule (name production-descriptions)
"Return a new rule object together with a list of keywords making up
the production definitions."
(multiple-value-bind (productions keywords)
(make-rule-productions name production-descriptions)
(values (make-rule :name name :productions productions)
keywords)))
(defun insert-rule-in-current-grammar (name productions)
"Add rule to the current grammar and its keywords to the keywords
hash table. You don't want to use this directly. See DEFRULE macro
instead."
(when (find-rule name *rules*)
(error "redefining rule ~A" name))
(multiple-value-bind (rule keywords)
(create-rule name productions)
(add-rule name rule *rules*)
(dolist (term keywords)
(add-keyword term *keywords*))
rule))
(defun resolve-nonterminals (tokens rules)
"Given a list of production tokens, try to expand the nonterminal
ones with their respective rule from the the RULES pool."
(flet ((resolve-symbol (sym)
(or (find-rule sym rules)
sym)))
(mapcar #'(lambda (tok)
(if (symbolp tok)
(resolve-symbol tok)
tok))
tokens)))
(defun reset-grammar ()
"Empty the current grammar from any existing rule."
(setf *rules* (make-rules-table)
*keywords* (make-keywords-table)))
(defun generate-grammar (&optional (equal-p #'string-equal))
"Return a GRAMMAR structure suitable for the PARSE function, using
the current rules. EQUAL-P, if present, is a function to be used to
match the input tokens; it defaults to STRING-EQUAL."
(resolve-all-nonterminals *rules*)
(remove-left-recursivity-from-rules *rules*)
(make-grammar :rules *rules*
:keywords *keywords*
:equal-p equal-p))