From 8e45aace13e00b91146d47385625449d14576fe5 Mon Sep 17 00:00:00 2001 From: sterni Date: Sat, 21 Aug 2021 14:44:37 +0200 Subject: chore(3p/lisp): import npg source tarball Used http://wcp.sdf-eu.org/software/npg-20150517T144652.tbz (sha256 42e88f6067128fbdb3a3d578371c9b0ee2a34f1d36daf80be8a520094132d828). There's no upstream repository nor a release since 2015, so importing seems to make a lot of sense. Since we can't subtree making any depot-related changes in a separate CL -- this is only the source import. Change-Id: I64c984ca0a84b9e48c6f496577ffccce1d7bdceb Reviewed-on: https://cl.tvl.fyi/c/depot/+/3377 Tested-by: BuildkiteCI Reviewed-by: grfn --- third_party/lisp/npg/src/common.lisp | 79 +++++++ third_party/lisp/npg/src/define.lisp | 408 ++++++++++++++++++++++++++++++++++ third_party/lisp/npg/src/package.lisp | 50 +++++ third_party/lisp/npg/src/parser.lisp | 234 +++++++++++++++++++ 4 files changed, 771 insertions(+) create mode 100644 third_party/lisp/npg/src/common.lisp create mode 100644 third_party/lisp/npg/src/define.lisp create mode 100644 third_party/lisp/npg/src/package.lisp create mode 100644 third_party/lisp/npg/src/parser.lisp (limited to 'third_party/lisp/npg/src') diff --git a/third_party/lisp/npg/src/common.lisp b/third_party/lisp/npg/src/common.lisp new file mode 100644 index 000000000000..8b64f5cc0a7b --- /dev/null +++ b/third_party/lisp/npg/src/common.lisp @@ -0,0 +1,79 @@ +;;; common.lisp --- common stuff + +;;; Copyright (C) 2003-2006, 2009 by Walter C. Pelissero + +;;; Author: Walter C. Pelissero +;;; Project: NPG a Naive Parser Generator + +#+cmu (ext:file-comment "$Module: common.lisp $") + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Lesser General Public License +;;; as published by the Free Software Foundation; either version 2.1 +;;; of the License, or (at your option) any later version. +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Lesser General Public License for more details. +;;; You should have received a copy of the GNU Lesser General Public +;;; License along with this library; if not, write to the Free +;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +;;; 02111-1307 USA + +(in-package :naive-parser-generator) + +(eval-when (:compile-toplevel :load-toplevel) + (defstruct grammar + rules + keywords + equal-p) + + (defstruct rule + name + productions) + + (defstruct (production (:conc-name prod-)) + tokens + (tokens-length 0 :type fixnum) + action) + + (defstruct token + type ; type of token (identifier, number, ...) + value ; its actual value + position) ; line/column in the input stream + ) ; eval-when + +(defmethod print-object ((obj rule) stream) + (format stream "#R(~A)" (rule-name obj))) + +(defmethod print-object ((obj production) stream) + (format stream "#P(action: ~S)" (prod-action obj))) + +(defmethod print-object ((obj token) stream) + (format stream "#T:~A=~S" (token-type obj) (token-value obj))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(declaim (inline make-rules-table find-rule add-rule)) + +(defun make-rules-table () + (make-hash-table)) + +(defun find-rule (rule-name rules) + (gethash rule-name rules)) + +(defun add-rule (rule-name rule rules) + (setf (gethash rule-name rules) rule)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(declaim (inline make-keywords-table find-keyword add-keyword)) + +(defun make-keywords-table () + (make-hash-table :test 'equal)) + +(defun find-keyword (keyword-name keywords) + (gethash keyword-name keywords)) + +(defun add-keyword (keyword keywords) + (setf (gethash keyword keywords) t)) diff --git a/third_party/lisp/npg/src/define.lisp b/third_party/lisp/npg/src/define.lisp new file mode 100644 index 000000000000..f52f0381a2de --- /dev/null +++ b/third_party/lisp/npg/src/define.lisp @@ -0,0 +1,408 @@ +;;; define.lisp --- grammar rules definition + +;;; Copyright (C) 2003-2006, 2009 by Walter C. Pelissero + +;;; Author: Walter C. Pelissero +;;; Project: NPG a Naive Parser Generator + +#+cmu (ext:file-comment "$Module: define.lisp $") + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Lesser General Public License +;;; as published by the Free Software Foundation; either version 2.1 +;;; of the License, or (at your option) any later version. +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Lesser General Public License for more details. +;;; You should have received a copy of the GNU Lesser General Public +;;; License along with this library; if not, write to the Free +;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +;;; 02111-1307 USA + +(in-package :naive-parser-generator) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defvar *smart-default-reduction* t + "If true the default reductions take only the non-static tokens - +those that are not declared as strings in the grammar.") + +;; These two are filled with DEFRULE. +(defvar *rules* (make-rules-table)) +(defvar *keywords* (make-keywords-table)) + +(defun make-action-arguments (tokens) + "Given a list of tokens making up a production, return three values: +the list of variables for the function reducing this production, those +that are non static and their unambiguous user-friendly names." + (flet ((unique (sym list) + (if (not (assoc sym list)) + sym + (loop + for i of-type fixnum from 2 + for x = (intern (format nil "~:@(~A~)~A" sym i)) + while (assoc x list) + finally (return x))))) + (loop + for tok in tokens + for i of-type fixnum from 1 + for arg = (intern (format nil "$~A" i) (find-package #.*package*)) + collect arg into args + unless (const-terminal-p tok) + collect arg into vars + and when (symbolp tok) + collect (list (unique tok named-vars) arg) into named-vars + when (and (listp tok) + (symbolp (cadr tok))) + collect (list (unique (cadr tok) named-vars) arg) into named-vars + finally + (return (values args vars named-vars))))) + +(defun make-action-function (name tokens action) + "Create a function with name NAME, arguments derived from TOKENS and +body ACTION. Return it's definition." + (let ((function + (multiple-value-bind (args vars named-vars) + (make-action-arguments tokens) + `(lambda ,args + (declare (ignorable ,@args)) + (let (($vars (list ,@vars)) + ($all (list ,@args)) + ,@named-vars + ($alist (list ,@(mapcar #'(lambda (v) + `(cons ',(intern (symbol-name (car v))) + ,(cadr v))) + named-vars)))) + (declare (ignorable $vars $all $alist ,@(mapcar #'car named-vars))) + (flet ((make-object (&optional type args) + (apply #'make-instance (or type ',name) + (append args $alist)))) + ,action)))))) + (when *compile-print* + (if *compile-verbose* + (format t "; Compiling ~S:~% ~S~%" name function) + (format t "; Compiling ~S~%" name))) + (compile name function))) + +(defun define-rule (name productions) + "Accept a rule in EBNF-like syntax, translate it into a sexp and a +call to INSERT-RULE-IN-CURRENT-GRAMMAR." + (flet ((transform (productions) + (loop + for tok in productions + with prod = nil + with action = nil + with phase = nil + with new-prods = nil + while tok + do (cond ((eq tok :=) + (push (list (nreverse prod) action) new-prods) + (setf prod nil + action nil + phase :prod)) + ((eq tok :reduce) + (setf phase :action)) + ((eq tok :tag) + (setf phase :tag)) + ((eq phase :tag) + (setf action `(cons ,tok $vars))) + ((eq phase :action) + (setf action tok)) + ((eq phase :prod) + (push tok prod))) + finally + (return (cdr (nreverse (cons (list (nreverse prod) action) new-prods))))))) + (insert-rule-in-current-grammar name (transform productions)))) + +(defmacro defrule (name &rest productions) + "Wrapper macro for DEFINE-RULE." + `(define-rule ',name ',productions)) + +(defun make-optional-rule (token) + "Make a rule for a possibly missing (non)terminal (? syntax) and +return it." + (insert-rule-in-current-grammar + (gensym (concatenate 'string "OPT-" + (if (rule-p token) + (symbol-name (rule-name token)) + (string-upcase token)))) + `(((,token)) (())))) + +(defun make-alternative-rule (tokens) + "Make a rule for a list of alternatives (\"or\" syntax) and return it." + (insert-rule-in-current-grammar + (gensym "ALT") + (mapcar #'(lambda (alternative) + `((,alternative))) + tokens))) + +(defun make-nonempty-list-rule (token &optional separator) + "Make a rule for a non-empty list (+ syntax) and return it." + (let ((rule-name (gensym (concatenate 'string "NELST-" + (if (rule-p token) + (symbol-name (rule-name token)) + (string-upcase token)))))) + (insert-rule-in-current-grammar + rule-name + (if separator + `(((,token ,separator ,rule-name) + (cons $1 $3)) + ((,token) ,#'list)) + `(((,token ,rule-name) + (cons $1 $2)) + ((,token) ,#'list)))))) + +(defun make-list-rule (token &optional separator) + "Make a rule for a possibly empty list (* syntax) return it." + (make-optional-rule (make-nonempty-list-rule token separator))) + +(defun const-terminal-p (object) + (or (stringp object) + (keywordp object))) + +(defun expand-production-token (tok) + "Translate token of the type NAME? or NAME* or NAME+ into (? NAME) +or (* NAME) or (+ NAME). This is used by the DEFRULE macro." + (if (symbolp tok) + (let* ((name (symbol-name tok)) + (last (char name (1- (length name)))) + ;; this looks silly but we need to make sure that we + ;; return symbols interned in this package, no one else + (op (cadr (assoc last '((#\? ?) (#\+ +) (#\* *)))))) + (if (and (> (length name) 1) op) + (list op + (intern (subseq name 0 (1- (length name))))) + tok)) + tok)) + +(defun EBNF-to-SEBNF (tokens) + "Take a production as a list of TOKENS and expand it. This turns a +EBNF syntax into a sexp-based EBNF syntax or SEBNF." + (loop + for tok in tokens + for token = (expand-production-token tok) + with new-tokens = '() + do (cond ((member token '(* + ?)) + (setf (car new-tokens) + (list token (car new-tokens)))) + (t + (push token new-tokens))) + finally (return (nreverse new-tokens)))) + +(defun SEBNF-to-BNF (tokens) + "Take a production in SEBNF (Symbolic Extended BNF) syntax and turn +it into BNF. The production is simplified but the current grammar is +populated with additional rules." + (flet ((make-complex-token-rule (tok) + (ecase (car tok) + (* (apply #'make-list-rule (cdr tok))) + (+ (apply #'make-nonempty-list-rule (cdr tok))) + (? (make-optional-rule (cadr tok))) + (or (make-alternative-rule (cdr tok)))))) + (loop + for token in tokens + with new-tokens = '() + with keywords = '() + do (cond ((listp token) + (push (make-complex-token-rule token) new-tokens)) + (t + (push token new-tokens) + (when (const-terminal-p token) + (push token keywords)))) + finally (return (values (nreverse new-tokens) keywords))))) + +(defun make-default-action-function (name tokens) + "Create a sexp to be used as default action in case one is not +supplied in the production. This is usually a quite sensible +one. That is, only the non-constant tokens are returned in a +list and in case only a variable token is available that one is +returned (not included in a list). If all the tokens are +constant, then all of them are returned in a list." + (cond ((null tokens) + ;; if the production matched the empty list (no tokens) we + ;; return always nil, that is the function LIST applied to no + ;; arguments + #'list) + ((null (cdr tokens)) + ;; if the production matches just one token we simply return + ;; that + #'identity) + (*smart-default-reduction* + ;; If we are required to be "smart" then create a function + ;; that simply returns the non static tokens of the + ;; production. If the production doesn't have nonterminal, + ;; then return all the tokens. If the production has only + ;; one argument then return that one only. + (make-action-function name tokens '(cond + ((null $vars) $all) + ((null (cdr $vars)) (car $vars)) + (t $vars)))) + (t + ;; in all the other cases we return all the token matching + ;; the production + #'list))) + +(defun make-production-from-descr (name production-description) + "Take a production NAME and its description in the form of a sexp +and return a production structure object together with a list of used +keywords." + (destructuring-bind (tokens &optional action) production-description + (let ((expanded-tokens (EBNF-to-SEBNF tokens))) + (multiple-value-bind (production-tokens keywords) + (sebnf-to-bnf expanded-tokens) + (let ((funct + (cond ((not action) + (make-default-action-function name expanded-tokens)) + ((or (listp action) + ;; the case when the action is simply to + ;; return a token (ie $2) or a constant value + (symbolp action)) + (make-action-function name expanded-tokens action)) + ((functionp action) + action) + (t ; action is a constant + #'(lambda (&rest args) + (declare (ignore args)) + action))))) + (values + ;; Make a promise instead of actually resolving the + ;; nonterminals. This avoids endless recursion. + (make-production :tokens production-tokens + :tokens-length (length production-tokens) + :action funct) + keywords)))))) + +(defun remove-immediate-left-recursivity (rule) + "Turn left recursive rules of the type + A -> A x | y +into + A -> y A2 + A2 -> x A2 | E +where E is the empty production." + (let ((name (rule-name rule)) + (productions (rule-productions rule))) + (loop + for prod in productions + for tokens = (prod-tokens prod) + ;; when immediately left recursive + when (eq (car tokens) rule) + collect prod into left-recursive + else + collect prod into non-left-recursive + finally + ;; found any left recursive production? + (when left-recursive + (warn "rule ~S is left recursive" name) + (let ((new-rule (make-rule :name (gensym "REWRITE")))) + ;; A -> y A2 + (setf (rule-productions rule) + (mapcar #'(lambda (p) + (let ((tokens (prod-tokens p)) + (action (prod-action p))) + (make-production :tokens (append tokens (list new-rule)) + :tokens-length (1+ (prod-tokens-length p)) + :action #'(lambda (&rest args) + (let ((f-A2 (car (last args))) + (head (butlast args))) + (funcall f-A2 (apply action head))))))) + non-left-recursive)) + ;; A2 -> x A2 | E + (setf (rule-productions new-rule) + (append + (mapcar #'(lambda (p) + (let ((tokens (prod-tokens p)) + (action (prod-action p))) + (make-production :tokens (append (cdr tokens) (list new-rule)) + :tokens-length (prod-tokens-length p) + :action #'(lambda (&rest args) + (let ((f-A2 (car (last args))) + (head (butlast args))) + #'(lambda (x) + (funcall f-A2 (apply action x head)))))))) + left-recursive) + (list + (make-production :tokens nil + :tokens-length 0 + :action #'(lambda () #'(lambda (arg) arg))))))))))) + +(defun remove-left-recursivity-from-rules (rules) + (loop + for rule being each hash-value in rules + do + ;; More to be done here. For now only the trivial immediate left + ;; recursivity is removed -wcp18/11/03. + (remove-immediate-left-recursivity rule))) + +(defun resolve-all-nonterminals (rules) + (loop + for rule being each hash-value in rules + do (loop + for production in (rule-productions rule) + do (setf (prod-tokens production) + (resolve-nonterminals (prod-tokens production) rules))))) + +(defun make-rule-productions (rule-name production-descriptions) + "Return a production object that belongs to RULE-NAME made according +to PRODUCTION-DESCRIPTIONS. See also MAKE-PRODUCTION-FROM-DESCR." + (loop + for descr in production-descriptions + for i of-type fixnum from 1 by 1 + for prod-name = (intern (format nil "~:@(~A~)-PROD~A" rule-name i)) + with productions = '() + with keywords = '() + do (progn + (multiple-value-bind (production keyws) + (make-production-from-descr prod-name descr) + (push production productions) + (setf keywords (append keyws keywords)))) + finally (return + (values (nreverse productions) keywords)))) + +(defun create-rule (name production-descriptions) + "Return a new rule object together with a list of keywords making up +the production definitions." + (multiple-value-bind (productions keywords) + (make-rule-productions name production-descriptions) + (values (make-rule :name name :productions productions) + keywords))) + +(defun insert-rule-in-current-grammar (name productions) + "Add rule to the current grammar and its keywords to the keywords +hash table. You don't want to use this directly. See DEFRULE macro +instead." + (when (find-rule name *rules*) + (error "redefining rule ~A" name)) + (multiple-value-bind (rule keywords) + (create-rule name productions) + (add-rule name rule *rules*) + (dolist (term keywords) + (add-keyword term *keywords*)) + rule)) + +(defun resolve-nonterminals (tokens rules) + "Given a list of production tokens, try to expand the nonterminal +ones with their respective rule from the the RULES pool." + (flet ((resolve-symbol (sym) + (or (find-rule sym rules) + sym))) + (mapcar #'(lambda (tok) + (if (symbolp tok) + (resolve-symbol tok) + tok)) + tokens))) + +(defun reset-grammar () + "Empty the current grammar from any existing rule." + (setf *rules* (make-rules-table) + *keywords* (make-keywords-table))) + +(defun generate-grammar (&optional (equal-p #'string-equal)) + "Return a GRAMMAR structure suitable for the PARSE function, using +the current rules. EQUAL-P, if present, is a function to be used to +match the input tokens; it defaults to STRING-EQUAL." + (resolve-all-nonterminals *rules*) + (remove-left-recursivity-from-rules *rules*) + (make-grammar :rules *rules* + :keywords *keywords* + :equal-p equal-p)) diff --git a/third_party/lisp/npg/src/package.lisp b/third_party/lisp/npg/src/package.lisp new file mode 100644 index 000000000000..b405f7b5f19e --- /dev/null +++ b/third_party/lisp/npg/src/package.lisp @@ -0,0 +1,50 @@ +;;; package.lisp --- backtracking parser package definition + +;;; Copyright (C) 2003-2006, 2009 by Walter C. Pelissero + +;;; Author: Walter C. Pelissero +;;; Project: NPG a Naive Parser Generator + +#+cmu (ext:file-comment "$Module: package.lisp $") + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Lesser General Public License +;;; as published by the Free Software Foundation; either version 2.1 +;;; of the License, or (at your option) any later version. +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Lesser General Public License for more details. +;;; You should have received a copy of the GNU Lesser General Public +;;; License along with this library; if not, write to the Free +;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +;;; 02111-1307 USA + +(in-package :cl-user) + +(defpackage :naive-parser-generator + (:nicknames :npg) + (:use :common-lisp) + (:export + #:parse ; The Parser + #:reset-grammar + #:generate-grammar + #:print-grammar-figures + #:grammar-keyword-p + #:keyword + #:grammar + #:make-token + #:token-value + #:token-type + #:token-position + #:later-position + #:defrule ; to define grammars + #:deftoken ; to define a lexer + #:input-cursor-mixin + #:copy-input-cursor-slots + #:dup-input-cursor + #:read-next-tokens + #:end-of-input + #:? #:+ #:* #:or + #:$vars #:$all #:$alist + #:$1 #:$2 #:$3 #:$4 #:$5 #:$6 #:$7 #:$8 #:$9 #:$10)) diff --git a/third_party/lisp/npg/src/parser.lisp b/third_party/lisp/npg/src/parser.lisp new file mode 100644 index 000000000000..328be1dcf30f --- /dev/null +++ b/third_party/lisp/npg/src/parser.lisp @@ -0,0 +1,234 @@ +;;; parser.lisp --- runtime parser + +;;; Copyright (C) 2003-2006, 2009 by Walter C. Pelissero + +;;; Author: Walter C. Pelissero +;;; Project: NPG a Naive Parser Generator + +#+cmu (ext:file-comment "$Module: parser.lisp $") + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Lesser General Public License +;;; as published by the Free Software Foundation; either version 2.1 +;;; of the License, or (at your option) any later version. +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Lesser General Public License for more details. +;;; You should have received a copy of the GNU Lesser General Public +;;; License along with this library; if not, write to the Free +;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +;;; 02111-1307 USA + +;;; Commentary: +;;; +;;; This is the runtime part of the parser. The code that is +;;; responsible to execute the parser defined with the primitives +;;; found in define.lisp. + +(in-package :naive-parser-generator) + +(defvar *debug* nil + "Either nil or a stream where to write the debug informations.") +#+debug (declaim (fixnum *maximum-recursion-depth*)) +#+debug (defvar *maximum-recursion-depth* 1000 + "Maximum depth the parser is allowed to recursively call itself. +This is the only way for the parser to detect a loop in the grammar. +Tune this if your grammar is unusually complex.") + +(declaim (inline reduce-production)) +(defun reduce-production (production arguments) + "Apply PRODUCTION's action on ARGUMENTS. This has the effect of + \"reducing\" the production." + (when *debug* + (format *debug* "reducing ~S on ~S~%" production arguments)) + (flet ((safe-token-value (token) + (if (token-p token) + (token-value token) + token))) + (apply (prod-action production) (mapcar #'safe-token-value arguments)))) + +(defgeneric later-position (pos1 pos2) + (:documentation + "Compare two file postions and return true if POS1 is later than +POS2 in the input stream.")) + +;; This is meant to be overloaded in the lexer +(defmethod later-position ((pos1 integer) (pos2 integer)) + (> pos1 pos2)) + +;; this looks silly but turns out to be useful (see below) +(defmethod later-position (pos1 pos2) + (and (eq pos1 :eof) (not (eq pos2 :eof)))) + +(defgeneric read-next-tokens (tokens-source) + (:documentation "Read next token from a lexical analysed stream. The nature of +TOKENS-SOURCE is implementation dependent and any lexical analyzer is +supposed to specialise this method.")) + +;; This is the actual parser. the algorithm is pretty +;; straightforward, the execution of the reductions a bit less. Error +;; recovery is rather clumsy. + +(defun parse (grammar start tokenizer) + "Match a GRAMMAR against the list of input tokens coming from TOKENIZER. +Return the reduced values according to the nonterminal actions. Raise +an error on failure." + (declare (type grammar grammar) + (type symbol start)) + (labels + ((match-token (expected token) + (when *debug* + (format *debug* "match-token ~S ~S -> " expected token)) + (let ((res (cond ((symbolp expected) + ;; non-costant terminal (like identifiers) + (eq expected (token-type token))) + ((and (stringp expected) + (stringp (token-value token))) + ;; string costant terminal + (funcall (the function (grammar-equal-p grammar)) expected (token-value token))) + ((functionp expected) + ;; custom equality predicate (must be able + ;; to deal with token objects) + (funcall expected token)) + ;; all the rest + (t (equal expected (token-value token)))))) + (when *debug* + (format *debug* "~Amatched~%" (if res "" "not "))) + res)) + (match (expected matched #+debug depth) + (declare (list expected matched) + #+debug (fixnum depth)) + (let ((first-expected (car expected))) + (cond #+debug ((> depth *maximum-recursion-depth*) + (error "endless recursion on ~A ~A at ~A expecting ~S" + (token-type (car matched)) (token-value (car matched)) + (token-position (car matched)) expected)) + ((eq first-expected :any) + (match (cdr expected) (cdr matched) #+debug depth)) + ;; This is a trick to obtain partial parses. When we + ;; reach this expected token we assume we succeeded + ;; the parsing and return the remaining tokens as + ;; part of the match. + ((eq first-expected :rest) + ;; we could be at the end of input so we check this + (unless (cdr matched) + (setf (cdr matched) (list :rest))) + (list nil nil)) + ((rule-p first-expected) + ;; If it's a rule, then we try to match all its + ;; productions. We return the first that succeeds. + (loop + for production in (rule-productions first-expected) + for production-tokens of-type list = (prod-tokens production) + with last-error-position = nil + with last-error = nil + for (error-position error-descr) = + (progn + (when *debug* + (format *debug* "trying to match ~A: ~S~%" + (rule-name first-expected) production-tokens)) + (match (append production-tokens (cdr expected)) matched #+debug (1+ depth))) + do (cond ((not error-position) + (return (let ((args-count (prod-tokens-length production))) + (setf (cdr matched) + (cons (reduce-production + production + (subseq (the list (cdr matched)) 0 args-count)) + (nthcdr (1+ args-count) matched))) + (list nil nil)))) + ((or (not last-error) + (later-position error-position last-error-position)) + (setf last-error-position error-position + last-error error-descr))) + ;; if everything fails return the "best" error + finally (return (list last-error-position + (if *debug* + #'(lambda () + (format nil "~A, trying to match ~A" + (funcall (the function last-error)) + (rule-name first-expected))) + last-error))))) + (t + ;; if necessary load the next tokens + (when (null (cdr matched)) + (setf (cdr matched) (read-next-tokens tokenizer))) + (cond ((and (or (null expected) (eq first-expected :eof)) + (null (cdr matched))) + ;; This point is reached only once for each complete + ;; parsing. The expected tokens and the input + ;; tokens have been exhausted at the same time. + ;; Hence we succeeded the parsing. + (setf (cdr matched) (list :eof)) + (list nil nil)) + ((null expected) + ;; Garbage at end of parsing. This may mean that we + ;; have considered a production completed too soon. + (list (token-position (car matched)) + #'(lambda () + "garbage at end of parsing"))) + ((null (cdr matched)) + ;; EOF error + (list :eof + #'(lambda () + (format nil "end of input expecting ~S" expected)))) + (t ;; normal token + (let ((first-token (cadr matched))) + (if (match-token first-expected first-token) + (match (cdr expected) (cdr matched) #+debug depth) + ;; failed: we return the error + (list (token-position first-token) + #'(lambda () + (format nil "expected ~S but got ~S ~S" + first-expected (token-type first-token) + (token-value first-token))))))))))))) + (declare (inline match-token)) + (let ((result (list :head))) + (destructuring-bind (error-position error) + (match (list (find-rule start (grammar-rules grammar))) result #+debug 0) + (when error-position + (error "~A at ~A~%" (funcall (the function error)) error-position)) + (cadr result))))) + +(defgeneric terminals-in-grammar (grammar-or-hashtable) + (:documentation + "Find non constant terminal symbols in GRAMMAR.")) + +(defmethod terminals-in-grammar ((grammar hash-table)) + (loop + for rule being each hash-value of grammar + with terminals = '() + do (loop + for prod in (rule-productions rule) + do (loop + for tok in (prod-tokens prod) + when (symbolp tok) + do (pushnew tok terminals))) + finally (return terminals))) + +(defmethod terminals-in-grammar ((grammar grammar)) + (terminals-in-grammar (grammar-rules grammar))) + +(defun print-grammar-figures (grammar &optional (stream *standard-output*)) + (format stream "rules: ~A~%constant terminals: ~A~%variable terminals: ~S~%" + (hash-table-count (grammar-rules grammar)) + (hash-table-count (grammar-keywords grammar)) + (terminals-in-grammar (grammar-rules grammar)))) + + +(defun grammar-keyword-p (keyword grammar) + "Check if KEYWORD is part of this grammar." + (find-keyword keyword (grammar-keywords grammar))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defvar *grammars* (make-hash-table)) + +(defun find-grammar (name) + (gethash name *grammars*)) + +(defun delete-grammar (name) + (remhash name *grammars*)) + +(defun add-grammar (name grammar) + (setf (gethash name *grammars*) grammar)) -- cgit 1.4.1