about summary refs log tree commit diff
path: root/third_party/lisp/npg/src/parser.lisp
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/lisp/npg/src/parser.lisp')
-rw-r--r--third_party/lisp/npg/src/parser.lisp234
1 files changed, 234 insertions, 0 deletions
diff --git a/third_party/lisp/npg/src/parser.lisp b/third_party/lisp/npg/src/parser.lisp
new file mode 100644
index 0000000000..328be1dcf3
--- /dev/null
+++ b/third_party/lisp/npg/src/parser.lisp
@@ -0,0 +1,234 @@
+;;;  parser.lisp --- runtime parser
+
+;;;  Copyright (C) 2003-2006, 2009 by Walter C. Pelissero
+
+;;;  Author: Walter C. Pelissero <walter@pelissero.de>
+;;;  Project: NPG a Naive Parser Generator
+
+#+cmu (ext:file-comment "$Module: parser.lisp $")
+
+;;; This library is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU Lesser General Public License
+;;; as published by the Free Software Foundation; either version 2.1
+;;; of the License, or (at your option) any later version.
+;;; This library is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; Lesser General Public License for more details.
+;;; You should have received a copy of the GNU Lesser General Public
+;;; License along with this library; if not, write to the Free
+;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+;;; 02111-1307 USA
+
+;;;  Commentary:
+;;;
+;;; This is the runtime part of the parser.  The code that is
+;;; responsible to execute the parser defined with the primitives
+;;; found in define.lisp.
+
+(in-package :naive-parser-generator)
+
+(defvar *debug* nil
+  "Either nil or a stream where to write the debug informations.")
+#+debug (declaim (fixnum *maximum-recursion-depth*))
+#+debug (defvar *maximum-recursion-depth* 1000
+  "Maximum depth the parser is allowed to recursively call itself.
+This is the only way for the parser to detect a loop in the grammar.
+Tune this if your grammar is unusually complex.")
+
+(declaim (inline reduce-production))
+(defun reduce-production (production arguments)
+  "Apply PRODUCTION's action on ARGUMENTS.  This has the effect of
+  \"reducing\" the production."
+  (when *debug*
+    (format *debug* "reducing ~S on ~S~%" production arguments))
+  (flet ((safe-token-value (token)
+	   (if (token-p token)
+	       (token-value token)
+	       token)))
+    (apply (prod-action production) (mapcar #'safe-token-value arguments))))
+
+(defgeneric later-position (pos1 pos2)
+  (:documentation
+   "Compare two file postions and return true if POS1 is later than
+POS2 in the input stream."))
+
+;; This is meant to be overloaded in the lexer
+(defmethod later-position ((pos1 integer) (pos2 integer))
+  (> pos1 pos2))
+
+;; this looks silly but turns out to be useful (see below)
+(defmethod later-position (pos1 pos2)
+  (and (eq pos1 :eof) (not (eq pos2 :eof))))
+
+(defgeneric read-next-tokens (tokens-source)
+  (:documentation "Read next token from a lexical analysed stream.  The nature of
+TOKENS-SOURCE is implementation dependent and any lexical analyzer is
+supposed to specialise this method."))
+
+;; This is the actual parser.  the algorithm is pretty
+;; straightforward, the execution of the reductions a bit less.  Error
+;; recovery is rather clumsy.
+
+(defun parse (grammar start tokenizer)
+  "Match a GRAMMAR against the list of input tokens coming from TOKENIZER.
+Return the reduced values according to the nonterminal actions.  Raise
+an error on failure."
+  (declare (type grammar grammar)
+	   (type symbol start))
+  (labels
+      ((match-token (expected token)
+	 (when *debug*
+	   (format *debug* "match-token ~S ~S -> " expected token))
+	 (let ((res (cond ((symbolp expected)
+			   ;; non-costant terminal (like identifiers)
+			   (eq expected (token-type token)))
+			  ((and (stringp expected)
+				(stringp (token-value token)))
+			   ;; string costant terminal
+			   (funcall (the function (grammar-equal-p grammar)) expected (token-value token)))
+			  ((functionp expected)
+			   ;; custom equality predicate (must be able
+			   ;; to deal with token objects)
+			   (funcall expected token))
+			  ;; all the rest
+			  (t (equal expected (token-value token))))))
+	   (when *debug*
+	     (format *debug* "~Amatched~%" (if res "" "not ")))
+	   res))
+       (match (expected matched #+debug depth)
+	 (declare (list expected matched)
+		  #+debug (fixnum depth))
+	 (let ((first-expected (car expected)))
+	   (cond #+debug ((> depth *maximum-recursion-depth*)
+		  (error "endless recursion on ~A ~A at ~A expecting ~S"
+			 (token-type (car matched)) (token-value (car matched))
+			 (token-position (car matched)) expected))
+		 ((eq first-expected :any)
+		  (match (cdr expected) (cdr matched) #+debug depth))
+		 ;; This is a trick to obtain partial parses.  When we
+		 ;; reach this expected token we assume we succeeded
+		 ;; the parsing and return the remaining tokens as
+		 ;; part of the match.
+		 ((eq first-expected :rest)
+		  ;; we could be at the end of input so we check this
+		  (unless (cdr matched)
+		    (setf (cdr matched) (list :rest)))
+		  (list nil nil))
+		 ((rule-p first-expected)
+		  ;; If it's a rule, then we try to match all its
+		  ;; productions.  We return the first that succeeds.
+		  (loop
+		     for production in (rule-productions first-expected)
+		     for production-tokens of-type list = (prod-tokens production)
+		     with last-error-position = nil
+		     with last-error = nil
+		     for (error-position error-descr) =
+		       (progn
+			 (when *debug*
+			   (format *debug* "trying to match ~A: ~S~%"
+				   (rule-name first-expected) production-tokens))
+			 (match (append production-tokens (cdr expected)) matched #+debug (1+ depth)))
+		     do (cond ((not error-position)
+			       (return (let ((args-count (prod-tokens-length production)))
+					 (setf (cdr matched)
+					       (cons (reduce-production
+						      production
+						      (subseq (the list (cdr matched)) 0 args-count))
+						     (nthcdr (1+ args-count) matched)))
+					 (list nil nil))))
+			      ((or (not last-error)
+				   (later-position error-position last-error-position))
+			       (setf last-error-position error-position
+				     last-error error-descr)))
+		     ;; if everything fails return the "best" error
+		     finally (return (list last-error-position
+					   (if *debug*
+					       #'(lambda ()
+						   (format nil "~A, trying to match ~A"
+							   (funcall (the function last-error))
+							   (rule-name first-expected)))
+					       last-error)))))
+		 (t
+		  ;; if necessary load the next tokens
+		  (when (null (cdr matched))
+		    (setf (cdr matched) (read-next-tokens tokenizer)))
+		  (cond ((and (or (null expected) (eq first-expected :eof))
+			      (null (cdr matched)))
+			 ;; This point is reached only once for each complete
+			 ;; parsing.  The expected tokens and the input
+			 ;; tokens have been exhausted at the same time.
+			 ;; Hence we succeeded the parsing.
+			 (setf (cdr matched) (list :eof))
+			 (list nil nil))
+			((null expected)
+			 ;; Garbage at end of parsing.  This may mean that we
+			 ;; have considered a production completed too soon.
+			 (list (token-position (car matched))
+			       #'(lambda ()
+				   "garbage at end of parsing")))
+			((null (cdr matched))
+			 ;; EOF error
+			 (list :eof
+			       #'(lambda ()
+				   (format nil "end of input expecting ~S" expected))))
+			(t ;; normal token
+			 (let ((first-token (cadr matched)))
+			   (if (match-token first-expected first-token)
+			       (match (cdr expected) (cdr matched) #+debug depth)
+			       ;; failed: we return the error
+			       (list (token-position first-token)
+				     #'(lambda ()
+					 (format nil "expected ~S but got ~S ~S"
+						 first-expected (token-type first-token)
+						 (token-value first-token)))))))))))))
+    (declare (inline match-token))
+    (let ((result (list :head)))
+      (destructuring-bind (error-position error)
+	  (match (list (find-rule start (grammar-rules grammar))) result #+debug 0)
+	(when error-position
+	  (error "~A at ~A~%" (funcall (the function error)) error-position))
+	(cadr result)))))
+
+(defgeneric terminals-in-grammar (grammar-or-hashtable)
+  (:documentation
+   "Find non constant terminal symbols in GRAMMAR."))
+
+(defmethod terminals-in-grammar ((grammar hash-table))
+  (loop
+     for rule being each hash-value of grammar
+     with terminals = '()
+     do (loop
+	   for prod in (rule-productions rule)
+	   do (loop
+		 for tok in (prod-tokens prod)
+		 when (symbolp tok)
+		 do (pushnew tok terminals)))
+     finally (return terminals)))
+
+(defmethod terminals-in-grammar ((grammar grammar))
+  (terminals-in-grammar (grammar-rules grammar)))
+
+(defun print-grammar-figures (grammar &optional (stream *standard-output*))
+  (format stream "rules: ~A~%constant terminals: ~A~%variable terminals: ~S~%"
+	  (hash-table-count (grammar-rules grammar))
+	  (hash-table-count (grammar-keywords grammar))
+	  (terminals-in-grammar (grammar-rules grammar))))
+
+
+(defun grammar-keyword-p (keyword grammar)
+  "Check if KEYWORD is part of this grammar."
+  (find-keyword keyword (grammar-keywords grammar)))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(defvar *grammars* (make-hash-table))
+
+(defun find-grammar (name)
+  (gethash name *grammars*))
+
+(defun delete-grammar (name)
+  (remhash name *grammars*))
+
+(defun add-grammar (name grammar)
+  (setf (gethash name *grammars*) grammar))