about summary refs log tree commit diff
path: root/third_party/lisp/npg/src/parser.lisp
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/lisp/npg/src/parser.lisp')
-rw-r--r--third_party/lisp/npg/src/parser.lisp234
1 files changed, 234 insertions, 0 deletions
diff --git a/third_party/lisp/npg/src/parser.lisp b/third_party/lisp/npg/src/parser.lisp
new file mode 100644
index 0000000000..c15d26fe39
--- /dev/null
+++ b/third_party/lisp/npg/src/parser.lisp
@@ -0,0 +1,234 @@
+;;;  parser.lisp --- runtime parser
+
+;;;  Copyright (C) 2003-2006, 2009 by Walter C. Pelissero
+
+;;;  Author: Walter C. Pelissero <walter@pelissero.de>
+;;;  Project: NPG a Naive Parser Generator
+
+#+cmu (ext:file-comment "$Module: parser.lisp $")
+
+;;; This library is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU Lesser General Public License
+;;; as published by the Free Software Foundation; either version 2.1
+;;; of the License, or (at your option) any later version.
+;;; This library is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; Lesser General Public License for more details.
+;;; You should have received a copy of the GNU Lesser General Public
+;;; License along with this library; if not, write to the Free
+;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+;;; 02111-1307 USA
+
+;;;  Commentary:
+;;;
+;;; This is the runtime part of the parser.  The code that is
+;;; responsible to execute the parser defined with the primitives
+;;; found in define.lisp.
+
+(in-package :naive-parser-generator)
+
+(defvar *debug* nil
+  "Either nil or a stream where to write the debug informations.")
+#+debug (declaim (fixnum *maximum-recursion-depth*))
+#+debug (defvar *maximum-recursion-depth* 1000
+  "Maximum depth the parser is allowed to recursively call itself.
+This is the only way for the parser to detect a loop in the grammar.
+Tune this if your grammar is unusually complex.")
+
+(declaim (inline reduce-production))
+(defun reduce-production (production arguments)
+  "Apply PRODUCTION's action on ARGUMENTS.  This has the effect of
+  \"reducing\" the production."
+  (when *debug*
+    (format *debug* "reducing ~S on ~S~%" production arguments))
+  (flet ((safe-token-value (token)
+           (if (token-p token)
+               (token-value token)
+               token)))
+    (apply (prod-action production) (mapcar #'safe-token-value arguments))))
+
+(defgeneric later-position (pos1 pos2)
+  (:documentation
+   "Compare two file postions and return true if POS1 is later than
+POS2 in the input stream."))
+
+;; This is meant to be overloaded in the lexer
+(defmethod later-position ((pos1 integer) (pos2 integer))
+  (> pos1 pos2))
+
+;; this looks silly but turns out to be useful (see below)
+(defmethod later-position (pos1 pos2)
+  (and (eq pos1 :eof) (not (eq pos2 :eof))))
+
+(defgeneric read-next-tokens (tokens-source)
+  (:documentation "Read next token from a lexical analysed stream.  The nature of
+TOKENS-SOURCE is implementation dependent and any lexical analyzer is
+supposed to specialise this method."))
+
+;; This is the actual parser.  the algorithm is pretty
+;; straightforward, the execution of the reductions a bit less.  Error
+;; recovery is rather clumsy.
+
+(defun parse (grammar start tokenizer)
+  "Match a GRAMMAR against the list of input tokens coming from TOKENIZER.
+Return the reduced values according to the nonterminal actions.  Raise
+an error on failure."
+  (declare (type grammar grammar)
+           (type symbol start))
+  (labels
+      ((match-token (expected token)
+         (when *debug*
+           (format *debug* "match-token ~S ~S -> " expected token))
+         (let ((res (cond ((symbolp expected)
+                           ;; non-costant terminal (like identifiers)
+                           (eq expected (token-type token)))
+                          ((and (stringp expected)
+                                (stringp (token-value token)))
+                           ;; string costant terminal
+                           (funcall (the function (grammar-equal-p grammar)) expected (token-value token)))
+                          ((functionp expected)
+                           ;; custom equality predicate (must be able
+                           ;; to deal with token objects)
+                           (funcall expected token))
+                          ;; all the rest
+                          (t (equal expected (token-value token))))))
+           (when *debug*
+             (format *debug* "~Amatched~%" (if res "" "not ")))
+           res))
+       (match (expected matched #+debug depth)
+         (declare (list expected matched)
+                  #+debug (fixnum depth))
+         (let ((first-expected (car expected)))
+           (cond #+debug ((> depth *maximum-recursion-depth*)
+                  (error "endless recursion on ~A ~A at ~A expecting ~S"
+                         (token-type (car matched)) (token-value (car matched))
+                         (token-position (car matched)) expected))
+                 ((eq first-expected :any)
+                  (match (cdr expected) (cdr matched) #+debug depth))
+                 ;; This is a trick to obtain partial parses.  When we
+                 ;; reach this expected token we assume we succeeded
+                 ;; the parsing and return the remaining tokens as
+                 ;; part of the match.
+                 ((eq first-expected :rest)
+                  ;; we could be at the end of input so we check this
+                  (unless (cdr matched)
+                    (setf (cdr matched) (list :rest)))
+                  (list nil nil))
+                 ((rule-p first-expected)
+                  ;; If it's a rule, then we try to match all its
+                  ;; productions.  We return the first that succeeds.
+                  (loop
+                     for production in (rule-productions first-expected)
+                     for production-tokens of-type list = (prod-tokens production)
+                     with last-error-position = nil
+                     with last-error = nil
+                     for (error-position error-descr) =
+                       (progn
+                         (when *debug*
+                           (format *debug* "trying to match ~A: ~S~%"
+                                   (rule-name first-expected) production-tokens))
+                         (match (append production-tokens (cdr expected)) matched #+debug (1+ depth)))
+                     do (cond ((not error-position)
+                               (return (let ((args-count (prod-tokens-length production)))
+                                         (setf (cdr matched)
+                                               (cons (reduce-production
+                                                      production
+                                                      (subseq (the list (cdr matched)) 0 args-count))
+                                                     (nthcdr (1+ args-count) matched)))
+                                         (list nil nil))))
+                              ((or (not last-error)
+                                   (later-position error-position last-error-position))
+                               (setf last-error-position error-position
+                                     last-error error-descr)))
+                     ;; if everything fails return the "best" error
+                     finally (return (list last-error-position
+                                           (if *debug*
+                                               #'(lambda ()
+                                                   (format nil "~A, trying to match ~A"
+                                                           (funcall (the function last-error))
+                                                           (rule-name first-expected)))
+                                               last-error)))))
+                 (t
+                  ;; if necessary load the next tokens
+                  (when (null (cdr matched))
+                    (setf (cdr matched) (read-next-tokens tokenizer)))
+                  (cond ((and (or (null expected) (eq first-expected :eof))
+                              (null (cdr matched)))
+                         ;; This point is reached only once for each complete
+                         ;; parsing.  The expected tokens and the input
+                         ;; tokens have been exhausted at the same time.
+                         ;; Hence we succeeded the parsing.
+                         (setf (cdr matched) (list :eof))
+                         (list nil nil))
+                        ((null expected)
+                         ;; Garbage at end of parsing.  This may mean that we
+                         ;; have considered a production completed too soon.
+                         (list (token-position (car matched))
+                               #'(lambda ()
+                                   "garbage at end of parsing")))
+                        ((null (cdr matched))
+                         ;; EOF error
+                         (list :eof
+                               #'(lambda ()
+                                   (format nil "end of input expecting ~S" expected))))
+                        (t ;; normal token
+                         (let ((first-token (cadr matched)))
+                           (if (match-token first-expected first-token)
+                               (match (cdr expected) (cdr matched) #+debug depth)
+                               ;; failed: we return the error
+                               (list (token-position first-token)
+                                     #'(lambda ()
+                                         (format nil "expected ~S but got ~S ~S"
+                                                 first-expected (token-type first-token)
+                                                 (token-value first-token)))))))))))))
+    (declare (inline match-token))
+    (let ((result (list :head)))
+      (destructuring-bind (error-position error)
+          (match (list (find-rule start (grammar-rules grammar))) result #+debug 0)
+        (when error-position
+          (error "~A at ~A~%" (funcall (the function error)) error-position))
+        (cadr result)))))
+
+(defgeneric terminals-in-grammar (grammar-or-hashtable)
+  (:documentation
+   "Find non constant terminal symbols in GRAMMAR."))
+
+(defmethod terminals-in-grammar ((grammar hash-table))
+  (loop
+     for rule being each hash-value of grammar
+     with terminals = '()
+     do (loop
+           for prod in (rule-productions rule)
+           do (loop
+                 for tok in (prod-tokens prod)
+                 when (symbolp tok)
+                 do (pushnew tok terminals)))
+     finally (return terminals)))
+
+(defmethod terminals-in-grammar ((grammar grammar))
+  (terminals-in-grammar (grammar-rules grammar)))
+
+(defun print-grammar-figures (grammar &optional (stream *standard-output*))
+  (format stream "rules: ~A~%constant terminals: ~A~%variable terminals: ~S~%"
+          (hash-table-count (grammar-rules grammar))
+          (hash-table-count (grammar-keywords grammar))
+          (terminals-in-grammar (grammar-rules grammar))))
+
+
+(defun grammar-keyword-p (keyword grammar)
+  "Check if KEYWORD is part of this grammar."
+  (find-keyword keyword (grammar-keywords grammar)))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(defvar *grammars* (make-hash-table))
+
+(defun find-grammar (name)
+  (gethash name *grammars*))
+
+(defun delete-grammar (name)
+  (remhash name *grammars*))
+
+(defun add-grammar (name grammar)
+  (setf (gethash name *grammars*) grammar))