diff options
author | Vincent Ambo <mail@tazj.in> | 2021-12-15T21·51+0300 |
---|---|---|
committer | tazjin <mail@tazj.in> | 2021-12-15T22·09+0000 |
commit | db742c2035f9cfcf496b4a9b1640d1cf623a2c0f (patch) | |
tree | a9bcaf6632d910ae68185319cc5bcd408667a8d9 /third_party/lisp/s-xml/src | |
parent | 75ca24c60a57ab894da4d404755b8b4094284ad8 (diff) |
chore(3p/lisp): use nixpkgs sources for s-xml r/3258
this one was a little more difficult because it needs a patch, there's something wonky with the definition order fwiw, the upstream cvs repository ... server errors. Change-Id: I2d99359edec36b578389f1be1fcf077743c29c4e Reviewed-on: https://cl.tvl.fyi/c/depot/+/4342 Tested-by: BuildkiteCI Reviewed-by: grfn <grfn@gws.fyi>
Diffstat (limited to 'third_party/lisp/s-xml/src')
-rw-r--r-- | third_party/lisp/s-xml/src/dom.lisp | 75 | ||||
-rw-r--r-- | third_party/lisp/s-xml/src/lxml-dom.lisp | 83 | ||||
-rw-r--r-- | third_party/lisp/s-xml/src/package.lisp | 46 | ||||
-rw-r--r-- | third_party/lisp/s-xml/src/sxml-dom.lisp | 76 | ||||
-rw-r--r-- | third_party/lisp/s-xml/src/xml-struct-dom.lisp | 125 | ||||
-rw-r--r-- | third_party/lisp/s-xml/src/xml.lisp | 702 |
6 files changed, 0 insertions, 1107 deletions
diff --git a/third_party/lisp/s-xml/src/dom.lisp b/third_party/lisp/s-xml/src/dom.lisp deleted file mode 100644 index 74d1c371db22..000000000000 --- a/third_party/lisp/s-xml/src/dom.lisp +++ /dev/null @@ -1,75 +0,0 @@ -;;;; -*- mode: lisp -*- -;;;; -;;;; $Id: dom.lisp,v 1.1.1.1 2004/06/07 18:49:56 scaekenberghe Exp $ -;;;; -;;;; This is the generic simple DOM parser and printer interface. -;;;; -;;;; Copyright (C) 2002, 2004 Sven Van Caekenberghe, Beta Nine BVBA. -;;;; -;;;; You are granted the rights to distribute and use this software -;;;; as governed by the terms of the Lisp Lesser General Public License -;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL. - -(in-package :s-xml) - -;;; top level DOM parser interface - -(defgeneric parse-xml-dom (stream output-type) - (:documentation "Parse a character stream as XML and generate a DOM of output-type")) - -(defun parse-xml (stream &key (output-type :lxml)) - "Parse a character stream as XML and generate a DOM of output-type, defaulting to :lxml" - (parse-xml-dom stream output-type)) - -(defun parse-xml-string (string &key (output-type :lxml)) - "Parse a string as XML and generate a DOM of output-type, defaulting to :lxml" - (with-input-from-string (stream string) - (parse-xml-dom stream output-type))) - -(defun parse-xml-file (filename &key (output-type :lxml)) - "Parse a character file as XML and generate a DOM of output-type, defaulting to :lxml" - (with-open-file (in filename :direction :input) - (parse-xml-dom in output-type))) - -;;; top level DOM printer interface - -(defgeneric print-xml-dom (dom input-type stream pretty level) - (:documentation "Generate XML output on a character stream from a DOM of input-type, optionally pretty printing using level")) - -(defun print-xml (dom &key (stream t) (pretty nil) (input-type :lxml) (header)) - "Generate XML output on a character stream (t by default) from a DOM of input-type (:lxml by default), optionally pretty printing (off by default), or adding a header (none by default)" - (when header (format stream header)) - (when pretty (terpri stream)) - (print-xml-dom dom input-type stream pretty 1)) - -(defun print-xml-string (dom &key (pretty nil) (input-type :lxml)) - "Generate XML output to a string from a DOM of input-type (:lxml by default), optionally pretty printing (off by default)" - (with-output-to-string (stream) - (print-xml dom :stream stream :pretty pretty :input-type input-type))) - -;;; shared/common support functions - -(defun print-spaces (n stream &optional (preceding-newline t)) - (when preceding-newline - (terpri stream)) - (loop :repeat n - :do (write-char #\Space stream))) - -(defun print-solitary-tag (tag stream) - (write-char #\< stream) - (print-identifier tag stream) - (write-string "/>" stream)) - -(defun print-closing-tag (tag stream) - (write-string "</" stream) - (print-identifier tag stream) - (write-char #\> stream)) - -(defun print-attribute (name value stream) - (write-char #\space stream) - (print-identifier name stream t) - (write-string "=\"" stream) - (print-string-xml value stream) - (write-char #\" stream)) - -;;;; eof diff --git a/third_party/lisp/s-xml/src/lxml-dom.lisp b/third_party/lisp/s-xml/src/lxml-dom.lisp deleted file mode 100644 index d43df6cf8171..000000000000 --- a/third_party/lisp/s-xml/src/lxml-dom.lisp +++ /dev/null @@ -1,83 +0,0 @@ -;;;; -*- mode: lisp -*- -;;;; -;;;; $Id: lxml-dom.lisp,v 1.5 2005/09/20 09:57:44 scaekenberghe Exp $ -;;;; -;;;; LXML implementation of the generic DOM parser and printer. -;;;; -;;;; Copyright (C) 2002, 2004 Sven Van Caekenberghe, Beta Nine BVBA. -;;;; -;;;; You are granted the rights to distribute and use this software -;;;; as governed by the terms of the Lisp Lesser General Public License -;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL. - -(in-package :s-xml) - -;;; the lxml hooks to generate lxml - -(defun lxml-new-element-hook (name attributes seed) - (declare (ignore name attributes seed)) - '()) - -(defun lxml-finish-element-hook (name attributes parent-seed seed) - (let ((xml-element - (cond ((and (null seed) (null attributes)) - name) - (attributes - `((,name ,@(let (list) - (dolist (attribute attributes list) - (push (cdr attribute) list) - (push (car attribute) list)))) - ,@(nreverse seed))) - (t - `(,name ,@(nreverse seed)))))) - (cons xml-element parent-seed))) - -(defun lxml-text-hook (string seed) - (cons string seed)) - -;;; standard DOM interfaces - -(defmethod parse-xml-dom (stream (output-type (eql :lxml))) - (car (start-parse-xml stream - (make-instance 'xml-parser-state - :new-element-hook #'lxml-new-element-hook - :finish-element-hook #'lxml-finish-element-hook - :text-hook #'lxml-text-hook)))) - -(defun plist->alist (plist) - (when plist - (cons (cons (first plist) (second plist)) - (plist->alist (rest (rest plist)))))) - -(defmethod print-xml-dom (dom (input-type (eql :lxml)) stream pretty level) - (declare (special *namespaces*)) - (cond ((symbolp dom) (print-solitary-tag dom stream)) - ((stringp dom) (print-string-xml dom stream)) - ((consp dom) - (let (tag attributes) - (cond ((symbolp (first dom)) (setf tag (first dom))) - ((consp (first dom)) (setf tag (first (first dom)) - attributes (plist->alist (rest (first dom))))) - (t (error "Input not recognized as LXML ~s" dom))) - (let ((*namespaces* (extend-namespaces attributes *namespaces*))) - (write-char #\< stream) - (print-identifier tag stream) - (loop :for (name . value) :in attributes - :do (print-attribute name value stream)) - (if (rest dom) - (let ((children (rest dom))) - (write-char #\> stream) - (if (and (= (length children) 1) (stringp (first children))) - (print-string-xml (first children) stream) - (progn - (dolist (child children) - (when pretty (print-spaces (* 2 level) stream)) - (if (stringp child) - (print-string-xml child stream) - (print-xml-dom child input-type stream pretty (1+ level)))) - (when pretty (print-spaces (* 2 (1- level)) stream)))) - (print-closing-tag tag stream)) - (write-string "/>" stream))))) - (t (error "Input not recognized as LXML ~s" dom)))) - -;;;; eof \ No newline at end of file diff --git a/third_party/lisp/s-xml/src/package.lisp b/third_party/lisp/s-xml/src/package.lisp deleted file mode 100644 index f90f0f49a166..000000000000 --- a/third_party/lisp/s-xml/src/package.lisp +++ /dev/null @@ -1,46 +0,0 @@ -;;;; -*- mode: lisp -*- -;;;; -;;;; $Id: package.lisp,v 1.7 2006/01/19 20:00:06 scaekenberghe Exp $ -;;;; -;;;; This is a Common Lisp implementation of a very basic XML parser. -;;;; The parser is non-validating. -;;;; The API into the parser is pure functional parser hook model that comes from SSAX, -;;;; see also http://pobox.com/~oleg/ftp/Scheme/xml.html or http://ssax.sourceforge.net -;;;; Different DOM models are provided, an XSML, an LXML and a xml-element struct based one. -;;;; -;;;; Copyright (C) 2002, 2003, 2004, 2005, 2006 Sven Van Caekenberghe, Beta Nine BVBA. -;;;; -;;;; You are granted the rights to distribute and use this software -;;;; as governed by the terms of the Lisp Lesser General Public License -;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL. - -(defpackage s-xml - (:use common-lisp) - (:export - ;; main parser interface - #:start-parse-xml - #:print-string-xml - #:xml-parser-error #:xml-parser-error-message #:xml-parser-error-args #:xml-parser-error-stream - #:xml-parser-state #:get-entities #:get-seed - #:get-new-element-hook #:get-finish-element-hook #:get-text-hook - ;; callbacks - #:*attribute-name-parser* - #:*attribute-value-parser* - #:parse-attribute-name - #:parse-attribute-value - ;; dom parser and printer - #:parse-xml-dom #:parse-xml #:parse-xml-string #:parse-xml-file - #:print-xml-dom #:print-xml #:print-xml-string - ;; xml-element structure - #:make-xml-element #:xml-element-children #:xml-element-name - #:xml-element-attribute #:xml-element-attributes - #:xml-element-p #:new-xml-element #:first-xml-element-child - ;; namespaces - #:*ignore-namespaces* #:*local-namespace* #:*namespaces* - #:*require-existing-symbols* #:*auto-export-symbols* #:*auto-create-namespace-packages* - #:find-namespace #:register-namespace #:get-prefix #:get-uri #:get-package - #:resolve-identifier #:extend-namespaces #:print-identifier #:split-identifier) - (:documentation - "A simple XML parser with an efficient, purely functional, event-based interface as well as a DOM interface")) - -;;;; eof diff --git a/third_party/lisp/s-xml/src/sxml-dom.lisp b/third_party/lisp/s-xml/src/sxml-dom.lisp deleted file mode 100644 index c9e0f9e0db4e..000000000000 --- a/third_party/lisp/s-xml/src/sxml-dom.lisp +++ /dev/null @@ -1,76 +0,0 @@ -;;;; -*- mode: lisp -*- -;;;; -;;;; $Id: sxml-dom.lisp,v 1.4 2005/09/20 09:57:48 scaekenberghe Exp $ -;;;; -;;;; LXML implementation of the generic DOM parser and printer. -;;;; -;;;; Copyright (C) 2003, 2004 Sven Van Caekenberghe, Beta Nine BVBA. -;;;; -;;;; You are granted the rights to distribute and use this software -;;;; as governed by the terms of the Lisp Lesser General Public License -;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL. - -(in-package :s-xml) - -;;; the sxml hooks to generate sxml - -(defun sxml-new-element-hook (name attributes seed) - (declare (ignore name attributes seed)) - '()) - -(defun sxml-finish-element-hook (name attributes parent-seed seed) - (let ((xml-element (append (list name) - (when attributes - (list (let (list) - (dolist (attribute attributes (cons :@ list)) - (push (list (car attribute) (cdr attribute)) list))))) - (nreverse seed)))) - (cons xml-element parent-seed))) - -(defun sxml-text-hook (string seed) - (cons string seed)) - -;;; the standard DOM interfaces - -(defmethod parse-xml-dom (stream (output-type (eql :sxml))) - (car (start-parse-xml stream - (make-instance 'xml-parser-state - :new-element-hook #'sxml-new-element-hook - :finish-element-hook #'sxml-finish-element-hook - :text-hook #'sxml-text-hook)))) - -(defmethod print-xml-dom (dom (input-type (eql :sxml)) stream pretty level) - (declare (special *namespaces*)) - (cond ((stringp dom) (print-string-xml dom stream)) - ((consp dom) - (let ((tag (first dom)) - attributes - children) - (if (and (consp (second dom)) (eq (first (second dom)) :@)) - (setf attributes (rest (second dom)) - children (rest (rest dom))) - (setf children (rest dom))) - (let ((*namespaces* (extend-namespaces (loop :for (name value) :in attributes - :collect (cons name value)) - *namespaces*))) - (write-char #\< stream) - (print-identifier tag stream) - (loop :for (name value) :in attributes - :do (print-attribute name value stream)) - (if children - (progn - (write-char #\> stream) - (if (and (= (length children) 1) (stringp (first children))) - (print-string-xml (first children) stream) - (progn - (dolist (child children) - (when pretty (print-spaces (* 2 level) stream)) - (if (stringp child) - (print-string-xml child stream) - (print-xml-dom child input-type stream pretty (1+ level)))) - (when pretty (print-spaces (* 2 (1- level)) stream)))) - (print-closing-tag tag stream)) - (write-string "/>" stream))))) - (t (error "Input not recognized as SXML ~s" dom)))) - -;;;; eof diff --git a/third_party/lisp/s-xml/src/xml-struct-dom.lisp b/third_party/lisp/s-xml/src/xml-struct-dom.lisp deleted file mode 100644 index 70373889152f..000000000000 --- a/third_party/lisp/s-xml/src/xml-struct-dom.lisp +++ /dev/null @@ -1,125 +0,0 @@ -;;;; -*- mode: lisp -*- -;;;; -;;;; $Id: xml-struct-dom.lisp,v 1.2 2005/08/29 15:01:47 scaekenberghe Exp $ -;;;; -;;;; XML-STRUCT implementation of the generic DOM parser and printer. -;;;; -;;;; Copyright (C) 2002, 2004 Sven Van Caekenberghe, Beta Nine BVBA. -;;;; -;;;; You are granted the rights to distribute and use this software -;;;; as governed by the terms of the Lisp Lesser General Public License -;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL. - -(in-package :s-xml) - -;;; xml-element struct datastructure and API - -(defstruct xml-element - name ; :tag-name - attributes ; a assoc list of (:attribute-name . "attribute-value") - children ; a list of children/content either text strings or xml-elements - ) - -(setf (documentation 'xml-element-p 'function) - "Return T when the argument is an xml-element struct" - (documentation 'xml-element-attributes 'function) - "Return the alist of attribute names and values dotted pairs from an xml-element struct" - (documentation 'xml-element-children 'function) - "Return the list of children from an xml-element struct" - (documentation 'xml-element-name 'function) - "Return the name from an xml-element struct" - (documentation 'make-xml-element 'function) - "Make and return a new xml-element struct") - -(defun xml-element-attribute (xml-element key) - "Return the string value of the attribute with name the keyword :key - of xml-element if any, return null if not found" - (let ((pair (assoc key (xml-element-attributes xml-element) :test #'eq))) - (when pair (cdr pair)))) - -(defun (setf xml-element-attribute) (value xml-element key) - "Set the string value of the attribute with name the keyword :key of - xml-element, creating a new attribute if necessary or overwriting an - existing one, returning the value" - (let ((attributes (xml-element-attributes xml-element))) - (if (null attributes) - (push (cons key value) (xml-element-attributes xml-element)) - (let ((pair (assoc key attributes :test #'eq))) - (if pair - (setf (cdr pair) value) - (push (cons key value) (xml-element-attributes xml-element))))) - value)) - -(defun new-xml-element (name &rest children) - "Make a new xml-element with name and children" - (make-xml-element :name name :children children)) - -(defun first-xml-element-child (xml-element) - "Get the first child of an xml-element" - (first (xml-element-children xml-element))) - -(defun xml-equal (xml-1 xml-2) - (and (xml-element-p xml-1) - (xml-element-p xml-2) - (eq (xml-element-name xml-1) - (xml-element-name xml-2)) - (equal (xml-element-attributes xml-1) - (xml-element-attributes xml-2)) - (reduce #'(lambda (&optional (x t) (y t)) (and x y)) - (mapcar #'(lambda (x y) - (or (and (stringp x) (stringp y) (string= x y)) - (xml-equal x y))) - (xml-element-children xml-1) - (xml-element-children xml-2))))) - -;;; printing xml structures - -(defmethod print-xml-dom (xml-element (input-type (eql :xml-struct)) stream pretty level) - (declare (special *namespaces*)) - (let ((*namespaces* (extend-namespaces (xml-element-attributes xml-element) - *namespaces*))) - (write-char #\< stream) - (print-identifier (xml-element-name xml-element) stream) - (loop :for (name . value) :in (xml-element-attributes xml-element) - :do (print-attribute name value stream)) - (let ((children (xml-element-children xml-element))) - (if children - (progn - (write-char #\> stream) - (if (and (= (length children) 1) (stringp (first children))) - (print-string-xml (first children) stream) - (progn - (dolist (child children) - (when pretty (print-spaces (* 2 level) stream)) - (if (stringp child) - (print-string-xml child stream) - (print-xml-dom child input-type stream pretty (1+ level)))) - (when pretty (print-spaces (* 2 (1- level)) stream)))) - (print-closing-tag (xml-element-name xml-element) stream)) - (write-string "/>" stream))))) - -;;; the standard hooks to generate xml-element structs - -(defun standard-new-element-hook (name attributes seed) - (declare (ignore name attributes seed)) - '()) - -(defun standard-finish-element-hook (name attributes parent-seed seed) - (let ((xml-element (make-xml-element :name name - :attributes attributes - :children (nreverse seed)))) - (cons xml-element parent-seed))) - -(defun standard-text-hook (string seed) - (cons string seed)) - -;;; top level standard parser interfaces - -(defmethod parse-xml-dom (stream (output-type (eql :xml-struct))) - (car (start-parse-xml stream - (make-instance 'xml-parser-state - :new-element-hook #'standard-new-element-hook - :finish-element-hook #'standard-finish-element-hook - :text-hook #'standard-text-hook)))) - -;;;; eof diff --git a/third_party/lisp/s-xml/src/xml.lisp b/third_party/lisp/s-xml/src/xml.lisp deleted file mode 100644 index 8a2076985a49..000000000000 --- a/third_party/lisp/s-xml/src/xml.lisp +++ /dev/null @@ -1,702 +0,0 @@ -;;;; -*- mode: lisp -*- -;;;; -;;;; $Id: xml.lisp,v 1.15 2006/01/19 20:00:06 scaekenberghe Exp $ -;;;; -;;;; This is a Common Lisp implementation of a basic but usable XML parser. -;;;; The parser is non-validating and not complete (no PI). -;;;; Namespace and entities are handled. -;;;; The API into the parser is a pure functional parser hook model that comes from SSAX, -;;;; see also http://pobox.com/~oleg/ftp/Scheme/xml.html or http://ssax.sourceforge.net -;;;; Different DOM models are provided, an XSML, an LXML and a xml-element struct based one. -;;;; -;;;; Copyright (C) 2002, 2003, 2004, 2005, 2006 Sven Van Caekenberghe, Beta Nine BVBA. -;;;; -;;;; You are granted the rights to distribute and use this software -;;;; as governed by the terms of the Lisp Lesser General Public License -;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL. - -(in-package :s-xml) - -;;; (tazjin): moved up here because something was wonky with the -;;; definition order -(defvar *ignore-namespaces* nil - "When t, namespaces are ignored like in the old version of S-XML") - -;;; error reporting - -(define-condition xml-parser-error (error) - ((message :initarg :message :reader xml-parser-error-message) - (args :initarg :args :reader xml-parser-error-args) - (stream :initarg :stream :reader xml-parser-error-stream :initform nil)) - (:report (lambda (condition stream) - (format stream - "XML parser ~?~@[ near stream position ~d~]." - (xml-parser-error-message condition) - (xml-parser-error-args condition) - (and (xml-parser-error-stream condition) - (file-position (xml-parser-error-stream condition)))))) - (:documentation "Thrown by the XML parser to indicate errorneous input")) - -(setf (documentation 'xml-parser-error-message 'function) - "Get the message from an XML parser error" - (documentation 'xml-parser-error-args 'function) - "Get the error arguments from an XML parser error" - (documentation 'xml-parser-error-stream 'function) - "Get the stream from an XML parser error") - -(defun parser-error (message &optional args stream) - (make-condition 'xml-parser-error - :message message - :args args - :stream stream)) - -;; attribute parsing hooks -;; this is a bit complicated, refer to the mailing lists for a more detailed explanation - -(defun parse-attribute-name (string) - "Default parser for the attribute name" - (declare (special *namespaces*)) - (resolve-identifier string *namespaces* t)) - -(defun parse-attribute-value (name string) - "Default parser for the attribute value" - (declare (ignore name) - (special *ignore-namespace*)) - (if *ignore-namespaces* - (copy-seq string) - string)) - -(defparameter *attribute-name-parser* #'parse-attribute-name - "Called to compute interned attribute name from a buffer that will be reused") - -(defparameter *attribute-value-parser* #'parse-attribute-value - "Called to compute an element of an attribute list from a buffer that will be reused") - -;;; utilities - -(defun whitespace-char-p (char) - "Is char an XML whitespace character ?" - (declare (type character char)) - (or (char= char #\space) - (char= char #\tab) - (char= char #\return) - (char= char #\linefeed))) - -(defun identifier-char-p (char) - "Is char an XML identifier character ?" - (declare (type character char)) - (or (and (char<= #\A char) (char<= char #\Z)) - (and (char<= #\a char) (char<= char #\z)) - (and (char<= #\0 char) (char<= char #\9)) - (char= char #\-) - (char= char #\_) - (char= char #\.) - (char= char #\:))) - -(defun skip-whitespace (stream) - "Skip over XML whitespace in stream, return first non-whitespace - character which was peeked but not read, return nil on eof" - (loop - (let ((char (peek-char nil stream nil #\Null))) - (declare (type character char)) - (if (whitespace-char-p char) - (read-char stream) - (return char))))) - -(defun make-extendable-string (&optional (size 10)) - "Make an extendable string which is a one-dimensional character - array which is adjustable and has a fill pointer" - (make-array size - :element-type 'character - :adjustable t - :fill-pointer 0)) - -(defun print-string-xml (string stream &key (start 0) end) - "Write the characters of string to stream using basic XML conventions" - (loop for offset upfrom start below (or end (length string)) - for char = (char string offset) - do (case char - (#\& (write-string "&" stream)) - (#\< (write-string "<" stream)) - (#\> (write-string ">" stream)) - (#\" (write-string """ stream)) - ((#\newline #\return #\tab) (write-char char stream)) - (t (if (and (<= 32 (char-code char)) - (<= (char-code char) 126)) - (write-char char stream) - (progn - (write-string "&#x" stream) - (write (char-code char) :stream stream :base 16) - (write-char #\; stream))))))) - -(defun make-standard-entities () - "A hashtable mapping XML entity names to their replacement strings, - filled with the standard set" - (let ((entities (make-hash-table :test #'equal))) - (setf (gethash "amp" entities) (string #\&) - (gethash "quot" entities) (string #\") - (gethash "apos" entities) (string #\') - (gethash "lt" entities) (string #\<) - (gethash "gt" entities) (string #\>) - (gethash "nbsp" entities) (string #\space)) - entities)) - -(defun resolve-entity (stream extendable-string entities entity) - "Read and resolve an XML entity from stream, positioned after the '&' entity marker, - accepting &name; &#DEC; and &#xHEX; formats, - destructively modifying string, which is also returned, - destructively modifying entity, incorrect entity formats result in errors" - (declare (type (vector character) entity)) - (loop - (let ((char (read-char stream nil #\Null))) - (declare (type character char)) - (cond ((char= char #\Null) (error (parser-error "encountered eof before end of entity"))) - ((char= #\; char) (return)) - (t (vector-push-extend char entity))))) - (if (char= (char entity 0) #\#) - (let ((code (if (char= (char entity 1) #\x) - (parse-integer entity :start 2 :radix 16 :junk-allowed t) - (parse-integer entity :start 1 :radix 10 :junk-allowed t)))) - (when (null code) - (error (parser-error "encountered incorrect entity &~s;" (list entity) stream))) - (vector-push-extend (code-char code) extendable-string)) - (let ((value (gethash entity entities))) - (if value - (loop :for char :across value - :do (vector-push-extend char extendable-string)) - (error (parser-error "encountered unknown entity &~s;" (list entity) stream))))) - extendable-string) - -;;; namespace support - -(defclass xml-namespace () - ((uri :documentation "The URI used to identify this namespace" - :accessor get-uri - :initarg :uri) - (prefix :documentation "The preferred prefix assigned to this namespace" - :accessor get-prefix - :initarg :prefix - :initform nil) - (package :documentation "The Common Lisp package where this namespace's symbols are interned" - :accessor get-package - :initarg :package - :initform nil)) - (:documentation "Describes an XML namespace and how it is handled")) - -(setf (documentation 'get-uri 'function) - "The URI used to identify this namespace" - (documentation 'get-prefix 'function) - "The preferred prefix assigned to this namespace" - (documentation 'get-package 'function) - "The Common Lisp package where this namespace's symbols are interned") - -(defmethod print-object ((object xml-namespace) stream) - (print-unreadable-object (object stream :type t :identity t) - (format stream "~A - ~A" (get-prefix object) (get-uri object)))) - -(defvar *local-namespace* (make-instance 'xml-namespace - :uri "local" - :prefix "" - :package (find-package :keyword)) - "The local (global default) XML namespace") - -(defvar *xml-namespace* (make-instance 'xml-namespace - :uri "http://www.w3.org/XML/1998/namespace" - :prefix "xml" - :package (or (find-package :xml) - (make-package :xml :nicknames '("XML")))) - "REC-xml-names-19990114 says the prefix xml is bound to the namespace http://www.w3.org/XML/1998/namespace.") - -(defvar *known-namespaces* (list *local-namespace* *xml-namespace*) - "The list of known/defined namespaces") - -(defvar *namespaces* `(("xml" . ,*xml-namespace*) ("" . ,*local-namespace*)) - "Ordered list of (prefix . XML-namespace) bindings currently in effect - special variable") - -(defun find-namespace (uri) - "Find a registered XML namespace identified by uri" - (find uri *known-namespaces* :key #'get-uri :test #'string-equal)) - -(defun register-namespace (uri prefix package) - "Register a new or redefine an existing XML namespace defined by uri with prefix and package" - (let ((namespace (find-namespace uri))) - (if namespace - (setf (get-prefix namespace) prefix - (get-package namespace) (find-package package)) - (push (setf namespace (make-instance 'xml-namespace - :uri uri - :prefix prefix - :package (find-package package))) - *known-namespaces*)) - namespace)) - -(defun find-namespace-binding (prefix namespaces) - "Find the XML namespace currently bound to prefix in the namespaces bindings" - (cdr (assoc prefix namespaces :test #'string-equal))) - -(defun split-identifier (identifier) - "Split an identifier 'prefix:name' and return (values prefix name)" - (when (symbolp identifier) - (setf identifier (symbol-name identifier))) - (let ((colon-position (position #\: identifier :test #'char=))) - (if colon-position - (values (subseq identifier 0 colon-position) - (subseq identifier (1+ colon-position))) - (values nil identifier)))) - -(defvar *require-existing-symbols* nil - "If t, each XML identifier must exist as symbol already") - -(defvar *auto-export-symbols* t - "If t, export newly interned symbols form their packages") - -(defun resolve-identifier (identifier namespaces &optional as-attribute) - "Resolve the string identifier in the list of namespace bindings" - (if *ignore-namespaces* - (intern identifier :keyword) - (flet ((intern-symbol (string package) ; intern string as a symbol in package - (if *require-existing-symbols* - (let ((symbol (find-symbol string package))) - (or symbol - (error "Symbol ~s does not exist in ~s" string package))) - (let ((symbol (intern string package))) - (when (and *auto-export-symbols* - (not (eql package (find-package :keyword)))) - (export symbol package)) - symbol)))) - (multiple-value-bind (prefix name) - (split-identifier identifier) - (if (or (null prefix) (string= prefix "xmlns")) - (if as-attribute - (intern (if (string= prefix "xmlns") identifier name) (get-package *local-namespace*)) - (let ((default-namespace (find-namespace-binding "" namespaces))) - (intern-symbol name (get-package default-namespace)))) - (let ((namespace (find-namespace-binding prefix namespaces))) - (if namespace - (intern-symbol name (get-package namespace)) - (error "namespace not found for prefix ~s" prefix)))))))) - -(defvar *auto-create-namespace-packages* t - "If t, new packages will be created for namespaces, if needed, named by the prefix") - -(defun new-namespace (uri &optional prefix) - "Register a new namespace for uri and prefix, creating a package if necessary" - (if prefix - (register-namespace uri - prefix - (or (find-package prefix) - (if *auto-create-namespace-packages* - (make-package prefix :nicknames `(,(string-upcase prefix))) - (error "Cannot find or create package ~s" prefix)))) - (let ((unique-name (loop :for i :upfrom 0 - :do (let ((name (format nil "ns-~d" i))) - (when (not (find-package name)) - (return name)))))) - (register-namespace uri - unique-name - (if *auto-create-namespace-packages* - (make-package (string-upcase unique-name) :nicknames `(,unique-name)) - (error "Cannot create package ~s" unique-name)))))) - -(defun extend-namespaces (attributes namespaces) - "Given possible 'xmlns[:prefix]' attributes, extend the namespaces bindings" - (unless *ignore-namespaces* - (let (default-namespace-uri) - (loop :for (key . value) :in attributes - :do (if (string= key "xmlns") - (setf default-namespace-uri value) - (multiple-value-bind (prefix name) - (split-identifier key) - (when (string= prefix "xmlns") - (let* ((uri value) - (prefix name) - (namespace (find-namespace uri))) - (unless namespace - (setf namespace (new-namespace uri prefix))) - (push `(,prefix . ,namespace) namespaces)))))) - (when default-namespace-uri - (let ((namespace (find-namespace default-namespace-uri))) - (unless namespace - (setf namespace (new-namespace default-namespace-uri))) - (push `("" . ,namespace) namespaces))))) - namespaces) - -(defun print-identifier (identifier stream &optional as-attribute) - "Print identifier on stream using namespace conventions" - (declare (ignore as-attribute) (special *namespaces*)) - (if *ignore-namespaces* - (princ identifier stream) - (if (symbolp identifier) - (let ((package (symbol-package identifier)) - (name (symbol-name identifier))) - (let* ((namespace (find package *known-namespaces* :key #'get-package)) - (prefix (or (car (find namespace *namespaces* :key #'cdr)) - (get-prefix namespace)))) - (if (string= prefix "") - (princ name stream) - (format stream "~a:~a" prefix name)))) - (princ identifier stream)))) - -;;; the parser state - -(defclass xml-parser-state () - ((entities :documentation "A hashtable mapping XML entity names to their replacement stings" - :accessor get-entities - :initarg :entities - :initform (make-standard-entities)) - (seed :documentation "The user seed object" - :accessor get-seed - :initarg :seed - :initform nil) - (buffer :documentation "The main reusable character buffer" - :accessor get-buffer - :initform (make-extendable-string)) - (mini-buffer :documentation "The secondary, smaller reusable character buffer" - :accessor get-mini-buffer - :initform (make-extendable-string)) - (new-element-hook :documentation "Called when new element starts" - ;; Handle the start of a new xml element with name and attributes, - ;; receiving seed from previous element (sibling or parent) - ;; return seed to be used for first child (content) - ;; or directly to finish-element-hook - :accessor get-new-element-hook - :initarg :new-element-hook - :initform #'(lambda (name attributes seed) - (declare (ignore name attributes)) - seed)) - (finish-element-hook :documentation "Called when element ends" - ;; Handle the end of an xml element with name and attributes, - ;; receiving parent-seed, the seed passed to us when this element started, - ;; i.e. passed to our corresponding new-element-hook - ;; and receiving seed from last child (content) - ;; or directly from new-element-hook - ;; return final seed for this element to next element (sibling or parent) - :accessor get-finish-element-hook - :initarg :finish-element-hook - :initform #'(lambda (name attributes parent-seed seed) - (declare (ignore name attributes parent-seed)) - seed)) - (text-hook :documentation "Called when text is found" - ;; Handle text in string, found as contents, - ;; receiving seed from previous element (sibling or parent), - ;; return final seed for this element to next element (sibling or parent) - :accessor get-text-hook - :initarg :text-hook - :initform #'(lambda (string seed) - (declare (ignore string)) - seed))) - (:documentation "The XML parser state passed along all code making up the parser")) - -(setf (documentation 'get-seed 'function) - "Get the initial user seed of an XML parser state" - (documentation 'get-entities 'function) - "Get the entities hashtable of an XML parser state" - (documentation 'get-new-element-hook 'function) - "Get the new element hook of an XML parser state" - (documentation 'get-finish-element-hook 'function) - "Get the finish element hook of an XML parser state" - (documentation 'get-text-hook 'function) - "Get the text hook of an XML parser state") - -#-allegro -(setf (documentation '(setf get-seed) 'function) - "Set the initial user seed of an XML parser state" - (documentation '(setf get-entities) 'function) - "Set the entities hashtable of an XML parser state" - (documentation '(setf get-new-element-hook) 'function) - "Set the new element hook of an XML parser state" - (documentation '(setf get-finish-element-hook) 'function) - "Set the finish element hook of an XML parser state" - (documentation '(setf get-text-hook) 'function) - "Set the text hook of an XML parser state") - -(defmethod get-mini-buffer :after ((state xml-parser-state)) - "Reset and return the reusable mini buffer" - (with-slots (mini-buffer) state - (setf (fill-pointer mini-buffer) 0))) - -(defmethod get-buffer :after ((state xml-parser-state)) - "Reset and return the main reusable buffer" - (with-slots (buffer) state - (setf (fill-pointer buffer) 0))) - -;;; parser support - -(defun parse-whitespace (stream extendable-string) - "Read and collect XML whitespace from stream in string which is - destructively modified, return first non-whitespace character which - was peeked but not read, return #\Null on eof" - (declare (type (vector character) extendable-string)) - (loop - (let ((char (peek-char nil stream nil #\Null))) - (declare (type character char)) - (if (whitespace-char-p char) - (vector-push-extend (read-char stream) extendable-string) - (return char))))) - -(defun parse-string (stream state string) - "Read and return an XML string from stream, delimited by either - single or double quotes, the stream is expected to be on the opening - delimiter, at the end the closing delimiter is also read, entities - are resolved, eof before end of string is an error" - (declare (type (vector character) string)) - (let ((delimiter (read-char stream nil #\Null)) - (char #\Null)) - (declare (type character delimiter char)) - (unless (or (char= delimiter #\') (char= delimiter #\")) - (error (parser-error "expected string delimiter" nil stream))) - (loop - (setf char (read-char stream nil #\Null)) - (cond ((char= char #\Null) (error (parser-error "encountered eof before end of string"))) - ((char= char delimiter) (return)) - ((char= char #\&) (resolve-entity stream string (get-entities state) (get-mini-buffer state))) - (t (vector-push-extend char string)))) - string)) - -(defun parse-text (stream state extendable-string) - "Read and collect XML text from stream in string which is - destructively modified, the text ends with a '<', which is peeked and - returned, entities are resolved, eof is considered an error" - (declare (type (vector character) extendable-string)) - (let ((char #\Null)) - (declare (type character char)) - (loop - (setf char (peek-char nil stream nil #\Null)) - (when (char= char #\Null) (error (parser-error "encountered unexpected eof in text"))) - (when (char= char #\<) (return)) - (read-char stream) - (if (char= char #\&) - (resolve-entity stream extendable-string (get-entities state) (get-mini-buffer state)) - (vector-push-extend char extendable-string))) - char)) - -(defun parse-identifier (stream identifier) - "Read and returns an XML identifier from stream, positioned at the - start of the identifier, ending with the first non-identifier - character, which is peeked, the identifier is written destructively - into identifier which is also returned" - (declare (type (vector character) identifier)) - (loop - (let ((char (read-char stream nil #\Null))) - (declare (type character char)) - (cond ((identifier-char-p char) - (vector-push-extend char identifier)) - (t - (when (char/= char #\Null) (unread-char char stream)) - (return identifier)))))) - -(defun skip-comment (stream) - "Skip an XML comment in stream, positioned after the opening '<!--', - consumes the closing '-->' sequence, unexpected eof or a malformed - closing sequence result in a error" - (let ((dashes-to-read 2)) - (loop - (if (zerop dashes-to-read) (return)) - (let ((char (read-char stream nil #\Null))) - (declare (type character char)) - (if (char= char #\Null) - (error (parser-error "encountered unexpected eof for comment"))) - (if (char= char #\-) - (decf dashes-to-read) - (setf dashes-to-read 2))))) - (if (char/= (read-char stream nil #\Null) #\>) - (error (parser-error "expected > ending comment" nil stream)))) - -(defun read-cdata (stream state string) - "Reads in the CDATA and calls the callback for CDATA if it exists" - ;; we already read the <![CDATA[ stuff - ;; continue to read until we hit ]]> - (let ((char #\space) - (last-3-characters (list #\[ #\A #\T)) - (pattern (list #\> #\] #\]))) - (declare (type character char)) - (loop - (setf char (read-char stream nil #\Null)) - (when (char= char #\Null) (error (parser-error "encountered unexpected eof in text"))) - (push char last-3-characters) - (setf (cdddr last-3-characters) nil) - (cond - ((equal last-3-characters - pattern) - (setf (fill-pointer string) - (- (fill-pointer string) 2)) - (setf (get-seed state) - (funcall (get-text-hook state) - (copy-seq string) - (get-seed state))) - (return-from read-cdata)) - (t - (vector-push-extend char string)))))) - -(defun skip-special-tag (stream state) - "Skip an XML special tag (comments and processing instructions) in - stream, positioned after the opening '<', unexpected eof is an error" - ;; opening < has been read, consume ? or ! - (read-char stream) - (let ((char (read-char stream nil #\Null))) - (declare (type character char)) - ;; see if we are dealing with a comment - (when (char= char #\-) - (setf char (read-char stream nil #\Null)) - (when (char= char #\-) - (skip-comment stream) - (return-from skip-special-tag))) - ;; maybe we are dealing with CDATA? - (when (and (char= char #\[) - (loop :for pattern :across "CDATA[" - :for char = (read-char stream nil #\Null) - :when (char= char #\Null) :do - (error (parser-error "encountered unexpected eof in cdata")) - :always (char= char pattern))) - (read-cdata stream state (get-buffer state)) - (return-from skip-special-tag)) - ;; loop over chars, dealing with strings (skipping their content) - ;; and counting opening and closing < and > chars - (let ((taglevel 1) - (string-delimiter #\Null)) - (declare (type character string-delimiter)) - (loop - (when (zerop taglevel) (return)) - (setf char (read-char stream nil #\Null)) - (when (char= char #\Null) - (error (parser-error "encountered unexpected eof for special (! or ?) tag" nil stream))) - (if (char/= string-delimiter #\Null) - ;; inside a string we only look for a closing string delimiter - (when (char= char string-delimiter) - (setf string-delimiter #\Null)) - ;; outside a string we count < and > and watch out for strings - (cond ((or (char= char #\') (char= char #\")) (setf string-delimiter char)) - ((char= char #\<) (incf taglevel)) - ((char= char #\>) (decf taglevel)))))))) - -;;; the XML parser proper - -(defun parse-xml-element-attributes (stream state) - "Parse XML element attributes from stream positioned after the tag - identifier, returning the attributes as an assoc list, ending at - either a '>' or a '/' which is peeked and also returned" - (declare (special *namespaces*)) - (let ((char #\Null) attributes) - (declare (type character char)) - (loop - ;; skip whitespace separating items - (setf char (skip-whitespace stream)) - ;; start tag attributes ends with > or /> - (when (or (char= char #\>) (char= char #\/)) (return)) - ;; read the attribute key - (let ((key (let ((string (parse-identifier stream (get-mini-buffer state)))) - (if *ignore-namespaces* - (funcall *attribute-name-parser* string) - (copy-seq string))))) - ;; skip separating whitespace - (setf char (skip-whitespace stream)) - ;; require = sign (and consume it if present) - (if (char= char #\=) - (read-char stream) - (error (parser-error "expected =" nil stream))) - ;; skip separating whitespace - (skip-whitespace stream) - ;; read the attribute value as a string - (push (cons key (let ((string (parse-string stream state (get-buffer state)))) - (if *ignore-namespaces* - (funcall *attribute-value-parser* key string) - (copy-seq string)))) - attributes))) - ;; return attributes peek char ending loop - (values attributes char))) - -(defun parse-xml-element (stream state) - "Parse and return an XML element from stream, positioned after the opening '<'" - (declare (special *namespaces*)) - ;; opening < has been read - (when (char= (peek-char nil stream nil #\Null) #\!) - (skip-special-tag stream state) - (return-from parse-xml-element)) - (let ((char #\Null) buffer open-tag parent-seed has-children) - (declare (type character char)) - (setf parent-seed (get-seed state)) - ;; read tag name (no whitespace between < and name ?) - (setf open-tag (copy-seq (parse-identifier stream (get-mini-buffer state)))) - ;; tag has been read, read attributes if any - (multiple-value-bind (attributes peeked-char) - (parse-xml-element-attributes stream state) - (let ((*namespaces* (extend-namespaces attributes *namespaces*))) - (setf open-tag (resolve-identifier open-tag *namespaces*)) - (unless *ignore-namespaces* - (dolist (attribute attributes) - (setf (car attribute) (funcall *attribute-name-parser* (car attribute)) - (cdr attribute) (funcall *attribute-value-parser* (car attribute) (cdr attribute))))) - (setf (get-seed state) (funcall (get-new-element-hook state) - open-tag attributes (get-seed state))) - (setf char peeked-char) - (when (char= char #\/) - ;; handle solitary tag of the form <tag .. /> - (read-char stream) - (setf char (read-char stream nil #\Null)) - (if (char= #\> char) - (progn - (setf (get-seed state) (funcall (get-finish-element-hook state) - open-tag attributes parent-seed (get-seed state))) - (return-from parse-xml-element)) - (error (parser-error "expected >" nil stream)))) - ;; consume > - (read-char stream) - (loop - (setf buffer (get-buffer state)) - ;; read whitespace into buffer - (setf char (parse-whitespace stream buffer)) - ;; see what ended the whitespace scan - (cond ((char= char #\Null) (error (parser-error "encountered unexpected eof handling ~a" - (list open-tag)))) - ((char= char #\<) - ;; consume the < - (read-char stream) - (if (char= (peek-char nil stream nil #\Null) #\/) - (progn - ;; handle the matching closing tag </tag> and done - ;; if we read whitespace as this (leaf) element's contents, it is significant - (when (and (not has-children) (plusp (length buffer))) - (setf (get-seed state) (funcall (get-text-hook state) - (copy-seq buffer) (get-seed state)))) - (read-char stream) - (let ((close-tag (resolve-identifier (parse-identifier stream (get-mini-buffer state)) - *namespaces*))) - (unless (eq open-tag close-tag) - (error (parser-error "found <~a> not matched by </~a> but by <~a>" - (list open-tag open-tag close-tag) stream))) - (unless (char= (read-char stream nil #\Null) #\>) - (error (parser-error "expected >" nil stream))) - (setf (get-seed state) (funcall (get-finish-element-hook state) - open-tag attributes parent-seed (get-seed state)))) - (return)) - ;; handle child tag and loop, no hooks to call here - ;; whitespace between child elements is skipped - (progn - (setf has-children t) - (parse-xml-element stream state)))) - (t - ;; no child tag, concatenate text to whitespace in buffer - ;; handle text content and loop - (setf char (parse-text stream state buffer)) - (setf (get-seed state) (funcall (get-text-hook state) - (copy-seq buffer) (get-seed state)))))))))) - -(defun start-parse-xml (stream &optional (state (make-instance 'xml-parser-state))) - "Parse and return a toplevel XML element from stream, using parser state" - (loop - (let ((char (skip-whitespace stream))) - (when (char= char #\Null) (return-from start-parse-xml)) - ;; skip whitespace until start tag - (unless (char= char #\<) - (error (parser-error "expected <" nil stream))) - (read-char stream) ; consume peeked char - (setf char (peek-char nil stream nil #\Null)) - (if (or (char= char #\!) (char= char #\?)) - ;; deal with special tags - (skip-special-tag stream state) - (progn - ;; read the main element - (parse-xml-element stream state) - (return-from start-parse-xml (get-seed state))))))) - -;;;; eof |