diff options
author | Vincent Ambo <tazjin@google.com> | 2020-01-22T00·49+0000 |
---|---|---|
committer | Vincent Ambo <tazjin@google.com> | 2020-01-22T00·49+0000 |
commit | 437efa7686f62c88f6b377a255715fa0c28ed2c7 (patch) | |
tree | caba3ba35513c9fc794ff8d78c82b67b274bcd52 /third_party/lisp/s-xml/src/xml.lisp | |
parent | fe3ea06cbc32c9b727549a6505e69234f3072f6f (diff) |
feat(third_party/lisp/s-xml): Check in sources & derivation r/438
Checked in the sources for this because it is tracked upstream in CVS and I can't be bothered to deal with that right now.
Diffstat (limited to 'third_party/lisp/s-xml/src/xml.lisp')
-rw-r--r-- | third_party/lisp/s-xml/src/xml.lisp | 702 |
1 files changed, 702 insertions, 0 deletions
diff --git a/third_party/lisp/s-xml/src/xml.lisp b/third_party/lisp/s-xml/src/xml.lisp new file mode 100644 index 000000000000..8a2076985a49 --- /dev/null +++ b/third_party/lisp/s-xml/src/xml.lisp @@ -0,0 +1,702 @@ +;;;; -*- mode: lisp -*- +;;;; +;;;; $Id: xml.lisp,v 1.15 2006/01/19 20:00:06 scaekenberghe Exp $ +;;;; +;;;; This is a Common Lisp implementation of a basic but usable XML parser. +;;;; The parser is non-validating and not complete (no PI). +;;;; Namespace and entities are handled. +;;;; The API into the parser is a pure functional parser hook model that comes from SSAX, +;;;; see also http://pobox.com/~oleg/ftp/Scheme/xml.html or http://ssax.sourceforge.net +;;;; Different DOM models are provided, an XSML, an LXML and a xml-element struct based one. +;;;; +;;;; Copyright (C) 2002, 2003, 2004, 2005, 2006 Sven Van Caekenberghe, Beta Nine BVBA. +;;;; +;;;; You are granted the rights to distribute and use this software +;;;; as governed by the terms of the Lisp Lesser General Public License +;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL. + +(in-package :s-xml) + +;;; (tazjin): moved up here because something was wonky with the +;;; definition order +(defvar *ignore-namespaces* nil + "When t, namespaces are ignored like in the old version of S-XML") + +;;; error reporting + +(define-condition xml-parser-error (error) + ((message :initarg :message :reader xml-parser-error-message) + (args :initarg :args :reader xml-parser-error-args) + (stream :initarg :stream :reader xml-parser-error-stream :initform nil)) + (:report (lambda (condition stream) + (format stream + "XML parser ~?~@[ near stream position ~d~]." + (xml-parser-error-message condition) + (xml-parser-error-args condition) + (and (xml-parser-error-stream condition) + (file-position (xml-parser-error-stream condition)))))) + (:documentation "Thrown by the XML parser to indicate errorneous input")) + +(setf (documentation 'xml-parser-error-message 'function) + "Get the message from an XML parser error" + (documentation 'xml-parser-error-args 'function) + "Get the error arguments from an XML parser error" + (documentation 'xml-parser-error-stream 'function) + "Get the stream from an XML parser error") + +(defun parser-error (message &optional args stream) + (make-condition 'xml-parser-error + :message message + :args args + :stream stream)) + +;; attribute parsing hooks +;; this is a bit complicated, refer to the mailing lists for a more detailed explanation + +(defun parse-attribute-name (string) + "Default parser for the attribute name" + (declare (special *namespaces*)) + (resolve-identifier string *namespaces* t)) + +(defun parse-attribute-value (name string) + "Default parser for the attribute value" + (declare (ignore name) + (special *ignore-namespace*)) + (if *ignore-namespaces* + (copy-seq string) + string)) + +(defparameter *attribute-name-parser* #'parse-attribute-name + "Called to compute interned attribute name from a buffer that will be reused") + +(defparameter *attribute-value-parser* #'parse-attribute-value + "Called to compute an element of an attribute list from a buffer that will be reused") + +;;; utilities + +(defun whitespace-char-p (char) + "Is char an XML whitespace character ?" + (declare (type character char)) + (or (char= char #\space) + (char= char #\tab) + (char= char #\return) + (char= char #\linefeed))) + +(defun identifier-char-p (char) + "Is char an XML identifier character ?" + (declare (type character char)) + (or (and (char<= #\A char) (char<= char #\Z)) + (and (char<= #\a char) (char<= char #\z)) + (and (char<= #\0 char) (char<= char #\9)) + (char= char #\-) + (char= char #\_) + (char= char #\.) + (char= char #\:))) + +(defun skip-whitespace (stream) + "Skip over XML whitespace in stream, return first non-whitespace + character which was peeked but not read, return nil on eof" + (loop + (let ((char (peek-char nil stream nil #\Null))) + (declare (type character char)) + (if (whitespace-char-p char) + (read-char stream) + (return char))))) + +(defun make-extendable-string (&optional (size 10)) + "Make an extendable string which is a one-dimensional character + array which is adjustable and has a fill pointer" + (make-array size + :element-type 'character + :adjustable t + :fill-pointer 0)) + +(defun print-string-xml (string stream &key (start 0) end) + "Write the characters of string to stream using basic XML conventions" + (loop for offset upfrom start below (or end (length string)) + for char = (char string offset) + do (case char + (#\& (write-string "&" stream)) + (#\< (write-string "<" stream)) + (#\> (write-string ">" stream)) + (#\" (write-string """ stream)) + ((#\newline #\return #\tab) (write-char char stream)) + (t (if (and (<= 32 (char-code char)) + (<= (char-code char) 126)) + (write-char char stream) + (progn + (write-string "&#x" stream) + (write (char-code char) :stream stream :base 16) + (write-char #\; stream))))))) + +(defun make-standard-entities () + "A hashtable mapping XML entity names to their replacement strings, + filled with the standard set" + (let ((entities (make-hash-table :test #'equal))) + (setf (gethash "amp" entities) (string #\&) + (gethash "quot" entities) (string #\") + (gethash "apos" entities) (string #\') + (gethash "lt" entities) (string #\<) + (gethash "gt" entities) (string #\>) + (gethash "nbsp" entities) (string #\space)) + entities)) + +(defun resolve-entity (stream extendable-string entities entity) + "Read and resolve an XML entity from stream, positioned after the '&' entity marker, + accepting &name; &#DEC; and &#xHEX; formats, + destructively modifying string, which is also returned, + destructively modifying entity, incorrect entity formats result in errors" + (declare (type (vector character) entity)) + (loop + (let ((char (read-char stream nil #\Null))) + (declare (type character char)) + (cond ((char= char #\Null) (error (parser-error "encountered eof before end of entity"))) + ((char= #\; char) (return)) + (t (vector-push-extend char entity))))) + (if (char= (char entity 0) #\#) + (let ((code (if (char= (char entity 1) #\x) + (parse-integer entity :start 2 :radix 16 :junk-allowed t) + (parse-integer entity :start 1 :radix 10 :junk-allowed t)))) + (when (null code) + (error (parser-error "encountered incorrect entity &~s;" (list entity) stream))) + (vector-push-extend (code-char code) extendable-string)) + (let ((value (gethash entity entities))) + (if value + (loop :for char :across value + :do (vector-push-extend char extendable-string)) + (error (parser-error "encountered unknown entity &~s;" (list entity) stream))))) + extendable-string) + +;;; namespace support + +(defclass xml-namespace () + ((uri :documentation "The URI used to identify this namespace" + :accessor get-uri + :initarg :uri) + (prefix :documentation "The preferred prefix assigned to this namespace" + :accessor get-prefix + :initarg :prefix + :initform nil) + (package :documentation "The Common Lisp package where this namespace's symbols are interned" + :accessor get-package + :initarg :package + :initform nil)) + (:documentation "Describes an XML namespace and how it is handled")) + +(setf (documentation 'get-uri 'function) + "The URI used to identify this namespace" + (documentation 'get-prefix 'function) + "The preferred prefix assigned to this namespace" + (documentation 'get-package 'function) + "The Common Lisp package where this namespace's symbols are interned") + +(defmethod print-object ((object xml-namespace) stream) + (print-unreadable-object (object stream :type t :identity t) + (format stream "~A - ~A" (get-prefix object) (get-uri object)))) + +(defvar *local-namespace* (make-instance 'xml-namespace + :uri "local" + :prefix "" + :package (find-package :keyword)) + "The local (global default) XML namespace") + +(defvar *xml-namespace* (make-instance 'xml-namespace + :uri "http://www.w3.org/XML/1998/namespace" + :prefix "xml" + :package (or (find-package :xml) + (make-package :xml :nicknames '("XML")))) + "REC-xml-names-19990114 says the prefix xml is bound to the namespace http://www.w3.org/XML/1998/namespace.") + +(defvar *known-namespaces* (list *local-namespace* *xml-namespace*) + "The list of known/defined namespaces") + +(defvar *namespaces* `(("xml" . ,*xml-namespace*) ("" . ,*local-namespace*)) + "Ordered list of (prefix . XML-namespace) bindings currently in effect - special variable") + +(defun find-namespace (uri) + "Find a registered XML namespace identified by uri" + (find uri *known-namespaces* :key #'get-uri :test #'string-equal)) + +(defun register-namespace (uri prefix package) + "Register a new or redefine an existing XML namespace defined by uri with prefix and package" + (let ((namespace (find-namespace uri))) + (if namespace + (setf (get-prefix namespace) prefix + (get-package namespace) (find-package package)) + (push (setf namespace (make-instance 'xml-namespace + :uri uri + :prefix prefix + :package (find-package package))) + *known-namespaces*)) + namespace)) + +(defun find-namespace-binding (prefix namespaces) + "Find the XML namespace currently bound to prefix in the namespaces bindings" + (cdr (assoc prefix namespaces :test #'string-equal))) + +(defun split-identifier (identifier) + "Split an identifier 'prefix:name' and return (values prefix name)" + (when (symbolp identifier) + (setf identifier (symbol-name identifier))) + (let ((colon-position (position #\: identifier :test #'char=))) + (if colon-position + (values (subseq identifier 0 colon-position) + (subseq identifier (1+ colon-position))) + (values nil identifier)))) + +(defvar *require-existing-symbols* nil + "If t, each XML identifier must exist as symbol already") + +(defvar *auto-export-symbols* t + "If t, export newly interned symbols form their packages") + +(defun resolve-identifier (identifier namespaces &optional as-attribute) + "Resolve the string identifier in the list of namespace bindings" + (if *ignore-namespaces* + (intern identifier :keyword) + (flet ((intern-symbol (string package) ; intern string as a symbol in package + (if *require-existing-symbols* + (let ((symbol (find-symbol string package))) + (or symbol + (error "Symbol ~s does not exist in ~s" string package))) + (let ((symbol (intern string package))) + (when (and *auto-export-symbols* + (not (eql package (find-package :keyword)))) + (export symbol package)) + symbol)))) + (multiple-value-bind (prefix name) + (split-identifier identifier) + (if (or (null prefix) (string= prefix "xmlns")) + (if as-attribute + (intern (if (string= prefix "xmlns") identifier name) (get-package *local-namespace*)) + (let ((default-namespace (find-namespace-binding "" namespaces))) + (intern-symbol name (get-package default-namespace)))) + (let ((namespace (find-namespace-binding prefix namespaces))) + (if namespace + (intern-symbol name (get-package namespace)) + (error "namespace not found for prefix ~s" prefix)))))))) + +(defvar *auto-create-namespace-packages* t + "If t, new packages will be created for namespaces, if needed, named by the prefix") + +(defun new-namespace (uri &optional prefix) + "Register a new namespace for uri and prefix, creating a package if necessary" + (if prefix + (register-namespace uri + prefix + (or (find-package prefix) + (if *auto-create-namespace-packages* + (make-package prefix :nicknames `(,(string-upcase prefix))) + (error "Cannot find or create package ~s" prefix)))) + (let ((unique-name (loop :for i :upfrom 0 + :do (let ((name (format nil "ns-~d" i))) + (when (not (find-package name)) + (return name)))))) + (register-namespace uri + unique-name + (if *auto-create-namespace-packages* + (make-package (string-upcase unique-name) :nicknames `(,unique-name)) + (error "Cannot create package ~s" unique-name)))))) + +(defun extend-namespaces (attributes namespaces) + "Given possible 'xmlns[:prefix]' attributes, extend the namespaces bindings" + (unless *ignore-namespaces* + (let (default-namespace-uri) + (loop :for (key . value) :in attributes + :do (if (string= key "xmlns") + (setf default-namespace-uri value) + (multiple-value-bind (prefix name) + (split-identifier key) + (when (string= prefix "xmlns") + (let* ((uri value) + (prefix name) + (namespace (find-namespace uri))) + (unless namespace + (setf namespace (new-namespace uri prefix))) + (push `(,prefix . ,namespace) namespaces)))))) + (when default-namespace-uri + (let ((namespace (find-namespace default-namespace-uri))) + (unless namespace + (setf namespace (new-namespace default-namespace-uri))) + (push `("" . ,namespace) namespaces))))) + namespaces) + +(defun print-identifier (identifier stream &optional as-attribute) + "Print identifier on stream using namespace conventions" + (declare (ignore as-attribute) (special *namespaces*)) + (if *ignore-namespaces* + (princ identifier stream) + (if (symbolp identifier) + (let ((package (symbol-package identifier)) + (name (symbol-name identifier))) + (let* ((namespace (find package *known-namespaces* :key #'get-package)) + (prefix (or (car (find namespace *namespaces* :key #'cdr)) + (get-prefix namespace)))) + (if (string= prefix "") + (princ name stream) + (format stream "~a:~a" prefix name)))) + (princ identifier stream)))) + +;;; the parser state + +(defclass xml-parser-state () + ((entities :documentation "A hashtable mapping XML entity names to their replacement stings" + :accessor get-entities + :initarg :entities + :initform (make-standard-entities)) + (seed :documentation "The user seed object" + :accessor get-seed + :initarg :seed + :initform nil) + (buffer :documentation "The main reusable character buffer" + :accessor get-buffer + :initform (make-extendable-string)) + (mini-buffer :documentation "The secondary, smaller reusable character buffer" + :accessor get-mini-buffer + :initform (make-extendable-string)) + (new-element-hook :documentation "Called when new element starts" + ;; Handle the start of a new xml element with name and attributes, + ;; receiving seed from previous element (sibling or parent) + ;; return seed to be used for first child (content) + ;; or directly to finish-element-hook + :accessor get-new-element-hook + :initarg :new-element-hook + :initform #'(lambda (name attributes seed) + (declare (ignore name attributes)) + seed)) + (finish-element-hook :documentation "Called when element ends" + ;; Handle the end of an xml element with name and attributes, + ;; receiving parent-seed, the seed passed to us when this element started, + ;; i.e. passed to our corresponding new-element-hook + ;; and receiving seed from last child (content) + ;; or directly from new-element-hook + ;; return final seed for this element to next element (sibling or parent) + :accessor get-finish-element-hook + :initarg :finish-element-hook + :initform #'(lambda (name attributes parent-seed seed) + (declare (ignore name attributes parent-seed)) + seed)) + (text-hook :documentation "Called when text is found" + ;; Handle text in string, found as contents, + ;; receiving seed from previous element (sibling or parent), + ;; return final seed for this element to next element (sibling or parent) + :accessor get-text-hook + :initarg :text-hook + :initform #'(lambda (string seed) + (declare (ignore string)) + seed))) + (:documentation "The XML parser state passed along all code making up the parser")) + +(setf (documentation 'get-seed 'function) + "Get the initial user seed of an XML parser state" + (documentation 'get-entities 'function) + "Get the entities hashtable of an XML parser state" + (documentation 'get-new-element-hook 'function) + "Get the new element hook of an XML parser state" + (documentation 'get-finish-element-hook 'function) + "Get the finish element hook of an XML parser state" + (documentation 'get-text-hook 'function) + "Get the text hook of an XML parser state") + +#-allegro +(setf (documentation '(setf get-seed) 'function) + "Set the initial user seed of an XML parser state" + (documentation '(setf get-entities) 'function) + "Set the entities hashtable of an XML parser state" + (documentation '(setf get-new-element-hook) 'function) + "Set the new element hook of an XML parser state" + (documentation '(setf get-finish-element-hook) 'function) + "Set the finish element hook of an XML parser state" + (documentation '(setf get-text-hook) 'function) + "Set the text hook of an XML parser state") + +(defmethod get-mini-buffer :after ((state xml-parser-state)) + "Reset and return the reusable mini buffer" + (with-slots (mini-buffer) state + (setf (fill-pointer mini-buffer) 0))) + +(defmethod get-buffer :after ((state xml-parser-state)) + "Reset and return the main reusable buffer" + (with-slots (buffer) state + (setf (fill-pointer buffer) 0))) + +;;; parser support + +(defun parse-whitespace (stream extendable-string) + "Read and collect XML whitespace from stream in string which is + destructively modified, return first non-whitespace character which + was peeked but not read, return #\Null on eof" + (declare (type (vector character) extendable-string)) + (loop + (let ((char (peek-char nil stream nil #\Null))) + (declare (type character char)) + (if (whitespace-char-p char) + (vector-push-extend (read-char stream) extendable-string) + (return char))))) + +(defun parse-string (stream state string) + "Read and return an XML string from stream, delimited by either + single or double quotes, the stream is expected to be on the opening + delimiter, at the end the closing delimiter is also read, entities + are resolved, eof before end of string is an error" + (declare (type (vector character) string)) + (let ((delimiter (read-char stream nil #\Null)) + (char #\Null)) + (declare (type character delimiter char)) + (unless (or (char= delimiter #\') (char= delimiter #\")) + (error (parser-error "expected string delimiter" nil stream))) + (loop + (setf char (read-char stream nil #\Null)) + (cond ((char= char #\Null) (error (parser-error "encountered eof before end of string"))) + ((char= char delimiter) (return)) + ((char= char #\&) (resolve-entity stream string (get-entities state) (get-mini-buffer state))) + (t (vector-push-extend char string)))) + string)) + +(defun parse-text (stream state extendable-string) + "Read and collect XML text from stream in string which is + destructively modified, the text ends with a '<', which is peeked and + returned, entities are resolved, eof is considered an error" + (declare (type (vector character) extendable-string)) + (let ((char #\Null)) + (declare (type character char)) + (loop + (setf char (peek-char nil stream nil #\Null)) + (when (char= char #\Null) (error (parser-error "encountered unexpected eof in text"))) + (when (char= char #\<) (return)) + (read-char stream) + (if (char= char #\&) + (resolve-entity stream extendable-string (get-entities state) (get-mini-buffer state)) + (vector-push-extend char extendable-string))) + char)) + +(defun parse-identifier (stream identifier) + "Read and returns an XML identifier from stream, positioned at the + start of the identifier, ending with the first non-identifier + character, which is peeked, the identifier is written destructively + into identifier which is also returned" + (declare (type (vector character) identifier)) + (loop + (let ((char (read-char stream nil #\Null))) + (declare (type character char)) + (cond ((identifier-char-p char) + (vector-push-extend char identifier)) + (t + (when (char/= char #\Null) (unread-char char stream)) + (return identifier)))))) + +(defun skip-comment (stream) + "Skip an XML comment in stream, positioned after the opening '<!--', + consumes the closing '-->' sequence, unexpected eof or a malformed + closing sequence result in a error" + (let ((dashes-to-read 2)) + (loop + (if (zerop dashes-to-read) (return)) + (let ((char (read-char stream nil #\Null))) + (declare (type character char)) + (if (char= char #\Null) + (error (parser-error "encountered unexpected eof for comment"))) + (if (char= char #\-) + (decf dashes-to-read) + (setf dashes-to-read 2))))) + (if (char/= (read-char stream nil #\Null) #\>) + (error (parser-error "expected > ending comment" nil stream)))) + +(defun read-cdata (stream state string) + "Reads in the CDATA and calls the callback for CDATA if it exists" + ;; we already read the <![CDATA[ stuff + ;; continue to read until we hit ]]> + (let ((char #\space) + (last-3-characters (list #\[ #\A #\T)) + (pattern (list #\> #\] #\]))) + (declare (type character char)) + (loop + (setf char (read-char stream nil #\Null)) + (when (char= char #\Null) (error (parser-error "encountered unexpected eof in text"))) + (push char last-3-characters) + (setf (cdddr last-3-characters) nil) + (cond + ((equal last-3-characters + pattern) + (setf (fill-pointer string) + (- (fill-pointer string) 2)) + (setf (get-seed state) + (funcall (get-text-hook state) + (copy-seq string) + (get-seed state))) + (return-from read-cdata)) + (t + (vector-push-extend char string)))))) + +(defun skip-special-tag (stream state) + "Skip an XML special tag (comments and processing instructions) in + stream, positioned after the opening '<', unexpected eof is an error" + ;; opening < has been read, consume ? or ! + (read-char stream) + (let ((char (read-char stream nil #\Null))) + (declare (type character char)) + ;; see if we are dealing with a comment + (when (char= char #\-) + (setf char (read-char stream nil #\Null)) + (when (char= char #\-) + (skip-comment stream) + (return-from skip-special-tag))) + ;; maybe we are dealing with CDATA? + (when (and (char= char #\[) + (loop :for pattern :across "CDATA[" + :for char = (read-char stream nil #\Null) + :when (char= char #\Null) :do + (error (parser-error "encountered unexpected eof in cdata")) + :always (char= char pattern))) + (read-cdata stream state (get-buffer state)) + (return-from skip-special-tag)) + ;; loop over chars, dealing with strings (skipping their content) + ;; and counting opening and closing < and > chars + (let ((taglevel 1) + (string-delimiter #\Null)) + (declare (type character string-delimiter)) + (loop + (when (zerop taglevel) (return)) + (setf char (read-char stream nil #\Null)) + (when (char= char #\Null) + (error (parser-error "encountered unexpected eof for special (! or ?) tag" nil stream))) + (if (char/= string-delimiter #\Null) + ;; inside a string we only look for a closing string delimiter + (when (char= char string-delimiter) + (setf string-delimiter #\Null)) + ;; outside a string we count < and > and watch out for strings + (cond ((or (char= char #\') (char= char #\")) (setf string-delimiter char)) + ((char= char #\<) (incf taglevel)) + ((char= char #\>) (decf taglevel)))))))) + +;;; the XML parser proper + +(defun parse-xml-element-attributes (stream state) + "Parse XML element attributes from stream positioned after the tag + identifier, returning the attributes as an assoc list, ending at + either a '>' or a '/' which is peeked and also returned" + (declare (special *namespaces*)) + (let ((char #\Null) attributes) + (declare (type character char)) + (loop + ;; skip whitespace separating items + (setf char (skip-whitespace stream)) + ;; start tag attributes ends with > or /> + (when (or (char= char #\>) (char= char #\/)) (return)) + ;; read the attribute key + (let ((key (let ((string (parse-identifier stream (get-mini-buffer state)))) + (if *ignore-namespaces* + (funcall *attribute-name-parser* string) + (copy-seq string))))) + ;; skip separating whitespace + (setf char (skip-whitespace stream)) + ;; require = sign (and consume it if present) + (if (char= char #\=) + (read-char stream) + (error (parser-error "expected =" nil stream))) + ;; skip separating whitespace + (skip-whitespace stream) + ;; read the attribute value as a string + (push (cons key (let ((string (parse-string stream state (get-buffer state)))) + (if *ignore-namespaces* + (funcall *attribute-value-parser* key string) + (copy-seq string)))) + attributes))) + ;; return attributes peek char ending loop + (values attributes char))) + +(defun parse-xml-element (stream state) + "Parse and return an XML element from stream, positioned after the opening '<'" + (declare (special *namespaces*)) + ;; opening < has been read + (when (char= (peek-char nil stream nil #\Null) #\!) + (skip-special-tag stream state) + (return-from parse-xml-element)) + (let ((char #\Null) buffer open-tag parent-seed has-children) + (declare (type character char)) + (setf parent-seed (get-seed state)) + ;; read tag name (no whitespace between < and name ?) + (setf open-tag (copy-seq (parse-identifier stream (get-mini-buffer state)))) + ;; tag has been read, read attributes if any + (multiple-value-bind (attributes peeked-char) + (parse-xml-element-attributes stream state) + (let ((*namespaces* (extend-namespaces attributes *namespaces*))) + (setf open-tag (resolve-identifier open-tag *namespaces*)) + (unless *ignore-namespaces* + (dolist (attribute attributes) + (setf (car attribute) (funcall *attribute-name-parser* (car attribute)) + (cdr attribute) (funcall *attribute-value-parser* (car attribute) (cdr attribute))))) + (setf (get-seed state) (funcall (get-new-element-hook state) + open-tag attributes (get-seed state))) + (setf char peeked-char) + (when (char= char #\/) + ;; handle solitary tag of the form <tag .. /> + (read-char stream) + (setf char (read-char stream nil #\Null)) + (if (char= #\> char) + (progn + (setf (get-seed state) (funcall (get-finish-element-hook state) + open-tag attributes parent-seed (get-seed state))) + (return-from parse-xml-element)) + (error (parser-error "expected >" nil stream)))) + ;; consume > + (read-char stream) + (loop + (setf buffer (get-buffer state)) + ;; read whitespace into buffer + (setf char (parse-whitespace stream buffer)) + ;; see what ended the whitespace scan + (cond ((char= char #\Null) (error (parser-error "encountered unexpected eof handling ~a" + (list open-tag)))) + ((char= char #\<) + ;; consume the < + (read-char stream) + (if (char= (peek-char nil stream nil #\Null) #\/) + (progn + ;; handle the matching closing tag </tag> and done + ;; if we read whitespace as this (leaf) element's contents, it is significant + (when (and (not has-children) (plusp (length buffer))) + (setf (get-seed state) (funcall (get-text-hook state) + (copy-seq buffer) (get-seed state)))) + (read-char stream) + (let ((close-tag (resolve-identifier (parse-identifier stream (get-mini-buffer state)) + *namespaces*))) + (unless (eq open-tag close-tag) + (error (parser-error "found <~a> not matched by </~a> but by <~a>" + (list open-tag open-tag close-tag) stream))) + (unless (char= (read-char stream nil #\Null) #\>) + (error (parser-error "expected >" nil stream))) + (setf (get-seed state) (funcall (get-finish-element-hook state) + open-tag attributes parent-seed (get-seed state)))) + (return)) + ;; handle child tag and loop, no hooks to call here + ;; whitespace between child elements is skipped + (progn + (setf has-children t) + (parse-xml-element stream state)))) + (t + ;; no child tag, concatenate text to whitespace in buffer + ;; handle text content and loop + (setf char (parse-text stream state buffer)) + (setf (get-seed state) (funcall (get-text-hook state) + (copy-seq buffer) (get-seed state)))))))))) + +(defun start-parse-xml (stream &optional (state (make-instance 'xml-parser-state))) + "Parse and return a toplevel XML element from stream, using parser state" + (loop + (let ((char (skip-whitespace stream))) + (when (char= char #\Null) (return-from start-parse-xml)) + ;; skip whitespace until start tag + (unless (char= char #\<) + (error (parser-error "expected <" nil stream))) + (read-char stream) ; consume peeked char + (setf char (peek-char nil stream nil #\Null)) + (if (or (char= char #\!) (char= char #\?)) + ;; deal with special tags + (skip-special-tag stream state) + (progn + ;; read the main element + (parse-xml-element stream state) + (return-from start-parse-xml (get-seed state))))))) + +;;;; eof |