about summary refs log tree commit diff
path: root/third_party/lisp/mime4cl/address.lisp
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/lisp/mime4cl/address.lisp')
-rw-r--r--third_party/lisp/mime4cl/address.lisp301
1 files changed, 301 insertions, 0 deletions
diff --git a/third_party/lisp/mime4cl/address.lisp b/third_party/lisp/mime4cl/address.lisp
new file mode 100644
index 0000000000..9a3bec9b20
--- /dev/null
+++ b/third_party/lisp/mime4cl/address.lisp
@@ -0,0 +1,301 @@
+;;;  address.lisp --- e-mail address parser
+
+;;;  Copyright (C) 2007, 2008, 2009 by Walter C. Pelissero
+
+;;;  Author: Walter C. Pelissero <walter@pelissero.de>
+;;;  Project: mime4cl
+
+#+cmu (ext:file-comment "$Module: address.lisp $")
+
+;;; This library is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU Lesser General Public License
+;;; as published by the Free Software Foundation; either version 2.1
+;;; of the License, or (at your option) any later version.
+;;; This library is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; Lesser General Public License for more details.
+;;; You should have received a copy of the GNU Lesser General Public
+;;; License along with this library; if not, write to the Free
+;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+;;; 02111-1307 USA
+
+;;;  Although not MIME specific, this parser is often useful together
+;;;  with the MIME primitives.  It should be able to parse the address
+;;;  syntax described in RFC2822 excluding the obsolete syntax (see
+;;;  RFC822).  Have a look at the test suite to get an idea of what
+;;;  kind of addresses it can parse.
+
+(in-package :mime4cl)
+
+(defstruct (mailbox (:conc-name mbx-))
+  description
+  user
+  host
+  domain)
+
+(defstruct (mailbox-group (:conc-name mbxg-))
+  name
+  mailboxes)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(defun write-mailbox-domain-name (addr &optional (stream *standard-output*))
+  (when (eq :internet (mbx-domain addr))
+    (write-char #\[ stream))
+  (write-string (mbx-host addr) stream)
+  (when (eq :internet (mbx-domain addr))
+    (write-char #\] stream))
+  (when (stringp (mbx-domain addr))
+    (write-char #\. stream)
+    (write-string (mbx-domain addr) stream)))
+
+(defun write-mailbox-address (addr &optional (stream *standard-output*))
+  (write-string (mbx-user addr) stream)
+  (when (mbx-host addr)
+    (write-char #\@ stream)
+    (write-mailbox-domain-name addr stream)))
+
+(defmethod mbx-domain-name ((MBX mailbox))
+  "Return the complete domain name string of MBX, in the form
+\"host.domain\"."
+  (with-output-to-string (out)
+    (write-mailbox-domain-name mbx out)))
+
+(defmethod mbx-address ((mbx mailbox))
+  "Return the e-mail address string of MBX, in the form
+\"user@host.domain\"."
+  (with-output-to-string (out)
+    (write-mailbox-address mbx out)))
+
+(defun write-mailbox (addr &optional (stream *standard-output*))
+  (awhen (mbx-description addr)
+    (write it :stream stream :readably t)
+    (write-string " <" stream))
+  (write-mailbox-address addr stream)
+  (awhen (mbx-description addr)
+    (write-char #\> stream)))
+
+(defun write-mailbox-group (grp &optional (stream *standard-output*))
+  (write-string (mbxg-name grp) stream)
+  (write-string ": " stream)
+  (loop
+     for mailboxes on (mbxg-mailboxes grp)
+     for mailbox = (car mailboxes)
+     do (write-mailbox mailbox stream)
+     unless (endp (cdr mailboxes))
+     do (write-string ", " stream))
+  (write-char #\; stream))
+
+(defmethod print-object ((mbx mailbox) stream)
+  (if (or *print-readably* *print-escape*)
+      (call-next-method)
+      (write-mailbox mbx stream)))
+
+(defmethod print-object ((grp mailbox-group) stream)
+  (if (or *print-readably* *print-escape*)
+      (call-next-method)
+      (write-mailbox-group grp stream)))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(defun parser-make-mailbox (description address-list)
+  (make-mailbox :description description
+		:user (car address-list)
+		:host (cadr address-list)
+		:domain (when (cddr address-list)
+			  (string-concat (cddr address-list) "."))))
+
+
+(defun populate-grammar ()
+  (defrule address-list
+      := (+ address ","))
+
+  (defrule address
+      := mailbox
+      := group)
+
+  (defrule mailbox
+      := display-name? angle-addr comment?
+      :reduce (parser-make-mailbox (or display-name comment) angle-addr)
+      := addr-spec comment?
+      :reduce (parser-make-mailbox comment addr-spec))
+
+  (defrule angle-addr
+      := "<" addr-spec ">")
+
+  (defrule group
+      := display-name ":" mailbox-list ";"
+      :reduce (make-mailbox-group :name display-name :mailboxes mailbox-list))
+
+  (defrule display-name
+      := phrase
+      :reduce (string-concat phrase " "))
+
+  (defrule phrase
+      := word+)
+
+  (defrule word
+      := atext
+      := string)
+
+  (defrule mailbox-list
+      := (+ mailbox ","))
+
+  (defrule addr-spec
+      := local-part "@" domain :reduce (cons local-part domain))
+
+  (defrule local-part
+      := dot-atom :reduce (string-concat dot-atom ".")
+      := string)
+
+  (defrule domain
+      := dot-atom
+      := domain-literal :reduce (list domain-literal :internet))
+
+  ;; actually, according to the RFC, dot-atoms don't allow spaces in
+  ;; between but these rules do
+  (defrule dot-atom
+      := (+ atom "."))
+
+  (defrule atom
+      := atext+
+      :reduce (apply #'concatenate 'string atext)))
+
+(deflazy define-grammar
+  (let ((*package* #.*package*)
+	(*compile-print* (when npg::*debug* t)))
+    (reset-grammar)
+    (format t "~&creating e-mail address grammar...~%")
+    (populate-grammar)
+    (let ((grammar (npg:generate-grammar #'string=)))
+      (reset-grammar)
+      (npg:print-grammar-figures grammar)
+      grammar)))
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; The lexical analyser
+
+(defstruct cursor
+  stream
+  (position 0))
+
+(defun read-delimited-string (stream end-char &key nesting-start-char (escape-char #\\))
+  (labels ((collect ()
+	     (with-output-to-string (out)
+	       (loop
+		  for c = (read-char stream nil)
+		  while (and c (not (char= c end-char)))
+		  do (cond ((char= c escape-char)
+			    (awhen (read-char stream nil)
+			      (write-char it out)))
+			   ((and nesting-start-char
+				 (char= c nesting-start-char))
+			    (write-char nesting-start-char out)
+			    (write-string (collect) out)
+			    (write-char end-char out))
+			   (t (write-char c out)))))))
+    (collect)))
+
+
+(defun read-string (cursor)
+  (make-token :type 'string
+	      :value (read-delimited-string (cursor-stream cursor) #\")
+	      :position (incf (cursor-position cursor))))
+
+(defun read-domain-literal (cursor)
+  (make-token :type 'domain-literal
+	      :value (read-delimited-string (cursor-stream cursor) #\])
+	      :position (incf (cursor-position cursor))))
+
+(defun read-comment (cursor)
+  (make-token :type 'comment
+	      :value (read-delimited-string (cursor-stream cursor) #\) :nesting-start-char #\()
+	      :position (incf (cursor-position cursor))))
+
+(declaim (inline atom-component-p))
+(defun atom-component-p (c)
+  (declare (type character c))
+  (not (find c " ()\"[]@.<>:;,")))
+
+(defun read-atext (first-character cursor)
+  (be string (with-output-to-string (out)
+	       (write-char first-character out)
+	       (loop
+		  for c = (read-char (cursor-stream cursor) nil)
+		  while (and c (atom-component-p c))
+		  do (write-char c out)
+		  finally (when c
+			    (unread-char c (cursor-stream cursor)))))
+    (make-token :type 'atext
+		:value string
+		:position (incf (cursor-position cursor)))))
+
+(defmethod read-next-tokens ((cursor cursor))
+  (flet ((make-keyword (c)
+	   (make-token :type 'keyword
+		       :value (string c)
+		       :position (incf (cursor-position cursor)))))
+    (be in (cursor-stream cursor)
+      (loop
+	 for c = (read-char in nil)
+	 while c
+	 unless (whitespace-p c)
+	 return (list
+		 (cond ((char= #\( c)
+			(read-comment cursor))
+		       ((char= #\" c)
+			(read-string cursor))
+		       ((char= #\[ c)
+			(read-domain-literal cursor))
+		       ((find c "@.<>:;,")
+			(make-keyword c))
+		       (t
+			;; anything else is considered a text atom even
+			;; though it's just a single character
+			(read-atext c cursor))))))))
+
+(defun analyse-string (string)
+  "Return the list of tokens produced by a lexical analysis of
+STRING.  These are the tokens that would be seen by the parser."
+  (with-input-from-string (stream string)
+    (be cursor (make-cursor :stream stream)
+      (loop
+	 for tokens = (read-next-tokens cursor)
+	 until (endp tokens)
+	 append tokens))))
+
+(defun mailboxes-only (list-of-mailboxes-and-groups)
+  "Return a flat list of MAILBOX-ADDRESSes from
+LIST-OF-MAILBOXES-AND-GROUPS, which is the kind of list returned
+by PARSE-ADDRESSES.  This turns out to be useful when your
+program is not interested in mailbox groups and expects the user
+addresses only."
+  (mapcan #'(lambda (mbx)
+	      (if (typep mbx 'mailbox-group)
+		  (mbxg-mailboxes mbx)
+		  (list mbx)))
+	  list-of-mailboxes-and-groups))
+
+(defun parse-addresses (string &key no-groups)
+  "Parse STRING and return a list of MAILBOX-ADDRESSes or
+MAILBOX-GROUPs.  If STRING is unparsable return NIL.  If
+NO-GROUPS is true, return a flat list of mailboxes throwing away
+the group containers, if any."
+  (be grammar (force define-grammar)
+    (with-input-from-string (stream string)
+      (be* cursor (make-cursor :stream stream)
+	   mailboxes (ignore-errors	; ignore parsing errors
+		       (parse grammar 'address-list cursor))
+	(if no-groups
+	    (mailboxes-only mailboxes)
+	    mailboxes)))))
+
+(defun debug-addresses (string)
+  "More or less like PARSE-ADDRESSES, but don't ignore parsing errors."
+  (be grammar (force define-grammar)
+    (with-input-from-string (stream string)
+      (be cursor (make-cursor :stream stream)
+	(parse grammar 'address-list cursor)))))
+