about summary refs log tree commit diff
path: root/third_party/lisp/mime4cl/address.lisp
;;;  address.lisp --- e-mail address parser

;;;  Copyright (C) 2007, 2008, 2009 by Walter C. Pelissero

;;;  Author: Walter C. Pelissero <walter@pelissero.de>
;;;  Project: mime4cl

#+cmu (ext:file-comment "$Module: address.lisp $")

;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public License
;;; as published by the Free Software Foundation; either version 2.1
;;; of the License, or (at your option) any later version.
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Lesser General Public License for more details.
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this library; if not, write to the Free
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
;;; 02111-1307 USA

;;;  Although not MIME specific, this parser is often useful together
;;;  with the MIME primitives.  It should be able to parse the address
;;;  syntax described in RFC2822 excluding the obsolete syntax (see
;;;  RFC822).  Have a look at the test suite to get an idea of what
;;;  kind of addresses it can parse.

(in-package :mime4cl)

(defstruct (mailbox (:conc-name mbx-))
  description
  user
  host
  domain)

(defstruct (mailbox-group (:conc-name mbxg-))
  name
  mailboxes)

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defun write-mailbox-domain-name (addr &optional (stream *standard-output*))
  (when (eq :internet (mbx-domain addr))
    (write-char #\[ stream))
  (write-string (mbx-host addr) stream)
  (when (eq :internet (mbx-domain addr))
    (write-char #\] stream))
  (when (stringp (mbx-domain addr))
    (write-char #\. stream)
    (write-string (mbx-domain addr) stream)))

(defun write-mailbox-address (addr &optional (stream *standard-output*))
  (write-string (mbx-user addr) stream)
  (when (mbx-host addr)
    (write-char #\@ stream)
    (write-mailbox-domain-name addr stream)))

(defmethod mbx-domain-name ((MBX mailbox))
  "Return the complete domain name string of MBX, in the form
\"host.domain\"."
  (with-output-to-string (out)
    (write-mailbox-domain-name mbx out)))

(defmethod mbx-address ((mbx mailbox))
  "Return the e-mail address string of MBX, in the form
\"user@host.domain\"."
  (with-output-to-string (out)
    (write-mailbox-address mbx out)))

(defun write-mailbox (addr &optional (stream *standard-output*))
  (awhen (mbx-description addr)
    (write it :stream stream :readably t)
    (write-string " <" stream))
  (write-mailbox-address addr stream)
  (awhen (mbx-description addr)
    (write-char #\> stream)))

(defun write-mailbox-group (grp &optional (stream *standard-output*))
  (write-string (mbxg-name grp) stream)
  (write-string ": " stream)
  (loop
     for mailboxes on (mbxg-mailboxes grp)
     for mailbox = (car mailboxes)
     do (write-mailbox mailbox stream)
     unless (endp (cdr mailboxes))
     do (write-string ", " stream))
  (write-char #\; stream))

(defmethod print-object ((mbx mailbox) stream)
  (if (or *print-readably* *print-escape*)
      (call-next-method)
      (write-mailbox mbx stream)))

(defmethod print-object ((grp mailbox-group) stream)
  (if (or *print-readably* *print-escape*)
      (call-next-method)
      (write-mailbox-group grp stream)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defun parser-make-mailbox (description address-list)
  (make-mailbox :description description
		:user (car address-list)
		:host (cadr address-list)
		:domain (when (cddr address-list)
			  (string-concat (cddr address-list) "."))))


(defun populate-grammar ()
  (defrule address-list
      := (+ address ","))

  (defrule address
      := mailbox
      := group)

  (defrule mailbox
      := display-name? angle-addr comment?
      :reduce (parser-make-mailbox (or display-name comment) angle-addr)
      := addr-spec comment?
      :reduce (parser-make-mailbox comment addr-spec))

  (defrule angle-addr
      := "<" addr-spec ">")

  (defrule group
      := display-name ":" mailbox-list ";"
      :reduce (make-mailbox-group :name display-name :mailboxes mailbox-list))

  (defrule display-name
      := phrase
      :reduce (string-concat phrase " "))

  (defrule phrase
      := word+)

  (defrule word
      := atext
      := string)

  (defrule mailbox-list
      := (+ mailbox ","))

  (defrule addr-spec
      := local-part "@" domain :reduce (cons local-part domain))

  (defrule local-part
      := dot-atom :reduce (string-concat dot-atom ".")
      := string)

  (defrule domain
      := dot-atom
      := domain-literal :reduce (list domain-literal :internet))

  ;; actually, according to the RFC, dot-atoms don't allow spaces in
  ;; between but these rules do
  (defrule dot-atom
      := (+ atom "."))

  (defrule atom
      := atext+
      :reduce (apply #'concatenate 'string atext)))

(deflazy define-grammar
  (let ((*package* #.*package*)
	(*compile-print* (when npg::*debug* t)))
    (reset-grammar)
    (format t "~&creating e-mail address grammar...~%")
    (populate-grammar)
    (let ((grammar (npg:generate-grammar #'string=)))
      (reset-grammar)
      (npg:print-grammar-figures grammar)
      grammar)))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; The lexical analyser

(defstruct cursor
  stream
  (position 0))

(defun read-delimited-string (stream end-char &key nesting-start-char (escape-char #\\))
  (labels ((collect ()
	     (with-output-to-string (out)
	       (loop
		  for c = (read-char stream nil)
		  while (and c (not (char= c end-char)))
		  do (cond ((char= c escape-char)
			    (awhen (read-char stream nil)
			      (write-char it out)))
			   ((and nesting-start-char
				 (char= c nesting-start-char))
			    (write-char nesting-start-char out)
			    (write-string (collect) out)
			    (write-char end-char out))
			   (t (write-char c out)))))))
    (collect)))


(defun read-string (cursor)
  (make-token :type 'string
	      :value (read-delimited-string (cursor-stream cursor) #\")
	      :position (incf (cursor-position cursor))))

(defun read-domain-literal (cursor)
  (make-token :type 'domain-literal
	      :value (read-delimited-string (cursor-stream cursor) #\])
	      :position (incf (cursor-position cursor))))

(defun read-comment (cursor)
  (make-token :type 'comment
	      :value (read-delimited-string (cursor-stream cursor) #\) :nesting-start-char #\()
	      :position (incf (cursor-position cursor))))

(declaim (inline atom-component-p))
(defun atom-component-p (c)
  (declare (type character c))
  (not (find c " ()\"[]@.<>:;,")))

(defun read-atext (first-character cursor)
  (be string (with-output-to-string (out)
	       (write-char first-character out)
	       (loop
		  for c = (read-char (cursor-stream cursor) nil)
		  while (and c (atom-component-p c))
		  do (write-char c out)
		  finally (when c
			    (unread-char c (cursor-stream cursor)))))
    (make-token :type 'atext
		:value string
		:position (incf (cursor-position cursor)))))

(defmethod read-next-tokens ((cursor cursor))
  (flet ((make-keyword (c)
	   (make-token :type 'keyword
		       :value (string c)
		       :position (incf (cursor-position cursor)))))
    (be in (cursor-stream cursor)
      (loop
	 for c = (read-char in nil)
	 while c
	 unless (whitespace-p c)
	 return (list
		 (cond ((char= #\( c)
			(read-comment cursor))
		       ((char= #\" c)
			(read-string cursor))
		       ((char= #\[ c)
			(read-domain-literal cursor))
		       ((find c "@.<>:;,")
			(make-keyword c))
		       (t
			;; anything else is considered a text atom even
			;; though it's just a single character
			(read-atext c cursor))))))))

(defun analyse-string (string)
  "Return the list of tokens produced by a lexical analysis of
STRING.  These are the tokens that would be seen by the parser."
  (with-input-from-string (stream string)
    (be cursor (make-cursor :stream stream)
      (loop
	 for tokens = (read-next-tokens cursor)
	 until (endp tokens)
	 append tokens))))

(defun mailboxes-only (list-of-mailboxes-and-groups)
  "Return a flat list of MAILBOX-ADDRESSes from
LIST-OF-MAILBOXES-AND-GROUPS, which is the kind of list returned
by PARSE-ADDRESSES.  This turns out to be useful when your
program is not interested in mailbox groups and expects the user
addresses only."
  (mapcan #'(lambda (mbx)
	      (if (typep mbx 'mailbox-group)
		  (mbxg-mailboxes mbx)
		  (list mbx)))
	  list-of-mailboxes-and-groups))

(defun parse-addresses (string &key no-groups)
  "Parse STRING and return a list of MAILBOX-ADDRESSes or
MAILBOX-GROUPs.  If STRING is unparsable return NIL.  If
NO-GROUPS is true, return a flat list of mailboxes throwing away
the group containers, if any."
  (be grammar (force define-grammar)
    (with-input-from-string (stream string)
      (be* cursor (make-cursor :stream stream)
	   mailboxes (ignore-errors	; ignore parsing errors
		       (parse grammar 'address-list cursor))
	(if no-groups
	    (mailboxes-only mailboxes)
	    mailboxes)))))

(defun debug-addresses (string)
  "More or less like PARSE-ADDRESSES, but don't ignore parsing errors."
  (be grammar (force define-grammar)
    (with-input-from-string (stream string)
      (be cursor (make-cursor :stream stream)
	(parse grammar 'address-list cursor)))))