diff options
Diffstat (limited to 'third_party/lisp/mime4cl/address.lisp')
-rw-r--r-- | third_party/lisp/mime4cl/address.lisp | 300 |
1 files changed, 300 insertions, 0 deletions
diff --git a/third_party/lisp/mime4cl/address.lisp b/third_party/lisp/mime4cl/address.lisp new file mode 100644 index 000000000000..944156916c0f --- /dev/null +++ b/third_party/lisp/mime4cl/address.lisp @@ -0,0 +1,300 @@ +;;; address.lisp --- e-mail address parser + +;;; Copyright (C) 2007, 2008, 2009 by Walter C. Pelissero +;;; Copyright (C) 2022 The TVL Authors + +;;; Author: Walter C. Pelissero <walter@pelissero.de> +;;; Project: mime4cl + +;;; This library is free software; you can redistribute it and/or +;;; modify it under the terms of the GNU Lesser General Public License +;;; as published by the Free Software Foundation; either version 2.1 +;;; of the License, or (at your option) any later version. +;;; This library is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; Lesser General Public License for more details. +;;; You should have received a copy of the GNU Lesser General Public +;;; License along with this library; if not, write to the Free +;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +;;; 02111-1307 USA + +;;; Although not MIME specific, this parser is often useful together +;;; with the MIME primitives. It should be able to parse the address +;;; syntax described in RFC2822 excluding the obsolete syntax (see +;;; RFC822). Have a look at the test suite to get an idea of what +;;; kind of addresses it can parse. + +(in-package :mime4cl) + +(defstruct (mailbox (:conc-name mbx-)) + description + user + host + domain) + +(defstruct (mailbox-group (:conc-name mbxg-)) + name + mailboxes) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defun write-mailbox-domain-name (addr &optional (stream *standard-output*)) + (when (eq :internet (mbx-domain addr)) + (write-char #\[ stream)) + (write-string (mbx-host addr) stream) + (when (eq :internet (mbx-domain addr)) + (write-char #\] stream)) + (when (stringp (mbx-domain addr)) + (write-char #\. stream) + (write-string (mbx-domain addr) stream))) + +(defun write-mailbox-address (addr &optional (stream *standard-output*)) + (write-string (mbx-user addr) stream) + (when (mbx-host addr) + (write-char #\@ stream) + (write-mailbox-domain-name addr stream))) + +(defmethod mbx-domain-name ((MBX mailbox)) + "Return the complete domain name string of MBX, in the form +\"host.domain\"." + (with-output-to-string (out) + (write-mailbox-domain-name mbx out))) + +(defmethod mbx-address ((mbx mailbox)) + "Return the e-mail address string of MBX, in the form +\"user@host.domain\"." + (with-output-to-string (out) + (write-mailbox-address mbx out))) + +(defun write-mailbox (addr &optional (stream *standard-output*)) + (awhen (mbx-description addr) + (write it :stream stream :readably t) + (write-string " <" stream)) + (write-mailbox-address addr stream) + (awhen (mbx-description addr) + (write-char #\> stream))) + +(defun write-mailbox-group (grp &optional (stream *standard-output*)) + (write-string (mbxg-name grp) stream) + (write-string ": " stream) + (loop + for mailboxes on (mbxg-mailboxes grp) + for mailbox = (car mailboxes) + do (write-mailbox mailbox stream) + unless (endp (cdr mailboxes)) + do (write-string ", " stream)) + (write-char #\; stream)) + +(defmethod print-object ((mbx mailbox) stream) + (if (or *print-readably* *print-escape*) + (call-next-method) + (write-mailbox mbx stream))) + +(defmethod print-object ((grp mailbox-group) stream) + (if (or *print-readably* *print-escape*) + (call-next-method) + (write-mailbox-group grp stream))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defun parser-make-mailbox (description address-list) + (make-mailbox :description description + :user (car address-list) + :host (cadr address-list) + :domain (when (cddr address-list) + (string-concat (cddr address-list) ".")))) + + +(defun populate-grammar () + (defrule address-list + := (+ address ",")) + + (defrule address + := mailbox + := group) + + (defrule mailbox + := display-name? angle-addr comment? + :reduce (parser-make-mailbox (or display-name comment) angle-addr) + := addr-spec comment? + :reduce (parser-make-mailbox comment addr-spec)) + + (defrule angle-addr + := "<" addr-spec ">") + + (defrule group + := display-name ":" mailbox-list ";" + :reduce (make-mailbox-group :name display-name :mailboxes mailbox-list)) + + (defrule display-name + := phrase + :reduce (string-concat phrase " ")) + + (defrule phrase + := word+) + + (defrule word + := atext + := string) + + (defrule mailbox-list + := (+ mailbox ",")) + + (defrule addr-spec + := local-part "@" domain :reduce (cons local-part domain)) + + (defrule local-part + := dot-atom :reduce (string-concat dot-atom ".") + := string) + + (defrule domain + := dot-atom + := domain-literal :reduce (list domain-literal :internet)) + + ;; actually, according to the RFC, dot-atoms don't allow spaces in + ;; between but these rules do + (defrule dot-atom + := (+ atom ".")) + + (defrule atom + := atext+ + :reduce (apply #'concatenate 'string atext))) + +(deflazy define-grammar + (let ((*package* #.*package*) + (*compile-print* (when npg::*debug* t))) + (reset-grammar) + (format t "~&creating e-mail address grammar...~%") + (populate-grammar) + (let ((grammar (npg:generate-grammar #'string=))) + (reset-grammar) + (npg:print-grammar-figures grammar) + grammar))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; The lexical analyser + +(defstruct cursor + stream + (position 0)) + +(defun read-delimited-string (stream end-char &key nesting-start-char (escape-char #\\)) + (labels ((collect () + (with-output-to-string (out) + (loop + for c = (read-char stream nil) + while (and c (not (char= c end-char))) + do (cond ((char= c escape-char) + (awhen (read-char stream nil) + (write-char it out))) + ((and nesting-start-char + (char= c nesting-start-char)) + (write-char nesting-start-char out) + (write-string (collect) out) + (write-char end-char out)) + (t (write-char c out))))))) + (collect))) + + +(defun read-string (cursor) + (make-token :type 'string + :value (read-delimited-string (cursor-stream cursor) #\") + :position (incf (cursor-position cursor)))) + +(defun read-domain-literal (cursor) + (make-token :type 'domain-literal + :value (read-delimited-string (cursor-stream cursor) #\]) + :position (incf (cursor-position cursor)))) + +(defun read-comment (cursor) + (make-token :type 'comment + :value (read-delimited-string (cursor-stream cursor) #\) :nesting-start-char #\() + :position (incf (cursor-position cursor)))) + +(declaim (inline atom-component-p)) +(defun atom-component-p (c) + (declare (type character c)) + (not (find c " ()\"[]@.<>:;,"))) + +(defun read-atext (first-character cursor) + (be string (with-output-to-string (out) + (write-char first-character out) + (loop + for c = (read-char (cursor-stream cursor) nil) + while (and c (atom-component-p c)) + do (write-char c out) + finally (when c + (unread-char c (cursor-stream cursor))))) + (make-token :type 'atext + :value string + :position (incf (cursor-position cursor))))) + +(defmethod read-next-tokens ((cursor cursor)) + (flet ((make-keyword (c) + (make-token :type 'keyword + :value (string c) + :position (incf (cursor-position cursor))))) + (be in (cursor-stream cursor) + (loop + for c = (read-char in nil) + while c + unless (whitespace-p c) + return (list + (cond ((char= #\( c) + (read-comment cursor)) + ((char= #\" c) + (read-string cursor)) + ((char= #\[ c) + (read-domain-literal cursor)) + ((find c "@.<>:;,") + (make-keyword c)) + (t + ;; anything else is considered a text atom even + ;; though it's just a single character + (read-atext c cursor)))))))) + +(defun analyse-string (string) + "Return the list of tokens produced by a lexical analysis of +STRING. These are the tokens that would be seen by the parser." + (with-input-from-string (stream string) + (be cursor (make-cursor :stream stream) + (loop + for tokens = (read-next-tokens cursor) + until (endp tokens) + append tokens)))) + +(defun mailboxes-only (list-of-mailboxes-and-groups) + "Return a flat list of MAILBOX-ADDRESSes from +LIST-OF-MAILBOXES-AND-GROUPS, which is the kind of list returned +by PARSE-ADDRESSES. This turns out to be useful when your +program is not interested in mailbox groups and expects the user +addresses only." + (mapcan #'(lambda (mbx) + (if (typep mbx 'mailbox-group) + (mbxg-mailboxes mbx) + (list mbx))) + list-of-mailboxes-and-groups)) + +(defun parse-addresses (string &key no-groups) + "Parse STRING and return a list of MAILBOX-ADDRESSes or +MAILBOX-GROUPs. If STRING is unparsable return NIL. If +NO-GROUPS is true, return a flat list of mailboxes throwing away +the group containers, if any." + (be grammar (force define-grammar) + (with-input-from-string (stream string) + (be* cursor (make-cursor :stream stream) + mailboxes (ignore-errors ; ignore parsing errors + (parse grammar 'address-list cursor)) + (if no-groups + (mailboxes-only mailboxes) + mailboxes))))) + +(defun debug-addresses (string) + "More or less like PARSE-ADDRESSES, but don't ignore parsing errors." + (be grammar (force define-grammar) + (with-input-from-string (stream string) + (be cursor (make-cursor :stream stream) + (parse grammar 'address-list cursor))))) + |