about summary refs log tree commit diff
path: root/third_party/lisp/mime4cl/address.lisp
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/lisp/mime4cl/address.lisp')
-rw-r--r--third_party/lisp/mime4cl/address.lisp300
1 files changed, 300 insertions, 0 deletions
diff --git a/third_party/lisp/mime4cl/address.lisp b/third_party/lisp/mime4cl/address.lisp
new file mode 100644
index 000000000000..944156916c0f
--- /dev/null
+++ b/third_party/lisp/mime4cl/address.lisp
@@ -0,0 +1,300 @@
+;;;  address.lisp --- e-mail address parser
+
+;;;  Copyright (C) 2007, 2008, 2009 by Walter C. Pelissero
+;;;  Copyright (C) 2022 The TVL Authors
+
+;;;  Author: Walter C. Pelissero <walter@pelissero.de>
+;;;  Project: mime4cl
+
+;;; This library is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU Lesser General Public License
+;;; as published by the Free Software Foundation; either version 2.1
+;;; of the License, or (at your option) any later version.
+;;; This library is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;; Lesser General Public License for more details.
+;;; You should have received a copy of the GNU Lesser General Public
+;;; License along with this library; if not, write to the Free
+;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+;;; 02111-1307 USA
+
+;;;  Although not MIME specific, this parser is often useful together
+;;;  with the MIME primitives.  It should be able to parse the address
+;;;  syntax described in RFC2822 excluding the obsolete syntax (see
+;;;  RFC822).  Have a look at the test suite to get an idea of what
+;;;  kind of addresses it can parse.
+
+(in-package :mime4cl)
+
+(defstruct (mailbox (:conc-name mbx-))
+  description
+  user
+  host
+  domain)
+
+(defstruct (mailbox-group (:conc-name mbxg-))
+  name
+  mailboxes)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(defun write-mailbox-domain-name (addr &optional (stream *standard-output*))
+  (when (eq :internet (mbx-domain addr))
+    (write-char #\[ stream))
+  (write-string (mbx-host addr) stream)
+  (when (eq :internet (mbx-domain addr))
+    (write-char #\] stream))
+  (when (stringp (mbx-domain addr))
+    (write-char #\. stream)
+    (write-string (mbx-domain addr) stream)))
+
+(defun write-mailbox-address (addr &optional (stream *standard-output*))
+  (write-string (mbx-user addr) stream)
+  (when (mbx-host addr)
+    (write-char #\@ stream)
+    (write-mailbox-domain-name addr stream)))
+
+(defmethod mbx-domain-name ((MBX mailbox))
+  "Return the complete domain name string of MBX, in the form
+\"host.domain\"."
+  (with-output-to-string (out)
+    (write-mailbox-domain-name mbx out)))
+
+(defmethod mbx-address ((mbx mailbox))
+  "Return the e-mail address string of MBX, in the form
+\"user@host.domain\"."
+  (with-output-to-string (out)
+    (write-mailbox-address mbx out)))
+
+(defun write-mailbox (addr &optional (stream *standard-output*))
+  (awhen (mbx-description addr)
+    (write it :stream stream :readably t)
+    (write-string " <" stream))
+  (write-mailbox-address addr stream)
+  (awhen (mbx-description addr)
+    (write-char #\> stream)))
+
+(defun write-mailbox-group (grp &optional (stream *standard-output*))
+  (write-string (mbxg-name grp) stream)
+  (write-string ": " stream)
+  (loop
+     for mailboxes on (mbxg-mailboxes grp)
+     for mailbox = (car mailboxes)
+     do (write-mailbox mailbox stream)
+     unless (endp (cdr mailboxes))
+     do (write-string ", " stream))
+  (write-char #\; stream))
+
+(defmethod print-object ((mbx mailbox) stream)
+  (if (or *print-readably* *print-escape*)
+      (call-next-method)
+      (write-mailbox mbx stream)))
+
+(defmethod print-object ((grp mailbox-group) stream)
+  (if (or *print-readably* *print-escape*)
+      (call-next-method)
+      (write-mailbox-group grp stream)))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(defun parser-make-mailbox (description address-list)
+  (make-mailbox :description description
+                :user (car address-list)
+                :host (cadr address-list)
+                :domain (when (cddr address-list)
+                          (string-concat (cddr address-list) "."))))
+
+
+(defun populate-grammar ()
+  (defrule address-list
+      := (+ address ","))
+
+  (defrule address
+      := mailbox
+      := group)
+
+  (defrule mailbox
+      := display-name? angle-addr comment?
+      :reduce (parser-make-mailbox (or display-name comment) angle-addr)
+      := addr-spec comment?
+      :reduce (parser-make-mailbox comment addr-spec))
+
+  (defrule angle-addr
+      := "<" addr-spec ">")
+
+  (defrule group
+      := display-name ":" mailbox-list ";"
+      :reduce (make-mailbox-group :name display-name :mailboxes mailbox-list))
+
+  (defrule display-name
+      := phrase
+      :reduce (string-concat phrase " "))
+
+  (defrule phrase
+      := word+)
+
+  (defrule word
+      := atext
+      := string)
+
+  (defrule mailbox-list
+      := (+ mailbox ","))
+
+  (defrule addr-spec
+      := local-part "@" domain :reduce (cons local-part domain))
+
+  (defrule local-part
+      := dot-atom :reduce (string-concat dot-atom ".")
+      := string)
+
+  (defrule domain
+      := dot-atom
+      := domain-literal :reduce (list domain-literal :internet))
+
+  ;; actually, according to the RFC, dot-atoms don't allow spaces in
+  ;; between but these rules do
+  (defrule dot-atom
+      := (+ atom "."))
+
+  (defrule atom
+      := atext+
+      :reduce (apply #'concatenate 'string atext)))
+
+(deflazy define-grammar
+  (let ((*package* #.*package*)
+        (*compile-print* (when npg::*debug* t)))
+    (reset-grammar)
+    (format t "~&creating e-mail address grammar...~%")
+    (populate-grammar)
+    (let ((grammar (npg:generate-grammar #'string=)))
+      (reset-grammar)
+      (npg:print-grammar-figures grammar)
+      grammar)))
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; The lexical analyser
+
+(defstruct cursor
+  stream
+  (position 0))
+
+(defun read-delimited-string (stream end-char &key nesting-start-char (escape-char #\\))
+  (labels ((collect ()
+             (with-output-to-string (out)
+               (loop
+                  for c = (read-char stream nil)
+                  while (and c (not (char= c end-char)))
+                  do (cond ((char= c escape-char)
+                            (awhen (read-char stream nil)
+                              (write-char it out)))
+                           ((and nesting-start-char
+                                 (char= c nesting-start-char))
+                            (write-char nesting-start-char out)
+                            (write-string (collect) out)
+                            (write-char end-char out))
+                           (t (write-char c out)))))))
+    (collect)))
+
+
+(defun read-string (cursor)
+  (make-token :type 'string
+              :value (read-delimited-string (cursor-stream cursor) #\")
+              :position (incf (cursor-position cursor))))
+
+(defun read-domain-literal (cursor)
+  (make-token :type 'domain-literal
+              :value (read-delimited-string (cursor-stream cursor) #\])
+              :position (incf (cursor-position cursor))))
+
+(defun read-comment (cursor)
+  (make-token :type 'comment
+              :value (read-delimited-string (cursor-stream cursor) #\) :nesting-start-char #\()
+              :position (incf (cursor-position cursor))))
+
+(declaim (inline atom-component-p))
+(defun atom-component-p (c)
+  (declare (type character c))
+  (not (find c " ()\"[]@.<>:;,")))
+
+(defun read-atext (first-character cursor)
+  (be string (with-output-to-string (out)
+               (write-char first-character out)
+               (loop
+                  for c = (read-char (cursor-stream cursor) nil)
+                  while (and c (atom-component-p c))
+                  do (write-char c out)
+                  finally (when c
+                            (unread-char c (cursor-stream cursor)))))
+    (make-token :type 'atext
+                :value string
+                :position (incf (cursor-position cursor)))))
+
+(defmethod read-next-tokens ((cursor cursor))
+  (flet ((make-keyword (c)
+           (make-token :type 'keyword
+                       :value (string c)
+                       :position (incf (cursor-position cursor)))))
+    (be in (cursor-stream cursor)
+      (loop
+         for c = (read-char in nil)
+         while c
+         unless (whitespace-p c)
+         return (list
+                 (cond ((char= #\( c)
+                        (read-comment cursor))
+                       ((char= #\" c)
+                        (read-string cursor))
+                       ((char= #\[ c)
+                        (read-domain-literal cursor))
+                       ((find c "@.<>:;,")
+                        (make-keyword c))
+                       (t
+                        ;; anything else is considered a text atom even
+                        ;; though it's just a single character
+                        (read-atext c cursor))))))))
+
+(defun analyse-string (string)
+  "Return the list of tokens produced by a lexical analysis of
+STRING.  These are the tokens that would be seen by the parser."
+  (with-input-from-string (stream string)
+    (be cursor (make-cursor :stream stream)
+      (loop
+         for tokens = (read-next-tokens cursor)
+         until (endp tokens)
+         append tokens))))
+
+(defun mailboxes-only (list-of-mailboxes-and-groups)
+  "Return a flat list of MAILBOX-ADDRESSes from
+LIST-OF-MAILBOXES-AND-GROUPS, which is the kind of list returned
+by PARSE-ADDRESSES.  This turns out to be useful when your
+program is not interested in mailbox groups and expects the user
+addresses only."
+  (mapcan #'(lambda (mbx)
+              (if (typep mbx 'mailbox-group)
+                  (mbxg-mailboxes mbx)
+                  (list mbx)))
+          list-of-mailboxes-and-groups))
+
+(defun parse-addresses (string &key no-groups)
+  "Parse STRING and return a list of MAILBOX-ADDRESSes or
+MAILBOX-GROUPs.  If STRING is unparsable return NIL.  If
+NO-GROUPS is true, return a flat list of mailboxes throwing away
+the group containers, if any."
+  (be grammar (force define-grammar)
+    (with-input-from-string (stream string)
+      (be* cursor (make-cursor :stream stream)
+           mailboxes (ignore-errors	; ignore parsing errors
+                       (parse grammar 'address-list cursor))
+        (if no-groups
+            (mailboxes-only mailboxes)
+            mailboxes)))))
+
+(defun debug-addresses (string)
+  "More or less like PARSE-ADDRESSES, but don't ignore parsing errors."
+  (be grammar (force define-grammar)
+    (with-input-from-string (stream string)
+      (be cursor (make-cursor :stream stream)
+        (parse grammar 'address-list cursor)))))
+