about summary refs log tree commit diff
diff options
context:
space:
mode:
authorsterni <sternenseemann@systemli.org>2022-01-31T23·01+0100
committersterni <sternenseemann@systemli.org>2022-02-02T20·47+0000
commit5bc73de59d9f01ec8497ac8ff142506fd7a40876 (patch)
treef5cc93f2c22739091be3f9878c3b8c8cba72d6fd
parent81c47da91c90fd8bac96c6088112f71a11964131 (diff)
feat: move mblog header handling into mime4cl r/3754
Accessing the headers of a MIME message feels like something mime4cl
should handle. We implemented this ad hoc in mblog before in order to
not need to worry about doing it in a sensible way. Now we introduce a
decent-ish interface for getting a header from a MIME message,
mime-message-header-values:

* It returns a list because MIME message headers may appear multiple
  times.

* It decodes RFC2047 only upon request, as you may want to be stricter
  about parsing certain fields.

* It checks header name equality case insensitively.

The code for decoding the RFC2047 string is retained and still uses
babel for doing the actual decoding.

Change-Id: I58bbbe4b46dbded04160b481a28a40d14775673d
Reviewed-on: https://cl.tvl.fyi/c/depot/+/5150
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
-rw-r--r--third_party/lisp/mime4cl/default.nix1
-rw-r--r--third_party/lisp/mime4cl/endec.lisp18
-rw-r--r--third_party/lisp/mime4cl/mime.lisp14
-rw-r--r--third_party/lisp/mime4cl/package.lisp3
-rw-r--r--users/sterni/mblog/default.nix1
-rw-r--r--users/sterni/mblog/note.lisp41
-rw-r--r--users/sterni/mblog/packages.lisp2
7 files changed, 43 insertions, 37 deletions
diff --git a/third_party/lisp/mime4cl/default.nix b/third_party/lisp/mime4cl/default.nix
index a14d695c298e..9d3d6253f480 100644
--- a/third_party/lisp/mime4cl/default.nix
+++ b/third_party/lisp/mime4cl/default.nix
@@ -6,6 +6,7 @@ depot.nix.buildLisp.library {
   name = "mime4cl";
 
   deps = [
+    depot.third_party.lisp.babel
     depot.third_party.lisp.sclf
     depot.third_party.lisp.npg
     depot.third_party.lisp.trivial-gray-streams
diff --git a/third_party/lisp/mime4cl/endec.lisp b/third_party/lisp/mime4cl/endec.lisp
index 9f2f9c51c260..020c212e5ec4 100644
--- a/third_party/lisp/mime4cl/endec.lisp
+++ b/third_party/lisp/mime4cl/endec.lisp
@@ -644,7 +644,7 @@ method of RFC2047 and return a sequence of bytes."
            (vector-push-extend (char-code c) output-sequence)))
        finally (return output-sequence)))
 
-(defun decode-RFC2047-string (encoding string &key (start 0) (end (length string)))
+(defun decode-RFC2047-part (encoding string &key (start 0) (end (length string)))
   "Decode STRING according to RFC2047 and return a sequence of
 bytes."
   (gcase (encoding string-equal)
@@ -674,10 +674,24 @@ sequence, a charset string indicating the original coding."
             (push (subseq text previous-end start)
                   result))
           (setf previous-end (+ end 2))
-          (push (cons (decode-RFC2047-string encoding text :start (1+ second-?) :end end)
+          (push (cons (decode-RFC2047-part encoding text :start (1+ second-?) :end end)
                       charset)
                 result))
      finally (unless (= previous-end (length text))
                (push (subseq text previous-end (length text))
                      result))
        (return (nreverse result))))
+
+(defun decode-RFC2047 (text)
+  "Decode TEXT into a fully decoded string. Whenever a non ASCII part is
+  encountered, try to decode it using babel, otherwise signal an error."
+  (flet ((decode-part (part)
+           (etypecase part
+             (cons (babel:octets-to-string
+                    (car part)
+                    :encoding (babel-encodings:get-character-encoding
+                               (intern (string-upcase (cdr part)) 'keyword))))
+             (string part))))
+    (apply #'concatenate
+           (cons 'string
+                 (mapcar #'decode-part (mime:parse-RFC2047-text text))))))
diff --git a/third_party/lisp/mime4cl/mime.lisp b/third_party/lisp/mime4cl/mime.lisp
index 1b1d98bfaf99..5639aab23641 100644
--- a/third_party/lisp/mime4cl/mime.lisp
+++ b/third_party/lisp/mime4cl/mime.lisp
@@ -622,6 +622,20 @@ found in STREAM."
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
+(defun mime-message-header-values (name message &key decode)
+  "Return all values of the header with NAME in MESSAGE, optionally decoding
+  it according to RFC2047 if :DECODE is T."
+  (loop ;; A header may occur multiple times
+        for header in (mime-message-headers message)
+        ;; MIME Headers should be case insensitive
+        ;; https://stackoverflow.com/a/6143644
+        when (string-equal (car header) name)
+        collect (if decode
+                    (decode-RFC2047 (cdr header))
+                    (cdr header))))
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
 (defvar *lazy-mime-decode* t
   "If true don't  decode mime bodies in memory.")
 
diff --git a/third_party/lisp/mime4cl/package.lisp b/third_party/lisp/mime4cl/package.lisp
index b1217a0b6818..5586bdc390e5 100644
--- a/third_party/lisp/mime4cl/package.lisp
+++ b/third_party/lisp/mime4cl/package.lisp
@@ -30,6 +30,8 @@
                           #:process-wait
                           #:process-alive-p
                           #:run-program)
+  (:import-from :babel :octets-to-string)
+  (:import-from :babel-encodings :get-character-encoding)
   (:export #:*lazy-mime-decode*
            #:print-mime-part
            #:read-mime-message
@@ -61,6 +63,7 @@
            #:mime-type-string
            #:mime-type-parameters
            #:mime-message-headers
+           #:mime-message-header-values
            #:mime=
            #:find-mime-part-by-path
            #:find-mime-part-by-id
diff --git a/users/sterni/mblog/default.nix b/users/sterni/mblog/default.nix
index 00e8b39ada0d..607d198930d1 100644
--- a/users/sterni/mblog/default.nix
+++ b/users/sterni/mblog/default.nix
@@ -19,7 +19,6 @@
     }
     depot.lisp.klatre
     depot.third_party.lisp.alexandria
-    depot.third_party.lisp.babel
     depot.third_party.lisp.closure-html
     depot.third_party.lisp.cl-date-time-parser
     depot.third_party.lisp.cl-who
diff --git a/users/sterni/mblog/note.lisp b/users/sterni/mblog/note.lisp
index f953d6b1e662..45be0f4e88a8 100644
--- a/users/sterni/mblog/note.lisp
+++ b/users/sterni/mblog/note.lisp
@@ -19,15 +19,8 @@
   surrounds them with angle brackets for a MIME header"
   (concatenate 'string "<" cid ">"))
 
-;; TODO(sterni): move into mime4cl
-(defun find-mime-message-header (header-name message)
-  (when-let ((header (assoc header-name
-                            (mime:mime-message-headers message)
-                            :test #'string-equal)))
-    (cdr header)))
-
 (defun find-mime-message-date (message)
-  (when-let ((date-string (find-mime-message-header "Date" message)))
+  (when-let ((date-string (car (mime:mime-message-header-values "Date" message))))
     (date-time-parser:parse-date-time date-string)))
 
 ;;; main implementation
@@ -65,24 +58,10 @@
 (defun apple-note-p (msg)
   "Checks X-Uniform-Type-Identifier of a MIME:MIME-MESSAGE
   to determine if a given mime message claims to be an Apple Note."
-  (when-let (uniform-id (assoc "X-Uniform-Type-Identifier"
-                               (mime:mime-message-headers msg)
-                               :test #'string-equal))
-    (string-equal (cdr uniform-id) "com.apple.mail-note")))
-
-(defun decode-RFC2047-to-string (input)
-  (apply
-   #'concatenate
-   (cons 'string
-         (mapcar
-          (lambda (el)
-            (etypecase el
-              (cons (babel:octets-to-string
-                     (car el)
-                     :encoding (babel-encodings:get-character-encoding
-                                (intern (string-upcase (cdr el)) 'keyword))))
-              (string el)))
-          (mime:parse-RFC2047-text input)))))
+  (when-let (uniform-id (car (mime:mime-message-header-values
+                              "X-Uniform-Type-Identifier"
+                              msg)))
+    (string-equal uniform-id "com.apple.mail-note")))
 
 (defun make-apple-note (msg)
   (check-type msg mime-message)
@@ -91,12 +70,10 @@
     (error "Passed message is not an Apple Note according to headers"))
 
   (let ((text-part (mime:find-mime-text-part msg))
-        (subject (when-let ((val (find-mime-message-header "Subject" msg)))
-                   ;; TODO(sterni): mime4cl should do this
-                   (decode-RFC2047-to-string val)))
-        (uuid (when-let ((val (find-mime-message-header
-                               "X-Universally-Unique-Identifier"
-                               msg)))
+        (subject (car (mime:mime-message-header-values "Subject" msg :decode t)))
+        (uuid (when-let ((val (car (mime:mime-message-header-values
+                                    "X-Universally-Unique-Identifier"
+                                    msg))))
                 (string-downcase val)))
         (time (find-mime-message-date msg)))
     ;; The idea here is that we don't need to check a lot manually, instead
diff --git a/users/sterni/mblog/packages.lisp b/users/sterni/mblog/packages.lisp
index 94fce16d40cf..e4fcb4672828 100644
--- a/users/sterni/mblog/packages.lisp
+++ b/users/sterni/mblog/packages.lisp
@@ -8,8 +8,6 @@
 (defpackage :note
   (:use
    :common-lisp
-   :babel
-   :babel-encodings
    :closure-html
    :cl-date-time-parser
    :mime4cl)