From 5bc73de59d9f01ec8497ac8ff142506fd7a40876 Mon Sep 17 00:00:00 2001 From: sterni Date: Tue, 1 Feb 2022 00:01:59 +0100 Subject: feat: move mblog header handling into mime4cl Accessing the headers of a MIME message feels like something mime4cl should handle. We implemented this ad hoc in mblog before in order to not need to worry about doing it in a sensible way. Now we introduce a decent-ish interface for getting a header from a MIME message, mime-message-header-values: * It returns a list because MIME message headers may appear multiple times. * It decodes RFC2047 only upon request, as you may want to be stricter about parsing certain fields. * It checks header name equality case insensitively. The code for decoding the RFC2047 string is retained and still uses babel for doing the actual decoding. Change-Id: I58bbbe4b46dbded04160b481a28a40d14775673d Reviewed-on: https://cl.tvl.fyi/c/depot/+/5150 Tested-by: BuildkiteCI Reviewed-by: sterni --- third_party/lisp/mime4cl/default.nix | 1 + third_party/lisp/mime4cl/endec.lisp | 18 +++++++++++++-- third_party/lisp/mime4cl/mime.lisp | 14 ++++++++++++ third_party/lisp/mime4cl/package.lisp | 3 +++ users/sterni/mblog/default.nix | 1 - users/sterni/mblog/note.lisp | 41 ++++++++--------------------------- users/sterni/mblog/packages.lisp | 2 -- 7 files changed, 43 insertions(+), 37 deletions(-) diff --git a/third_party/lisp/mime4cl/default.nix b/third_party/lisp/mime4cl/default.nix index a14d695c29..9d3d6253f4 100644 --- a/third_party/lisp/mime4cl/default.nix +++ b/third_party/lisp/mime4cl/default.nix @@ -6,6 +6,7 @@ depot.nix.buildLisp.library { name = "mime4cl"; deps = [ + depot.third_party.lisp.babel depot.third_party.lisp.sclf depot.third_party.lisp.npg depot.third_party.lisp.trivial-gray-streams diff --git a/third_party/lisp/mime4cl/endec.lisp b/third_party/lisp/mime4cl/endec.lisp index 9f2f9c51c2..020c212e5e 100644 --- a/third_party/lisp/mime4cl/endec.lisp +++ b/third_party/lisp/mime4cl/endec.lisp @@ -644,7 +644,7 @@ method of RFC2047 and return a sequence of bytes." (vector-push-extend (char-code c) output-sequence))) finally (return output-sequence))) -(defun decode-RFC2047-string (encoding string &key (start 0) (end (length string))) +(defun decode-RFC2047-part (encoding string &key (start 0) (end (length string))) "Decode STRING according to RFC2047 and return a sequence of bytes." (gcase (encoding string-equal) @@ -674,10 +674,24 @@ sequence, a charset string indicating the original coding." (push (subseq text previous-end start) result)) (setf previous-end (+ end 2)) - (push (cons (decode-RFC2047-string encoding text :start (1+ second-?) :end end) + (push (cons (decode-RFC2047-part encoding text :start (1+ second-?) :end end) charset) result)) finally (unless (= previous-end (length text)) (push (subseq text previous-end (length text)) result)) (return (nreverse result)))) + +(defun decode-RFC2047 (text) + "Decode TEXT into a fully decoded string. Whenever a non ASCII part is + encountered, try to decode it using babel, otherwise signal an error." + (flet ((decode-part (part) + (etypecase part + (cons (babel:octets-to-string + (car part) + :encoding (babel-encodings:get-character-encoding + (intern (string-upcase (cdr part)) 'keyword)))) + (string part)))) + (apply #'concatenate + (cons 'string + (mapcar #'decode-part (mime:parse-RFC2047-text text)))))) diff --git a/third_party/lisp/mime4cl/mime.lisp b/third_party/lisp/mime4cl/mime.lisp index 1b1d98bfaf..5639aab236 100644 --- a/third_party/lisp/mime4cl/mime.lisp +++ b/third_party/lisp/mime4cl/mime.lisp @@ -622,6 +622,20 @@ found in STREAM." ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(defun mime-message-header-values (name message &key decode) + "Return all values of the header with NAME in MESSAGE, optionally decoding + it according to RFC2047 if :DECODE is T." + (loop ;; A header may occur multiple times + for header in (mime-message-headers message) + ;; MIME Headers should be case insensitive + ;; https://stackoverflow.com/a/6143644 + when (string-equal (car header) name) + collect (if decode + (decode-RFC2047 (cdr header)) + (cdr header)))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + (defvar *lazy-mime-decode* t "If true don't decode mime bodies in memory.") diff --git a/third_party/lisp/mime4cl/package.lisp b/third_party/lisp/mime4cl/package.lisp index b1217a0b68..5586bdc390 100644 --- a/third_party/lisp/mime4cl/package.lisp +++ b/third_party/lisp/mime4cl/package.lisp @@ -30,6 +30,8 @@ #:process-wait #:process-alive-p #:run-program) + (:import-from :babel :octets-to-string) + (:import-from :babel-encodings :get-character-encoding) (:export #:*lazy-mime-decode* #:print-mime-part #:read-mime-message @@ -61,6 +63,7 @@ #:mime-type-string #:mime-type-parameters #:mime-message-headers + #:mime-message-header-values #:mime= #:find-mime-part-by-path #:find-mime-part-by-id diff --git a/users/sterni/mblog/default.nix b/users/sterni/mblog/default.nix index 00e8b39ada..607d198930 100644 --- a/users/sterni/mblog/default.nix +++ b/users/sterni/mblog/default.nix @@ -19,7 +19,6 @@ } depot.lisp.klatre depot.third_party.lisp.alexandria - depot.third_party.lisp.babel depot.third_party.lisp.closure-html depot.third_party.lisp.cl-date-time-parser depot.third_party.lisp.cl-who diff --git a/users/sterni/mblog/note.lisp b/users/sterni/mblog/note.lisp index f953d6b1e6..45be0f4e88 100644 --- a/users/sterni/mblog/note.lisp +++ b/users/sterni/mblog/note.lisp @@ -19,15 +19,8 @@ surrounds them with angle brackets for a MIME header" (concatenate 'string "<" cid ">")) -;; TODO(sterni): move into mime4cl -(defun find-mime-message-header (header-name message) - (when-let ((header (assoc header-name - (mime:mime-message-headers message) - :test #'string-equal))) - (cdr header))) - (defun find-mime-message-date (message) - (when-let ((date-string (find-mime-message-header "Date" message))) + (when-let ((date-string (car (mime:mime-message-header-values "Date" message)))) (date-time-parser:parse-date-time date-string))) ;;; main implementation @@ -65,24 +58,10 @@ (defun apple-note-p (msg) "Checks X-Uniform-Type-Identifier of a MIME:MIME-MESSAGE to determine if a given mime message claims to be an Apple Note." - (when-let (uniform-id (assoc "X-Uniform-Type-Identifier" - (mime:mime-message-headers msg) - :test #'string-equal)) - (string-equal (cdr uniform-id) "com.apple.mail-note"))) - -(defun decode-RFC2047-to-string (input) - (apply - #'concatenate - (cons 'string - (mapcar - (lambda (el) - (etypecase el - (cons (babel:octets-to-string - (car el) - :encoding (babel-encodings:get-character-encoding - (intern (string-upcase (cdr el)) 'keyword)))) - (string el))) - (mime:parse-RFC2047-text input))))) + (when-let (uniform-id (car (mime:mime-message-header-values + "X-Uniform-Type-Identifier" + msg))) + (string-equal uniform-id "com.apple.mail-note"))) (defun make-apple-note (msg) (check-type msg mime-message) @@ -91,12 +70,10 @@ (error "Passed message is not an Apple Note according to headers")) (let ((text-part (mime:find-mime-text-part msg)) - (subject (when-let ((val (find-mime-message-header "Subject" msg))) - ;; TODO(sterni): mime4cl should do this - (decode-RFC2047-to-string val))) - (uuid (when-let ((val (find-mime-message-header - "X-Universally-Unique-Identifier" - msg))) + (subject (car (mime:mime-message-header-values "Subject" msg :decode t))) + (uuid (when-let ((val (car (mime:mime-message-header-values + "X-Universally-Unique-Identifier" + msg)))) (string-downcase val))) (time (find-mime-message-date msg))) ;; The idea here is that we don't need to check a lot manually, instead diff --git a/users/sterni/mblog/packages.lisp b/users/sterni/mblog/packages.lisp index 94fce16d40..e4fcb46728 100644 --- a/users/sterni/mblog/packages.lisp +++ b/users/sterni/mblog/packages.lisp @@ -8,8 +8,6 @@ (defpackage :note (:use :common-lisp - :babel - :babel-encodings :closure-html :cl-date-time-parser :mime4cl) -- cgit 1.4.1