about summary refs log tree commit diff
path: root/third_party/lisp/mime4cl/endec.lisp
diff options
context:
space:
mode:
authorsterni <sternenseemann@systemli.org>2022-01-31T23·01+0100
committersterni <sternenseemann@systemli.org>2022-02-02T20·47+0000
commit5bc73de59d9f01ec8497ac8ff142506fd7a40876 (patch)
treef5cc93f2c22739091be3f9878c3b8c8cba72d6fd /third_party/lisp/mime4cl/endec.lisp
parent81c47da91c90fd8bac96c6088112f71a11964131 (diff)
feat: move mblog header handling into mime4cl r/3754
Accessing the headers of a MIME message feels like something mime4cl
should handle. We implemented this ad hoc in mblog before in order to
not need to worry about doing it in a sensible way. Now we introduce a
decent-ish interface for getting a header from a MIME message,
mime-message-header-values:

* It returns a list because MIME message headers may appear multiple
  times.

* It decodes RFC2047 only upon request, as you may want to be stricter
  about parsing certain fields.

* It checks header name equality case insensitively.

The code for decoding the RFC2047 string is retained and still uses
babel for doing the actual decoding.

Change-Id: I58bbbe4b46dbded04160b481a28a40d14775673d
Reviewed-on: https://cl.tvl.fyi/c/depot/+/5150
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
Diffstat (limited to 'third_party/lisp/mime4cl/endec.lisp')
-rw-r--r--third_party/lisp/mime4cl/endec.lisp18
1 files changed, 16 insertions, 2 deletions
diff --git a/third_party/lisp/mime4cl/endec.lisp b/third_party/lisp/mime4cl/endec.lisp
index 9f2f9c51c260..020c212e5ec4 100644
--- a/third_party/lisp/mime4cl/endec.lisp
+++ b/third_party/lisp/mime4cl/endec.lisp
@@ -644,7 +644,7 @@ method of RFC2047 and return a sequence of bytes."
            (vector-push-extend (char-code c) output-sequence)))
        finally (return output-sequence)))
 
-(defun decode-RFC2047-string (encoding string &key (start 0) (end (length string)))
+(defun decode-RFC2047-part (encoding string &key (start 0) (end (length string)))
   "Decode STRING according to RFC2047 and return a sequence of
 bytes."
   (gcase (encoding string-equal)
@@ -674,10 +674,24 @@ sequence, a charset string indicating the original coding."
             (push (subseq text previous-end start)
                   result))
           (setf previous-end (+ end 2))
-          (push (cons (decode-RFC2047-string encoding text :start (1+ second-?) :end end)
+          (push (cons (decode-RFC2047-part encoding text :start (1+ second-?) :end end)
                       charset)
                 result))
      finally (unless (= previous-end (length text))
                (push (subseq text previous-end (length text))
                      result))
        (return (nreverse result))))
+
+(defun decode-RFC2047 (text)
+  "Decode TEXT into a fully decoded string. Whenever a non ASCII part is
+  encountered, try to decode it using babel, otherwise signal an error."
+  (flet ((decode-part (part)
+           (etypecase part
+             (cons (babel:octets-to-string
+                    (car part)
+                    :encoding (babel-encodings:get-character-encoding
+                               (intern (string-upcase (cdr part)) 'keyword))))
+             (string part))))
+    (apply #'concatenate
+           (cons 'string
+                 (mapcar #'decode-part (mime:parse-RFC2047-text text))))))