about summary refs log tree commit diff
diff options
context:
space:
mode:
authorsterni <sternenseemann@systemli.org>2021-08-02T13·13+0200
committersterni <sternenseemann@systemli.org>2021-09-12T21·39+0000
commit8e156e6b86c201d2696175eee55ef7bb6123ce85 (patch)
tree81fc6e5cca83ecc7c5600e5a020f8a6d6516154c
parent7f31562acfc9a83e042a506bf7bdaca5de4e789a (diff)
feat(sterni/mblog): convert apple note mime msgs to html r/2854
For now mblog only contains the mnote-html executable which takes a mime
message from a maildir and prints the equivalent HTML fragment to
stdout. It is intended to work with the mblaze(7) utilities,
i. e. mnote-html resolves all `object` tags to proper `img` inclusions
with the correct filename, so mshow(1)'s -x version can supply the
needed image files. A note created using Apple's Notes app (tested with
the iOS version) can be converted in a viewable HTML file like this:

    $ mnote-html path/to/msg > fragment.html
    $ mshow -x path/to/msg
    $ cat <(echo "<!DOCTYPE html>") fragment.html > document.html
    $ xdg-open document.html

Note that only the limited feature set of Apple Notes when using the
IMAP backend is supported. The iCloud-based one has more (quite neat)
features, but its notes can only accessed via an internal API as far as
I know.

This CLI is a bit impractical due to the big startup overhead of loading
the lisp image. mblog should be become a fully fletched static site
generator in the future, but this is a good starting point and providing
the mnote-html tool is certainly useful.

Change-Id: Iee6d1558e939b932da1e70ca2d2ae75638d855df
Reviewed-on: https://cl.tvl.fyi/c/depot/+/3271
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
-rw-r--r--users/sterni/mblog/cli.lisp17
-rw-r--r--users/sterni/mblog/default.nix31
-rw-r--r--users/sterni/mblog/note.lisp60
-rw-r--r--users/sterni/mblog/packages.lisp15
-rw-r--r--users/sterni/mblog/transformer.lisp127
5 files changed, 250 insertions, 0 deletions
diff --git a/users/sterni/mblog/cli.lisp b/users/sterni/mblog/cli.lisp
new file mode 100644
index 0000000000..93be7e8b8e
--- /dev/null
+++ b/users/sterni/mblog/cli.lisp
@@ -0,0 +1,17 @@
+(in-package :mblog)
+(declaim (optimize (safety 3)))
+
+(defparameter +synopsis+ "mnote-html FILE [FILE [ ... ]]")
+
+;; TODO(sterni): handle relevant conditions
+(defun main ()
+  (let* ((args (uiop:command-line-arguments))
+         (help-p (or (not args)
+                     (find-if (lambda (x)
+                                (member x '("-h" "--help" "--usage")
+                                        :test #'string=))
+                              args))))
+    (if help-p (format *error-output* "Usage: ~A~%" +synopsis+)
+      (loop for arg in args
+            do (apple-note-html-fragment
+                (mime:mime-message (pathname arg)) *standard-output*)))))
diff --git a/users/sterni/mblog/default.nix b/users/sterni/mblog/default.nix
new file mode 100644
index 0000000000..16ae573ba7
--- /dev/null
+++ b/users/sterni/mblog/default.nix
@@ -0,0 +1,31 @@
+{ depot, pkgs, ... }:
+
+depot.nix.buildLisp.program {
+  name = "mnote-html";
+
+  srcs = [
+    ./packages.lisp
+    ./transformer.lisp
+    ./note.lisp
+    ./cli.lisp
+  ];
+
+  deps = [
+    {
+      sbcl = depot.nix.buildLisp.bundled "uiop";
+      default = depot.nix.buildLisp.bundled "asdf";
+    }
+    depot.third_party.lisp.alexandria
+    depot.third_party.lisp.closure-html
+    depot.third_party.lisp.cl-who
+    depot.third_party.lisp.mime4cl
+  ];
+
+  main = "mblog:main";
+
+  # due to sclf
+  brokenOn = [
+    "ccl"
+    "ecl"
+  ];
+}
diff --git a/users/sterni/mblog/note.lisp b/users/sterni/mblog/note.lisp
new file mode 100644
index 0000000000..fa4de0956f
--- /dev/null
+++ b/users/sterni/mblog/note.lisp
@@ -0,0 +1,60 @@
+(in-package :mblog)
+(declaim (optimize (safety 3)))
+
+;;; util
+
+(defun html-escape-stream (in out)
+  "Escape characters read from stream IN and write them to
+  stream OUT escaped using WHO:ESCAPE-CHAR-MINIMAL."
+  (loop for char = (read-char in nil nil)
+        while char
+        do (write-string (who:escape-char-minimal char) out)))
+
+(defun cid-header-value (cid)
+  "Takes a Content-ID as present in Apple Notes' <object> tags and properly
+  surrounds them with angle brackets for a MIME header"
+  (concatenate 'string "<" cid ">"))
+
+;;; main implementation
+
+;; TODO(sterni): make this a “parser” instead of a predicate
+(defun apple-note-p (msg)
+  "Checks X-Uniform-Type-Identifier of a MIME:MIME-MESSAGE
+  to determine if a given mime message is an Apple Note."
+  (when-let (uniform-id (assoc "X-Uniform-Type-Identifier"
+                               (mime:mime-message-headers msg)
+                               :test #'string=))
+    (string= (cdr uniform-id) "com.apple.mail-note")))
+
+(defun apple-note-html-fragment (msg out)
+  "Takes a MIME:MIME-MESSAGE and writes its text content as HTML to
+  the OUT stream. The <object> tags are resolved to <img> which
+  refer to the respective attachment's filename as a relative path,
+  but extraction of the attachments must be done separately. The
+  surrounding <html> and <body> tags are stripped and <head>
+  discarded completely, so only a fragment which can be included
+  in custom templates remains."
+  (let ((text (find-mime-text-part msg)))
+    (cond
+      ;; Sanity checking of the note
+      ((not (apple-note-p msg))
+       (error "Unsupported or missing X-Uniform-Type-Identifier"))
+      ((not text) (error "Malformed Apple Note: no text part"))
+      ;; notemap creates text/plain notes we need to handle properly.
+      ;; Additionally we *could* check X-Mailer which notemap sets
+      ((string= (mime:mime-subtype text) "plain")
+       (html-escape-stream (mime:mime-body-stream text :binary nil) out))
+      ;; Notes.app creates text/html parts
+      ((string= (mime:mime-subtype text) "html")
+       (closure-html:parse
+        (mime:mime-body-stream text)
+        (make-instance
+         'apple-note-transformer
+         :cid-lookup
+         (lambda (cid)
+           (when-let* ((part (mime:find-mime-part-by-id msg (cid-header-value cid)))
+                       (file (mime:mime-part-file-name part)))
+             file))
+         :next-handler
+         (closure-html:make-character-stream-sink out))))
+      (t (error "Malformed Apple Note: unknown mime type")))))
diff --git a/users/sterni/mblog/packages.lisp b/users/sterni/mblog/packages.lisp
new file mode 100644
index 0000000000..ca2e41b682
--- /dev/null
+++ b/users/sterni/mblog/packages.lisp
@@ -0,0 +1,15 @@
+(defpackage :mblog
+  (:use
+   :common-lisp
+   :mime4cl
+   :closure-html
+   :who
+   :uiop)
+  (:shadow :with-html-output) ; conflict between closure-html and who
+  (:import-from
+   :alexandria
+   :when-let*
+   :when-let
+   :starts-with-subseq
+   :ends-with-subseq)
+  (:export :main))
diff --git a/users/sterni/mblog/transformer.lisp b/users/sterni/mblog/transformer.lisp
new file mode 100644
index 0000000000..f26c5652a2
--- /dev/null
+++ b/users/sterni/mblog/transformer.lisp
@@ -0,0 +1,127 @@
+(in-package :mblog)
+(declaim (optimize (safety 3)))
+
+;; Throw away these tags and all of their children
+(defparameter +discard-tags-with-children+ '("HEAD"))
+;; Only “strip” these tags and leave their content as is
+(defparameter +discard-tags-only+ '("BODY" "HTML"))
+
+;; This is basically the same as cxml's PROXY-HANDLER.
+;; Couldn't be bothered to make a BROADCAST-HANDLER because I
+;; only need to pass through to one handler. It accepts every
+;; event and passes it on to NEXT-HANDLER. This is useful for
+;; subclassing mostly where an event can be modified or passed
+;; on as is via CALL-NEXT-METHOD.
+(defclass hax-proxy-handler (hax:default-handler)
+  ((next-handler
+    :initarg :next-handler
+    :accessor proxy-next-handler)))
+
+;; Define the trivial handlers which just call themselves for NEXT-HANDLER
+(macrolet ((def-proxy-handler (name (&rest args))
+             `(defmethod ,name ((h hax-proxy-handler) ,@args)
+                (,name (proxy-next-handler h) ,@args))))
+  (def-proxy-handler hax:start-document (name p-id s-id))
+  (def-proxy-handler hax:end-document ())
+  (def-proxy-handler hax:start-element (name attrs))
+  (def-proxy-handler hax:end-element (name))
+  (def-proxy-handler hax:characters (data))
+  (def-proxy-handler hax:unescaped (data))
+  (def-proxy-handler hax:comment (data)))
+
+(defclass apple-note-transformer (hax-proxy-handler)
+  ((cid-lookup
+    :initarg :cid-lookup
+    :initform (lambda (cid) nil)
+    :accessor transformer-cid-lookup)
+   (discard-until
+    :initarg :discard-until
+    :initform nil
+    :accessor transformer-discard-until)
+   (depth
+    :initarg :depth
+    :initform 0
+    :accessor transformer-depth))
+  (:documentation
+   "HAX handler that strips unnecessary tags from the HTML of a com.apple.mail-note
+   and resolves references to attachments to IMG tags."))
+
+;; Define the “boring” handlers which just call the next method (i. e. the next
+;; handler) unless discard-until is not nil in which case the event is dropped.
+(macrolet ((def-filter-handler (name (&rest args))
+             `(defmethod ,name ((h apple-note-transformer) ,@args)
+                (when (not (transformer-discard-until h))
+                  (call-next-method)))))
+  (def-filter-handler hax:start-document (name p-id s-id))
+  (def-filter-handler hax:end-document ())
+  (def-filter-handler hax:characters (data))
+  (def-filter-handler hax:unescaped (data))
+  (def-filter-handler hax:comment (data)))
+
+(defun parse-content-id (attrlist)
+  (when-let (data (find-if (lambda (x)
+                             (string= (hax:attribute-name x) "DATA"))
+                           attrlist))
+    (multiple-value-bind (starts-with-cid-p suffix)
+        (starts-with-subseq "cid:" (hax:attribute-value data)
+                            :return-suffix t :test #'char=)
+      (if starts-with-cid-p suffix data))))
+
+(defmethod hax:start-element ((handler apple-note-transformer) name attrs)
+  (with-accessors ((discard-until transformer-discard-until)
+                   (next-handler proxy-next-handler)
+                   (cid-lookup transformer-cid-lookup)
+                   (depth transformer-depth))
+      handler
+
+    (cond
+      ;; If we are discarding, any started element is dropped,
+      ;; since the end-condition only is reached via END-ELEMENT.
+      (discard-until nil)
+      ;; If we are not discarding any outer elements, we can set
+      ;; up a new discard condition if we encounter an appropriate
+      ;; element.
+      ((member name +discard-tags-with-children+ :test #'string=)
+       (setf discard-until (cons name depth)))
+      ;; Only drop this event, must be mirrored in END-ELEMENT to
+      ;; avoid invalidly nested HTML.
+      ((member name +discard-tags-only+ :test #'string=) nil)
+      ;; If we encounter an object tag, we drop it and its contents,
+      ;; but only after inspecting its attributes and emitting new
+      ;; events representing an img tag which includes the respective
+      ;; attachment via its filename.
+      ((string= name "OBJECT")
+       (progn
+         (setf discard-until (cons "OBJECT" depth))
+         ;; TODO(sterni): check type and only resolve images, raise error
+         ;; otherwise. We should only encounter images anyways, since
+         ;; other types are only supported for iCloud which doesn't seem
+         ;; to use IMAP for sync these days.
+         (when-let* ((cid (parse-content-id attrs))
+                     (file (apply cid-lookup (list cid)))
+                     (src (hax:make-attribute "SRC" file)))
+           (hax:start-element next-handler "IMG" (list src))
+           (hax:end-element next-handler "IMG"))))
+      ;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on.
+      (t (call-next-method)))
+    (setf depth (1+ depth))))
+
+(defmethod hax:end-element ((handler apple-note-transformer) name)
+  (with-accessors ((discard-until transformer-discard-until)
+                   (depth transformer-depth))
+      handler
+
+    (setf depth (1- depth))
+    (cond
+      ;; If we are discarding and encounter the same tag again at the same
+      ;; depth, we can stop, but still have to discard the current tag.
+      ((and discard-until
+            (string= (car discard-until) name)
+            (= (cdr discard-until) depth))
+       (setf discard-until nil))
+      ;; In all other cases, we drop properly.
+      (discard-until nil)
+      ;; Mirrored tag stripping as in START-ELEMENT
+      ((member name +discard-tags-only+ :test #'string=) nil)
+      ;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on.
+      (t (call-next-method)))))