about summary refs log tree commit diff
path: root/users/sterni/mblog/transformer.lisp
diff options
context:
space:
mode:
authorsterni <sternenseemann@systemli.org>2021-08-02T13·13+0200
committersterni <sternenseemann@systemli.org>2021-09-12T21·39+0000
commit8e156e6b86c201d2696175eee55ef7bb6123ce85 (patch)
tree81fc6e5cca83ecc7c5600e5a020f8a6d6516154c /users/sterni/mblog/transformer.lisp
parent7f31562acfc9a83e042a506bf7bdaca5de4e789a (diff)
feat(sterni/mblog): convert apple note mime msgs to html r/2854
For now mblog only contains the mnote-html executable which takes a mime
message from a maildir and prints the equivalent HTML fragment to
stdout. It is intended to work with the mblaze(7) utilities,
i. e. mnote-html resolves all `object` tags to proper `img` inclusions
with the correct filename, so mshow(1)'s -x version can supply the
needed image files. A note created using Apple's Notes app (tested with
the iOS version) can be converted in a viewable HTML file like this:

    $ mnote-html path/to/msg > fragment.html
    $ mshow -x path/to/msg
    $ cat <(echo "<!DOCTYPE html>") fragment.html > document.html
    $ xdg-open document.html

Note that only the limited feature set of Apple Notes when using the
IMAP backend is supported. The iCloud-based one has more (quite neat)
features, but its notes can only accessed via an internal API as far as
I know.

This CLI is a bit impractical due to the big startup overhead of loading
the lisp image. mblog should be become a fully fletched static site
generator in the future, but this is a good starting point and providing
the mnote-html tool is certainly useful.

Change-Id: Iee6d1558e939b932da1e70ca2d2ae75638d855df
Reviewed-on: https://cl.tvl.fyi/c/depot/+/3271
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
Diffstat (limited to 'users/sterni/mblog/transformer.lisp')
-rw-r--r--users/sterni/mblog/transformer.lisp127
1 files changed, 127 insertions, 0 deletions
diff --git a/users/sterni/mblog/transformer.lisp b/users/sterni/mblog/transformer.lisp
new file mode 100644
index 000000000000..f26c5652a266
--- /dev/null
+++ b/users/sterni/mblog/transformer.lisp
@@ -0,0 +1,127 @@
+(in-package :mblog)
+(declaim (optimize (safety 3)))
+
+;; Throw away these tags and all of their children
+(defparameter +discard-tags-with-children+ '("HEAD"))
+;; Only “strip” these tags and leave their content as is
+(defparameter +discard-tags-only+ '("BODY" "HTML"))
+
+;; This is basically the same as cxml's PROXY-HANDLER.
+;; Couldn't be bothered to make a BROADCAST-HANDLER because I
+;; only need to pass through to one handler. It accepts every
+;; event and passes it on to NEXT-HANDLER. This is useful for
+;; subclassing mostly where an event can be modified or passed
+;; on as is via CALL-NEXT-METHOD.
+(defclass hax-proxy-handler (hax:default-handler)
+  ((next-handler
+    :initarg :next-handler
+    :accessor proxy-next-handler)))
+
+;; Define the trivial handlers which just call themselves for NEXT-HANDLER
+(macrolet ((def-proxy-handler (name (&rest args))
+             `(defmethod ,name ((h hax-proxy-handler) ,@args)
+                (,name (proxy-next-handler h) ,@args))))
+  (def-proxy-handler hax:start-document (name p-id s-id))
+  (def-proxy-handler hax:end-document ())
+  (def-proxy-handler hax:start-element (name attrs))
+  (def-proxy-handler hax:end-element (name))
+  (def-proxy-handler hax:characters (data))
+  (def-proxy-handler hax:unescaped (data))
+  (def-proxy-handler hax:comment (data)))
+
+(defclass apple-note-transformer (hax-proxy-handler)
+  ((cid-lookup
+    :initarg :cid-lookup
+    :initform (lambda (cid) nil)
+    :accessor transformer-cid-lookup)
+   (discard-until
+    :initarg :discard-until
+    :initform nil
+    :accessor transformer-discard-until)
+   (depth
+    :initarg :depth
+    :initform 0
+    :accessor transformer-depth))
+  (:documentation
+   "HAX handler that strips unnecessary tags from the HTML of a com.apple.mail-note
+   and resolves references to attachments to IMG tags."))
+
+;; Define the “boring” handlers which just call the next method (i. e. the next
+;; handler) unless discard-until is not nil in which case the event is dropped.
+(macrolet ((def-filter-handler (name (&rest args))
+             `(defmethod ,name ((h apple-note-transformer) ,@args)
+                (when (not (transformer-discard-until h))
+                  (call-next-method)))))
+  (def-filter-handler hax:start-document (name p-id s-id))
+  (def-filter-handler hax:end-document ())
+  (def-filter-handler hax:characters (data))
+  (def-filter-handler hax:unescaped (data))
+  (def-filter-handler hax:comment (data)))
+
+(defun parse-content-id (attrlist)
+  (when-let (data (find-if (lambda (x)
+                             (string= (hax:attribute-name x) "DATA"))
+                           attrlist))
+    (multiple-value-bind (starts-with-cid-p suffix)
+        (starts-with-subseq "cid:" (hax:attribute-value data)
+                            :return-suffix t :test #'char=)
+      (if starts-with-cid-p suffix data))))
+
+(defmethod hax:start-element ((handler apple-note-transformer) name attrs)
+  (with-accessors ((discard-until transformer-discard-until)
+                   (next-handler proxy-next-handler)
+                   (cid-lookup transformer-cid-lookup)
+                   (depth transformer-depth))
+      handler
+
+    (cond
+      ;; If we are discarding, any started element is dropped,
+      ;; since the end-condition only is reached via END-ELEMENT.
+      (discard-until nil)
+      ;; If we are not discarding any outer elements, we can set
+      ;; up a new discard condition if we encounter an appropriate
+      ;; element.
+      ((member name +discard-tags-with-children+ :test #'string=)
+       (setf discard-until (cons name depth)))
+      ;; Only drop this event, must be mirrored in END-ELEMENT to
+      ;; avoid invalidly nested HTML.
+      ((member name +discard-tags-only+ :test #'string=) nil)
+      ;; If we encounter an object tag, we drop it and its contents,
+      ;; but only after inspecting its attributes and emitting new
+      ;; events representing an img tag which includes the respective
+      ;; attachment via its filename.
+      ((string= name "OBJECT")
+       (progn
+         (setf discard-until (cons "OBJECT" depth))
+         ;; TODO(sterni): check type and only resolve images, raise error
+         ;; otherwise. We should only encounter images anyways, since
+         ;; other types are only supported for iCloud which doesn't seem
+         ;; to use IMAP for sync these days.
+         (when-let* ((cid (parse-content-id attrs))
+                     (file (apply cid-lookup (list cid)))
+                     (src (hax:make-attribute "SRC" file)))
+           (hax:start-element next-handler "IMG" (list src))
+           (hax:end-element next-handler "IMG"))))
+      ;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on.
+      (t (call-next-method)))
+    (setf depth (1+ depth))))
+
+(defmethod hax:end-element ((handler apple-note-transformer) name)
+  (with-accessors ((discard-until transformer-discard-until)
+                   (depth transformer-depth))
+      handler
+
+    (setf depth (1- depth))
+    (cond
+      ;; If we are discarding and encounter the same tag again at the same
+      ;; depth, we can stop, but still have to discard the current tag.
+      ((and discard-until
+            (string= (car discard-until) name)
+            (= (cdr discard-until) depth))
+       (setf discard-until nil))
+      ;; In all other cases, we drop properly.
+      (discard-until nil)
+      ;; Mirrored tag stripping as in START-ELEMENT
+      ((member name +discard-tags-only+ :test #'string=) nil)
+      ;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on.
+      (t (call-next-method)))))