From 8e156e6b86c201d2696175eee55ef7bb6123ce85 Mon Sep 17 00:00:00 2001 From: sterni Date: Mon, 2 Aug 2021 15:13:05 +0200 Subject: feat(sterni/mblog): convert apple note mime msgs to html For now mblog only contains the mnote-html executable which takes a mime message from a maildir and prints the equivalent HTML fragment to stdout. It is intended to work with the mblaze(7) utilities, i. e. mnote-html resolves all `object` tags to proper `img` inclusions with the correct filename, so mshow(1)'s -x version can supply the needed image files. A note created using Apple's Notes app (tested with the iOS version) can be converted in a viewable HTML file like this: $ mnote-html path/to/msg > fragment.html $ mshow -x path/to/msg $ cat <(echo "") fragment.html > document.html $ xdg-open document.html Note that only the limited feature set of Apple Notes when using the IMAP backend is supported. The iCloud-based one has more (quite neat) features, but its notes can only accessed via an internal API as far as I know. This CLI is a bit impractical due to the big startup overhead of loading the lisp image. mblog should be become a fully fletched static site generator in the future, but this is a good starting point and providing the mnote-html tool is certainly useful. Change-Id: Iee6d1558e939b932da1e70ca2d2ae75638d855df Reviewed-on: https://cl.tvl.fyi/c/depot/+/3271 Tested-by: BuildkiteCI Reviewed-by: sterni --- users/sterni/mblog/transformer.lisp | 127 ++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 users/sterni/mblog/transformer.lisp (limited to 'users/sterni/mblog/transformer.lisp') diff --git a/users/sterni/mblog/transformer.lisp b/users/sterni/mblog/transformer.lisp new file mode 100644 index 000000000000..f26c5652a266 --- /dev/null +++ b/users/sterni/mblog/transformer.lisp @@ -0,0 +1,127 @@ +(in-package :mblog) +(declaim (optimize (safety 3))) + +;; Throw away these tags and all of their children +(defparameter +discard-tags-with-children+ '("HEAD")) +;; Only “strip” these tags and leave their content as is +(defparameter +discard-tags-only+ '("BODY" "HTML")) + +;; This is basically the same as cxml's PROXY-HANDLER. +;; Couldn't be bothered to make a BROADCAST-HANDLER because I +;; only need to pass through to one handler. It accepts every +;; event and passes it on to NEXT-HANDLER. This is useful for +;; subclassing mostly where an event can be modified or passed +;; on as is via CALL-NEXT-METHOD. +(defclass hax-proxy-handler (hax:default-handler) + ((next-handler + :initarg :next-handler + :accessor proxy-next-handler))) + +;; Define the trivial handlers which just call themselves for NEXT-HANDLER +(macrolet ((def-proxy-handler (name (&rest args)) + `(defmethod ,name ((h hax-proxy-handler) ,@args) + (,name (proxy-next-handler h) ,@args)))) + (def-proxy-handler hax:start-document (name p-id s-id)) + (def-proxy-handler hax:end-document ()) + (def-proxy-handler hax:start-element (name attrs)) + (def-proxy-handler hax:end-element (name)) + (def-proxy-handler hax:characters (data)) + (def-proxy-handler hax:unescaped (data)) + (def-proxy-handler hax:comment (data))) + +(defclass apple-note-transformer (hax-proxy-handler) + ((cid-lookup + :initarg :cid-lookup + :initform (lambda (cid) nil) + :accessor transformer-cid-lookup) + (discard-until + :initarg :discard-until + :initform nil + :accessor transformer-discard-until) + (depth + :initarg :depth + :initform 0 + :accessor transformer-depth)) + (:documentation + "HAX handler that strips unnecessary tags from the HTML of a com.apple.mail-note + and resolves references to attachments to IMG tags.")) + +;; Define the “boring” handlers which just call the next method (i. e. the next +;; handler) unless discard-until is not nil in which case the event is dropped. +(macrolet ((def-filter-handler (name (&rest args)) + `(defmethod ,name ((h apple-note-transformer) ,@args) + (when (not (transformer-discard-until h)) + (call-next-method))))) + (def-filter-handler hax:start-document (name p-id s-id)) + (def-filter-handler hax:end-document ()) + (def-filter-handler hax:characters (data)) + (def-filter-handler hax:unescaped (data)) + (def-filter-handler hax:comment (data))) + +(defun parse-content-id (attrlist) + (when-let (data (find-if (lambda (x) + (string= (hax:attribute-name x) "DATA")) + attrlist)) + (multiple-value-bind (starts-with-cid-p suffix) + (starts-with-subseq "cid:" (hax:attribute-value data) + :return-suffix t :test #'char=) + (if starts-with-cid-p suffix data)))) + +(defmethod hax:start-element ((handler apple-note-transformer) name attrs) + (with-accessors ((discard-until transformer-discard-until) + (next-handler proxy-next-handler) + (cid-lookup transformer-cid-lookup) + (depth transformer-depth)) + handler + + (cond + ;; If we are discarding, any started element is dropped, + ;; since the end-condition only is reached via END-ELEMENT. + (discard-until nil) + ;; If we are not discarding any outer elements, we can set + ;; up a new discard condition if we encounter an appropriate + ;; element. + ((member name +discard-tags-with-children+ :test #'string=) + (setf discard-until (cons name depth))) + ;; Only drop this event, must be mirrored in END-ELEMENT to + ;; avoid invalidly nested HTML. + ((member name +discard-tags-only+ :test #'string=) nil) + ;; If we encounter an object tag, we drop it and its contents, + ;; but only after inspecting its attributes and emitting new + ;; events representing an img tag which includes the respective + ;; attachment via its filename. + ((string= name "OBJECT") + (progn + (setf discard-until (cons "OBJECT" depth)) + ;; TODO(sterni): check type and only resolve images, raise error + ;; otherwise. We should only encounter images anyways, since + ;; other types are only supported for iCloud which doesn't seem + ;; to use IMAP for sync these days. + (when-let* ((cid (parse-content-id attrs)) + (file (apply cid-lookup (list cid))) + (src (hax:make-attribute "SRC" file))) + (hax:start-element next-handler "IMG" (list src)) + (hax:end-element next-handler "IMG")))) + ;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on. + (t (call-next-method))) + (setf depth (1+ depth)))) + +(defmethod hax:end-element ((handler apple-note-transformer) name) + (with-accessors ((discard-until transformer-discard-until) + (depth transformer-depth)) + handler + + (setf depth (1- depth)) + (cond + ;; If we are discarding and encounter the same tag again at the same + ;; depth, we can stop, but still have to discard the current tag. + ((and discard-until + (string= (car discard-until) name) + (= (cdr discard-until) depth)) + (setf discard-until nil)) + ;; In all other cases, we drop properly. + (discard-until nil) + ;; Mirrored tag stripping as in START-ELEMENT + ((member name +discard-tags-only+ :test #'string=) nil) + ;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on. + (t (call-next-method))))) -- cgit 1.4.1