From 8e156e6b86c201d2696175eee55ef7bb6123ce85 Mon Sep 17 00:00:00 2001 From: sterni Date: Mon, 2 Aug 2021 15:13:05 +0200 Subject: feat(sterni/mblog): convert apple note mime msgs to html For now mblog only contains the mnote-html executable which takes a mime message from a maildir and prints the equivalent HTML fragment to stdout. It is intended to work with the mblaze(7) utilities, i. e. mnote-html resolves all `object` tags to proper `img` inclusions with the correct filename, so mshow(1)'s -x version can supply the needed image files. A note created using Apple's Notes app (tested with the iOS version) can be converted in a viewable HTML file like this: $ mnote-html path/to/msg > fragment.html $ mshow -x path/to/msg $ cat <(echo "") fragment.html > document.html $ xdg-open document.html Note that only the limited feature set of Apple Notes when using the IMAP backend is supported. The iCloud-based one has more (quite neat) features, but its notes can only accessed via an internal API as far as I know. This CLI is a bit impractical due to the big startup overhead of loading the lisp image. mblog should be become a fully fletched static site generator in the future, but this is a good starting point and providing the mnote-html tool is certainly useful. Change-Id: Iee6d1558e939b932da1e70ca2d2ae75638d855df Reviewed-on: https://cl.tvl.fyi/c/depot/+/3271 Tested-by: BuildkiteCI Reviewed-by: sterni --- users/sterni/mblog/cli.lisp | 17 +++++ users/sterni/mblog/default.nix | 31 +++++++++ users/sterni/mblog/note.lisp | 60 +++++++++++++++++ users/sterni/mblog/packages.lisp | 15 +++++ users/sterni/mblog/transformer.lisp | 127 ++++++++++++++++++++++++++++++++++++ 5 files changed, 250 insertions(+) create mode 100644 users/sterni/mblog/cli.lisp create mode 100644 users/sterni/mblog/default.nix create mode 100644 users/sterni/mblog/note.lisp create mode 100644 users/sterni/mblog/packages.lisp create mode 100644 users/sterni/mblog/transformer.lisp (limited to 'users/sterni') diff --git a/users/sterni/mblog/cli.lisp b/users/sterni/mblog/cli.lisp new file mode 100644 index 000000000000..93be7e8b8e44 --- /dev/null +++ b/users/sterni/mblog/cli.lisp @@ -0,0 +1,17 @@ +(in-package :mblog) +(declaim (optimize (safety 3))) + +(defparameter +synopsis+ "mnote-html FILE [FILE [ ... ]]") + +;; TODO(sterni): handle relevant conditions +(defun main () + (let* ((args (uiop:command-line-arguments)) + (help-p (or (not args) + (find-if (lambda (x) + (member x '("-h" "--help" "--usage") + :test #'string=)) + args)))) + (if help-p (format *error-output* "Usage: ~A~%" +synopsis+) + (loop for arg in args + do (apple-note-html-fragment + (mime:mime-message (pathname arg)) *standard-output*))))) diff --git a/users/sterni/mblog/default.nix b/users/sterni/mblog/default.nix new file mode 100644 index 000000000000..16ae573ba78c --- /dev/null +++ b/users/sterni/mblog/default.nix @@ -0,0 +1,31 @@ +{ depot, pkgs, ... }: + +depot.nix.buildLisp.program { + name = "mnote-html"; + + srcs = [ + ./packages.lisp + ./transformer.lisp + ./note.lisp + ./cli.lisp + ]; + + deps = [ + { + sbcl = depot.nix.buildLisp.bundled "uiop"; + default = depot.nix.buildLisp.bundled "asdf"; + } + depot.third_party.lisp.alexandria + depot.third_party.lisp.closure-html + depot.third_party.lisp.cl-who + depot.third_party.lisp.mime4cl + ]; + + main = "mblog:main"; + + # due to sclf + brokenOn = [ + "ccl" + "ecl" + ]; +} diff --git a/users/sterni/mblog/note.lisp b/users/sterni/mblog/note.lisp new file mode 100644 index 000000000000..fa4de0956ffb --- /dev/null +++ b/users/sterni/mblog/note.lisp @@ -0,0 +1,60 @@ +(in-package :mblog) +(declaim (optimize (safety 3))) + +;;; util + +(defun html-escape-stream (in out) + "Escape characters read from stream IN and write them to + stream OUT escaped using WHO:ESCAPE-CHAR-MINIMAL." + (loop for char = (read-char in nil nil) + while char + do (write-string (who:escape-char-minimal char) out))) + +(defun cid-header-value (cid) + "Takes a Content-ID as present in Apple Notes' tags and properly + surrounds them with angle brackets for a MIME header" + (concatenate 'string "<" cid ">")) + +;;; main implementation + +;; TODO(sterni): make this a “parser” instead of a predicate +(defun apple-note-p (msg) + "Checks X-Uniform-Type-Identifier of a MIME:MIME-MESSAGE + to determine if a given mime message is an Apple Note." + (when-let (uniform-id (assoc "X-Uniform-Type-Identifier" + (mime:mime-message-headers msg) + :test #'string=)) + (string= (cdr uniform-id) "com.apple.mail-note"))) + +(defun apple-note-html-fragment (msg out) + "Takes a MIME:MIME-MESSAGE and writes its text content as HTML to + the OUT stream. The tags are resolved to which + refer to the respective attachment's filename as a relative path, + but extraction of the attachments must be done separately. The + surrounding and tags are stripped and + discarded completely, so only a fragment which can be included + in custom templates remains." + (let ((text (find-mime-text-part msg))) + (cond + ;; Sanity checking of the note + ((not (apple-note-p msg)) + (error "Unsupported or missing X-Uniform-Type-Identifier")) + ((not text) (error "Malformed Apple Note: no text part")) + ;; notemap creates text/plain notes we need to handle properly. + ;; Additionally we *could* check X-Mailer which notemap sets + ((string= (mime:mime-subtype text) "plain") + (html-escape-stream (mime:mime-body-stream text :binary nil) out)) + ;; Notes.app creates text/html parts + ((string= (mime:mime-subtype text) "html") + (closure-html:parse + (mime:mime-body-stream text) + (make-instance + 'apple-note-transformer + :cid-lookup + (lambda (cid) + (when-let* ((part (mime:find-mime-part-by-id msg (cid-header-value cid))) + (file (mime:mime-part-file-name part))) + file)) + :next-handler + (closure-html:make-character-stream-sink out)))) + (t (error "Malformed Apple Note: unknown mime type"))))) diff --git a/users/sterni/mblog/packages.lisp b/users/sterni/mblog/packages.lisp new file mode 100644 index 000000000000..ca2e41b6827f --- /dev/null +++ b/users/sterni/mblog/packages.lisp @@ -0,0 +1,15 @@ +(defpackage :mblog + (:use + :common-lisp + :mime4cl + :closure-html + :who + :uiop) + (:shadow :with-html-output) ; conflict between closure-html and who + (:import-from + :alexandria + :when-let* + :when-let + :starts-with-subseq + :ends-with-subseq) + (:export :main)) diff --git a/users/sterni/mblog/transformer.lisp b/users/sterni/mblog/transformer.lisp new file mode 100644 index 000000000000..f26c5652a266 --- /dev/null +++ b/users/sterni/mblog/transformer.lisp @@ -0,0 +1,127 @@ +(in-package :mblog) +(declaim (optimize (safety 3))) + +;; Throw away these tags and all of their children +(defparameter +discard-tags-with-children+ '("HEAD")) +;; Only “strip” these tags and leave their content as is +(defparameter +discard-tags-only+ '("BODY" "HTML")) + +;; This is basically the same as cxml's PROXY-HANDLER. +;; Couldn't be bothered to make a BROADCAST-HANDLER because I +;; only need to pass through to one handler. It accepts every +;; event and passes it on to NEXT-HANDLER. This is useful for +;; subclassing mostly where an event can be modified or passed +;; on as is via CALL-NEXT-METHOD. +(defclass hax-proxy-handler (hax:default-handler) + ((next-handler + :initarg :next-handler + :accessor proxy-next-handler))) + +;; Define the trivial handlers which just call themselves for NEXT-HANDLER +(macrolet ((def-proxy-handler (name (&rest args)) + `(defmethod ,name ((h hax-proxy-handler) ,@args) + (,name (proxy-next-handler h) ,@args)))) + (def-proxy-handler hax:start-document (name p-id s-id)) + (def-proxy-handler hax:end-document ()) + (def-proxy-handler hax:start-element (name attrs)) + (def-proxy-handler hax:end-element (name)) + (def-proxy-handler hax:characters (data)) + (def-proxy-handler hax:unescaped (data)) + (def-proxy-handler hax:comment (data))) + +(defclass apple-note-transformer (hax-proxy-handler) + ((cid-lookup + :initarg :cid-lookup + :initform (lambda (cid) nil) + :accessor transformer-cid-lookup) + (discard-until + :initarg :discard-until + :initform nil + :accessor transformer-discard-until) + (depth + :initarg :depth + :initform 0 + :accessor transformer-depth)) + (:documentation + "HAX handler that strips unnecessary tags from the HTML of a com.apple.mail-note + and resolves references to attachments to IMG tags.")) + +;; Define the “boring” handlers which just call the next method (i. e. the next +;; handler) unless discard-until is not nil in which case the event is dropped. +(macrolet ((def-filter-handler (name (&rest args)) + `(defmethod ,name ((h apple-note-transformer) ,@args) + (when (not (transformer-discard-until h)) + (call-next-method))))) + (def-filter-handler hax:start-document (name p-id s-id)) + (def-filter-handler hax:end-document ()) + (def-filter-handler hax:characters (data)) + (def-filter-handler hax:unescaped (data)) + (def-filter-handler hax:comment (data))) + +(defun parse-content-id (attrlist) + (when-let (data (find-if (lambda (x) + (string= (hax:attribute-name x) "DATA")) + attrlist)) + (multiple-value-bind (starts-with-cid-p suffix) + (starts-with-subseq "cid:" (hax:attribute-value data) + :return-suffix t :test #'char=) + (if starts-with-cid-p suffix data)))) + +(defmethod hax:start-element ((handler apple-note-transformer) name attrs) + (with-accessors ((discard-until transformer-discard-until) + (next-handler proxy-next-handler) + (cid-lookup transformer-cid-lookup) + (depth transformer-depth)) + handler + + (cond + ;; If we are discarding, any started element is dropped, + ;; since the end-condition only is reached via END-ELEMENT. + (discard-until nil) + ;; If we are not discarding any outer elements, we can set + ;; up a new discard condition if we encounter an appropriate + ;; element. + ((member name +discard-tags-with-children+ :test #'string=) + (setf discard-until (cons name depth))) + ;; Only drop this event, must be mirrored in END-ELEMENT to + ;; avoid invalidly nested HTML. + ((member name +discard-tags-only+ :test #'string=) nil) + ;; If we encounter an object tag, we drop it and its contents, + ;; but only after inspecting its attributes and emitting new + ;; events representing an img tag which includes the respective + ;; attachment via its filename. + ((string= name "OBJECT") + (progn + (setf discard-until (cons "OBJECT" depth)) + ;; TODO(sterni): check type and only resolve images, raise error + ;; otherwise. We should only encounter images anyways, since + ;; other types are only supported for iCloud which doesn't seem + ;; to use IMAP for sync these days. + (when-let* ((cid (parse-content-id attrs)) + (file (apply cid-lookup (list cid))) + (src (hax:make-attribute "SRC" file))) + (hax:start-element next-handler "IMG" (list src)) + (hax:end-element next-handler "IMG")))) + ;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on. + (t (call-next-method))) + (setf depth (1+ depth)))) + +(defmethod hax:end-element ((handler apple-note-transformer) name) + (with-accessors ((discard-until transformer-discard-until) + (depth transformer-depth)) + handler + + (setf depth (1- depth)) + (cond + ;; If we are discarding and encounter the same tag again at the same + ;; depth, we can stop, but still have to discard the current tag. + ((and discard-until + (string= (car discard-until) name) + (= (cdr discard-until) depth)) + (setf discard-until nil)) + ;; In all other cases, we drop properly. + (discard-until nil) + ;; Mirrored tag stripping as in START-ELEMENT + ((member name +discard-tags-only+ :test #'string=) nil) + ;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on. + (t (call-next-method))))) -- cgit 1.4.1