1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
;; SPDX-License-Identifier: GPL-3.0-only
;; SPDX-FileCopyrightText: Copyright (C) 2022 by sterni
(in-package :note)
(declaim (optimize (safety 3)))
;; Throw away these tags and all of their children
(defparameter +discard-tags-with-children+ '("HEAD"))
;; Only “strip” these tags and leave their content as is
(defparameter +discard-tags-only+ '("BODY" "HTML"))
;; This is basically the same as cxml's PROXY-HANDLER.
;; Couldn't be bothered to make a BROADCAST-HANDLER because I
;; only need to pass through to one handler. It accepts every
;; event and passes it on to NEXT-HANDLER. This is useful for
;; subclassing mostly where an event can be modified or passed
;; on as is via CALL-NEXT-METHOD.
(defclass hax-proxy-handler (hax:default-handler)
((next-handler
:initarg :next-handler
:accessor proxy-next-handler)))
;; Define the trivial handlers which just call themselves for NEXT-HANDLER
(macrolet ((def-proxy-handler (name (&rest args))
`(defmethod ,name ((h hax-proxy-handler) ,@args)
(,name (proxy-next-handler h) ,@args))))
(def-proxy-handler hax:start-document (name p-id s-id))
(def-proxy-handler hax:end-document ())
(def-proxy-handler hax:start-element (name attrs))
(def-proxy-handler hax:end-element (name))
(def-proxy-handler hax:characters (data))
(def-proxy-handler hax:unescaped (data))
(def-proxy-handler hax:comment (data)))
(defclass apple-note-transformer (hax-proxy-handler)
((cid-lookup
:initarg :cid-lookup
:initform (lambda (cid) nil)
:accessor transformer-cid-lookup)
(discard-until
:initarg :discard-until
:initform nil
:accessor transformer-discard-until)
(depth
:initarg :depth
:initform 0
:accessor transformer-depth))
(:documentation
"HAX handler that strips unnecessary tags from the HTML of a com.apple.mail-note
and resolves references to attachments to IMG tags."))
;; Define the “boring” handlers which just call the next method (i. e. the next
;; handler) unless discard-until is not nil in which case the event is dropped.
(macrolet ((def-filter-handler (name (&rest args))
`(defmethod ,name ((h apple-note-transformer) ,@args)
(when (not (transformer-discard-until h))
(call-next-method)))))
(def-filter-handler hax:start-document (name p-id s-id))
(def-filter-handler hax:end-document ())
(def-filter-handler hax:characters (data))
(def-filter-handler hax:unescaped (data))
(def-filter-handler hax:comment (data)))
(defun parse-content-id (attrlist)
(when-let (data (find-if (lambda (x)
(string-equal (hax:attribute-name x) "DATA"))
attrlist))
(multiple-value-bind (starts-with-cid-p suffix)
(starts-with-subseq "cid:" (hax:attribute-value data)
:return-suffix t :test #'char=)
(if starts-with-cid-p suffix data))))
(defmethod hax:start-element ((handler apple-note-transformer) name attrs)
(with-accessors ((discard-until transformer-discard-until)
(next-handler proxy-next-handler)
(cid-lookup transformer-cid-lookup)
(depth transformer-depth))
handler
(cond
;; If we are discarding, any started element is dropped,
;; since the end-condition only is reached via END-ELEMENT.
(discard-until nil)
;; If we are not discarding any outer elements, we can set
;; up a new discard condition if we encounter an appropriate
;; element.
((member name +discard-tags-with-children+ :test #'string-equal)
(setf discard-until (cons name depth)))
;; Only drop this event, must be mirrored in END-ELEMENT to
;; avoid invalidly nested HTML.
((member name +discard-tags-only+ :test #'string-equal) nil)
;; If we encounter an object tag, we drop it and its contents,
;; but only after inspecting its attributes and emitting new
;; events representing an img tag which includes the respective
;; attachment via its filename.
((string-equal name "OBJECT")
(progn
(setf discard-until (cons "OBJECT" depth))
;; TODO(sterni): check type and only resolve images, raise error
;; otherwise. We should only encounter images anyways, since
;; other types are only supported for iCloud which doesn't seem
;; to use IMAP for sync these days.
(when-let* ((cid (parse-content-id attrs))
(file (apply cid-lookup (list cid)))
(src (hax:make-attribute "SRC" file)))
(hax:start-element next-handler "IMG" (list src))
(hax:end-element next-handler "IMG"))))
;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on.
(t (call-next-method)))
(setf depth (1+ depth))))
(defmethod hax:end-element ((handler apple-note-transformer) name)
(with-accessors ((discard-until transformer-discard-until)
(depth transformer-depth))
handler
(setf depth (1- depth))
(cond
;; If we are discarding and encounter the same tag again at the same
;; depth, we can stop, but still have to discard the current tag.
((and discard-until
(string-equal (car discard-until) name)
(= (cdr discard-until) depth))
(setf discard-until nil))
;; In all other cases, we drop properly.
(discard-until nil)
;; Mirrored tag stripping as in START-ELEMENT
((member name +discard-tags-only+ :test #'string-equal) nil)
;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on.
(t (call-next-method)))))
|