about summary refs log tree commit diff
path: root/users/sterni/mblog/transformer.lisp
blob: f26c5652a266e0e45f8fcf8d4c47b5295932ebb6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
(in-package :mblog)
(declaim (optimize (safety 3)))

;; Throw away these tags and all of their children
(defparameter +discard-tags-with-children+ '("HEAD"))
;; Only “strip” these tags and leave their content as is
(defparameter +discard-tags-only+ '("BODY" "HTML"))

;; This is basically the same as cxml's PROXY-HANDLER.
;; Couldn't be bothered to make a BROADCAST-HANDLER because I
;; only need to pass through to one handler. It accepts every
;; event and passes it on to NEXT-HANDLER. This is useful for
;; subclassing mostly where an event can be modified or passed
;; on as is via CALL-NEXT-METHOD.
(defclass hax-proxy-handler (hax:default-handler)
  ((next-handler
    :initarg :next-handler
    :accessor proxy-next-handler)))

;; Define the trivial handlers which just call themselves for NEXT-HANDLER
(macrolet ((def-proxy-handler (name (&rest args))
             `(defmethod ,name ((h hax-proxy-handler) ,@args)
                (,name (proxy-next-handler h) ,@args))))
  (def-proxy-handler hax:start-document (name p-id s-id))
  (def-proxy-handler hax:end-document ())
  (def-proxy-handler hax:start-element (name attrs))
  (def-proxy-handler hax:end-element (name))
  (def-proxy-handler hax:characters (data))
  (def-proxy-handler hax:unescaped (data))
  (def-proxy-handler hax:comment (data)))

(defclass apple-note-transformer (hax-proxy-handler)
  ((cid-lookup
    :initarg :cid-lookup
    :initform (lambda (cid) nil)
    :accessor transformer-cid-lookup)
   (discard-until
    :initarg :discard-until
    :initform nil
    :accessor transformer-discard-until)
   (depth
    :initarg :depth
    :initform 0
    :accessor transformer-depth))
  (:documentation
   "HAX handler that strips unnecessary tags from the HTML of a com.apple.mail-note
   and resolves references to attachments to IMG tags."))

;; Define the “boring” handlers which just call the next method (i. e. the next
;; handler) unless discard-until is not nil in which case the event is dropped.
(macrolet ((def-filter-handler (name (&rest args))
             `(defmethod ,name ((h apple-note-transformer) ,@args)
                (when (not (transformer-discard-until h))
                  (call-next-method)))))
  (def-filter-handler hax:start-document (name p-id s-id))
  (def-filter-handler hax:end-document ())
  (def-filter-handler hax:characters (data))
  (def-filter-handler hax:unescaped (data))
  (def-filter-handler hax:comment (data)))

(defun parse-content-id (attrlist)
  (when-let (data (find-if (lambda (x)
                             (string= (hax:attribute-name x) "DATA"))
                           attrlist))
    (multiple-value-bind (starts-with-cid-p suffix)
        (starts-with-subseq "cid:" (hax:attribute-value data)
                            :return-suffix t :test #'char=)
      (if starts-with-cid-p suffix data))))

(defmethod hax:start-element ((handler apple-note-transformer) name attrs)
  (with-accessors ((discard-until transformer-discard-until)
                   (next-handler proxy-next-handler)
                   (cid-lookup transformer-cid-lookup)
                   (depth transformer-depth))
      handler

    (cond
      ;; If we are discarding, any started element is dropped,
      ;; since the end-condition only is reached via END-ELEMENT.
      (discard-until nil)
      ;; If we are not discarding any outer elements, we can set
      ;; up a new discard condition if we encounter an appropriate
      ;; element.
      ((member name +discard-tags-with-children+ :test #'string=)
       (setf discard-until (cons name depth)))
      ;; Only drop this event, must be mirrored in END-ELEMENT to
      ;; avoid invalidly nested HTML.
      ((member name +discard-tags-only+ :test #'string=) nil)
      ;; If we encounter an object tag, we drop it and its contents,
      ;; but only after inspecting its attributes and emitting new
      ;; events representing an img tag which includes the respective
      ;; attachment via its filename.
      ((string= name "OBJECT")
       (progn
         (setf discard-until (cons "OBJECT" depth))
         ;; TODO(sterni): check type and only resolve images, raise error
         ;; otherwise. We should only encounter images anyways, since
         ;; other types are only supported for iCloud which doesn't seem
         ;; to use IMAP for sync these days.
         (when-let* ((cid (parse-content-id attrs))
                     (file (apply cid-lookup (list cid)))
                     (src (hax:make-attribute "SRC" file)))
           (hax:start-element next-handler "IMG" (list src))
           (hax:end-element next-handler "IMG"))))
      ;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on.
      (t (call-next-method)))
    (setf depth (1+ depth))))

(defmethod hax:end-element ((handler apple-note-transformer) name)
  (with-accessors ((discard-until transformer-discard-until)
                   (depth transformer-depth))
      handler

    (setf depth (1- depth))
    (cond
      ;; If we are discarding and encounter the same tag again at the same
      ;; depth, we can stop, but still have to discard the current tag.
      ((and discard-until
            (string= (car discard-until) name)
            (= (cdr discard-until) depth))
       (setf discard-until nil))
      ;; In all other cases, we drop properly.
      (discard-until nil)
      ;; Mirrored tag stripping as in START-ELEMENT
      ((member name +discard-tags-only+ :test #'string=) nil)
      ;; In all other cases, we use HAX-PROXY-HANDLER to pass the event on.
      (t (call-next-method)))))