about summary refs log tree commit diff
path: root/third_party/lisp/mime4cl/streams.lisp
diff options
context:
space:
mode:
authorsterni <sternenseemann@systemli.org>2023-05-15T21·36+0200
committerclbot <clbot@tvl.fyi>2023-05-18T16·14+0000
commit3d2e55ad535371d1152a221dc31a8773b3a09ddf (patch)
tree2c036f24436431882536615f3aa8d0176e4604cc /third_party/lisp/mime4cl/streams.lisp
parentb379e44dfb871100546b3d60bd3c77fbeac61adc (diff)
refactor(mime4cl): replace *-input-adapter-stream with flexi-streams r/6152
The input adapter streams were input streams yielding either binary or
character data that could be constructed from a variable data source.
The stream would take care not to destroy the underlying data
source (i.e. not close it if it was a stream), so similar to with
FILE-PORTIONs, but simpler.

Unfortunately, the implementation was quite inefficient: They are
ultimately defined in terms of a function that retrieves the next
character in the source. This only allows for an implementation of
READ-CHAR (and READ-BYTE). Thanks to cl/8559, READ-SEQUENCE can be used
on e.g. FILE-PORTION, but this was still negated by a input adapter
based on one—then, READ-SEQUENCE would need to fall back on READ-CHAR or
READ-BYTE again.

Luckily, we can replace BINARY-INPUT-ADAPTER-STREAM and
CHARACTER-INPUT-ADAPTER-STREAM with a much simpler abstraction: Instead
of extra stream classes, we have a function, MAKE-INPUT-ADAPTER, which
returns an appropriate instance of FLEXI-STREAM based on a given source.
This way, the need for a distinction between binary and character input
adapter is eliminated, since FLEXI-STREAMS supports both binary and
character reads (external format is not yet handled, though).
Consequently, the :binary keyword argument to MIME-BODY-STREAM can be
dropped.

flexi-streams provides stream classes for everything except a stream
that doesn't close the underlying one. Since we have already implemented
this in POSITIONED-FLEXI-INPUT-STREAM, we can split this functionality
into a new superclass ADAPTER-FLEXI-INPUT-STREAM.

This change also allows addressing the performance regression
encountered in cl/8559: It seems that flexi-streams performs worse when
we are reading byte by byte or char by char. (After this change mblog is
still two times slower than on r/6150.) By eliminating the adapter
streams, we can start utilizing READ-SEQUENCE via decoding code that
supports it (i.e. qbase64) and bring performance on par with r/6150
again. Surely there are also ways to gain back even more performance
which has to be determined using profiling. Buffering more aggressively
seems like a sure bet, though.

Switching to flexi-streams still seems like a no-brainer, as it allows
us to drop a lot of code that was quite hacky (e.g. DELIMITED-INPUT-
STREAM) and implements en/decoding handling we did not support before,
but would need for improved correctness.

Change-Id: Ie2d1f4e42b47512a5660a1ccc0deeec2bff9788d
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8581
Autosubmit: sterni <sternenseemann@systemli.org>
Reviewed-by: sterni <sternenseemann@systemli.org>
Tested-by: BuildkiteCI
Diffstat (limited to 'third_party/lisp/mime4cl/streams.lisp')
-rw-r--r--third_party/lisp/mime4cl/streams.lisp169
1 files changed, 53 insertions, 116 deletions
diff --git a/third_party/lisp/mime4cl/streams.lisp b/third_party/lisp/mime4cl/streams.lisp
index 055104b5c422..b1c78d6d08b6 100644
--- a/third_party/lisp/mime4cl/streams.lisp
+++ b/third_party/lisp/mime4cl/streams.lisp
@@ -1,7 +1,7 @@
 ;;; streams.lisp --- En/De-coding Streams
 
 ;;; Copyright (C) 2012 by Walter C. Pelissero
-;;; Copyright (C) 2021-2022 by the TVL Authors
+;;; Copyright (C) 2021-2023 by the TVL Authors
 
 ;;; Author: Walter C. Pelissero <walter@pelissero.de>
 ;;; Project: mime4cl
@@ -39,6 +39,10 @@
 (defclass coder-output-stream-mixin (fundamental-binary-output-stream coder-stream-mixin)
   ())
 
+;; TODO(sterni): temporary, ugly measure to make flexi-streams happy
+(defmethod stream-element-type ((stream coder-input-stream-mixin))
+  (declare (ignore stream))
+  '(unsigned-byte 8))
 
 (defclass quoted-printable-decoder-stream (coder-input-stream-mixin quoted-printable-decoder) ())
 (defclass base64-decoder-stream (coder-input-stream-mixin base64-decoder) ())
@@ -136,112 +140,59 @@ in a stream of character."))
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(defclass input-adapter-stream ()
-  ((source :initarg :source)
-   (real-stream)
-   (input-function)))
-
-(defclass binary-input-adapter-stream (fundamental-binary-input-stream input-adapter-stream) ())
-
-(defclass character-input-adapter-stream (fundamental-character-input-stream input-adapter-stream) ())
-
-(defmethod stream-element-type ((stream binary-input-adapter-stream))
-  '(unsigned-byte 8))
-
-(defmethod initialize-instance ((stream input-adapter-stream) &key &allow-other-keys)
-  (call-next-method)
-  (assert (slot-boundp stream 'source)))
-
-(defmethod initialize-instance ((stream binary-input-adapter-stream) &key &allow-other-keys)
-  (call-next-method)
-  ;; REAL-STREAM slot is set only if we are going to close it later on
-  (with-slots (source real-stream input-function) stream
-    (etypecase source
-      (string
-       (setf real-stream (make-string-input-stream source)
-             input-function #'(lambda ()
-                                (awhen (read-char real-stream nil)
-                                  (char-code it)))))
-      ((vector (unsigned-byte 8))
-       (be i 0
-         (setf input-function #'(lambda ()
-                                  (when (< i (length source))
-                                    (prog1 (aref source i)
-                                      (incf i)))))))
-      (stream
-       (assert (input-stream-p source))
-       (setf input-function (if (subtypep (stream-element-type source) 'character)
-                                #'(lambda ()
-                                    (awhen (read-char source nil)
-                                      (char-code it)))
-                                #'(lambda ()
-                                    (read-byte source nil)))))
-      (pathname
-       (setf real-stream (open source :element-type '(unsigned-byte 8))
-             input-function #'(lambda ()
-                                (read-byte real-stream nil))))
-      (file-portion
-       (setf real-stream (open-decoded-file-portion source)
-             input-function #'(lambda ()
-                                (read-byte real-stream nil)))))))
-
-(defmethod initialize-instance ((stream character-input-adapter-stream) &key &allow-other-keys)
-  (call-next-method)
-  ;; REAL-STREAM slot is set only if we are going to close later on
-  (with-slots (source real-stream input-function) stream
-    (etypecase source
-      (string
-       (setf real-stream (make-string-input-stream source)
-             input-function #'(lambda ()
-                                (read-char real-stream nil))))
-      ((vector (unsigned-byte 8))
-       (be i 0
-         (setf input-function #'(lambda ()
-                                  (when (< i (length source))
-                                    (prog1 (code-char (aref source i))
-                                      (incf i)))))))
-      (stream
-       (assert (input-stream-p source))
-       (setf input-function (if (subtypep (stream-element-type source) 'character)
-                                #'(lambda ()
-                                    (read-char source nil))
-                                #'(lambda ()
-                                    (awhen (read-byte source nil)
-                                      (code-char it))))))
-      (pathname
-       (setf real-stream (open source :element-type 'character)
-             input-function #'(lambda ()
-                                (read-char real-stream nil))))
-      (file-portion
-       (setf real-stream (open-decoded-file-portion source)
-             input-function #'(lambda ()
-                                (awhen (read-byte real-stream nil)
-                                  (code-char it))))))))
-
-(defmethod close ((stream input-adapter-stream) &key abort)
-  (when (slot-boundp stream 'real-stream)
-    (with-slots (real-stream) stream
-      (close real-stream :abort abort))))
-
-(defmethod stream-read-byte ((stream binary-input-adapter-stream))
-  (with-slots (input-function) stream
-    (or (funcall input-function)
-        :eof)))
-
-(defmethod stream-read-char ((stream character-input-adapter-stream))
-  (with-slots (input-function) stream
-    (or (funcall input-function)
-        :eof)))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(defun make-custom-flexi-stream (class stream other-args)
+  (apply #'make-instance
+         class
+         :stream stream
+         (mapcar (lambda (x)
+                   ;; make-flexi-stream has a discrepancy between :initarg of
+                   ;; make-instance and its &key which we mirror here.
+                   (if (eq x :external-format) :flexi-stream-external-format x))
+                 other-args)))
 
-(defclass positioned-flexi-input-stream (flexi-input-stream)
+(defclass adapter-flexi-input-stream (flexi-input-stream)
   ((ignore-close
     :initform nil
     :initarg :ignore-close
     :documentation
     "If T, calling CLOSE on the stream does nothing.
 If NIL, the underlying stream is closed."))
+  (:documentation "FLEXI-STREAM that does not close the underlying stream on
+CLOSE if :IGNORE-CLOSE is T."))
+
+(defmethod close ((stream adapter-flexi-input-stream) &key abort)
+  (declare (ignore abort))
+  (with-slots (ignore-close) stream
+    (unless ignore-close
+      (call-next-method))))
+
+(defun make-input-adapter (source)
+  (etypecase source
+    ;; If it's already a stream, we need to make sure it's not closed by the adapter
+    (stream
+     (assert (input-stream-p source))
+     (if (and (typep source 'adapter-flexi-input-stream)
+              (slot-value source 'ignore-close))
+         source ; already ignores CLOSE
+         (make-adapter-flexi-input-stream source :ignore-close t)))
+    ;; TODO(sterni): is this necessary? (maybe with (not *lazy-mime-decode*)?)
+    (string
+     (make-input-adapter (string-to-octets source)))
+    ((vector (unsigned-byte 8))
+     (make-in-memory-input-stream source))
+    (pathname
+     (make-flexi-stream (open source :element-type '(unsigned-byte 8))))
+    (file-portion
+     (open-decoded-file-portion source))))
+
+(defun make-adapter-flexi-input-stream (stream &rest args)
+  "Create a ADAPTER-FLEXI-INPUT-STREAM. Accepts the same keyword arguments as
+MAKE-FLEXI-STREAM as well as :IGNORE-CLOSE. If T, the underlying stream is not
+closed."
+  (make-custom-flexi-stream 'adapter-flexi-input-stream stream args))
+
+(defclass positioned-flexi-input-stream (adapter-flexi-input-stream)
+  ()
   (:documentation
    "FLEXI-INPUT-STREAM that automatically advances the underlying :STREAM to
 the location given by :POSITION. This uses FILE-POSITION internally, so it'll
@@ -249,8 +200,7 @@ only works if the underlying stream position is tracked in bytes. Note that
 the underlying stream is still advanced, so having multiple instances of
 POSITIONED-FLEXI-INPUT-STREAM based with the same underlying stream won't work
 reliably.
-If :IGNORE-CLOSE is set, the underlying stream won't be closed if CLOSE is
-called on the POSITIONED-FLEXI-INPUT-STREAM."))
+Also supports :IGNORE-CLOSE of ADAPTER-FLEXI-INPUT-STREAM."))
 
 (defmethod initialize-instance ((stream positioned-flexi-input-stream)
                                 &key &allow-other-keys)
@@ -264,24 +214,11 @@ called on the POSITIONED-FLEXI-INPUT-STREAM."))
   ;; even in SBCL don't).
   (file-position (flexi-stream-stream stream) (flexi-stream-position stream)))
 
-(defmethod close ((stream positioned-flexi-input-stream) &key abort)
-  (declare (ignore abort))
-  (with-slots (ignore-close) stream
-    (unless ignore-close
-      (call-next-method))))
-
 (defun make-positioned-flexi-input-stream (stream &rest args)
   "Create a POSITIONED-FLEXI-INPUT-STREAM. Accepts the same keyword arguments as
 MAKE-FLEXI-STREAM as well as :IGNORE-CLOSE. Causes the FILE-POSITION of STREAM to
 be modified to match the :POSITION argument."
-  (apply #'make-instance
-         'positioned-flexi-input-stream
-         :stream stream
-         (mapcar (lambda (x)
-                   ;; make-flexi-stream has a discrepancy between :initarg of
-                   ;; make-instance and its &key which we mirror here.
-                   (if (eq x :external-format) :flexi-stream-external-format x))
-                 args)))
+  (make-custom-flexi-stream 'positioned-flexi-input-stream stream args))
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;