From 3d2e55ad535371d1152a221dc31a8773b3a09ddf Mon Sep 17 00:00:00 2001 From: sterni Date: Mon, 15 May 2023 23:36:40 +0200 Subject: refactor(mime4cl): replace *-input-adapter-stream with flexi-streams MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The input adapter streams were input streams yielding either binary or character data that could be constructed from a variable data source. The stream would take care not to destroy the underlying data source (i.e. not close it if it was a stream), so similar to with FILE-PORTIONs, but simpler. Unfortunately, the implementation was quite inefficient: They are ultimately defined in terms of a function that retrieves the next character in the source. This only allows for an implementation of READ-CHAR (and READ-BYTE). Thanks to cl/8559, READ-SEQUENCE can be used on e.g. FILE-PORTION, but this was still negated by a input adapter based on oneā€”then, READ-SEQUENCE would need to fall back on READ-CHAR or READ-BYTE again. Luckily, we can replace BINARY-INPUT-ADAPTER-STREAM and CHARACTER-INPUT-ADAPTER-STREAM with a much simpler abstraction: Instead of extra stream classes, we have a function, MAKE-INPUT-ADAPTER, which returns an appropriate instance of FLEXI-STREAM based on a given source. This way, the need for a distinction between binary and character input adapter is eliminated, since FLEXI-STREAMS supports both binary and character reads (external format is not yet handled, though). Consequently, the :binary keyword argument to MIME-BODY-STREAM can be dropped. flexi-streams provides stream classes for everything except a stream that doesn't close the underlying one. Since we have already implemented this in POSITIONED-FLEXI-INPUT-STREAM, we can split this functionality into a new superclass ADAPTER-FLEXI-INPUT-STREAM. This change also allows addressing the performance regression encountered in cl/8559: It seems that flexi-streams performs worse when we are reading byte by byte or char by char. (After this change mblog is still two times slower than on r/6150.) By eliminating the adapter streams, we can start utilizing READ-SEQUENCE via decoding code that supports it (i.e. qbase64) and bring performance on par with r/6150 again. Surely there are also ways to gain back even more performance which has to be determined using profiling. Buffering more aggressively seems like a sure bet, though. Switching to flexi-streams still seems like a no-brainer, as it allows us to drop a lot of code that was quite hacky (e.g. DELIMITED-INPUT- STREAM) and implements en/decoding handling we did not support before, but would need for improved correctness. Change-Id: Ie2d1f4e42b47512a5660a1ccc0deeec2bff9788d Reviewed-on: https://cl.tvl.fyi/c/depot/+/8581 Autosubmit: sterni Reviewed-by: sterni Tested-by: BuildkiteCI --- third_party/lisp/mime4cl/mime.lisp | 13 +-- third_party/lisp/mime4cl/streams.lisp | 169 +++++++++++----------------------- users/sterni/mblog/note.lisp | 2 +- 3 files changed, 59 insertions(+), 125 deletions(-) diff --git a/third_party/lisp/mime4cl/mime.lisp b/third_party/lisp/mime4cl/mime.lisp index ac828216cb..3e7d83847e 100644 --- a/third_party/lisp/mime4cl/mime.lisp +++ b/third_party/lisp/mime4cl/mime.lisp @@ -183,11 +183,8 @@ :test #'string=) (mime= (mime-body part1) (mime-body part2)))) -(defun mime-body-stream (mime-part &key (binary t)) - (make-instance (if binary - 'binary-input-adapter-stream - 'character-input-adapter-stream) - :source (mime-body mime-part))) +(defun mime-body-stream (mime-part) + (make-input-adapter (mime-body mime-part))) (defun mime-body-length (mime-part) (be body (mime-body mime-part) @@ -207,8 +204,8 @@ while byte count byte)))))) -(defmacro with-input-from-mime-body-stream ((stream part &key (binary t)) &body forms) - `(with-open-stream (,stream (mime-body-stream ,part :binary ,binary)) +(defmacro with-input-from-mime-body-stream ((stream part) &body forms) + `(with-open-stream (,stream (mime-body-stream ,part)) ,@forms)) (defmethod mime= ((part1 mime-bodily-part) (part2 mime-bodily-part)) @@ -799,7 +796,7 @@ returns a MIME-MESSAGE object." (otherwise '8bit-encoder-input-stream)) :underlying-stream - (make-instance 'binary-input-adapter-stream :source body)))) + (make-input-adapter body)))) (defun choose-boundary (parts &optional default) (labels ((match-in-parts (boundary parts) diff --git a/third_party/lisp/mime4cl/streams.lisp b/third_party/lisp/mime4cl/streams.lisp index 055104b5c4..b1c78d6d08 100644 --- a/third_party/lisp/mime4cl/streams.lisp +++ b/third_party/lisp/mime4cl/streams.lisp @@ -1,7 +1,7 @@ ;;; streams.lisp --- En/De-coding Streams ;;; Copyright (C) 2012 by Walter C. Pelissero -;;; Copyright (C) 2021-2022 by the TVL Authors +;;; Copyright (C) 2021-2023 by the TVL Authors ;;; Author: Walter C. Pelissero ;;; Project: mime4cl @@ -39,6 +39,10 @@ (defclass coder-output-stream-mixin (fundamental-binary-output-stream coder-stream-mixin) ()) +;; TODO(sterni): temporary, ugly measure to make flexi-streams happy +(defmethod stream-element-type ((stream coder-input-stream-mixin)) + (declare (ignore stream)) + '(unsigned-byte 8)) (defclass quoted-printable-decoder-stream (coder-input-stream-mixin quoted-printable-decoder) ()) (defclass base64-decoder-stream (coder-input-stream-mixin base64-decoder) ()) @@ -136,112 +140,59 @@ in a stream of character.")) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(defclass input-adapter-stream () - ((source :initarg :source) - (real-stream) - (input-function))) - -(defclass binary-input-adapter-stream (fundamental-binary-input-stream input-adapter-stream) ()) - -(defclass character-input-adapter-stream (fundamental-character-input-stream input-adapter-stream) ()) - -(defmethod stream-element-type ((stream binary-input-adapter-stream)) - '(unsigned-byte 8)) - -(defmethod initialize-instance ((stream input-adapter-stream) &key &allow-other-keys) - (call-next-method) - (assert (slot-boundp stream 'source))) - -(defmethod initialize-instance ((stream binary-input-adapter-stream) &key &allow-other-keys) - (call-next-method) - ;; REAL-STREAM slot is set only if we are going to close it later on - (with-slots (source real-stream input-function) stream - (etypecase source - (string - (setf real-stream (make-string-input-stream source) - input-function #'(lambda () - (awhen (read-char real-stream nil) - (char-code it))))) - ((vector (unsigned-byte 8)) - (be i 0 - (setf input-function #'(lambda () - (when (< i (length source)) - (prog1 (aref source i) - (incf i))))))) - (stream - (assert (input-stream-p source)) - (setf input-function (if (subtypep (stream-element-type source) 'character) - #'(lambda () - (awhen (read-char source nil) - (char-code it))) - #'(lambda () - (read-byte source nil))))) - (pathname - (setf real-stream (open source :element-type '(unsigned-byte 8)) - input-function #'(lambda () - (read-byte real-stream nil)))) - (file-portion - (setf real-stream (open-decoded-file-portion source) - input-function #'(lambda () - (read-byte real-stream nil))))))) - -(defmethod initialize-instance ((stream character-input-adapter-stream) &key &allow-other-keys) - (call-next-method) - ;; REAL-STREAM slot is set only if we are going to close later on - (with-slots (source real-stream input-function) stream - (etypecase source - (string - (setf real-stream (make-string-input-stream source) - input-function #'(lambda () - (read-char real-stream nil)))) - ((vector (unsigned-byte 8)) - (be i 0 - (setf input-function #'(lambda () - (when (< i (length source)) - (prog1 (code-char (aref source i)) - (incf i))))))) - (stream - (assert (input-stream-p source)) - (setf input-function (if (subtypep (stream-element-type source) 'character) - #'(lambda () - (read-char source nil)) - #'(lambda () - (awhen (read-byte source nil) - (code-char it)))))) - (pathname - (setf real-stream (open source :element-type 'character) - input-function #'(lambda () - (read-char real-stream nil)))) - (file-portion - (setf real-stream (open-decoded-file-portion source) - input-function #'(lambda () - (awhen (read-byte real-stream nil) - (code-char it)))))))) - -(defmethod close ((stream input-adapter-stream) &key abort) - (when (slot-boundp stream 'real-stream) - (with-slots (real-stream) stream - (close real-stream :abort abort)))) - -(defmethod stream-read-byte ((stream binary-input-adapter-stream)) - (with-slots (input-function) stream - (or (funcall input-function) - :eof))) - -(defmethod stream-read-char ((stream character-input-adapter-stream)) - (with-slots (input-function) stream - (or (funcall input-function) - :eof))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(defun make-custom-flexi-stream (class stream other-args) + (apply #'make-instance + class + :stream stream + (mapcar (lambda (x) + ;; make-flexi-stream has a discrepancy between :initarg of + ;; make-instance and its &key which we mirror here. + (if (eq x :external-format) :flexi-stream-external-format x)) + other-args))) -(defclass positioned-flexi-input-stream (flexi-input-stream) +(defclass adapter-flexi-input-stream (flexi-input-stream) ((ignore-close :initform nil :initarg :ignore-close :documentation "If T, calling CLOSE on the stream does nothing. If NIL, the underlying stream is closed.")) + (:documentation "FLEXI-STREAM that does not close the underlying stream on +CLOSE if :IGNORE-CLOSE is T.")) + +(defmethod close ((stream adapter-flexi-input-stream) &key abort) + (declare (ignore abort)) + (with-slots (ignore-close) stream + (unless ignore-close + (call-next-method)))) + +(defun make-input-adapter (source) + (etypecase source + ;; If it's already a stream, we need to make sure it's not closed by the adapter + (stream + (assert (input-stream-p source)) + (if (and (typep source 'adapter-flexi-input-stream) + (slot-value source 'ignore-close)) + source ; already ignores CLOSE + (make-adapter-flexi-input-stream source :ignore-close t))) + ;; TODO(sterni): is this necessary? (maybe with (not *lazy-mime-decode*)?) + (string + (make-input-adapter (string-to-octets source))) + ((vector (unsigned-byte 8)) + (make-in-memory-input-stream source)) + (pathname + (make-flexi-stream (open source :element-type '(unsigned-byte 8)))) + (file-portion + (open-decoded-file-portion source)))) + +(defun make-adapter-flexi-input-stream (stream &rest args) + "Create a ADAPTER-FLEXI-INPUT-STREAM. Accepts the same keyword arguments as +MAKE-FLEXI-STREAM as well as :IGNORE-CLOSE. If T, the underlying stream is not +closed." + (make-custom-flexi-stream 'adapter-flexi-input-stream stream args)) + +(defclass positioned-flexi-input-stream (adapter-flexi-input-stream) + () (:documentation "FLEXI-INPUT-STREAM that automatically advances the underlying :STREAM to the location given by :POSITION. This uses FILE-POSITION internally, so it'll @@ -249,8 +200,7 @@ only works if the underlying stream position is tracked in bytes. Note that the underlying stream is still advanced, so having multiple instances of POSITIONED-FLEXI-INPUT-STREAM based with the same underlying stream won't work reliably. -If :IGNORE-CLOSE is set, the underlying stream won't be closed if CLOSE is -called on the POSITIONED-FLEXI-INPUT-STREAM.")) +Also supports :IGNORE-CLOSE of ADAPTER-FLEXI-INPUT-STREAM.")) (defmethod initialize-instance ((stream positioned-flexi-input-stream) &key &allow-other-keys) @@ -264,24 +214,11 @@ called on the POSITIONED-FLEXI-INPUT-STREAM.")) ;; even in SBCL don't). (file-position (flexi-stream-stream stream) (flexi-stream-position stream))) -(defmethod close ((stream positioned-flexi-input-stream) &key abort) - (declare (ignore abort)) - (with-slots (ignore-close) stream - (unless ignore-close - (call-next-method)))) - (defun make-positioned-flexi-input-stream (stream &rest args) "Create a POSITIONED-FLEXI-INPUT-STREAM. Accepts the same keyword arguments as MAKE-FLEXI-STREAM as well as :IGNORE-CLOSE. Causes the FILE-POSITION of STREAM to be modified to match the :POSITION argument." - (apply #'make-instance - 'positioned-flexi-input-stream - :stream stream - (mapcar (lambda (x) - ;; make-flexi-stream has a discrepancy between :initarg of - ;; make-instance and its &key which we mirror here. - (if (eq x :external-format) :flexi-stream-external-format x)) - args))) + (make-custom-flexi-stream 'positioned-flexi-input-stream stream args)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/users/sterni/mblog/note.lisp b/users/sterni/mblog/note.lisp index d44bcc0d0a..f056aaa72d 100644 --- a/users/sterni/mblog/note.lisp +++ b/users/sterni/mblog/note.lisp @@ -101,7 +101,7 @@ ;; notemap creates text/plain notes we need to handle properly. ;; Additionally we *could* check X-Mailer which notemap sets ((string-equal (apple-note-mime-subtype note) "plain") - (html-escape-stream (mime:mime-body-stream text :binary nil) out)) + (html-escape-stream (mime:mime-body-stream text) out)) ;; Notes.app creates text/html parts ((string-equal (apple-note-mime-subtype note) "html") (closure-html:parse -- cgit 1.4.1