From c3cf66f248d5b721629d3cb5293d6f1bd1358a43 Mon Sep 17 00:00:00 2001 From: sterni Date: Wed, 26 Jan 2022 21:40:04 +0100 Subject: feat(3p/lisp/mime4cl): cache offset in delimited-input-stream By computing the amount the stream position advanced we can save a syscall on every read which speeds up mime:mime-body-stream by /a lot/, e.g. extracting a ~3MB attachment drops from over 15s to under ~0.5s. There's still a lot to be gained and correctness left to be desired which can be addressed as described in the newly added comment. Change-Id: I5e1dfd213aac41203f271cf220db456dfb95a02b Reviewed-on: https://cl.tvl.fyi/c/depot/+/5073 Tested-by: BuildkiteCI Reviewed-by: sterni --- third_party/lisp/mime4cl/streams.lisp | 58 +++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 19 deletions(-) (limited to 'third_party') diff --git a/third_party/lisp/mime4cl/streams.lisp b/third_party/lisp/mime4cl/streams.lisp index d175a5d04fd6..dcac6ac34192 100644 --- a/third_party/lisp/mime4cl/streams.lisp +++ b/third_party/lisp/mime4cl/streams.lisp @@ -243,7 +243,8 @@ in a stream of character.")) (end-offset :initarg :end :initform nil :reader stream-end - :type (or null integer)))) + :type (or null integer)) + (current-offset :type integer))) (defmethod print-object ((object delimited-input-stream) stream) (if *print-readably* @@ -262,24 +263,43 @@ in a stream of character.")) (unless (slot-boundp stream 'real-stream) (error "REAL-STREAM is unbound. Must provide a :STREAM argument.")) (with-slots (start-offset) stream - (when start-offset - (file-position stream start-offset)))) - -(defmethod stream-read-char ((stream delimited-input-stream)) - (with-slots (real-stream end-offset) stream - (if (or (not end-offset) - (< (file-position real-stream) end-offset)) - (or (read-char real-stream nil) - :eof) - :eof))) - -#+(OR)(defmethod stream-read-byte ((stream delimited-input-stream)) - (with-slots (real-stream end-offset) stream - (if (or (not end-offset) - (< (file-position real-stream) end-offset)) - (or (read-byte real-stream nil) - :eof) - :eof))) + (file-position stream start-offset))) + +(defmethod (setf stream-file-position) (newval (stream delimited-input-stream)) + (with-slots (current-offset real-stream) stream + (setf current-offset newval) + (call-next-method))) + +(defmethod stream-file-position ((stream delimited-input-stream)) + (slot-value stream 'current-offset)) + +;; Calling file-position with SBCL on every read is quite expensive, since +;; it will invoke lseek each time. This is so expensive that it's faster to +;; /compute/ the amount the stream got advanced by. +;; file-position's behavior however, is quite flexible and it behaves differently +;; not only for different implementation, but also different streams in SBCL. +;; Thus, we should ideally go back to file-position and try to reduce the amount +;; of calls by using read-sequence. +;; TODO(sterni): make decoders use read-sequence and drop offset tracking code +(macrolet ((def-stream-read (name read-fun update-offset-form) + `(defmethod ,name ((stream delimited-input-stream)) + (with-slots (real-stream end-offset current-offset) stream + (let ((el (if (or (not end-offset) + (< current-offset end-offset)) + (or (,read-fun real-stream nil) + :eof) + :eof))) + (setf current-offset ,update-offset-form) + el))))) + + ;; Assume we are using an encoding where < 128 is one byte, in all other cases + ;; it's hard to guess how much file-position will increase + (def-stream-read stream-read-char read-char + (if (or (eq el :eof) (< (char-code el) 128)) + (1+ current-offset) + (file-position real-stream))) + + (def-stream-read stream-read-byte read-byte (1+ current-offset))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -- cgit 1.4.1