about summary refs log tree commit diff
path: root/third_party/lisp/mime4cl/endec.lisp
;;;  endec.lisp --- encoder/decoder functions

;;;  Copyright (C) 2005-2008, 2010 by Walter C. Pelissero
;;;  Copyright (C) 2023 by The TVL Authors

;;;  Author: Walter C. Pelissero <walter@pelissero.de>
;;;  Project: mime4cl

;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public License
;;; as published by the Free Software Foundation; either version 2.1
;;; of the License, or (at your option) any later version.
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Lesser General Public License for more details.
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this library; if not, write to the Free
;;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
;;; 02111-1307 USA


(in-package :mime4cl)

(defun redirect-stream (in out &key (buffer-size 4096))
  "Consume input stream IN and write all its content to output stream OUT.
The streams' element types need to match."
  (let ((buf (make-array buffer-size :element-type (stream-element-type in))))
    (loop for pos = (read-sequence buf in)
          while (> pos 0)
          do (write-sequence buf out :end pos))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Thank you SBCL for rendering constants totally useless!
(defparameter +base64-encode-table+
  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=")

(declaim (type simple-string +base64-encode-table+))

(defvar *base64-line-length* 76
  "Maximum length of the encoded base64 line.  NIL means it can
be of unlimited length \(no line breaks will be done by the
encoding function).")

(defvar *quoted-printable-line-length* 72
  "Maximum length of the encoded quoted printable line.  NIL
means it can be of unlimited length \(no line breaks will be done
by the encoding function).")

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defclass decoder ()
  ((input-function :initarg :input-function
                   :reader decoder-input-function
                   :type function
                   :documentation
                   "Function is called repeatedly by the decoder methods to get the next character.
It should return a character os NIL (indicating EOF)."))
  (:documentation
   "Abstract base class for decoders."))

(defclass parsing-decoder (decoder)
  ((parser-errors :initform nil
                  :initarg :parser-errors
                  :reader decoder-parser-errors
                  :type boolean))
  (:documentation
   "Abstract base class for decoders that do parsing."))

(defclass encoder ()
  ((output-function :initarg :output-function
                    :reader encoder-output-function
                    :type function
                    :documentation
                    "Function is called repeatedly by the encoder methods to output a character.
It should expect a character as its only argument."))
  (:documentation
   "Abstract base class for encoders."))

(defclass line-encoder (encoder)
  ((column :initform 0
           :type fixnum)
   (line-length :initarg :line-length
                :initform nil
                :reader encoder-line-length
                :type (or fixnum null)))
  (:documentation
   "Abstract base class for line encoders."))

(defclass 8bit-decoder (decoder)
  ()
  (:documentation
   "Class for decoders that do nothing."))

(defclass 8bit-encoder (encoder)
  ()
  (:documentation
   "Class for encoders that do nothing."))

(defclass 7bit-decoder (decoder)
  ()
  (:documentation
   "Class for decoders that do nothing."))

(defclass 7bit-encoder (encoder)
  ()
  (:documentation
   "Class for encoders that do nothing."))

(defclass byte-decoder (decoder)
  ()
  (:documentation
   "Class for decoders that turns chars to bytes."))

(defclass byte-encoder (encoder)
  ()
  (:documentation
   "Class for encoders that turns bytes to chars."))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defgeneric encoder-write-byte (encoder byte))
(defgeneric encoder-finish-output (encoder))
(defgeneric decoder-read-byte (decoder))

(defmethod encoder-finish-output ((encoder encoder))
  (values))

(defmethod encoder-write-byte ((encoder 8bit-encoder) byte)
  (funcall (slot-value encoder 'output-function)
           (code-char byte))
  (values))

(defmethod decoder-read-byte ((decoder 8bit-decoder))
  (awhen (funcall (slot-value decoder 'input-function))
    (char-code it)))

(defmethod encoder-write-byte ((encoder 7bit-encoder) byte)
  (funcall (slot-value encoder 'output-function)
           (code-char (logand #x7F byte)))
  (values))

(defmethod decoder-read-byte ((decoder 7bit-decoder))
  (awhen (funcall (slot-value decoder 'input-function))
    (logand #x7F (char-code it))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defun decoder-read-sequence (sequence decoder &key (start 0) (end (length sequence)))
  (declare (optimize (speed 3) (safety 0) (debug 0))
           (type fixnum start end)
           (type vector sequence))
  (loop
     for i fixnum from start below end
     for byte = (decoder-read-byte decoder)
     while byte
     do (setf (aref sequence i) byte)
     finally (return i)))

(defun decoder-read-line (decoder)
  (with-output-to-string (str)
    (loop
       for byte = (decoder-read-byte decoder)
       unless byte
       do (return-from decoder-read-line nil)
       do (let ((c (code-char byte)))
            (cond ((char= c #\return)
                   ;; skip the newline
                   (decoder-read-byte decoder)
                   (return nil))
                  ((char= c #\newline)
                   ;; the #\return was missing
                   (return nil))
                  (t (write-char c str)))))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(declaim (inline parse-hex))
(defun parse-hex (c1 c2)
  "Parse two characters as hexadecimal and return their combined
value."
  (declare (optimize (speed 3) (safety 0) (debug 0))
           (type character c1 c2))
  (flet ((digit-value (char)
           (or (position char "0123456789ABCDEF")
               (return-from parse-hex nil))))
    (+ (* 16 (digit-value c1))
       (digit-value c2))))

(defclass quoted-printable-decoder (parsing-decoder)
  ((saved-bytes :initform (make-queue))))

(defmethod decoder-read-byte ((decoder quoted-printable-decoder))
  (declare (optimize (speed 3) (safety 0) (debug 0)))
  (with-slots (input-function saved-bytes parser-errors) decoder
    (declare (type function input-function))
    (labels ((saveb (b)
               (queue-append saved-bytes b)
               (values))
             (save (c)
               (saveb (char-code c)))
             (push-next ()
               (let ((c (funcall input-function)))
                 (declare (type (or null character) c))
                 (cond ((not c))
                       ((or (char= c #\space)
                            (char= c #\tab))
                        (save c)
                        (push-next))
                       ((char= c #\=)
                        (let ((c1 (funcall input-function)))
                          (cond ((not c1)
                                 (save #\=))
                                ((char= c1 #\return)
                                 ;; soft line break: skip the next
                                 ;; character which we assume to be a
                                 ;; newline (pity if it isn't)
                                 (funcall input-function)
                                 (push-next))
                                ((char= c1 #\newline)
                                 ;; soft line break: the #\return is
                                 ;; missing, but we are tolerant
                                 (push-next))
                                (t
                                 ;; hexadecimal sequence: get the 2nd digit
                                 (let ((c2 (funcall input-function)))
                                   (if c2
                                       (aif (parse-hex c1 c2)
                                            (saveb it)
                                            (if parser-errors
                                                (error "invalid hex sequence ~A~A" c1 c2)
                                                (progn
                                                  (save #\=)
                                                  (save c1)
                                                  (save c2))))
                                       (progn
                                         (save c)
                                         (save c1))))))))
                       (t
                        (save c))))))
      (or (queue-pop saved-bytes)
          (progn
            (push-next)
            (queue-pop saved-bytes))))))

(defmacro make-encoder-loop (encoder-class input-form output-form)
  (with-gensyms (encoder byte)
    `(loop
        with ,encoder = (make-instance ',encoder-class
                                       :output-function #'(lambda (char) ,output-form))
        for ,byte = ,input-form
        while ,byte
        do (encoder-write-byte ,encoder ,byte)
        finally (encoder-finish-output ,encoder))))

(defmacro make-decoder-loop (decoder-class input-form output-form &key parser-errors)
  (with-gensyms (decoder)
    `(loop
        with ,decoder = (make-instance ',decoder-class
                                       :input-function #'(lambda () ,input-form)
                                       :parser-errors ,parser-errors)
        for byte = (decoder-read-byte ,decoder)
        while byte
        do ,output-form)))

(defun decode-quoted-printable-stream (in out &key parser-errors)
  "Read from stream IN a quoted printable text and write to
binary output OUT the decoded stream of bytes."
  (make-decoder-loop quoted-printable-decoder
                     (read-byte in nil) (write-byte byte out)
                     :parser-errors parser-errors))

(defmacro make-stream-to-sequence-decoder (decoder-class input-form &key parser-errors)
  "Decode the character stream STREAM and return a sequence of bytes."
  (with-gensyms (output-sequence)
    `(let ((,output-sequence (make-array 0
                                         :element-type '(unsigned-byte 8)
                                         :fill-pointer 0
                                         :adjustable t)))
       (make-decoder-loop ,decoder-class ,input-form
                          (vector-push-extend byte ,output-sequence)
                          :parser-errors ,parser-errors)
       ,output-sequence)))

(defun decode-quoted-printable-stream-to-sequence (stream &key parser-errors)
  "Read from STREAM a quoted printable text and return a vector of
bytes."
  (make-stream-to-sequence-decoder quoted-printable-decoder
    (read-char stream nil)
    :parser-errors parser-errors))

(defun decode-quoted-printable-string (string &key (start 0) (end (length string)) parser-errors)
  "Decode STRING as quoted printable sequence of characters and
return a decoded sequence of bytes."
  (with-input-from-string (in string :start start :end end)
    (decode-quoted-printable-stream-to-sequence in :parser-errors parser-errors)))

(defclass quoted-printable-encoder (line-encoder)
  ((line-length :initform *quoted-printable-line-length*
                :type (or fixnum null))
   (pending-space :initform nil
                  :type boolean)))

(defmethod encoder-write-byte ((encoder quoted-printable-encoder) byte)
  (declare (optimize (speed 3) (safety 0) (debug 0))
           (type (unsigned-byte 8) byte))
  (with-slots (output-function column pending-space line-length) encoder
    (declare (type function output-function)
             (type fixnum column)
             (type (or fixnum null) line-length)
             (type boolean pending-space))
    (labels ((out (c)
               (funcall output-function c)
               (values))
             (outs (str)
               (declare (type simple-string str))
               (loop
                  for c across str
                  do (out c))
               (values))
             (out2hex (x)
               (declare (type fixnum x))
               (multiple-value-bind (a b) (truncate x 16)
                 (out (digit-char a 16))
                 (out (digit-char b 16)))))
      (cond ((= byte #.(char-code #\newline))
             (when pending-space
               (outs "=20")
               (setf pending-space nil))
             (out #\newline)
             (setf column 0))
            ((= byte #.(char-code #\space))
             (if pending-space
                 (progn
                   (out #\space)
                   (f++ column))
                 (setf pending-space t)))
            (t
             (when pending-space
               (out #\space)
               (f++ column)
               (setf pending-space nil))
             (cond ((or (< byte 32)
                        (= byte #.(char-code #\=))
                        (> byte 126))
                    (out #\=)
                    (out2hex byte)
                    (f++ column 3))
                   (t
                    (out (code-char byte))
                    (f++ column)))))
      (when (and line-length
                 (>= column line-length))
        ;; soft line break
        (outs #.(coerce '(#\= #\newline) 'string))
        (setf column 0)))))

(defmethod encoder-finish-output ((encoder quoted-printable-encoder))
  (declare (optimize (speed 3) (safety 0) (debug 0)))
  (with-slots (pending-space output-function) encoder
    (declare (type boolean pending-space)
             (type function output-function))
    (when pending-space
      (flet ((outs (s)
               (declare (type simple-string s))
               (loop
                  for c across s
                  do (funcall output-function c))))
        (setf pending-space nil)
        (outs "=20")))))

(defun encode-quoted-printable-stream (in out)
  "Read from IN a stream of bytes and write to OUT a stream of
characters quoted printables encoded."
  (make-encoder-loop quoted-printable-encoder
                     (read-byte in nil)
                     (write-char char out)))

(defun encode-quoted-printable-sequence-to-stream (sequence stream &key (start 0) (end (length sequence)))
  "Encode the sequence of bytes SEQUENCE and write to STREAM a
quoted printable sequence of characters."
  (let ((i start))
    (make-encoder-loop quoted-printable-encoder
     (when (< i end)
       (prog1 (elt sequence i)
         (f++ i)))
     (write-char char stream))))

(defun encode-quoted-printable-sequence (sequence &key (start 0) (end (length sequence)))
  "Encode the sequence of bytes SEQUENCE into a quoted printable
string and return it."
  (with-output-to-string (out)
    (encode-quoted-printable-sequence-to-stream sequence out :start start :end end)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defclass base64-encoder (line-encoder)
  ((line-length :initform *base64-line-length*)
   (bitstore :initform 0
             :type fixnum)
   (bytecount :initform 0
              :type fixnum))
  (:documentation
   "Class for Base64 encoder output streams."))


(eval-when (:load-toplevel :compile-toplevel)
  (unless (> most-positive-fixnum (expt 2 (* 8 3)))))

(macrolet ((with-encoder (encoder &body forms)
             `(with-slots (bitstore line-length column bytecount output-function) ,encoder
                (declare (type fixnum column)
                         (type fixnum bitstore bytecount)
                         (type (or fixnum null) line-length)
                         (type function output-function))
                (labels ((emitr (i b)
                           (declare (type fixnum i b))
                           (unless (zerop i)
                             (emitr (1- i) (ash b -6)))
                           (emitc
                            (char +base64-encode-table+ (logand b #x3F)))
                           (values))
                         (out (c)
                           (funcall output-function c))
                         (eol ()
                           (progn
                             (out #\return)
                             (out #\newline)))
                         (emitc (char)
                           (out char)
                           (f++ column)
                           (when (and line-length
                                      (>= column line-length))
                             (setf column 0)
                             (eol))))
                  (declare (inline out eol emitc)
                           (ignorable (function emitr) (function out) (function eol) (function emitc)))
                  ,@forms))))
  ;; For this function to work correctly, the FIXNUM must be at least
  ;; 24 bits.
  (defmethod encoder-write-byte ((encoder base64-encoder) byte)
    (declare (optimize (speed 3) (safety 0) (debug 0))
             (type (unsigned-byte 8) byte))
    (with-encoder encoder
      (setf bitstore (logior byte (the fixnum (ash bitstore 8))))
      (f++ bytecount)
      (when (= 3 bytecount)
        (emitr 3 bitstore)
        (setf bitstore 0
              bytecount 0)))
    (values))

  (defmethod encoder-finish-output ((encoder base64-encoder))
    (with-encoder encoder
      (unless (zerop bytecount)
        (multiple-value-bind (saved6 rest) (truncate (* bytecount 8) 6)
          (setf bitstore (ash bitstore (- 6 rest)))
          (emitr saved6 bitstore)
          (dotimes (x (- 3 saved6))
            (emitc #\=))))
      (when (and line-length
                 (not (zerop column)))
        (eol)))
    (values)))

(defun encode-base64-stream (in out)
  "Read a byte stream from IN and write to OUT the encoded Base64
character stream."
  (make-encoder-loop base64-encoder (read-byte in nil)
                     (write-char char out)))

(defun encode-base64-sequence-to-stream (sequence stream &key (start 0) (end (length sequence)))
  "Encode the sequence of bytes SEQUENCE and write to STREAM the
Base64 character sequence."
  (let ((i start))
    (make-encoder-loop base64-encoder
                       (when (< i end)
                         (prog1 (elt sequence i)
                           (incf i)))
                       (write-char char stream))))

(defun encode-base64-sequence (sequence &key (start 0) (end (length sequence)))
  "Encode the sequence of bytes SEQUENCE into a Base64 string and
return it."
  (with-output-to-string (out)
    (encode-base64-sequence-to-stream sequence out :start start :end end)))

(defun decode-base64-stream (in out &key parser-errors)
  "Read from IN a stream of characters Base64 encoded and write
to OUT a stream of decoded bytes."
  ;; parser-errors are ignored for base64
  (declare (ignore parser-errors))
  (redirect-stream (make-instance 'qbase64:decode-stream
                                  :underlying-stream in)
                   out))

(defun decode-base64-stream-to-sequence (stream &key parser-errors)
  "Read Base64 characters from STREAM and return result of decoding them as a
binary sequence."
  ;; parser-errors are ignored for base64
  (declare (ignore parser-errors))
  (let* ((buffered-size 4096)
         (dstream (make-instance 'qbase64:decode-stream
                                 :underlying-stream stream))
         (output-seq (make-array buffered-size
                                 :element-type '(unsigned-byte 8)
                                 :adjustable t)))
    (loop for cap = (array-dimension output-seq 0)
          for pos = (read-sequence output-seq dstream :start (or pos 0))
          if (>= pos cap)
            do (adjust-array output-seq (+ cap buffered-size))
          else
            do (progn
                 (adjust-array output-seq pos)
                 (return output-seq)))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defun dump-stream-binary (in out)
  "Write content of IN character stream to OUT binary stream."
  (loop
     for c = (read-char in nil)
     while c
     do (write-byte (char-code c) out)))

(defun decode-string (string encoding &key parser-errors-p)
  (gcase (encoding string-equal)
    (:quoted-printable
     (decode-quoted-printable-string string
                                     :parser-errors parser-errors-p))
    (:base64
     ;; parser-errors-p is unused in base64
     (qbase64:decode-string string))
    (otherwise
     (map '(vector (unsigned-byte 8)) #'char-code string))))

(defun decode-stream-to-sequence (stream encoding &key parser-errors-p)
  (gcase (encoding string-equal)
    (:quoted-printable
     (decode-quoted-printable-stream-to-sequence stream
                                                 :parser-errors parser-errors-p))
    (:base64
     (decode-base64-stream-to-sequence stream
                                       :parser-errors parser-errors-p))
    (otherwise
     (loop
        with output-sequence = (make-array 0 :fill-pointer 0
                                           :element-type '(unsigned-byte 8)
                                           :adjustable t)
        for c = (read-char stream nil)
        while c
        do (vector-push-extend (char-code c) output-sequence)
        finally (return output-sequence)))))

(defun encode-stream (in out encoding)
  (gcase (encoding string-equal)
    (:quoted-printable
     (encode-quoted-printable-stream in out))
    (:base64
     (encode-base64-stream in out))
    (otherwise
     (loop
        for byte = (read-byte in nil)
        while byte
        do (write-char (code-char byte) out)))))

(defun encode-sequence-to-stream (sequence out encoding)
  (gcase (encoding string-equal)
    (:quoted-printable
     (encode-quoted-printable-sequence-to-stream sequence out))
    (:base64
     (encode-base64-sequence-to-stream sequence out))
    (otherwise
     (loop
        for byte across sequence
        do (write-char (code-char byte) out)))))

(defun encode-sequence (sequence encoding)
  (gcase (encoding string-equal)
    (:quoted-printable
     (encode-quoted-printable-sequence sequence))
    (:base64
     (encode-base64-sequence sequence))
    (otherwise
     (map 'string #'code-char sequence))))

;; This is similar to decode-quoted-printable-string but #\_ is used
;; instead of space
(defun decode-quoted-printable-RFC2047-string (string &key (start 0) (end (length string)))
  "Decode a string encoded according to the quoted printable
method of RFC2047 and return a sequence of bytes."
  (declare (optimize (speed 3) (debug 0) (safety 0))
           (type simple-string string))
  (loop
     with output-sequence = (make-array (length string)
                                        :element-type '(unsigned-byte 8)
                                        :fill-pointer 0)
     for i fixnum from start by 1 below end
     for c = (char string i)
     do (case c
          (#\=
           (vector-push-extend (or (parse-hex (char string (1+ i)) (char string (+ 2 i)))
                                   ;; the char code was malformed
                                   #.(char-code #\?))
                               output-sequence)
           (f++ i 2))
          (#\_ (vector-push-extend #.(char-code #\space) output-sequence))
          (otherwise
           (vector-push-extend (char-code c) output-sequence)))
       finally (return output-sequence)))

(defun decode-RFC2047-part (encoding string &key (start 0) (end (length string)))
  "Decode STRING according to RFC2047 and return a sequence of
bytes."
  (gcase (encoding string-equal)
    ("Q" (decode-quoted-printable-RFC2047-string string :start start :end end))
    ("B" (qbase64:decode-string (subseq string start end)))
    (t string)))

(defun parse-RFC2047-text (text)
  "Parse the string TEXT according to RFC2047 rules and return a list
of pairs and strings.  The strings are the bits interposed between the
actually encoded text.  The pairs are composed of: a decoded byte
sequence, a charset string indicating the original coding."
  (loop
     with result = '()
     with previous-end = 0
     for start = (search "=?" text :start2 previous-end)
     while start
     for first-? = (position #\? text :start (+ 2 start))
     while first-?
     for second-? = (position #\? text :start (1+ first-?))
     while second-?
     for end = (search "?=" text :start2 (1+ second-?))
     while end
     do (let ((charset (string-upcase (subseq text (+ 2 start) first-?)))
              (encoding (subseq text (1+ first-?) second-?)))
          (unless (= previous-end start)
            (push (subseq text previous-end start)
                  result))
          (setf previous-end (+ end 2))
          (push (cons (decode-RFC2047-part encoding text :start (1+ second-?) :end end)
                      charset)
                result))
     finally (unless (= previous-end (length text))
               (push (subseq text previous-end (length text))
                     result))
       (return (nreverse result))))

(defun decode-RFC2047 (text)
  "Decode TEXT into a fully decoded string. Whenever a non ASCII part is
  encountered, try to decode it using flexi-streams, otherwise signal an error."
  (flet ((decode-part (part)
           (etypecase part
             (cons (flexi-streams:octets-to-string
                    (car part)
                    :external-format (flexi-streams:make-external-format
                                      (intern (string-upcase (cdr part)) 'keyword))))
             (string part))))
    (apply #'concatenate
           (cons 'string
                 (mapcar #'decode-part (mime:parse-RFC2047-text text))))))