diff options
author | sterni <sternenseemann@systemli.org> | 2024-12-10T23·22+0100 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2025-02-05T13·01+0000 |
commit | 3224488a29f681bbf7b53d5b635d80224ea013e6 (patch) | |
tree | f023ed9664b6753a97ac3278d321c667d3b7add4 | |
parent | 84bdb1e89a3d5f1c51dc4f6ce699fb5c752fbd7b (diff) |
feat(sterni/mn2html): reimplement mnote-html in Rust r/9177
Reimplement the MIME body extraction and HTML rewriting from mblog in Rust so that shelling out to it becomes viable. The problem with mnote-html is mainly that it – being written in CL – requires a ~300MB executable and is a bit sluggish starting. Change-Id: I5c1adc1a7ab5f3dde207f9a1f67ace685bd3f69f Reviewed-on: https://cl.tvl.fyi/c/depot/+/13014 Tested-by: BuildkiteCI Reviewed-by: sterni <sternenseemann@systemli.org> Autosubmit: sterni <sternenseemann@systemli.org>
-rw-r--r-- | users/sterni/mn2html/.gitignore | 1 | ||||
-rw-r--r-- | users/sterni/mn2html/Cargo.lock | 534 | ||||
-rw-r--r-- | users/sterni/mn2html/Cargo.toml | 14 | ||||
-rw-r--r-- | users/sterni/mn2html/README.md | 20 | ||||
-rw-r--r-- | users/sterni/mn2html/default.nix | 25 | ||||
-rw-r--r-- | users/sterni/mn2html/mn2html.rs | 165 |
6 files changed, 759 insertions, 0 deletions
diff --git a/users/sterni/mn2html/.gitignore b/users/sterni/mn2html/.gitignore new file mode 100644 index 000000000000..eb5a316cbd19 --- /dev/null +++ b/users/sterni/mn2html/.gitignore @@ -0,0 +1 @@ +target diff --git a/users/sterni/mn2html/Cargo.lock b/users/sterni/mn2html/Cargo.lock new file mode 100644 index 000000000000..19de2c57e69d --- /dev/null +++ b/users/sterni/mn2html/Cargo.lock @@ -0,0 +1,534 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "amn2html" +version = "0.1.0" +dependencies = [ + "lol_html", + "mail-parser", + "memmap2", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + +[[package]] +name = "cssparser" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "matches", + "phf", + "proc-macro2", + "quote", + "smallvec", + "syn 1.0.109", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn 2.0.90", +] + +[[package]] +name = "derive_more" +version = "0.99.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn 2.0.90", +] + +[[package]] +name = "dtoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "foldhash" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.168" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "lol_html" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2872b88213f3cd4b04f719ec8f2e0b37c98882a7c4aa6fc13ba2f19f5eba1bd2" +dependencies = [ + "bitflags 2.6.0", + "cfg-if", + "cssparser", + "encoding_rs", + "hashbrown", + "lazy_static", + "lazycell", + "memchr", + "mime", + "selectors", + "thiserror", +] + +[[package]] +name = "mail-parser" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93c3b9e5d8b17faf573330bbc43b37d6e918c0a3bf8a88e7d0a220ebc84af9fc" +dependencies = [ + "encoding_rs", +] + +[[package]] +name = "matches" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "memmap2" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" +dependencies = [ + "libc", +] + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "nodrop" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" + +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_macros", + "phf_shared", + "proc-macro-hack", +] + +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] +name = "proc-macro-hack" +version = "0.5.20+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" + +[[package]] +name = "proc-macro2" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom", + "libc", + "rand_chacha", + "rand_core", + "rand_hc", + "rand_pcg", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "selectors" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" +dependencies = [ + "bitflags 1.3.2", + "cssparser", + "derive_more", + "fxhash", + "log", + "matches", + "phf", + "phf_codegen", + "precomputed-hash", + "servo_arc", + "smallvec", + "thin-slice", +] + +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + +[[package]] +name = "servo_arc" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" +dependencies = [ + "nodrop", + "stable_deref_trait", +] + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thin-slice" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + +[[package]] +name = "unicode-ident" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] diff --git a/users/sterni/mn2html/Cargo.toml b/users/sterni/mn2html/Cargo.toml new file mode 100644 index 000000000000..6c959db4a07d --- /dev/null +++ b/users/sterni/mn2html/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "mn2html" +version = "0.0.0" +edition = "2021" +license = "GPL-3.0-only" + +[dependencies] +lol_html = "2.1.0" +mail-parser = { version = "0.9.4", features = [ "encoding_rs" ] } +memmap2 = "0.9.5" + +[[bin]] +name = "mn2html" +path = "mn2html.rs" diff --git a/users/sterni/mn2html/README.md b/users/sterni/mn2html/README.md new file mode 100644 index 000000000000..bec557f19ab4 --- /dev/null +++ b/users/sterni/mn2html/README.md @@ -0,0 +1,20 @@ +# mn2html + +Convert mail notes authored e.g. by the iOS/macOS Notes application, +into HTML suitable for standard browsers. Instead of full documents, +mn2html emits HTML fragments that can easily be embedded into other +documents or postprocessed using a templating engine. + +## History + +mn2html is a reimplementation mnote-html from //users/sterni/mblog. +The reason for this was mainly avoiding the startup cost associated +with Common Lisp programs, so the program would be suitable for +shell scripting. + +## Tasks + +- [ ] Properly handle `text/plain` bodies (from e.g. notemap) +- [ ] Add man page +- [ ] Help screen +- [ ] Improve error reporting diff --git a/users/sterni/mn2html/default.nix b/users/sterni/mn2html/default.nix new file mode 100644 index 000000000000..d6f2c21a4c1f --- /dev/null +++ b/users/sterni/mn2html/default.nix @@ -0,0 +1,25 @@ +{ pkgs, lib, ... }: + +pkgs.rustPlatform.buildRustPackage rec { + pname = "mn2hmtl"; + version = "canon"; + + src = lib.fileset.toSource { + root = ./.; + fileset = lib.fileset.unions [ + ./Cargo.lock + ./Cargo.toml + ./mn2html.rs + ]; + }; + + cargoLock.lockFile = ./Cargo.lock; + + passthru.shell = pkgs.mkShell { + name = "${pname}-shell"; + nativeBuildInputs = [ + pkgs.buildPackages.cargo + pkgs.buildPackages.rustc + ]; + }; +} diff --git a/users/sterni/mn2html/mn2html.rs b/users/sterni/mn2html/mn2html.rs new file mode 100644 index 000000000000..cda8f2993321 --- /dev/null +++ b/users/sterni/mn2html/mn2html.rs @@ -0,0 +1,165 @@ +// SPDX-FileCopyrightText: Copyright © 2024 sterni +// SPDX-License-Identifier: GPL-3.0-only +use lol_html::html_content::ContentType; +use lol_html::{element, HtmlRewriter, Settings}; +use mail_parser::{Message, MessageParser, MimeHeaders}; +use memmap2::Mmap; + +use std::collections::HashMap; +use std::env; +use std::error::Error; +use std::fmt; +use std::fs::File; +use std::io::Write; + +type CidMap<'a> = HashMap<&'a str, &'a str>; + +#[derive(Debug)] +enum Mn2htmlError { + MimeParseFail, + NoMailNote, + MissingAttachment(String), +} + +impl fmt::Display for Mn2htmlError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Mn2htmlError::MimeParseFail => { + write!(f, "Could not parse given file as a MIME message") + } + Mn2htmlError::NoMailNote => { + write!(f, "Given MIME message does not appear to be a Mail Note") + } + Mn2htmlError::MissingAttachment(cid) => write!( + f, + "Given object's Content-Id {} doesn't match any attachment", + cid + ), + } + } +} + +impl Error for Mn2htmlError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + None + } +} + +fn warn(msg: &str) { + eprintln!("mn2html: {}", msg); +} + +fn main() -> Result<(), Box<dyn std::error::Error>> { + for arg in env::args_os().skip(1) { + // TODO(sterni): flags, --help and such + let msg_file = File::open(arg)?; + let msg_raw = unsafe { Mmap::map(&msg_file) }?; + + let msg_parsed = MessageParser::default() + .parse(msg_raw.as_ref()) + .ok_or(Mn2htmlError::MimeParseFail)?; + + if !matches!( + msg_parsed + .header("X-Uniform-Type-Identifier") + .and_then(|h| h.as_text()), + Some("com.apple.mail-note") + ) { + return Err(Box::new(Mn2htmlError::NoMailNote)); + } + + let cid_map = index_attachments(&msg_parsed); + let html_body = msg_parsed + .html_bodies() + .nth(0) + .ok_or(Mn2htmlError::NoMailNote)? + .contents(); + + rewrite_html(html_body, &cid_map)?; + } + + Ok(()) +} + +// At some point, it was a consideration to move this out of the Rust program. +// mn2html would have been a shell script with mblaze(7) tools finding the +// attachments and their content ideas passing the information to a Rust HTML +// rewriter via CLI args. It is unclear how much (if at all?) slower this would +// have been. In the end, it just seemed cleaner to do it in the Rust program, +// especially since the HTML rewriter would not really have been useful on its +// own. +fn index_attachments<'a>(msg: &'a Message) -> CidMap<'a> { + let mut map = HashMap::new(); + for a in msg.attachments() { + match (a.content_id(), a.attachment_name()) { + (Some(cid), Some(filename)) => { + if let Some(_) = map.insert(cid, filename) { + warn("multiple attachments share the same Content-Id"); + } + } + (_, _) => warn("attachment without Content-Id and/or filename in Content-Disposition"), + } + } + + map +} + +fn rewrite_html(html_body: &[u8], cid_map: &CidMap) -> Result<(), Box<dyn std::error::Error>> { + let mut stdout = std::io::stdout(); + let mut rewriter = HtmlRewriter::new( + Settings { + element_content_handlers: vec![ + element!("head", |el| { + el.remove(); + Ok(()) + }), + element!("body", |el| { + el.remove_and_keep_content(); + Ok(()) + }), + element!("html", |el| { + el.remove_and_keep_content(); + Ok(()) + }), + element!("object[type][data]", |el| { + if el + .get_attribute("type") + .expect("element! matched object[type] without type attribute") + != "application/x-apple-msg-attachment" + { + warn("encountered object with unknown type attribute, ignoring"); + return Ok(()); + } + + match el + .get_attribute("data") + .expect("element! matched object[data] without data attribute") + .split_at_checked(4) + { + Some(("cid:", cid)) => match cid_map.get(cid) { + Some(filename) => el.replace( + &format!(r#"<img src="{}">"#, filename), + ContentType::Html, + ), + _ => { + return Err(Box::new(Mn2htmlError::MissingAttachment( + cid.to_string(), + ))) + } + }, + _ => warn("encountered object with malformed data attribute, ignoring"), + }; + + Ok(()) + }), + ], + ..Settings::new() + }, + |c: &[u8]| stdout.write_all(c).expect("Can't write to stdout"), + ); + + rewriter.write(html_body)?; + rewriter.end()?; + + Ok(()) +} |