From e0b9d9b1cdbf9356a850dac5287b9eb63d83f3dc Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Wed, 20 Apr 2022 13:00:37 +0200 Subject: fix(tazjin/tgsa): Preserve newlines in messages Change-Id: I9e2d7038357a5510898d153740ae0c0e1f7a8a3a Reviewed-on: https://cl.tvl.fyi/c/depot/+/5483 Reviewed-by: tazjin Autosubmit: tazjin Tested-by: BuildkiteCI --- users/tazjin/tgsa/Cargo.lock | 1 + users/tazjin/tgsa/Cargo.toml | 1 + users/tazjin/tgsa/src/main.rs | 28 ++++++++++++++++++++++++---- 3 files changed, 26 insertions(+), 4 deletions(-) (limited to 'users/tazjin') diff --git a/users/tazjin/tgsa/Cargo.lock b/users/tazjin/tgsa/Cargo.lock index b1c154f0ab..d5d034dde4 100644 --- a/users/tazjin/tgsa/Cargo.lock +++ b/users/tazjin/tgsa/Cargo.lock @@ -1084,6 +1084,7 @@ version = "0.1.0" dependencies = [ "anyhow", "crimp", + "ego-tree", "rouille", "scraper", "url", diff --git a/users/tazjin/tgsa/Cargo.toml b/users/tazjin/tgsa/Cargo.toml index c4f40fd8a1..105333c942 100644 --- a/users/tazjin/tgsa/Cargo.toml +++ b/users/tazjin/tgsa/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" [dependencies] anyhow = "1.0" crimp = "0.2" +ego-tree = "0.6" # in tandem with 'scraper' rouille = "3.5" scraper = "0.12" url = "2.2" diff --git a/users/tazjin/tgsa/src/main.rs b/users/tazjin/tgsa/src/main.rs index d4bce1e0bc..92ecc55728 100644 --- a/users/tazjin/tgsa/src/main.rs +++ b/users/tazjin/tgsa/src/main.rs @@ -84,10 +84,30 @@ fn parse_tgmessage(embed: &str) -> Result { .concat(); let msg_sel = Selector::parse("div.tgme_widget_message_text.js-message_text").unwrap(); - let message = doc - .select(&msg_sel) - .next() - .map(|m| m.text().collect::>().concat()); + + // The ElementRef::text() iterator does not yield newlines present + // in the message, so it is partially reimplemented here. + let message = if let Some(msg_elem) = doc.select(&msg_sel).next() { + use ego_tree::iter::Edge; + use scraper::node::Node; + + let mut out = String::new(); + + for edge in &mut msg_elem.traverse() { + if let Edge::Open(node) = edge { + match node.value() { + Node::Text(ref text) => out.push_str(&*text), + Node::Element(elem) if elem.name() == "br" => out.push_str("\n"), + _ => {} + } + } + } + + Some(out) + } else { + // Not all Telegram messages have a textual message. + None + }; let photo_sel = Selector::parse("a.tgme_widget_message_photo_wrap").unwrap(); let mut photos = vec![]; -- cgit 1.4.1