From e0b9d9b1cdbf9356a850dac5287b9eb63d83f3dc Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Wed, 20 Apr 2022 13:00:37 +0200 Subject: fix(tazjin/tgsa): Preserve newlines in messages Change-Id: I9e2d7038357a5510898d153740ae0c0e1f7a8a3a Reviewed-on: https://cl.tvl.fyi/c/depot/+/5483 Reviewed-by: tazjin Autosubmit: tazjin Tested-by: BuildkiteCI --- users/tazjin/tgsa/src/main.rs | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'users/tazjin/tgsa/src') diff --git a/users/tazjin/tgsa/src/main.rs b/users/tazjin/tgsa/src/main.rs index d4bce1e0bcd2..92ecc5572896 100644 --- a/users/tazjin/tgsa/src/main.rs +++ b/users/tazjin/tgsa/src/main.rs @@ -84,10 +84,30 @@ fn parse_tgmessage(embed: &str) -> Result { .concat(); let msg_sel = Selector::parse("div.tgme_widget_message_text.js-message_text").unwrap(); - let message = doc - .select(&msg_sel) - .next() - .map(|m| m.text().collect::>().concat()); + + // The ElementRef::text() iterator does not yield newlines present + // in the message, so it is partially reimplemented here. + let message = if let Some(msg_elem) = doc.select(&msg_sel).next() { + use ego_tree::iter::Edge; + use scraper::node::Node; + + let mut out = String::new(); + + for edge in &mut msg_elem.traverse() { + if let Edge::Open(node) = edge { + match node.value() { + Node::Text(ref text) => out.push_str(&*text), + Node::Element(elem) if elem.name() == "br" => out.push_str("\n"), + _ => {} + } + } + } + + Some(out) + } else { + // Not all Telegram messages have a textual message. + None + }; let photo_sel = Selector::parse("a.tgme_widget_message_photo_wrap").unwrap(); let mut photos = vec![]; -- cgit 1.4.1