about summary refs log tree commit diff
path: root/users/tazjin/tgsa
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2022-04-20T11·00+0200
committerclbot <clbot@tvl.fyi>2022-04-20T11·02+0000
commite0b9d9b1cdbf9356a850dac5287b9eb63d83f3dc (patch)
tree7daa7715ade97c467827fd49ea8526d0ac65095e /users/tazjin/tgsa
parent094b8cc7afe2f854d7bdde5ad239867c8e54e1f3 (diff)
fix(tazjin/tgsa): Preserve newlines in messages r/3977
Change-Id: I9e2d7038357a5510898d153740ae0c0e1f7a8a3a
Reviewed-on: https://cl.tvl.fyi/c/depot/+/5483
Reviewed-by: tazjin <tazjin@tvl.su>
Autosubmit: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
Diffstat (limited to 'users/tazjin/tgsa')
-rw-r--r--users/tazjin/tgsa/Cargo.lock1
-rw-r--r--users/tazjin/tgsa/Cargo.toml1
-rw-r--r--users/tazjin/tgsa/src/main.rs28
3 files changed, 26 insertions, 4 deletions
diff --git a/users/tazjin/tgsa/Cargo.lock b/users/tazjin/tgsa/Cargo.lock
index b1c154f0ab..d5d034dde4 100644
--- a/users/tazjin/tgsa/Cargo.lock
+++ b/users/tazjin/tgsa/Cargo.lock
@@ -1084,6 +1084,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "crimp",
+ "ego-tree",
  "rouille",
  "scraper",
  "url",
diff --git a/users/tazjin/tgsa/Cargo.toml b/users/tazjin/tgsa/Cargo.toml
index c4f40fd8a1..105333c942 100644
--- a/users/tazjin/tgsa/Cargo.toml
+++ b/users/tazjin/tgsa/Cargo.toml
@@ -6,6 +6,7 @@ edition = "2021"
 [dependencies]
 anyhow = "1.0"
 crimp = "0.2"
+ego-tree = "0.6" # in tandem with 'scraper'
 rouille = "3.5"
 scraper = "0.12"
 url = "2.2"
diff --git a/users/tazjin/tgsa/src/main.rs b/users/tazjin/tgsa/src/main.rs
index d4bce1e0bc..92ecc55728 100644
--- a/users/tazjin/tgsa/src/main.rs
+++ b/users/tazjin/tgsa/src/main.rs
@@ -84,10 +84,30 @@ fn parse_tgmessage(embed: &str) -> Result<TgMessage> {
         .concat();
 
     let msg_sel = Selector::parse("div.tgme_widget_message_text.js-message_text").unwrap();
-    let message = doc
-        .select(&msg_sel)
-        .next()
-        .map(|m| m.text().collect::<Vec<&str>>().concat());
+
+    // The ElementRef::text() iterator does not yield newlines present
+    // in the message, so it is partially reimplemented here.
+    let message = if let Some(msg_elem) = doc.select(&msg_sel).next() {
+        use ego_tree::iter::Edge;
+        use scraper::node::Node;
+
+        let mut out = String::new();
+
+        for edge in &mut msg_elem.traverse() {
+            if let Edge::Open(node) = edge {
+                match node.value() {
+                    Node::Text(ref text) => out.push_str(&*text),
+                    Node::Element(elem) if elem.name() == "br" => out.push_str("\n"),
+                    _ => {}
+                }
+            }
+        }
+
+        Some(out)
+    } else {
+        // Not all Telegram messages have a textual message.
+        None
+    };
 
     let photo_sel = Selector::parse("a.tgme_widget_message_photo_wrap").unwrap();
     let mut photos = vec![];