about summary refs log tree commit diff
path: root/users/tazjin/tgsa/src/main.rs
diff options
context:
space:
mode:
Diffstat (limited to 'users/tazjin/tgsa/src/main.rs')
-rw-r--r--users/tazjin/tgsa/src/main.rs104
1 files changed, 82 insertions, 22 deletions
diff --git a/users/tazjin/tgsa/src/main.rs b/users/tazjin/tgsa/src/main.rs
index ed72569f92..d9a5d4abc2 100644
--- a/users/tazjin/tgsa/src/main.rs
+++ b/users/tazjin/tgsa/src/main.rs
@@ -1,12 +1,16 @@
 use anyhow::{anyhow, Context, Result};
+use scraper::{Html, Selector};
 use std::collections::HashMap;
 use std::sync::RwLock;
 use std::time::{Duration, Instant};
 
+mod translate;
+
 #[derive(Clone, Debug, Eq, Hash, PartialEq)]
 struct TgLink {
     username: String,
     message_id: usize,
+    translated: bool,
 }
 
 impl TgLink {
@@ -14,12 +18,17 @@ impl TgLink {
         format!("t.me/{}/{}", self.username, self.message_id)
     }
 
-    fn to_url(&self) -> String {
-        format!("https://t.me/{}/{}?embed=1", self.username, self.message_id)
+    fn to_url(&self, embed: bool) -> String {
+        format!(
+            "https://t.me/{}/{}{}",
+            self.username,
+            self.message_id,
+            if embed { "?embed=1" } else { "" }
+        )
     }
 
-    fn parse(url: &str) -> Option<Self> {
-        let url = url.strip_prefix("/")?;
+    fn parse(url: &str, translated: bool) -> Option<Self> {
+        let url = url.strip_prefix('/')?;
         let parsed = url::Url::parse(url).ok()?;
 
         if parsed.host()? != url::Host::Domain("t.me") {
@@ -36,13 +45,14 @@ impl TgLink {
         Some(TgLink {
             username: parts[0].into(),
             message_id: parts[1].parse().ok()?,
+            translated,
         })
     }
 }
 
-fn fetch_embed(link: &TgLink) -> Result<String> {
+fn fetch_post(link: &TgLink, embed: bool) -> Result<String> {
     println!("fetching {}#{}", link.username, link.message_id);
-    let response = crimp::Request::get(&link.to_url())
+    let response = crimp::Request::get(&link.to_url(embed))
         .send()
         .context("failed to fetch embed data")?
         .as_string()
@@ -54,6 +64,28 @@ fn fetch_embed(link: &TgLink) -> Result<String> {
     Ok(response.body)
 }
 
+// in some cases, posts can not be embedded, but telegram still
+// includes their content in metadata tags for content previews.
+//
+// we skip images in this case, as they are scaled down to thumbnail
+// size and not useful.
+fn fetch_fallback(link: &TgLink) -> Result<Option<String>> {
+    let post = fetch_post(link, false)?;
+    let doc = Html::parse_document(&post);
+    let desc_sel = Selector::parse("meta[property=\"og:description\"]").unwrap();
+    let desc_elem = match doc.select(&desc_sel).next() {
+        None => return Ok(None),
+        Some(elem) => elem,
+    };
+
+    let content = match desc_elem.value().attr("content") {
+        None => return Ok(None),
+        Some(content) => content.to_string(),
+    };
+
+    Ok(Some(content))
+}
+
 #[derive(Debug)]
 struct TgMessage {
     author: String,
@@ -71,8 +103,6 @@ fn extract_photo_url(style: &str) -> Option<&str> {
 }
 
 fn parse_tgmessage(embed: &str) -> Result<TgMessage> {
-    use scraper::{Html, Selector};
-
     let doc = Html::parse_document(embed);
 
     let author_sel = Selector::parse("a.tgme_widget_message_owner_name").unwrap();
@@ -97,8 +127,8 @@ fn parse_tgmessage(embed: &str) -> Result<TgMessage> {
         for edge in &mut msg_elem.traverse() {
             if let Edge::Open(node) = edge {
                 match node.value() {
-                    Node::Text(ref text) => out.push_str(&*text),
-                    Node::Element(elem) if elem.name() == "br" => out.push_str("\n"),
+                    Node::Text(ref text) => out.push_str(text),
+                    Node::Element(elem) if elem.name() == "br" => out.push('\n'),
                     _ => {}
                 }
             }
@@ -164,7 +194,7 @@ fn to_bbcode(link: &TgLink, msg: &TgMessage) -> String {
     out.push_str(&format!("[quote=\"{}\"]\n", msg.author));
 
     for video in 0..msg.videos.len() {
-        out.push_str(&format!("[url=\"{}\"]", link.to_url()));
+        out.push_str(&format!("[url=\"{}\"]", link.to_url(true)));
 
         // video thumbnail links are appended to the photos, hence the
         // addition here
@@ -184,7 +214,7 @@ fn to_bbcode(link: &TgLink, msg: &TgMessage) -> String {
     if msg.has_audio {
         out.push_str(&format!(
             "[i]This message has audio attached. Go [url=\"{}\"]to Telegram[/url] to listen.[/i]",
-            link.to_url(),
+            link.to_url(true),
         ));
     }
 
@@ -196,7 +226,7 @@ fn to_bbcode(link: &TgLink, msg: &TgMessage) -> String {
 
     out.push_str(&format!(
         "[sub](from [url=\"{}\"]{}[/url], via [url=\"https://tgsa.tazj.in\"]tgsa[/url])[/sub]\n",
-        link.to_url(),
+        link.to_url(true),
         link.human_friendly_url(),
     ));
 
@@ -216,7 +246,7 @@ struct TgPost {
 type Cache = RwLock<HashMap<TgLink, TgPost>>;
 
 fn fetch_with_cache(cache: &Cache, link: &TgLink) -> Result<TgPost> {
-    if let Some(entry) = cache.read().unwrap().get(&link) {
+    if let Some(entry) = cache.read().unwrap().get(link) {
         if Instant::now() - entry.at < CACHE_EXPIRY {
             println!("serving {}#{} from cache", link.username, link.message_id);
             return Ok(entry.clone());
@@ -227,9 +257,21 @@ fn fetch_with_cache(cache: &Cache, link: &TgLink) -> Result<TgPost> {
     // TODO(tazjin): per link?
     let mut writer = cache.write().unwrap();
 
-    let embed = fetch_embed(&link)?;
-    let mut msg = parse_tgmessage(&embed)?;
-    let bbcode = to_bbcode(&link, &msg);
+    let post = fetch_post(link, true)?;
+    let mut msg = parse_tgmessage(&post)?;
+
+    if msg.message.is_none() {
+        msg.message = fetch_fallback(link)?;
+    }
+
+    if let Some(message) = &msg.message {
+        if link.translated {
+            println!("translating {}#{}", link.username, link.message_id);
+            msg.message = Some(translate::fetch_translation(message)?);
+        }
+    }
+
+    let bbcode = to_bbcode(link, &msg);
 
     let mut media = vec![];
     media.append(&mut msg.photos);
@@ -256,7 +298,7 @@ fn handle_img_redirect(cache: &Cache, img_path: &str) -> Result<rouille::Respons
     // |          post ID
     // username
 
-    let img_parts: Vec<&str> = img_path.split("/").collect();
+    let img_parts: Vec<&str> = img_path.split('/').collect();
 
     if img_parts.len() != 3 {
         println!("invalid image link: {}", img_path);
@@ -266,6 +308,7 @@ fn handle_img_redirect(cache: &Cache, img_path: &str) -> Result<rouille::Respons
     let link = TgLink {
         username: img_parts[0].into(),
         message_id: img_parts[1].parse().context("failed to parse message_id")?,
+        translated: false,
     };
 
     let img_idx: usize = img_parts[2].parse().context("failed to parse img_idx")?;
@@ -294,12 +337,20 @@ fn main() {
     let cache: Cache = RwLock::new(HashMap::new());
 
     rouille::start_server("0.0.0.0:8472", move |request| {
+        let mut raw_url = request.raw_url();
+        let mut translate = false;
+
         let response = loop {
-            if request.raw_url().starts_with("/img/") {
-                break handle_img_redirect(&cache, &request.raw_url()[5..]);
+            if raw_url.starts_with("/img/") {
+                break handle_img_redirect(&cache, &raw_url[5..]);
             }
 
-            break match TgLink::parse(request.raw_url()) {
+            if raw_url.starts_with("/translate/") {
+                translate = true;
+                raw_url = &raw_url[10..];
+            }
+
+            break match TgLink::parse(raw_url, translate) {
                 None => Ok(rouille::Response::text(
                     r#"tgsa
 ----
@@ -319,7 +370,16 @@ yes, that looks stupid, but it works
 if you see this message and think you did the above correctly, you
 didn't. try again. idiot.
 
-pm me on the forums if this makes you mad or something.
+it can also translate posts from russian, ukrainian or whatever other
+dumb language you speak into english by adding `/translate/`, for
+example:
+
+  https://tgsa.tazj.in/translate/https://t.me/strelkovii/4329
+
+expect this to be slow though. that's the price to pay for translating
+shitty slang.
+
+pm me on the forums if any of this makes you mad or something.
 "#,
                 )),
                 Some(link) => handle_tg_link(&cache, &link),