diff options
Diffstat (limited to 'users/tazjin/tgsa/src/main.rs')
-rw-r--r-- | users/tazjin/tgsa/src/main.rs | 104 |
1 files changed, 82 insertions, 22 deletions
diff --git a/users/tazjin/tgsa/src/main.rs b/users/tazjin/tgsa/src/main.rs index ed72569f92..d9a5d4abc2 100644 --- a/users/tazjin/tgsa/src/main.rs +++ b/users/tazjin/tgsa/src/main.rs @@ -1,12 +1,16 @@ use anyhow::{anyhow, Context, Result}; +use scraper::{Html, Selector}; use std::collections::HashMap; use std::sync::RwLock; use std::time::{Duration, Instant}; +mod translate; + #[derive(Clone, Debug, Eq, Hash, PartialEq)] struct TgLink { username: String, message_id: usize, + translated: bool, } impl TgLink { @@ -14,12 +18,17 @@ impl TgLink { format!("t.me/{}/{}", self.username, self.message_id) } - fn to_url(&self) -> String { - format!("https://t.me/{}/{}?embed=1", self.username, self.message_id) + fn to_url(&self, embed: bool) -> String { + format!( + "https://t.me/{}/{}{}", + self.username, + self.message_id, + if embed { "?embed=1" } else { "" } + ) } - fn parse(url: &str) -> Option<Self> { - let url = url.strip_prefix("/")?; + fn parse(url: &str, translated: bool) -> Option<Self> { + let url = url.strip_prefix('/')?; let parsed = url::Url::parse(url).ok()?; if parsed.host()? != url::Host::Domain("t.me") { @@ -36,13 +45,14 @@ impl TgLink { Some(TgLink { username: parts[0].into(), message_id: parts[1].parse().ok()?, + translated, }) } } -fn fetch_embed(link: &TgLink) -> Result<String> { +fn fetch_post(link: &TgLink, embed: bool) -> Result<String> { println!("fetching {}#{}", link.username, link.message_id); - let response = crimp::Request::get(&link.to_url()) + let response = crimp::Request::get(&link.to_url(embed)) .send() .context("failed to fetch embed data")? .as_string() @@ -54,6 +64,28 @@ fn fetch_embed(link: &TgLink) -> Result<String> { Ok(response.body) } +// in some cases, posts can not be embedded, but telegram still +// includes their content in metadata tags for content previews. +// +// we skip images in this case, as they are scaled down to thumbnail +// size and not useful. +fn fetch_fallback(link: &TgLink) -> Result<Option<String>> { + let post = fetch_post(link, false)?; + let doc = Html::parse_document(&post); + let desc_sel = Selector::parse("meta[property=\"og:description\"]").unwrap(); + let desc_elem = match doc.select(&desc_sel).next() { + None => return Ok(None), + Some(elem) => elem, + }; + + let content = match desc_elem.value().attr("content") { + None => return Ok(None), + Some(content) => content.to_string(), + }; + + Ok(Some(content)) +} + #[derive(Debug)] struct TgMessage { author: String, @@ -71,8 +103,6 @@ fn extract_photo_url(style: &str) -> Option<&str> { } fn parse_tgmessage(embed: &str) -> Result<TgMessage> { - use scraper::{Html, Selector}; - let doc = Html::parse_document(embed); let author_sel = Selector::parse("a.tgme_widget_message_owner_name").unwrap(); @@ -97,8 +127,8 @@ fn parse_tgmessage(embed: &str) -> Result<TgMessage> { for edge in &mut msg_elem.traverse() { if let Edge::Open(node) = edge { match node.value() { - Node::Text(ref text) => out.push_str(&*text), - Node::Element(elem) if elem.name() == "br" => out.push_str("\n"), + Node::Text(ref text) => out.push_str(text), + Node::Element(elem) if elem.name() == "br" => out.push('\n'), _ => {} } } @@ -164,7 +194,7 @@ fn to_bbcode(link: &TgLink, msg: &TgMessage) -> String { out.push_str(&format!("[quote=\"{}\"]\n", msg.author)); for video in 0..msg.videos.len() { - out.push_str(&format!("[url=\"{}\"]", link.to_url())); + out.push_str(&format!("[url=\"{}\"]", link.to_url(true))); // video thumbnail links are appended to the photos, hence the // addition here @@ -184,7 +214,7 @@ fn to_bbcode(link: &TgLink, msg: &TgMessage) -> String { if msg.has_audio { out.push_str(&format!( "[i]This message has audio attached. Go [url=\"{}\"]to Telegram[/url] to listen.[/i]", - link.to_url(), + link.to_url(true), )); } @@ -196,7 +226,7 @@ fn to_bbcode(link: &TgLink, msg: &TgMessage) -> String { out.push_str(&format!( "[sub](from [url=\"{}\"]{}[/url], via [url=\"https://tgsa.tazj.in\"]tgsa[/url])[/sub]\n", - link.to_url(), + link.to_url(true), link.human_friendly_url(), )); @@ -216,7 +246,7 @@ struct TgPost { type Cache = RwLock<HashMap<TgLink, TgPost>>; fn fetch_with_cache(cache: &Cache, link: &TgLink) -> Result<TgPost> { - if let Some(entry) = cache.read().unwrap().get(&link) { + if let Some(entry) = cache.read().unwrap().get(link) { if Instant::now() - entry.at < CACHE_EXPIRY { println!("serving {}#{} from cache", link.username, link.message_id); return Ok(entry.clone()); @@ -227,9 +257,21 @@ fn fetch_with_cache(cache: &Cache, link: &TgLink) -> Result<TgPost> { // TODO(tazjin): per link? let mut writer = cache.write().unwrap(); - let embed = fetch_embed(&link)?; - let mut msg = parse_tgmessage(&embed)?; - let bbcode = to_bbcode(&link, &msg); + let post = fetch_post(link, true)?; + let mut msg = parse_tgmessage(&post)?; + + if msg.message.is_none() { + msg.message = fetch_fallback(link)?; + } + + if let Some(message) = &msg.message { + if link.translated { + println!("translating {}#{}", link.username, link.message_id); + msg.message = Some(translate::fetch_translation(message)?); + } + } + + let bbcode = to_bbcode(link, &msg); let mut media = vec![]; media.append(&mut msg.photos); @@ -256,7 +298,7 @@ fn handle_img_redirect(cache: &Cache, img_path: &str) -> Result<rouille::Respons // | post ID // username - let img_parts: Vec<&str> = img_path.split("/").collect(); + let img_parts: Vec<&str> = img_path.split('/').collect(); if img_parts.len() != 3 { println!("invalid image link: {}", img_path); @@ -266,6 +308,7 @@ fn handle_img_redirect(cache: &Cache, img_path: &str) -> Result<rouille::Respons let link = TgLink { username: img_parts[0].into(), message_id: img_parts[1].parse().context("failed to parse message_id")?, + translated: false, }; let img_idx: usize = img_parts[2].parse().context("failed to parse img_idx")?; @@ -294,12 +337,20 @@ fn main() { let cache: Cache = RwLock::new(HashMap::new()); rouille::start_server("0.0.0.0:8472", move |request| { + let mut raw_url = request.raw_url(); + let mut translate = false; + let response = loop { - if request.raw_url().starts_with("/img/") { - break handle_img_redirect(&cache, &request.raw_url()[5..]); + if raw_url.starts_with("/img/") { + break handle_img_redirect(&cache, &raw_url[5..]); } - break match TgLink::parse(request.raw_url()) { + if raw_url.starts_with("/translate/") { + translate = true; + raw_url = &raw_url[10..]; + } + + break match TgLink::parse(raw_url, translate) { None => Ok(rouille::Response::text( r#"tgsa ---- @@ -319,7 +370,16 @@ yes, that looks stupid, but it works if you see this message and think you did the above correctly, you didn't. try again. idiot. -pm me on the forums if this makes you mad or something. +it can also translate posts from russian, ukrainian or whatever other +dumb language you speak into english by adding `/translate/`, for +example: + + https://tgsa.tazj.in/translate/https://t.me/strelkovii/4329 + +expect this to be slow though. that's the price to pay for translating +shitty slang. + +pm me on the forums if any of this makes you mad or something. "#, )), Some(link) => handle_tg_link(&cache, &link), |