diff options
Diffstat (limited to 'users/tazjin/tgsa')
-rw-r--r-- | users/tazjin/tgsa/Cargo.lock | 2 | ||||
-rw-r--r-- | users/tazjin/tgsa/Cargo.toml | 2 | ||||
-rw-r--r-- | users/tazjin/tgsa/src/main.rs | 86 |
3 files changed, 83 insertions, 7 deletions
diff --git a/users/tazjin/tgsa/Cargo.lock b/users/tazjin/tgsa/Cargo.lock index 386355cddaeb..51d11135f37f 100644 --- a/users/tazjin/tgsa/Cargo.lock +++ b/users/tazjin/tgsa/Cargo.lock @@ -1075,6 +1075,8 @@ dependencies = [ "ego-tree", "rouille", "scraper", + "serde", + "serde_json", "url", ] diff --git a/users/tazjin/tgsa/Cargo.toml b/users/tazjin/tgsa/Cargo.toml index b589a8174e8e..0b1529805864 100644 --- a/users/tazjin/tgsa/Cargo.toml +++ b/users/tazjin/tgsa/Cargo.toml @@ -10,3 +10,5 @@ rouille = { version = "3.5", default-features = false } url = "2.3" scraper = "0.13" ego-tree = "0.6" # in tandem with 'scraper' +serde = "1.0" +serde_json = "1.0" diff --git a/users/tazjin/tgsa/src/main.rs b/users/tazjin/tgsa/src/main.rs index 508b9872a254..bff02924038f 100644 --- a/users/tazjin/tgsa/src/main.rs +++ b/users/tazjin/tgsa/src/main.rs @@ -1,13 +1,15 @@ use anyhow::{anyhow, Context, Result}; +use scraper::{Html, Selector}; +use serde_json::Value; use std::collections::HashMap; use std::sync::RwLock; use std::time::{Duration, Instant}; -use scraper::{Html, Selector}; #[derive(Clone, Debug, Eq, Hash, PartialEq)] struct TgLink { username: String, message_id: usize, + translated: bool, } impl TgLink { @@ -16,10 +18,15 @@ impl TgLink { } fn to_url(&self, embed: bool) -> String { - format!("https://t.me/{}/{}{}", self.username, self.message_id, if embed { "?embed=1" } else { "" }) + format!( + "https://t.me/{}/{}{}", + self.username, + self.message_id, + if embed { "?embed=1" } else { "" } + ) } - fn parse(url: &str) -> Option<Self> { + fn parse(url: &str, translated: bool) -> Option<Self> { let url = url.strip_prefix("/")?; let parsed = url::Url::parse(url).ok()?; @@ -37,6 +44,7 @@ impl TgLink { Some(TgLink { username: parts[0].into(), message_id: parts[1].parse().ok()?, + translated, }) } } @@ -55,6 +63,46 @@ fn fetch_post(link: &TgLink, embed: bool) -> Result<String> { Ok(response.body) } +fn fetch_translation(message: &str) -> Result<String> { + let request = serde_json::json!({ + "model": "gpt-3.5-turbo", + "messages": [ + {"role": "user", "content": "Please translate the following message from a Telegram channel into English. If the post is already partially in English, please leave those bits intact as they are. Please respond only with the translation."}, + {"role": "user", "content": message} + ] + }); + + let response: Value = crimp::Request::post("https://api.openai.com/v1/chat/completions") + .bearer_auth(&std::env::var("OPENAPI_KEY").context("no openapi key set")?)? + .json(&request)? + .send() + .context("failed to fetch translation from openai")? + .as_json::<Value>()? + .error_for_status(|resp| { + anyhow!( + "translation request failed: {} ({})", + resp.body, + resp.status + ) + })? + .body; + + // we want choices[0].message.content, and inshallah it's the right thing. + let translation = response + .get("choices") + .ok_or_else(|| anyhow!("missing 'choices' key"))? + .get(0) + .ok_or_else(|| anyhow!("empty 'choices' or something"))? + .get("message") + .ok_or_else(|| anyhow!("missing 'message' key"))? + .get("content") + .ok_or_else(|| anyhow!("missing 'content' key"))? + .as_str() + .ok_or_else(|| anyhow!("'content' was not a string"))?; + + Ok(translation.to_string()) +} + // in some cases, posts can not be embedded, but telegram still // includes their content in metadata tags for content previews. // @@ -255,6 +303,12 @@ fn fetch_with_cache(cache: &Cache, link: &TgLink) -> Result<TgPost> { msg.message = fetch_fallback(&link)?; } + if let Some(message) = &msg.message { + if link.translated { + msg.message = Some(fetch_translation(message)?); + } + } + let bbcode = to_bbcode(&link, &msg); let mut media = vec![]; @@ -292,6 +346,7 @@ fn handle_img_redirect(cache: &Cache, img_path: &str) -> Result<rouille::Respons let link = TgLink { username: img_parts[0].into(), message_id: img_parts[1].parse().context("failed to parse message_id")?, + translated: false, }; let img_idx: usize = img_parts[2].parse().context("failed to parse img_idx")?; @@ -320,12 +375,20 @@ fn main() { let cache: Cache = RwLock::new(HashMap::new()); rouille::start_server("0.0.0.0:8472", move |request| { + let mut raw_url = request.raw_url(); + let mut translate = false; + let response = loop { - if request.raw_url().starts_with("/img/") { - break handle_img_redirect(&cache, &request.raw_url()[5..]); + if raw_url.starts_with("/img/") { + break handle_img_redirect(&cache, &raw_url[5..]); } - break match TgLink::parse(request.raw_url()) { + if raw_url.starts_with("/translate/") { + translate = true; + raw_url = &raw_url[10..]; + } + + break match TgLink::parse(raw_url, translate) { None => Ok(rouille::Response::text( r#"tgsa ---- @@ -345,7 +408,16 @@ yes, that looks stupid, but it works if you see this message and think you did the above correctly, you didn't. try again. idiot. -pm me on the forums if this makes you mad or something. +it can also translate posts from russian, ukrainian or whatever other +dumb language you speak into english, by adding `/translate/`, for +example: + + https://tgsa.tazj.in/translate/https://t.me/strelkovii/4329 + +expect this to be slow though. that's the price to pay for translating +shitty slang. + +pm me on the forums if any of this makes you mad or something. "#, )), Some(link) => handle_tg_link(&cache, &link), |