about summary refs log tree commit diff
path: root/users/tazjin/tgsa/src
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2022-04-16T19·04+0200
committertazjin <tazjin@tvl.su>2022-04-16T20·35+0000
commit3d58dfb4ab1747f4cbc9e98ccc9b4b229476067a (patch)
tree993c4e1d55b47e655a0066784701a26c42d24c08 /users/tazjin/tgsa/src
parent632c4280b5c8ad717a7ce7b08c49ad93630c8db4 (diff)
feat(tazjin/tgsa): Implement initial tg embed -> BBcode features r/3963
Supports only posts with plain photos, and loses all message
formatting, but it's getting there.

Change-Id: I9e4afcf3072d1e0724521ccbdc1338fe4f8d5ebe
Reviewed-on: https://cl.tvl.fyi/c/depot/+/5466
Tested-by: BuildkiteCI
Reviewed-by: tazjin <tazjin@tvl.su>
Diffstat (limited to 'users/tazjin/tgsa/src')
-rw-r--r--users/tazjin/tgsa/src/main.rs140
1 files changed, 140 insertions, 0 deletions
diff --git a/users/tazjin/tgsa/src/main.rs b/users/tazjin/tgsa/src/main.rs
new file mode 100644
index 000000000000..d5c5e18b226e
--- /dev/null
+++ b/users/tazjin/tgsa/src/main.rs
@@ -0,0 +1,140 @@
+use anyhow::{anyhow, Context, Result};
+
+#[derive(Debug)]
+struct TgLink {
+    username: String,
+    message_id: usize,
+}
+
+impl TgLink {
+    fn to_url(&self) -> String {
+        format!("https://t.me/{}/{}?embed=1", self.username, self.message_id)
+    }
+}
+
+fn fetch_embed(link: &TgLink) -> Result<String> {
+    let response = crimp::Request::get(&link.to_url())
+        .send()
+        .context("failed to fetch embed data")?
+        .as_string()
+        .context("failed to decode embed data")?
+        .error_for_status(|resp| {
+            anyhow!("telegram request failed: {} ({})", resp.body, resp.status)
+        })?;
+
+    Ok(response.body)
+}
+
+#[derive(Debug)]
+struct TgMessage {
+    author: String,
+    message: Option<String>,
+    photos: Vec<String>,
+}
+
+fn extract_photo_url(style: &str) -> Option<&str> {
+    let url_start = style.find("url('")? + 5;
+    let url_end = style.find("')")?;
+
+    Some(&style[url_start..url_end])
+}
+
+fn parse_tgmessage(embed: &str) -> Result<TgMessage> {
+    use scraper::{Html, Selector};
+
+    let doc = Html::parse_document(embed);
+
+    let author_sel = Selector::parse("a.tgme_widget_message_owner_name").unwrap();
+    let author = doc
+        .select(&author_sel)
+        .next()
+        .ok_or_else(|| anyhow!("failed to find message author"))?
+        .text()
+        .collect::<Vec<&str>>()
+        .concat();
+
+    let msg_sel = Selector::parse("div.tgme_widget_message_text.js-message_text").unwrap();
+    let message = doc
+        .select(&msg_sel)
+        .next()
+        .map(|m| m.text().collect::<Vec<&str>>().concat());
+
+    let photo_sel = Selector::parse("a.tgme_widget_message_photo_wrap").unwrap();
+    let mut photos = vec![];
+
+    for photo in doc.select(&photo_sel) {
+        if let Some(style) = photo.value().attr("style") {
+            if let Some(url) = extract_photo_url(style) {
+                photos.push(url.to_string())
+            }
+        }
+    }
+
+    Ok(TgMessage {
+        author,
+        message,
+        photos,
+    })
+}
+
+fn shorten_photo_links(msg: &mut TgMessage) -> Result<()> {
+    let mut url = url::Url::parse("https://tinyurl.com/api-create.php")?;
+    let mut shortened = vec![];
+
+    for photo in &msg.photos {
+        url.query_pairs_mut().clear().append_pair("url", &photo);
+        let request = url.as_str();
+
+        let response = crimp::Request::get(request)
+            .send()
+            .context("failed to shorten URL")?
+            .as_string()
+            .context("failed to decode shortened URL")?
+            .error_for_status(|resp| {
+                anyhow!("tinyurl request failed: {} ({})", resp.body, resp.status)
+            })?;
+
+        shortened.push(response.body.trim().into());
+    }
+
+    msg.photos = shortened;
+
+    Ok(())
+}
+
+fn to_bbcode(link: &TgLink, msg: &TgMessage) -> String {
+    let mut out = String::new();
+
+    out.push_str(&format!("[quote=\"{}\"]\n", msg.author));
+
+    for photo in &msg.photos {
+        out.push_str(&format!("[timg]{}[/timg]\n", photo));
+    }
+
+    if let Some(message) = &msg.message {
+        out.push_str(message);
+    }
+
+    out.push_str("\n[/quote]\n");
+    out.push_str(&format!(
+        "[i](via [url=\"{}\"]Telegram[/url])[/i]",
+        link.to_url(),
+    ));
+
+    return out;
+}
+
+fn main() {
+    crimp::init();
+
+    let link = TgLink {
+        username: "RWApodcast".into(),
+        message_id: 113,
+    };
+
+    let embed = fetch_embed(&link).unwrap();
+    let mut msg = parse_tgmessage(&embed).unwrap();
+    shorten_photo_links(&mut msg).unwrap();
+
+    println!("{}", to_bbcode(&link, &msg));
+}