diff options
author | Vincent Ambo <mail@tazj.in> | 2021-04-30T20·39+0200 |
---|---|---|
committer | tazjin <mail@tazj.in> | 2021-05-03T13·57+0000 |
commit | d311af9bc059253a4b7adf41027a60b141c6fc53 (patch) | |
tree | 830d328103de70bc98ba69567b4a1630a61d8495 /tools/cheddar/src/main.rs | |
parent | 713f06629757c507607cf5dc88399a84d54f2469 (diff) |
refactor(cheddar): Split out a library with rendering logic r/2556
Splits `main.rs` into `lib.rs` and `bin/cheddar.rs`, which enables reuse of cheddar's rendering logic in other Rust applications. Change-Id: Ifd1a44a8d1620c595550a0a497a25b0563e917ca Reviewed-on: https://cl.tvl.fyi/c/depot/+/3060 Tested-by: BuildkiteCI Reviewed-by: flokli <flokli@flokli.de>
Diffstat (limited to 'tools/cheddar/src/main.rs')
-rw-r--r-- | tools/cheddar/src/main.rs | 409 |
1 files changed, 0 insertions, 409 deletions
diff --git a/tools/cheddar/src/main.rs b/tools/cheddar/src/main.rs deleted file mode 100644 index 9f6d1a25db52..000000000000 --- a/tools/cheddar/src/main.rs +++ /dev/null @@ -1,409 +0,0 @@ -use clap::{App, Arg}; -use comrak::arena_tree::Node; -use comrak::nodes::{Ast, AstNode, NodeCodeBlock, NodeHtmlBlock, NodeValue}; -use comrak::{format_html, parse_document, Arena, ComrakOptions}; -use lazy_static::lazy_static; -use rouille::Response; -use rouille::{router, try_or_400}; -use serde::Deserialize; -use serde_json::json; -use std::cell::RefCell; -use std::collections::HashMap; -use std::env; -use std::ffi::OsStr; -use std::io; -use std::io::BufRead; -use std::io::Write; -use std::path::Path; -use syntect::dumps::from_binary; -use syntect::easy::HighlightLines; -use syntect::highlighting::{Theme, ThemeSet}; -use syntect::parsing::{SyntaxReference, SyntaxSet}; -use syntect::util::LinesWithEndings; - -use syntect::html::{ - append_highlighted_html_for_styled_line, start_highlighted_html_snippet, IncludeBackground, -}; - -#[cfg(test)] -mod tests; - -lazy_static! { - // Load syntaxes & themes lazily. Initialisation might not be - // required in the case of Markdown rendering (if there's no code - // blocks within the document). - static ref SYNTAXES: SyntaxSet = from_binary(include_bytes!(env!("BAT_SYNTAXES"))); - static ref THEMES: ThemeSet = ThemeSet::load_defaults(); - - // Configure Comrak's Markdown rendering with all the bells & - // whistles! - static ref MD_OPTS: ComrakOptions = { - let mut options = ComrakOptions::default(); - - // Enable non-standard Markdown features: - options.extension.strikethrough = true; - options.extension.tagfilter = true; - options.extension.table = true; - options.extension.autolink = true; - options.extension.tasklist = true; - options.extension.header_ids = Some(String::new()); // yyeeesss! - options.extension.footnotes = true; - options.extension.description_lists = true; - options.extension.front_matter_delimiter = Some("---".to_owned()); - - // Required for tagfilter - options.render.unsafe_ = true; - - options - }; - - // Configures a map of specific filenames to languages, for cases - // where the detection by extension or other heuristics fails. - static ref FILENAME_OVERRIDES: HashMap<&'static str, &'static str> = { - let mut map = HashMap::new(); - // rules.pl is the canonical name of the submit rule file in - // Gerrit, which is written in Prolog. - map.insert("rules.pl", "Prolog"); - map - }; -} - -// HTML fragment used when rendering inline blocks in Markdown documents. -// Emulates the GitHub style (subtle background hue and padding). -const BLOCK_PRE: &str = "<pre style=\"background-color:#f6f8fa;padding:16px;\">\n"; - -fn should_continue(res: &io::Result<usize>) -> bool { - match *res { - Ok(n) => n > 0, - Err(_) => false, - } -} - -// This function is taken from the Comrak documentation. -fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F) -where - F: Fn(&'a AstNode<'a>), -{ - f(node); - for c in node.children() { - iter_nodes(c, f); - } -} - -// Many of the syntaxes in the syntax list have random capitalisations, which -// means that name matching for the block info of a code block in HTML fails. -// -// Instead, try finding a syntax match by comparing case insensitively (for -// ASCII characters, anyways). -fn find_syntax_case_insensitive(info: &str) -> Option<&'static SyntaxReference> { - // TODO(tazjin): memoize this lookup - SYNTAXES - .syntaxes() - .iter() - .rev() - .find(|&s| info.eq_ignore_ascii_case(&s.name)) -} - -// Replaces code-block inside of a Markdown AST with HTML blocks rendered by -// syntect. This enables static (i.e. no JavaScript) syntax highlighting, even -// of complex languages. -fn highlight_code_block(code_block: &NodeCodeBlock) -> NodeValue { - let theme = &THEMES.themes["InspiredGitHub"]; - let info = String::from_utf8_lossy(&code_block.info); - - let syntax = find_syntax_case_insensitive(&info) - .or_else(|| SYNTAXES.find_syntax_by_extension(&info)) - .unwrap_or_else(|| SYNTAXES.find_syntax_plain_text()); - - let code = String::from_utf8_lossy(&code_block.literal); - - let rendered = { - // Write the block preamble manually to get exactly the - // desired layout: - let mut hl = HighlightLines::new(syntax, theme); - let mut buf = BLOCK_PRE.to_string(); - - for line in LinesWithEndings::from(&code) { - let regions = hl.highlight(line, &SYNTAXES); - append_highlighted_html_for_styled_line(®ions[..], IncludeBackground::No, &mut buf); - } - - buf.push_str("</pre>"); - buf - }; - - let mut block = NodeHtmlBlock::default(); - block.literal = rendered.into_bytes(); - - NodeValue::HtmlBlock(block) -} - -// Supported callout elements (which each have their own distinct rendering): -enum Callout { - Todo, - Warning, - Question, - Tip, -} - -// Determine whether the first child of the supplied node contains a text that -// should cause a callout section to be rendered. -fn has_callout<'a>(node: &Node<'a, RefCell<Ast>>) -> Option<Callout> { - match node.first_child().map(|c| c.data.borrow()) { - Some(child) => match &child.value { - NodeValue::Text(text) => { - if text.starts_with(b"TODO") { - return Some(Callout::Todo); - } else if text.starts_with(b"WARNING") { - return Some(Callout::Warning); - } else if text.starts_with(b"QUESTION") { - return Some(Callout::Question); - } else if text.starts_with(b"TIP") { - return Some(Callout::Tip); - } - - None - } - _ => None, - }, - _ => None, - } -} - -fn format_callout_paragraph(callout: Callout) -> NodeValue { - let class = match callout { - Callout::Todo => "cheddar-todo", - Callout::Warning => "cheddar-warning", - Callout::Question => "cheddar-question", - Callout::Tip => "cheddar-tip", - }; - - let mut block = NodeHtmlBlock::default(); - block.literal = format!("<p class=\"cheddar-callout {}\">", class).into_bytes(); - NodeValue::HtmlBlock(block) -} - -fn format_markdown<R: BufRead, W: Write>(reader: &mut R, writer: &mut W) { - let document = { - let mut buffer = String::new(); - reader - .read_to_string(&mut buffer) - .expect("reading should work"); - buffer - }; - - let arena = Arena::new(); - let root = parse_document(&arena, &document, &MD_OPTS); - - // This node must exist with a lifetime greater than that of the parsed AST - // in case that callouts are encountered (otherwise insertion into the tree - // is not possible). - let mut p_close_value = NodeHtmlBlock::default(); - p_close_value.literal = b"</p>".to_vec(); - - let p_close_node = Ast::new(NodeValue::HtmlBlock(p_close_value)); - let p_close = Node::new(RefCell::new(p_close_node)); - - // Special features of Cheddar are implemented by traversing the - // arena and reacting on nodes that we might want to modify. - iter_nodes(root, &|node| { - let mut ast = node.data.borrow_mut(); - let new = match &ast.value { - // Syntax highlighting is implemented by replacing the - // code block node with literal HTML. - NodeValue::CodeBlock(code) => Some(highlight_code_block(code)), - - NodeValue::Paragraph => { - if let Some(callout) = has_callout(node) { - node.insert_after(&p_close); - Some(format_callout_paragraph(callout)) - } else { - None - } - } - _ => None, - }; - - if let Some(new_value) = new { - ast.value = new_value - } - }); - - format_html(root, &MD_OPTS, writer).expect("Markdown rendering failed"); -} - -fn find_syntax_for_file(filename: &str) -> &'static SyntaxReference { - (*FILENAME_OVERRIDES) - .get(filename) - .and_then(|name| SYNTAXES.find_syntax_by_name(name)) - .or_else(|| { - Path::new(filename) - .extension() - .and_then(OsStr::to_str) - .and_then(|s| SYNTAXES.find_syntax_by_extension(s)) - }) - .unwrap_or_else(|| SYNTAXES.find_syntax_plain_text()) -} - -fn format_code<R: BufRead, W: Write>( - theme: &Theme, - reader: &mut R, - writer: &mut W, - filename: &str, -) { - let mut linebuf = String::new(); - - // Get the first line, we might need it for syntax identification. - let mut read_result = reader.read_line(&mut linebuf); - let syntax = find_syntax_for_file(filename); - - let mut hl = HighlightLines::new(syntax, theme); - let (mut outbuf, bg) = start_highlighted_html_snippet(theme); - - // Rather than using the `lines` iterator, read each line manually - // and maintain buffer state. - // - // This is done because the syntax highlighter requires trailing - // newlines to be efficient, and those are stripped in the lines - // iterator. - while should_continue(&read_result) { - let regions = hl.highlight(&linebuf, &SYNTAXES); - - append_highlighted_html_for_styled_line( - ®ions[..], - IncludeBackground::IfDifferent(bg), - &mut outbuf, - ); - - // immediately output the current state to avoid keeping - // things in memory - write!(writer, "{}", outbuf).expect("write should not fail"); - - // merry go round again - linebuf.clear(); - outbuf.clear(); - read_result = reader.read_line(&mut linebuf); - } - - writeln!(writer, "</pre>").expect("write should not fail"); -} - -// Server endpoint for rendering the syntax of source code. This -// replaces the 'syntect_server' component of Sourcegraph. -fn code_endpoint(request: &rouille::Request) -> rouille::Response { - #[derive(Deserialize)] - struct SourcegraphQuery { - filepath: String, - theme: String, - code: String, - } - - let query: SourcegraphQuery = try_or_400!(rouille::input::json_input(request)); - let mut buf: Vec<u8> = Vec::new(); - - // We don't use syntect with the sourcegraph themes bundled - // currently, so let's fall back to something that is kind of - // similar (tm). - let theme = &THEMES.themes[match query.theme.as_str() { - "Sourcegraph (light)" => "Solarized (light)", - _ => "Solarized (dark)", - }]; - - format_code(theme, &mut query.code.as_bytes(), &mut buf, &query.filepath); - - Response::json(&json!({ - "is_plaintext": false, - "data": String::from_utf8_lossy(&buf) - })) -} - -// Server endpoint for rendering a Markdown file. -fn markdown_endpoint(request: &rouille::Request) -> rouille::Response { - let mut texts: HashMap<String, String> = try_or_400!(rouille::input::json_input(request)); - - for text in texts.values_mut() { - let mut buf: Vec<u8> = Vec::new(); - format_markdown(&mut text.as_bytes(), &mut buf); - *text = String::from_utf8_lossy(&buf).to_string(); - } - - Response::json(&texts) -} - -fn highlighting_server(listen: &str) { - println!("Starting syntax highlighting server on '{}'", listen); - - rouille::start_server(listen, move |request| { - router!(request, - // Markdown rendering route - (POST) (/markdown) => { - markdown_endpoint(request) - }, - - // Code rendering route - (POST) (/) => { - code_endpoint(request) - }, - - _ => { - rouille::Response::empty_404() - }, - ) - }); -} - -fn main() { - // Parse the command-line flags passed to cheddar to determine - // whether it is running in about-filter mode (`--about-filter`) - // and what file extension has been supplied. - let matches = App::new("cheddar") - .about("TVL's syntax highlighter") - .arg( - Arg::with_name("about-filter") - .help("Run as a cgit about-filter (renders Markdown)") - .long("about-filter") - .takes_value(false), - ) - .arg( - Arg::with_name("sourcegraph-server") - .help("Run as a Sourcegraph compatible web-server") - .long("sourcegraph-server") - .takes_value(false), - ) - .arg( - Arg::with_name("listen") - .help("Address to listen on") - .long("listen") - .takes_value(true), - ) - .arg(Arg::with_name("filename").help("File to render").index(1)) - .get_matches(); - - if matches.is_present("sourcegraph-server") { - highlighting_server( - matches - .value_of("listen") - .expect("Listening address is required for server mode"), - ); - return; - } - - let filename = matches.value_of("filename").expect("filename is required"); - - let stdin = io::stdin(); - let mut in_handle = stdin.lock(); - - let stdout = io::stdout(); - let mut out_handle = stdout.lock(); - - if matches.is_present("about-filter") && filename.ends_with(".md") { - format_markdown(&mut in_handle, &mut out_handle); - } else { - format_code( - &THEMES.themes["InspiredGitHub"], - &mut in_handle, - &mut out_handle, - filename, - ); - } -} |