diff options
Diffstat (limited to 'tools/cheddar/src')
-rw-r--r-- | tools/cheddar/src/bin/cheddar.rs | 134 | ||||
-rw-r--r-- | tools/cheddar/src/lib.rs | 339 | ||||
-rw-r--r-- | tools/cheddar/src/tests.rs | 105 |
3 files changed, 578 insertions, 0 deletions
diff --git a/tools/cheddar/src/bin/cheddar.rs b/tools/cheddar/src/bin/cheddar.rs new file mode 100644 index 000000000000..48c504d53590 --- /dev/null +++ b/tools/cheddar/src/bin/cheddar.rs @@ -0,0 +1,134 @@ +//! This file defines the binary for cheddar, which can be interacted +//! with in two different ways: +//! +//! 1. As a CLI tool that acts as a cgit filter. +//! 2. As a long-running HTTP server that handles rendering requests +//! (matching the SourceGraph protocol). +use clap::{App, Arg}; +use rouille::{router, try_or_400, Response}; +use serde::Deserialize; +use serde_json::json; +use std::collections::HashMap; +use std::io; + +use cheddar::{format_code, format_markdown, THEMES}; + +// Server endpoint for rendering the syntax of source code. This +// replaces the 'syntect_server' component of Sourcegraph. +fn code_endpoint(request: &rouille::Request) -> rouille::Response { + #[derive(Deserialize)] + struct SourcegraphQuery { + filepath: String, + theme: String, + code: String, + } + + let query: SourcegraphQuery = try_or_400!(rouille::input::json_input(request)); + let mut buf: Vec<u8> = Vec::new(); + + // We don't use syntect with the sourcegraph themes bundled + // currently, so let's fall back to something that is kind of + // similar (tm). + let theme = &THEMES.themes[match query.theme.as_str() { + "Sourcegraph (light)" => "Solarized (light)", + _ => "Solarized (dark)", + }]; + + format_code(theme, &mut query.code.as_bytes(), &mut buf, &query.filepath); + + Response::json(&json!({ + "is_plaintext": false, + "data": String::from_utf8_lossy(&buf) + })) +} + +// Server endpoint for rendering a Markdown file. +fn markdown_endpoint(request: &rouille::Request) -> rouille::Response { + let mut texts: HashMap<String, String> = try_or_400!(rouille::input::json_input(request)); + + for text in texts.values_mut() { + let mut buf: Vec<u8> = Vec::new(); + format_markdown(&mut text.as_bytes(), &mut buf); + *text = String::from_utf8_lossy(&buf).to_string(); + } + + Response::json(&texts) +} + +fn highlighting_server(listen: &str) { + println!("Starting syntax highlighting server on '{}'", listen); + + rouille::start_server(listen, move |request| { + router!(request, + // Markdown rendering route + (POST) (/markdown) => { + markdown_endpoint(request) + }, + + // Code rendering route + (POST) (/) => { + code_endpoint(request) + }, + + _ => { + rouille::Response::empty_404() + }, + ) + }); +} + +fn main() { + // Parse the command-line flags passed to cheddar to determine + // whether it is running in about-filter mode (`--about-filter`) + // and what file extension has been supplied. + let matches = App::new("cheddar") + .about("TVL's syntax highlighter") + .arg( + Arg::with_name("about-filter") + .help("Run as a cgit about-filter (renders Markdown)") + .long("about-filter") + .takes_value(false), + ) + .arg( + Arg::with_name("sourcegraph-server") + .help("Run as a Sourcegraph compatible web-server") + .long("sourcegraph-server") + .takes_value(false), + ) + .arg( + Arg::with_name("listen") + .help("Address to listen on") + .long("listen") + .takes_value(true), + ) + .arg(Arg::with_name("filename").help("File to render").index(1)) + .get_matches(); + + if matches.is_present("sourcegraph-server") { + highlighting_server( + matches + .value_of("listen") + .expect("Listening address is required for server mode"), + ); + return; + } + + let filename = matches.value_of("filename").expect("filename is required"); + + let stdin = io::stdin(); + let mut in_handle = stdin.lock(); + + let stdout = io::stdout(); + let mut out_handle = stdout.lock(); + + if matches.is_present("about-filter") && filename.ends_with(".md") { + format_markdown(&mut in_handle, &mut out_handle); + } else { + format_code( + &THEMES.themes["InspiredGitHub"], + &mut in_handle, + &mut out_handle, + filename, + ); + } +} diff --git a/tools/cheddar/src/lib.rs b/tools/cheddar/src/lib.rs new file mode 100644 index 000000000000..851bd743db2e --- /dev/null +++ b/tools/cheddar/src/lib.rs @@ -0,0 +1,339 @@ +//! This file implements the rendering logic of cheddar with public +//! functions for syntax-highlighting code and for turning Markdown +//! into HTML with TVL extensions. +use comrak::arena_tree::Node; +use comrak::nodes::{Ast, AstNode, NodeCodeBlock, NodeHtmlBlock, NodeValue}; +use comrak::{format_html, parse_document, Arena, ComrakOptions}; +use lazy_static::lazy_static; +use regex::Regex; +use std::cell::RefCell; +use std::collections::HashMap; +use std::ffi::OsStr; +use std::io::{BufRead, Write}; +use std::path::Path; +use std::{env, io}; +use syntect::dumps::from_binary; +use syntect::easy::HighlightLines; +use syntect::highlighting::{Theme, ThemeSet}; +use syntect::parsing::{SyntaxReference, SyntaxSet}; +use syntect::util::LinesWithEndings; + +use syntect::html::{ + append_highlighted_html_for_styled_line, start_highlighted_html_snippet, IncludeBackground, +}; + +#[cfg(test)] +mod tests; + +lazy_static! { + // Load syntaxes lazily. Initialisation might not be required in + // the case of Markdown rendering (if there's no code blocks + // within the document). + // + // Note that the syntax set is included from the path pointed to + // by the BAT_SYNTAXES environment variable at compile time. This + // variable is populated by Nix and points to TVL's syntax set. + static ref SYNTAXES: SyntaxSet = from_binary(include_bytes!(env!("BAT_SYNTAXES"))); + pub static ref THEMES: ThemeSet = ThemeSet::load_defaults(); + + // Configure Comrak's Markdown rendering with all the bells & + // whistles! + static ref MD_OPTS: ComrakOptions = { + let mut options = ComrakOptions::default(); + + // Enable non-standard Markdown features: + options.extension.strikethrough = true; + options.extension.tagfilter = true; + options.extension.table = true; + options.extension.autolink = true; + options.extension.tasklist = true; + options.extension.header_ids = Some(String::new()); // yyeeesss! + options.extension.footnotes = true; + options.extension.description_lists = true; + options.extension.front_matter_delimiter = Some("---".to_owned()); + + // Required for tagfilter + options.render.unsafe_ = true; + + options + }; + + // Configures a map of specific filenames to languages, for cases + // where the detection by extension or other heuristics fails. + static ref FILENAME_OVERRIDES: HashMap<&'static str, &'static str> = { + let mut map = HashMap::new(); + // rules.pl is the canonical name of the submit rule file in + // Gerrit, which is written in Prolog. + map.insert("rules.pl", "Prolog"); + map + }; + + // Default shortlink set used in cheddar (i.e. TVL's shortlinks) + static ref TVL_LINKS: Vec<Shortlink> = vec![ + // TVL shortlinks for bugs and changelists (e.g. b/123, + // cl/123). Coincidentally these have the same format, which + // makes the initial implementation easy. + Shortlink { + pattern: Regex::new(r#"\b(?P<type>b|cl)/(?P<dest>\d+)\b"#).unwrap(), + replacement: "[$type/$dest](https://$type.tvl.fyi/$dest)", + }, + Shortlink { + pattern: Regex::new(r#"\br/(?P<dest>\d+)\b"#).unwrap(), + replacement: "[r/$dest](https://code.tvl.fyi/commit/?id=refs/r/$dest)", + } + ]; +} + +/// Structure that describes a single shortlink that should be +/// automatically highlighted. Highlighting is performed as a string +/// replacement over input Markdown. +pub struct Shortlink { + /// Short link pattern to recognise. Make sure to anchor these + /// correctly. + pub pattern: Regex, + + /// Replacement string, as per the documentation of + /// [`Regex::replace`]. + pub replacement: &'static str, +} + +// HTML fragment used when rendering inline blocks in Markdown documents. +// Emulates the GitHub style (subtle background hue and padding). +const BLOCK_PRE: &str = "<pre style=\"background-color:#f6f8fa;padding:16px;\">\n"; + +fn should_continue(res: &io::Result<usize>) -> bool { + match *res { + Ok(n) => n > 0, + Err(_) => false, + } +} + +// This function is taken from the Comrak documentation. +fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F) +where + F: Fn(&'a AstNode<'a>), +{ + f(node); + for c in node.children() { + iter_nodes(c, f); + } +} + +// Many of the syntaxes in the syntax list have random capitalisations, which +// means that name matching for the block info of a code block in HTML fails. +// +// Instead, try finding a syntax match by comparing case insensitively (for +// ASCII characters, anyways). +fn find_syntax_case_insensitive(info: &str) -> Option<&'static SyntaxReference> { + // TODO(tazjin): memoize this lookup + SYNTAXES + .syntaxes() + .iter() + .rev() + .find(|&s| info.eq_ignore_ascii_case(&s.name)) +} + +// Replaces code-block inside of a Markdown AST with HTML blocks rendered by +// syntect. This enables static (i.e. no JavaScript) syntax highlighting, even +// of complex languages. +fn highlight_code_block(code_block: &NodeCodeBlock) -> NodeValue { + let theme = &THEMES.themes["InspiredGitHub"]; + let info = String::from_utf8_lossy(&code_block.info); + + let syntax = find_syntax_case_insensitive(&info) + .or_else(|| SYNTAXES.find_syntax_by_extension(&info)) + .unwrap_or_else(|| SYNTAXES.find_syntax_plain_text()); + + let code = String::from_utf8_lossy(&code_block.literal); + + let rendered = { + // Write the block preamble manually to get exactly the + // desired layout: + let mut hl = HighlightLines::new(syntax, theme); + let mut buf = BLOCK_PRE.to_string(); + + for line in LinesWithEndings::from(&code) { + let regions = hl.highlight(line, &SYNTAXES); + append_highlighted_html_for_styled_line(®ions[..], IncludeBackground::No, &mut buf); + } + + buf.push_str("</pre>"); + buf + }; + + let mut block = NodeHtmlBlock::default(); + block.literal = rendered.into_bytes(); + + NodeValue::HtmlBlock(block) +} + +// Supported callout elements (which each have their own distinct rendering): +enum Callout { + Todo, + Warning, + Question, + Tip, +} + +// Determine whether the first child of the supplied node contains a text that +// should cause a callout section to be rendered. +fn has_callout<'a>(node: &Node<'a, RefCell<Ast>>) -> Option<Callout> { + match node.first_child().map(|c| c.data.borrow()) { + Some(child) => match &child.value { + NodeValue::Text(text) => { + if text.starts_with(b"TODO") { + return Some(Callout::Todo); + } else if text.starts_with(b"WARNING") { + return Some(Callout::Warning); + } else if text.starts_with(b"QUESTION") { + return Some(Callout::Question); + } else if text.starts_with(b"TIP") { + return Some(Callout::Tip); + } + + None + } + _ => None, + }, + _ => None, + } +} + +// Replace instances of known shortlinks in the input document with +// Markdown syntax for a highlighted link. +fn linkify_shortlinks(mut text: String, shortlinks: &[Shortlink]) -> String { + for link in shortlinks { + text = link + .pattern + .replace_all(&text, link.replacement) + .to_string(); + } + + return text; +} + +fn format_callout_paragraph(callout: Callout) -> NodeValue { + let class = match callout { + Callout::Todo => "cheddar-todo", + Callout::Warning => "cheddar-warning", + Callout::Question => "cheddar-question", + Callout::Tip => "cheddar-tip", + }; + + let mut block = NodeHtmlBlock::default(); + block.literal = format!("<p class=\"cheddar-callout {}\">", class).into_bytes(); + NodeValue::HtmlBlock(block) +} + +pub fn format_markdown_with_shortlinks<R: BufRead, W: Write>( + reader: &mut R, + writer: &mut W, + shortlinks: &[Shortlink], +) { + let document = { + let mut buffer = String::new(); + reader + .read_to_string(&mut buffer) + .expect("reading should work"); + buffer + }; + + let arena = Arena::new(); + let root = parse_document(&arena, &linkify_shortlinks(document, shortlinks), &MD_OPTS); + + // This node must exist with a lifetime greater than that of the parsed AST + // in case that callouts are encountered (otherwise insertion into the tree + // is not possible). + let mut p_close_value = NodeHtmlBlock::default(); + p_close_value.literal = b"</p>".to_vec(); + + let p_close_node = Ast::new(NodeValue::HtmlBlock(p_close_value)); + let p_close = Node::new(RefCell::new(p_close_node)); + + // Special features of Cheddar are implemented by traversing the + // arena and reacting on nodes that we might want to modify. + iter_nodes(root, &|node| { + let mut ast = node.data.borrow_mut(); + let new = match &ast.value { + // Syntax highlighting is implemented by replacing the + // code block node with literal HTML. + NodeValue::CodeBlock(code) => Some(highlight_code_block(code)), + + NodeValue::Paragraph => { + if let Some(callout) = has_callout(node) { + node.insert_after(&p_close); + Some(format_callout_paragraph(callout)) + } else { + None + } + } + _ => None, + }; + + if let Some(new_value) = new { + ast.value = new_value + } + }); + + format_html(root, &MD_OPTS, writer).expect("Markdown rendering failed"); +} + +pub fn format_markdown<R: BufRead, W: Write>(reader: &mut R, writer: &mut W) { + format_markdown_with_shortlinks(reader, writer, &TVL_LINKS) +} + +fn find_syntax_for_file(filename: &str) -> &'static SyntaxReference { + (*FILENAME_OVERRIDES) + .get(filename) + .and_then(|name| SYNTAXES.find_syntax_by_name(name)) + .or_else(|| { + Path::new(filename) + .extension() + .and_then(OsStr::to_str) + .and_then(|s| SYNTAXES.find_syntax_by_extension(s)) + }) + .unwrap_or_else(|| SYNTAXES.find_syntax_plain_text()) +} + +pub fn format_code<R: BufRead, W: Write>( + theme: &Theme, + reader: &mut R, + writer: &mut W, + filename: &str, +) { + let mut linebuf = String::new(); + + // Get the first line, we might need it for syntax identification. + let mut read_result = reader.read_line(&mut linebuf); + let syntax = find_syntax_for_file(filename); + + let mut hl = HighlightLines::new(syntax, theme); + let (mut outbuf, bg) = start_highlighted_html_snippet(theme); + + // Rather than using the `lines` iterator, read each line manually + // and maintain buffer state. + // + // This is done because the syntax highlighter requires trailing + // newlines to be efficient, and those are stripped in the lines + // iterator. + while should_continue(&read_result) { + let regions = hl.highlight(&linebuf, &SYNTAXES); + + append_highlighted_html_for_styled_line( + ®ions[..], + IncludeBackground::IfDifferent(bg), + &mut outbuf, + ); + + // immediately output the current state to avoid keeping + // things in memory + write!(writer, "{}", outbuf).expect("write should not fail"); + + // merry go round again + linebuf.clear(); + outbuf.clear(); + read_result = reader.read_line(&mut linebuf); + } + + writeln!(writer, "</pre>").expect("write should not fail"); +} diff --git a/tools/cheddar/src/tests.rs b/tools/cheddar/src/tests.rs new file mode 100644 index 000000000000..c82bba676746 --- /dev/null +++ b/tools/cheddar/src/tests.rs @@ -0,0 +1,105 @@ +use super::*; +use std::io::BufReader; + +// Markdown rendering expectation, ignoring leading and trailing +// whitespace in the input and output. +fn expect_markdown(input: &str, expected: &str) { + let mut input_buf = BufReader::new(input.trim().as_bytes()); + let mut out_buf: Vec<u8> = vec![]; + format_markdown(&mut input_buf, &mut out_buf); + + let out_string = String::from_utf8(out_buf).expect("output should be UTF8"); + assert_eq!(out_string.trim(), expected.trim()); +} + +#[test] +fn renders_simple_markdown() { + expect_markdown("hello", "<p>hello</p>\n"); +} + +#[test] +fn renders_callouts() { + expect_markdown( + "TODO some task.", + r#"<p class="cheddar-callout cheddar-todo"> +TODO some task. +</p> +"#, + ); + + expect_markdown( + "WARNING: be careful", + r#"<p class="cheddar-callout cheddar-warning"> +WARNING: be careful +</p> +"#, + ); + + expect_markdown( + "TIP: note the thing", + r#"<p class="cheddar-callout cheddar-tip"> +TIP: note the thing +</p> +"#, + ); +} + +#[test] +fn renders_code_snippets() { + expect_markdown( + r#" +Code: +```nix +toString 42 +``` +"#, + r#" +<p>Code:</p> +<pre style="background-color:#f6f8fa;padding:16px;"> +<span style="color:#62a35c;">toString </span><span style="color:#0086b3;">42 +</span></pre> +"#, + ); +} + +#[test] +fn highlights_bug_link() { + expect_markdown( + "Please look at b/123.", + "<p>Please look at <a href=\"https://b.tvl.fyi/123\">b/123</a>.</p>", + ); +} + +#[test] +fn highlights_cl_link() { + expect_markdown( + "Please look at cl/420.", + "<p>Please look at <a href=\"https://cl.tvl.fyi/420\">cl/420</a>.</p>", + ); +} + +#[test] +fn highlights_r_link() { + expect_markdown( + "Fixed in r/3268.", + "<p>Fixed in <a href=\"https://code.tvl.fyi/commit/?id=refs/r/3268\">r/3268</a>.</p>", + ); +} + +#[test] +fn highlights_multiple_shortlinks() { + expect_markdown( + "Please look at cl/420, b/123.", + "<p>Please look at <a href=\"https://cl.tvl.fyi/420\">cl/420</a>, <a href=\"https://b.tvl.fyi/123\">b/123</a>.</p>", + ); + + expect_markdown( + "b/213/cl/213 are different things", + "<p><a href=\"https://b.tvl.fyi/213\">b/213</a>/<a href=\"https://cl.tvl.fyi/213\">cl/213</a> are different things</p>", + ); +} + +#[test] +fn ignores_invalid_shortlinks() { + expect_markdown("b/abc is not a real bug", "<p>b/abc is not a real bug</p>"); +} |