about summary refs log tree commit diff
path: root/tools/cheddar/src
diff options
context:
space:
mode:
Diffstat (limited to 'tools/cheddar/src')
-rw-r--r--tools/cheddar/src/bin/cheddar.rs135
-rw-r--r--tools/cheddar/src/lib.rs337
-rw-r--r--tools/cheddar/src/tests.rs97
3 files changed, 569 insertions, 0 deletions
diff --git a/tools/cheddar/src/bin/cheddar.rs b/tools/cheddar/src/bin/cheddar.rs
new file mode 100644
index 000000000000..58ef32a1b432
--- /dev/null
+++ b/tools/cheddar/src/bin/cheddar.rs
@@ -0,0 +1,135 @@
+//! This file defines the binary for cheddar, which can be interacted
+//! with in two different ways:
+//!
+//! 1. As a CLI tool that acts as a cgit filter.
+//! 2. As a long-running HTTP server that handles rendering requests
+//!    (matching the SourceGraph protocol).
+use clap::{App, Arg};
+use rouille::Response;
+use rouille::{router, try_or_400};
+use serde::Deserialize;
+use serde_json::json;
+use std::collections::HashMap;
+use std::io;
+
+use cheddar::{THEMES, format_code, format_markdown};
+
+// Server endpoint for rendering the syntax of source code. This
+// replaces the 'syntect_server' component of Sourcegraph.
+fn code_endpoint(request: &rouille::Request) -> rouille::Response {
+    #[derive(Deserialize)]
+    struct SourcegraphQuery {
+        filepath: String,
+        theme: String,
+        code: String,
+    }
+
+    let query: SourcegraphQuery = try_or_400!(rouille::input::json_input(request));
+    let mut buf: Vec<u8> = Vec::new();
+
+    // We don't use syntect with the sourcegraph themes bundled
+    // currently, so let's fall back to something that is kind of
+    // similar (tm).
+    let theme = &THEMES.themes[match query.theme.as_str() {
+        "Sourcegraph (light)" => "Solarized (light)",
+        _ => "Solarized (dark)",
+    }];
+
+    format_code(theme, &mut query.code.as_bytes(), &mut buf, &query.filepath);
+
+    Response::json(&json!({
+        "is_plaintext": false,
+        "data": String::from_utf8_lossy(&buf)
+    }))
+}
+
+// Server endpoint for rendering a Markdown file.
+fn markdown_endpoint(request: &rouille::Request) -> rouille::Response {
+    let mut texts: HashMap<String, String> = try_or_400!(rouille::input::json_input(request));
+
+    for text in texts.values_mut() {
+        let mut buf: Vec<u8> = Vec::new();
+        format_markdown(&mut text.as_bytes(), &mut buf);
+        *text = String::from_utf8_lossy(&buf).to_string();
+    }
+
+    Response::json(&texts)
+}
+
+fn highlighting_server(listen: &str) {
+    println!("Starting syntax highlighting server on '{}'", listen);
+
+    rouille::start_server(listen, move |request| {
+        router!(request,
+                // Markdown rendering route
+                (POST) (/markdown) => {
+                    markdown_endpoint(request)
+                },
+
+                // Code rendering route
+                (POST) (/) => {
+                    code_endpoint(request)
+                },
+
+                _ => {
+                    rouille::Response::empty_404()
+                },
+        )
+    });
+}
+
+fn main() {
+    // Parse the command-line flags passed to cheddar to determine
+    // whether it is running in about-filter mode (`--about-filter`)
+    // and what file extension has been supplied.
+    let matches = App::new("cheddar")
+        .about("TVL's syntax highlighter")
+        .arg(
+            Arg::with_name("about-filter")
+                .help("Run as a cgit about-filter (renders Markdown)")
+                .long("about-filter")
+                .takes_value(false),
+        )
+        .arg(
+            Arg::with_name("sourcegraph-server")
+                .help("Run as a Sourcegraph compatible web-server")
+                .long("sourcegraph-server")
+                .takes_value(false),
+        )
+        .arg(
+            Arg::with_name("listen")
+                .help("Address to listen on")
+                .long("listen")
+                .takes_value(true),
+        )
+        .arg(Arg::with_name("filename").help("File to render").index(1))
+        .get_matches();
+
+    if matches.is_present("sourcegraph-server") {
+        highlighting_server(
+            matches
+                .value_of("listen")
+                .expect("Listening address is required for server mode"),
+        );
+        return;
+    }
+
+    let filename = matches.value_of("filename").expect("filename is required");
+
+    let stdin = io::stdin();
+    let mut in_handle = stdin.lock();
+
+    let stdout = io::stdout();
+    let mut out_handle = stdout.lock();
+
+    if matches.is_present("about-filter") && filename.ends_with(".md") {
+        format_markdown(&mut in_handle, &mut out_handle);
+    } else {
+        format_code(
+            &THEMES.themes["InspiredGitHub"],
+            &mut in_handle,
+            &mut out_handle,
+            filename,
+        );
+    }
+}
diff --git a/tools/cheddar/src/lib.rs b/tools/cheddar/src/lib.rs
new file mode 100644
index 000000000000..e5d7aad1e62c
--- /dev/null
+++ b/tools/cheddar/src/lib.rs
@@ -0,0 +1,337 @@
+//! This file implements the rendering logic of cheddar with public
+//! functions for syntax-highlighting code and for turning Markdown
+//! into HTML with TVL extensions.
+use comrak::arena_tree::Node;
+use comrak::nodes::{Ast, AstNode, NodeCodeBlock, NodeHtmlBlock, NodeValue};
+use comrak::{format_html, parse_document, Arena, ComrakOptions};
+use lazy_static::lazy_static;
+use regex::Regex;
+use std::cell::RefCell;
+use std::collections::HashMap;
+use std::env;
+use std::ffi::OsStr;
+use std::io;
+use std::io::BufRead;
+use std::io::Write;
+use std::path::Path;
+use syntect::dumps::from_binary;
+use syntect::easy::HighlightLines;
+use syntect::highlighting::{Theme, ThemeSet};
+use syntect::parsing::{SyntaxReference, SyntaxSet};
+use syntect::util::LinesWithEndings;
+
+use syntect::html::{
+    append_highlighted_html_for_styled_line, start_highlighted_html_snippet, IncludeBackground,
+};
+
+#[cfg(test)]
+mod tests;
+
+lazy_static! {
+    // Load syntaxes lazily. Initialisation might not be required in
+    // the case of Markdown rendering (if there's no code blocks
+    // within the document).
+    //
+    // Note that the syntax set is included from the path pointed to
+    // by the BAT_SYNTAXES environment variable at compile time. This
+    // variable is populated by Nix and points to TVL's syntax set.
+    static ref SYNTAXES: SyntaxSet = from_binary(include_bytes!(env!("BAT_SYNTAXES")));
+    pub static ref THEMES: ThemeSet = ThemeSet::load_defaults();
+
+    // Configure Comrak's Markdown rendering with all the bells &
+    // whistles!
+    static ref MD_OPTS: ComrakOptions = {
+        let mut options = ComrakOptions::default();
+
+        // Enable non-standard Markdown features:
+        options.extension.strikethrough = true;
+        options.extension.tagfilter = true;
+        options.extension.table = true;
+        options.extension.autolink = true;
+        options.extension.tasklist = true;
+        options.extension.header_ids = Some(String::new()); // yyeeesss!
+        options.extension.footnotes = true;
+        options.extension.description_lists = true;
+        options.extension.front_matter_delimiter = Some("---".to_owned());
+
+        // Required for tagfilter
+        options.render.unsafe_ = true;
+
+        options
+    };
+
+    // Configures a map of specific filenames to languages, for cases
+    // where the detection by extension or other heuristics fails.
+    static ref FILENAME_OVERRIDES: HashMap<&'static str, &'static str> = {
+        let mut map = HashMap::new();
+        // rules.pl is the canonical name of the submit rule file in
+        // Gerrit, which is written in Prolog.
+        map.insert("rules.pl", "Prolog");
+        map
+    };
+
+    // Default shortlink set used in cheddar (i.e. TVL's shortlinks)
+    static ref TVL_LINKS: Vec<Shortlink> = vec![
+        // TVL shortlinks for bugs and changelists (e.g. b/123,
+        // cl/123). Coincidentally these have the same format, which
+        // makes the initial implementation easy.
+        Shortlink {
+            pattern: Regex::new(r#"\b(?P<type>b|cl)/(?P<dest>\d+)\b"#).unwrap(),
+            replacement: "[$type/$dest](https://$type.tvl.fyi/$dest)",
+        }
+    ];
+}
+
+/// Structure that describes a single shortlink that should be
+/// automatically highlighted. Highlighting is performed as a string
+/// replacement over input Markdown.
+pub struct Shortlink {
+    /// Short link pattern to recognise. Make sure to anchor these
+    /// correctly.
+    pub pattern: Regex,
+
+    /// Replacement string, as per the documentation of
+    /// [`Regex::replace`].
+    pub replacement: &'static str,
+}
+
+// HTML fragment used when rendering inline blocks in Markdown documents.
+// Emulates the GitHub style (subtle background hue and padding).
+const BLOCK_PRE: &str = "<pre style=\"background-color:#f6f8fa;padding:16px;\">\n";
+
+fn should_continue(res: &io::Result<usize>) -> bool {
+    match *res {
+        Ok(n) => n > 0,
+        Err(_) => false,
+    }
+}
+
+// This function is taken from the Comrak documentation.
+fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F)
+where
+    F: Fn(&'a AstNode<'a>),
+{
+    f(node);
+    for c in node.children() {
+        iter_nodes(c, f);
+    }
+}
+
+// Many of the syntaxes in the syntax list have random capitalisations, which
+// means that name matching for the block info of a code block in HTML fails.
+//
+// Instead, try finding a syntax match by comparing case insensitively (for
+// ASCII characters, anyways).
+fn find_syntax_case_insensitive(info: &str) -> Option<&'static SyntaxReference> {
+    // TODO(tazjin): memoize this lookup
+    SYNTAXES
+        .syntaxes()
+        .iter()
+        .rev()
+        .find(|&s| info.eq_ignore_ascii_case(&s.name))
+}
+
+// Replaces code-block inside of a Markdown AST with HTML blocks rendered by
+// syntect. This enables static (i.e. no JavaScript) syntax highlighting, even
+// of complex languages.
+fn highlight_code_block(code_block: &NodeCodeBlock) -> NodeValue {
+    let theme = &THEMES.themes["InspiredGitHub"];
+    let info = String::from_utf8_lossy(&code_block.info);
+
+    let syntax = find_syntax_case_insensitive(&info)
+        .or_else(|| SYNTAXES.find_syntax_by_extension(&info))
+        .unwrap_or_else(|| SYNTAXES.find_syntax_plain_text());
+
+    let code = String::from_utf8_lossy(&code_block.literal);
+
+    let rendered = {
+        // Write the block preamble manually to get exactly the
+        // desired layout:
+        let mut hl = HighlightLines::new(syntax, theme);
+        let mut buf = BLOCK_PRE.to_string();
+
+        for line in LinesWithEndings::from(&code) {
+            let regions = hl.highlight(line, &SYNTAXES);
+            append_highlighted_html_for_styled_line(&regions[..], IncludeBackground::No, &mut buf);
+        }
+
+        buf.push_str("</pre>");
+        buf
+    };
+
+    let mut block = NodeHtmlBlock::default();
+    block.literal = rendered.into_bytes();
+
+    NodeValue::HtmlBlock(block)
+}
+
+// Supported callout elements (which each have their own distinct rendering):
+enum Callout {
+    Todo,
+    Warning,
+    Question,
+    Tip,
+}
+
+// Determine whether the first child of the supplied node contains a text that
+// should cause a callout section to be rendered.
+fn has_callout<'a>(node: &Node<'a, RefCell<Ast>>) -> Option<Callout> {
+    match node.first_child().map(|c| c.data.borrow()) {
+        Some(child) => match &child.value {
+            NodeValue::Text(text) => {
+                if text.starts_with(b"TODO") {
+                    return Some(Callout::Todo);
+                } else if text.starts_with(b"WARNING") {
+                    return Some(Callout::Warning);
+                } else if text.starts_with(b"QUESTION") {
+                    return Some(Callout::Question);
+                } else if text.starts_with(b"TIP") {
+                    return Some(Callout::Tip);
+                }
+
+                None
+            }
+            _ => None,
+        },
+        _ => None,
+    }
+}
+
+// Replace instances of known shortlinks in the input document with
+// Markdown syntax for a highlighted link.
+fn linkify_shortlinks(mut text: String, shortlinks: &[Shortlink]) -> String {
+    for link in shortlinks {
+        text = link
+            .pattern
+            .replace_all(&text, link.replacement)
+            .to_string();
+    }
+
+    return text;
+}
+
+fn format_callout_paragraph(callout: Callout) -> NodeValue {
+    let class = match callout {
+        Callout::Todo => "cheddar-todo",
+        Callout::Warning => "cheddar-warning",
+        Callout::Question => "cheddar-question",
+        Callout::Tip => "cheddar-tip",
+    };
+
+    let mut block = NodeHtmlBlock::default();
+    block.literal = format!("<p class=\"cheddar-callout {}\">", class).into_bytes();
+    NodeValue::HtmlBlock(block)
+}
+
+pub fn format_markdown_with_shortlinks<R: BufRead, W: Write>(
+    reader: &mut R,
+    writer: &mut W,
+    shortlinks: &[Shortlink],
+) {
+    let document = {
+        let mut buffer = String::new();
+        reader
+            .read_to_string(&mut buffer)
+            .expect("reading should work");
+        buffer
+    };
+
+    let arena = Arena::new();
+    let root = parse_document(&arena, &linkify_shortlinks(document, shortlinks), &MD_OPTS);
+
+    // This node must exist with a lifetime greater than that of the parsed AST
+    // in case that callouts are encountered (otherwise insertion into the tree
+    // is not possible).
+    let mut p_close_value = NodeHtmlBlock::default();
+    p_close_value.literal = b"</p>".to_vec();
+
+    let p_close_node = Ast::new(NodeValue::HtmlBlock(p_close_value));
+    let p_close = Node::new(RefCell::new(p_close_node));
+
+    // Special features of Cheddar are implemented by traversing the
+    // arena and reacting on nodes that we might want to modify.
+    iter_nodes(root, &|node| {
+        let mut ast = node.data.borrow_mut();
+        let new = match &ast.value {
+            // Syntax highlighting is implemented by replacing the
+            // code block node with literal HTML.
+            NodeValue::CodeBlock(code) => Some(highlight_code_block(code)),
+
+            NodeValue::Paragraph => {
+                if let Some(callout) = has_callout(node) {
+                    node.insert_after(&p_close);
+                    Some(format_callout_paragraph(callout))
+                } else {
+                    None
+                }
+            }
+            _ => None,
+        };
+
+        if let Some(new_value) = new {
+            ast.value = new_value
+        }
+    });
+
+    format_html(root, &MD_OPTS, writer).expect("Markdown rendering failed");
+}
+
+pub fn format_markdown<R: BufRead, W: Write>(reader: &mut R, writer: &mut W) {
+    format_markdown_with_shortlinks(reader, writer, &TVL_LINKS)
+}
+
+fn find_syntax_for_file(filename: &str) -> &'static SyntaxReference {
+    (*FILENAME_OVERRIDES)
+        .get(filename)
+        .and_then(|name| SYNTAXES.find_syntax_by_name(name))
+        .or_else(|| {
+            Path::new(filename)
+                .extension()
+                .and_then(OsStr::to_str)
+                .and_then(|s| SYNTAXES.find_syntax_by_extension(s))
+        })
+        .unwrap_or_else(|| SYNTAXES.find_syntax_plain_text())
+}
+
+pub fn format_code<R: BufRead, W: Write>(
+    theme: &Theme,
+    reader: &mut R,
+    writer: &mut W,
+    filename: &str,
+) {
+    let mut linebuf = String::new();
+
+    // Get the first line, we might need it for syntax identification.
+    let mut read_result = reader.read_line(&mut linebuf);
+    let syntax = find_syntax_for_file(filename);
+
+    let mut hl = HighlightLines::new(syntax, theme);
+    let (mut outbuf, bg) = start_highlighted_html_snippet(theme);
+
+    // Rather than using the `lines` iterator, read each line manually
+    // and maintain buffer state.
+    //
+    // This is done because the syntax highlighter requires trailing
+    // newlines to be efficient, and those are stripped in the lines
+    // iterator.
+    while should_continue(&read_result) {
+        let regions = hl.highlight(&linebuf, &SYNTAXES);
+
+        append_highlighted_html_for_styled_line(
+            &regions[..],
+            IncludeBackground::IfDifferent(bg),
+            &mut outbuf,
+        );
+
+        // immediately output the current state to avoid keeping
+        // things in memory
+        write!(writer, "{}", outbuf).expect("write should not fail");
+
+        // merry go round again
+        linebuf.clear();
+        outbuf.clear();
+        read_result = reader.read_line(&mut linebuf);
+    }
+
+    writeln!(writer, "</pre>").expect("write should not fail");
+}
diff --git a/tools/cheddar/src/tests.rs b/tools/cheddar/src/tests.rs
new file mode 100644
index 000000000000..5b7b1cc52a95
--- /dev/null
+++ b/tools/cheddar/src/tests.rs
@@ -0,0 +1,97 @@
+use super::*;
+use std::io::BufReader;
+
+// Markdown rendering expectation, ignoring leading and trailing
+// whitespace in the input and output.
+fn expect_markdown(input: &str, expected: &str) {
+    let mut input_buf = BufReader::new(input.trim().as_bytes());
+    let mut out_buf: Vec<u8> = vec![];
+    format_markdown(&mut input_buf, &mut out_buf);
+
+    let out_string = String::from_utf8(out_buf).expect("output should be UTF8");
+    assert_eq!(out_string.trim(), expected.trim());
+}
+
+#[test]
+fn renders_simple_markdown() {
+    expect_markdown("hello", "<p>hello</p>\n");
+}
+
+#[test]
+fn renders_callouts() {
+    expect_markdown(
+        "TODO some task.",
+        r#"<p class="cheddar-callout cheddar-todo">
+TODO some task.
+</p>
+"#,
+    );
+
+    expect_markdown(
+        "WARNING: be careful",
+        r#"<p class="cheddar-callout cheddar-warning">
+WARNING: be careful
+</p>
+"#,
+    );
+
+    expect_markdown(
+        "TIP: note the thing",
+        r#"<p class="cheddar-callout cheddar-tip">
+TIP: note the thing
+</p>
+"#,
+    );
+}
+
+#[test]
+fn renders_code_snippets() {
+    expect_markdown(
+        r#"
+Code:
+```nix
+toString 42
+```
+"#,
+        r#"
+<p>Code:</p>
+<pre style="background-color:#f6f8fa;padding:16px;">
+<span style="color:#62a35c;">toString </span><span style="color:#0086b3;">42
+</span></pre>
+"#,
+    );
+}
+
+#[test]
+fn highlights_bug_link() {
+    expect_markdown(
+        "Please look at b/123.",
+        "<p>Please look at <a href=\"https://b.tvl.fyi/123\">b/123</a>.</p>",
+    );
+}
+
+#[test]
+fn highlights_cl_link() {
+    expect_markdown(
+        "Please look at cl/420.",
+        "<p>Please look at <a href=\"https://cl.tvl.fyi/420\">cl/420</a>.</p>",
+    );
+}
+
+#[test]
+fn highlights_multiple_shortlinks() {
+    expect_markdown(
+        "Please look at cl/420, b/123.",
+        "<p>Please look at <a href=\"https://cl.tvl.fyi/420\">cl/420</a>, <a href=\"https://b.tvl.fyi/123\">b/123</a>.</p>",
+    );
+
+    expect_markdown(
+        "b/213/cl/213 are different things",
+        "<p><a href=\"https://b.tvl.fyi/213\">b/213</a>/<a href=\"https://cl.tvl.fyi/213\">cl/213</a> are different things</p>",
+    );
+}
+
+#[test]
+fn ignores_invalid_shortlinks() {
+    expect_markdown("b/abc is not a real bug", "<p>b/abc is not a real bug</p>");
+}