From d311af9bc059253a4b7adf41027a60b141c6fc53 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Fri, 30 Apr 2021 22:39:55 +0200 Subject: refactor(cheddar): Split out a library with rendering logic Splits `main.rs` into `lib.rs` and `bin/cheddar.rs`, which enables reuse of cheddar's rendering logic in other Rust applications. Change-Id: Ifd1a44a8d1620c595550a0a497a25b0563e917ca Reviewed-on: https://cl.tvl.fyi/c/depot/+/3060 Tested-by: BuildkiteCI Reviewed-by: flokli --- tools/cheddar/src/lib.rs | 291 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 tools/cheddar/src/lib.rs (limited to 'tools/cheddar/src/lib.rs') diff --git a/tools/cheddar/src/lib.rs b/tools/cheddar/src/lib.rs new file mode 100644 index 000000000000..5e88aaaf61f8 --- /dev/null +++ b/tools/cheddar/src/lib.rs @@ -0,0 +1,291 @@ +//! This file implements the rendering logic of cheddar with public +//! functions for syntax-highlighting code and for turning Markdown +//! into HTML with TVL extensions. +use comrak::arena_tree::Node; +use comrak::nodes::{Ast, AstNode, NodeCodeBlock, NodeHtmlBlock, NodeValue}; +use comrak::{format_html, parse_document, Arena, ComrakOptions}; +use lazy_static::lazy_static; +use std::cell::RefCell; +use std::collections::HashMap; +use std::env; +use std::ffi::OsStr; +use std::io; +use std::io::BufRead; +use std::io::Write; +use std::path::Path; +use syntect::dumps::from_binary; +use syntect::easy::HighlightLines; +use syntect::highlighting::{Theme, ThemeSet}; +use syntect::parsing::{SyntaxReference, SyntaxSet}; +use syntect::util::LinesWithEndings; + +use syntect::html::{ + append_highlighted_html_for_styled_line, start_highlighted_html_snippet, IncludeBackground, +}; + +#[cfg(test)] +mod tests; + +lazy_static! { + // Load syntaxes lazily. Initialisation might not be required in + // the case of Markdown rendering (if there's no code blocks + // within the document). + // + // Note that the syntax set is included from the path pointed to + // by the BAT_SYNTAXES environment variable at compile time. This + // variable is populated by Nix and points to TVL's syntax set. + static ref SYNTAXES: SyntaxSet = from_binary(include_bytes!(env!("BAT_SYNTAXES"))); + pub static ref THEMES: ThemeSet = ThemeSet::load_defaults(); + + // Configure Comrak's Markdown rendering with all the bells & + // whistles! + static ref MD_OPTS: ComrakOptions = { + let mut options = ComrakOptions::default(); + + // Enable non-standard Markdown features: + options.extension.strikethrough = true; + options.extension.tagfilter = true; + options.extension.table = true; + options.extension.autolink = true; + options.extension.tasklist = true; + options.extension.header_ids = Some(String::new()); // yyeeesss! + options.extension.footnotes = true; + options.extension.description_lists = true; + options.extension.front_matter_delimiter = Some("---".to_owned()); + + // Required for tagfilter + options.render.unsafe_ = true; + + options + }; + + // Configures a map of specific filenames to languages, for cases + // where the detection by extension or other heuristics fails. + static ref FILENAME_OVERRIDES: HashMap<&'static str, &'static str> = { + let mut map = HashMap::new(); + // rules.pl is the canonical name of the submit rule file in + // Gerrit, which is written in Prolog. + map.insert("rules.pl", "Prolog"); + map + }; +} + +// HTML fragment used when rendering inline blocks in Markdown documents. +// Emulates the GitHub style (subtle background hue and padding). +const BLOCK_PRE: &str = "
\n";
+
+fn should_continue(res: &io::Result) -> bool {
+    match *res {
+        Ok(n) => n > 0,
+        Err(_) => false,
+    }
+}
+
+// This function is taken from the Comrak documentation.
+fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F)
+where
+    F: Fn(&'a AstNode<'a>),
+{
+    f(node);
+    for c in node.children() {
+        iter_nodes(c, f);
+    }
+}
+
+// Many of the syntaxes in the syntax list have random capitalisations, which
+// means that name matching for the block info of a code block in HTML fails.
+//
+// Instead, try finding a syntax match by comparing case insensitively (for
+// ASCII characters, anyways).
+fn find_syntax_case_insensitive(info: &str) -> Option<&'static SyntaxReference> {
+    // TODO(tazjin): memoize this lookup
+    SYNTAXES
+        .syntaxes()
+        .iter()
+        .rev()
+        .find(|&s| info.eq_ignore_ascii_case(&s.name))
+}
+
+// Replaces code-block inside of a Markdown AST with HTML blocks rendered by
+// syntect. This enables static (i.e. no JavaScript) syntax highlighting, even
+// of complex languages.
+fn highlight_code_block(code_block: &NodeCodeBlock) -> NodeValue {
+    let theme = &THEMES.themes["InspiredGitHub"];
+    let info = String::from_utf8_lossy(&code_block.info);
+
+    let syntax = find_syntax_case_insensitive(&info)
+        .or_else(|| SYNTAXES.find_syntax_by_extension(&info))
+        .unwrap_or_else(|| SYNTAXES.find_syntax_plain_text());
+
+    let code = String::from_utf8_lossy(&code_block.literal);
+
+    let rendered = {
+        // Write the block preamble manually to get exactly the
+        // desired layout:
+        let mut hl = HighlightLines::new(syntax, theme);
+        let mut buf = BLOCK_PRE.to_string();
+
+        for line in LinesWithEndings::from(&code) {
+            let regions = hl.highlight(line, &SYNTAXES);
+            append_highlighted_html_for_styled_line(®ions[..], IncludeBackground::No, &mut buf);
+        }
+
+        buf.push_str("
"); + buf + }; + + let mut block = NodeHtmlBlock::default(); + block.literal = rendered.into_bytes(); + + NodeValue::HtmlBlock(block) +} + +// Supported callout elements (which each have their own distinct rendering): +enum Callout { + Todo, + Warning, + Question, + Tip, +} + +// Determine whether the first child of the supplied node contains a text that +// should cause a callout section to be rendered. +fn has_callout<'a>(node: &Node<'a, RefCell>) -> Option { + match node.first_child().map(|c| c.data.borrow()) { + Some(child) => match &child.value { + NodeValue::Text(text) => { + if text.starts_with(b"TODO") { + return Some(Callout::Todo); + } else if text.starts_with(b"WARNING") { + return Some(Callout::Warning); + } else if text.starts_with(b"QUESTION") { + return Some(Callout::Question); + } else if text.starts_with(b"TIP") { + return Some(Callout::Tip); + } + + None + } + _ => None, + }, + _ => None, + } +} + +fn format_callout_paragraph(callout: Callout) -> NodeValue { + let class = match callout { + Callout::Todo => "cheddar-todo", + Callout::Warning => "cheddar-warning", + Callout::Question => "cheddar-question", + Callout::Tip => "cheddar-tip", + }; + + let mut block = NodeHtmlBlock::default(); + block.literal = format!("

", class).into_bytes(); + NodeValue::HtmlBlock(block) +} + +pub fn format_markdown(reader: &mut R, writer: &mut W) { + let document = { + let mut buffer = String::new(); + reader + .read_to_string(&mut buffer) + .expect("reading should work"); + buffer + }; + + let arena = Arena::new(); + let root = parse_document(&arena, &document, &MD_OPTS); + + // This node must exist with a lifetime greater than that of the parsed AST + // in case that callouts are encountered (otherwise insertion into the tree + // is not possible). + let mut p_close_value = NodeHtmlBlock::default(); + p_close_value.literal = b"

".to_vec(); + + let p_close_node = Ast::new(NodeValue::HtmlBlock(p_close_value)); + let p_close = Node::new(RefCell::new(p_close_node)); + + // Special features of Cheddar are implemented by traversing the + // arena and reacting on nodes that we might want to modify. + iter_nodes(root, &|node| { + let mut ast = node.data.borrow_mut(); + let new = match &ast.value { + // Syntax highlighting is implemented by replacing the + // code block node with literal HTML. + NodeValue::CodeBlock(code) => Some(highlight_code_block(code)), + + NodeValue::Paragraph => { + if let Some(callout) = has_callout(node) { + node.insert_after(&p_close); + Some(format_callout_paragraph(callout)) + } else { + None + } + } + _ => None, + }; + + if let Some(new_value) = new { + ast.value = new_value + } + }); + + format_html(root, &MD_OPTS, writer).expect("Markdown rendering failed"); +} + +fn find_syntax_for_file(filename: &str) -> &'static SyntaxReference { + (*FILENAME_OVERRIDES) + .get(filename) + .and_then(|name| SYNTAXES.find_syntax_by_name(name)) + .or_else(|| { + Path::new(filename) + .extension() + .and_then(OsStr::to_str) + .and_then(|s| SYNTAXES.find_syntax_by_extension(s)) + }) + .unwrap_or_else(|| SYNTAXES.find_syntax_plain_text()) +} + +pub fn format_code( + theme: &Theme, + reader: &mut R, + writer: &mut W, + filename: &str, +) { + let mut linebuf = String::new(); + + // Get the first line, we might need it for syntax identification. + let mut read_result = reader.read_line(&mut linebuf); + let syntax = find_syntax_for_file(filename); + + let mut hl = HighlightLines::new(syntax, theme); + let (mut outbuf, bg) = start_highlighted_html_snippet(theme); + + // Rather than using the `lines` iterator, read each line manually + // and maintain buffer state. + // + // This is done because the syntax highlighter requires trailing + // newlines to be efficient, and those are stripped in the lines + // iterator. + while should_continue(&read_result) { + let regions = hl.highlight(&linebuf, &SYNTAXES); + + append_highlighted_html_for_styled_line( + ®ions[..], + IncludeBackground::IfDifferent(bg), + &mut outbuf, + ); + + // immediately output the current state to avoid keeping + // things in memory + write!(writer, "{}", outbuf).expect("write should not fail"); + + // merry go round again + linebuf.clear(); + outbuf.clear(); + read_result = reader.read_line(&mut linebuf); + } + + writeln!(writer, "").expect("write should not fail"); +} -- cgit 1.4.1