about summary refs log tree commit diff
path: root/tools
diff options
context:
space:
mode:
authorVincent Ambo <tazjin@google.com>2019-12-21T15·57+0000
committerVincent Ambo <tazjin@google.com>2019-12-21T15·57+0000
commitf90495138483698c954620564d93e2c42a114c9f (patch)
tree980e201f78ed238a1a61053449aa3908b49a5b8b /tools
parent671dbc7f72c2ccd1bbd1aa3ae66a0e627382eebd (diff)
feat(cheddar): Render code blocks in Markdown via syntect r/287
Implements fully static (i.e. no JavaScript!) highlighting of code
blocks when rendering Markdown.

This works by walking through the Comrak AST and replacing any code
blocks with pre-rendered HTML blocks.

Syntaxes are chosen based on the "block info", which is the string
users put after the block's opening fence. This can either be
a (case-insensitive) name of a syntax, or alternatively a file
extension associated with the desired syntax.

The theme is set to one that imitates GitHub.
Diffstat (limited to 'tools')
-rw-r--r--tools/cheddar/src/main.rs108
1 files changed, 82 insertions, 26 deletions
diff --git a/tools/cheddar/src/main.rs b/tools/cheddar/src/main.rs
index 7affc3bc9c..67cd2f8bdf 100644
--- a/tools/cheddar/src/main.rs
+++ b/tools/cheddar/src/main.rs
@@ -1,4 +1,5 @@
-use comrak::{markdown_to_html, ComrakOptions};
+use comrak::nodes::{AstNode, NodeValue, NodeHtmlBlock};
+use comrak::{Arena, parse_document, format_html, ComrakOptions};
 use lazy_static::lazy_static;
 use std::env;
 use std::ffi::OsStr;
@@ -9,19 +10,36 @@ use std::path::Path;
 use syntect::dumps::from_binary;
 use syntect::easy::HighlightLines;
 use syntect::highlighting::ThemeSet;
-use syntect::parsing::SyntaxSet;
+use syntect::parsing::{SyntaxSet, SyntaxReference};
 
 use syntect::html::{
+    IncludeBackground,
     append_highlighted_html_for_styled_line,
+    highlighted_html_for_string,
     start_highlighted_html_snippet,
-    IncludeBackground,
 };
 
-// Set up syntaxes as a lazy_static. Initialisation might not be
-// required in the case of Markdown rendering (if there's no code
-// blocks within the document).
 lazy_static! {
+    // Load syntaxes & themes lazily. Initialisation might not be
+    // required in the case of Markdown rendering (if there's no code
+    // blocks within the document).
     static ref SYNTAXES: SyntaxSet = from_binary(include_bytes!(env!("BAT_SYNTAXES")));
+    static ref THEMES: ThemeSet = ThemeSet::load_defaults();
+
+    // Configure Comrak's Markdown rendering with all the bells &
+    // whistles!
+    static ref MD_OPTS: ComrakOptions = ComrakOptions{
+        ext_strikethrough: true,
+        ext_tagfilter: true,
+        ext_table: true,
+        ext_autolink: true,
+        ext_tasklist: true,
+        ext_header_ids: Some(String::new()), // yyeeesss!
+        ext_footnotes: true,
+        ext_description_lists: true,
+        unsafe_: true, // required for tagfilter
+        ..ComrakOptions::default()
+    };
 }
 
 fn args_extension() -> Option<String> {
@@ -44,27 +62,66 @@ fn should_continue(res: &io::Result<usize>) -> bool {
     }
 }
 
-fn format_markdown() {
-    let mut buffer = String::new();
-    let stdin = io::stdin();
-    let mut stdin = stdin.lock();
-    stdin.read_to_string(&mut buffer).expect("failed to read stdin");
+// This function is taken from the Comrak documentation.
+fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F) where F : Fn(&'a AstNode<'a>) {
+    f(node);
+    for c in node.children() {
+        iter_nodes(c, f);
+    }
+}
 
-    // Markdown rendering is configurd with most of the bells &
-    // whistles here:
-    let opts = ComrakOptions{
-        ext_strikethrough: true,
-        ext_tagfilter: true,
-        ext_table: true,
-        ext_autolink: true,
-        ext_tasklist: true,
-        ext_header_ids: Some(String::new()), // yyeeesss!
-        ext_footnotes: true,
-        ext_description_lists: true,
-        ..ComrakOptions::default()
+// Many of the syntaxes in the syntax list have random capitalisations, which
+// means that name matching for the block info of a code block in HTML fails.
+//
+// Instead, try finding a syntax match by comparing case insensitively (for
+// ASCII characters, anyways).
+fn find_syntax_case_insensitive(info: &str) -> Option<&'static SyntaxReference> {
+    SYNTAXES.syntaxes().iter().rev().find(|&s| info.eq_ignore_ascii_case(&s.name))
+}
+
+fn format_markdown() {
+    let document = {
+        let mut buffer = String::new();
+        let stdin = io::stdin();
+        let mut stdin = stdin.lock();
+        stdin.read_to_string(&mut buffer).expect("failed to read stdin");
+        buffer
     };
 
-    print!("{}", markdown_to_html(&buffer, &opts));
+    let arena = Arena::new();
+    let root = parse_document(&arena, &document, &MD_OPTS);
+
+    // Syntax highlighting is implemented by traversing the arena and
+    // replacing all code blocks with HTML blocks rendered by syntect.
+    iter_nodes(root, &|node| {
+        let mut ast = node.data.borrow_mut();
+        match &ast.value {
+            NodeValue::CodeBlock(code_block) => {
+                let theme = &THEMES.themes["InspiredGitHub"];
+                let info = String::from_utf8_lossy(&code_block.info);
+
+                let syntax = find_syntax_case_insensitive(&info)
+                    .or_else(|| SYNTAXES.find_syntax_by_extension(&info))
+                    .unwrap_or_else(|| SYNTAXES.find_syntax_plain_text());
+
+                let code = String::from_utf8_lossy(&code_block.literal);
+                let rendered = highlighted_html_for_string(
+                    &code, &SYNTAXES, syntax, theme,
+                );
+
+                let block = NodeHtmlBlock {
+                    block_type: 1, // It's unclear what behaviour is toggled by this
+                    literal: rendered.into_bytes(),
+                };
+
+                ast.value = NodeValue::HtmlBlock(block);
+            },
+            _ => (),
+        };
+    });
+
+    format_html(root, &MD_OPTS, &mut io::stdout())
+        .expect("Markdown rendering failed");
 }
 
 fn format_code(extension: String) {
@@ -76,8 +133,7 @@ fn format_code(extension: String) {
     let mut read_result = stdin.read_line(&mut linebuf);
 
     // Set up the highlighter
-    let ts = ThemeSet::load_defaults();
-    let theme = &ts.themes["InspiredGitHub"];
+    let theme = &THEMES.themes["InspiredGitHub"];
 
     let syntax = SYNTAXES.find_syntax_by_extension(&extension)
         .or_else(|| SYNTAXES.find_syntax_by_first_line(&linebuf))