From f90495138483698c954620564d93e2c42a114c9f Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Sat, 21 Dec 2019 15:57:02 +0000 Subject: feat(cheddar): Render code blocks in Markdown via syntect Implements fully static (i.e. no JavaScript!) highlighting of code blocks when rendering Markdown. This works by walking through the Comrak AST and replacing any code blocks with pre-rendered HTML blocks. Syntaxes are chosen based on the "block info", which is the string users put after the block's opening fence. This can either be a (case-insensitive) name of a syntax, or alternatively a file extension associated with the desired syntax. The theme is set to one that imitates GitHub. --- tools/cheddar/src/main.rs | 108 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 82 insertions(+), 26 deletions(-) diff --git a/tools/cheddar/src/main.rs b/tools/cheddar/src/main.rs index 7affc3bc9c..67cd2f8bdf 100644 --- a/tools/cheddar/src/main.rs +++ b/tools/cheddar/src/main.rs @@ -1,4 +1,5 @@ -use comrak::{markdown_to_html, ComrakOptions}; +use comrak::nodes::{AstNode, NodeValue, NodeHtmlBlock}; +use comrak::{Arena, parse_document, format_html, ComrakOptions}; use lazy_static::lazy_static; use std::env; use std::ffi::OsStr; @@ -9,19 +10,36 @@ use std::path::Path; use syntect::dumps::from_binary; use syntect::easy::HighlightLines; use syntect::highlighting::ThemeSet; -use syntect::parsing::SyntaxSet; +use syntect::parsing::{SyntaxSet, SyntaxReference}; use syntect::html::{ + IncludeBackground, append_highlighted_html_for_styled_line, + highlighted_html_for_string, start_highlighted_html_snippet, - IncludeBackground, }; -// Set up syntaxes as a lazy_static. Initialisation might not be -// required in the case of Markdown rendering (if there's no code -// blocks within the document). lazy_static! { + // Load syntaxes & themes lazily. Initialisation might not be + // required in the case of Markdown rendering (if there's no code + // blocks within the document). static ref SYNTAXES: SyntaxSet = from_binary(include_bytes!(env!("BAT_SYNTAXES"))); + static ref THEMES: ThemeSet = ThemeSet::load_defaults(); + + // Configure Comrak's Markdown rendering with all the bells & + // whistles! + static ref MD_OPTS: ComrakOptions = ComrakOptions{ + ext_strikethrough: true, + ext_tagfilter: true, + ext_table: true, + ext_autolink: true, + ext_tasklist: true, + ext_header_ids: Some(String::new()), // yyeeesss! + ext_footnotes: true, + ext_description_lists: true, + unsafe_: true, // required for tagfilter + ..ComrakOptions::default() + }; } fn args_extension() -> Option { @@ -44,27 +62,66 @@ fn should_continue(res: &io::Result) -> bool { } } -fn format_markdown() { - let mut buffer = String::new(); - let stdin = io::stdin(); - let mut stdin = stdin.lock(); - stdin.read_to_string(&mut buffer).expect("failed to read stdin"); +// This function is taken from the Comrak documentation. +fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F) where F : Fn(&'a AstNode<'a>) { + f(node); + for c in node.children() { + iter_nodes(c, f); + } +} - // Markdown rendering is configurd with most of the bells & - // whistles here: - let opts = ComrakOptions{ - ext_strikethrough: true, - ext_tagfilter: true, - ext_table: true, - ext_autolink: true, - ext_tasklist: true, - ext_header_ids: Some(String::new()), // yyeeesss! - ext_footnotes: true, - ext_description_lists: true, - ..ComrakOptions::default() +// Many of the syntaxes in the syntax list have random capitalisations, which +// means that name matching for the block info of a code block in HTML fails. +// +// Instead, try finding a syntax match by comparing case insensitively (for +// ASCII characters, anyways). +fn find_syntax_case_insensitive(info: &str) -> Option<&'static SyntaxReference> { + SYNTAXES.syntaxes().iter().rev().find(|&s| info.eq_ignore_ascii_case(&s.name)) +} + +fn format_markdown() { + let document = { + let mut buffer = String::new(); + let stdin = io::stdin(); + let mut stdin = stdin.lock(); + stdin.read_to_string(&mut buffer).expect("failed to read stdin"); + buffer }; - print!("{}", markdown_to_html(&buffer, &opts)); + let arena = Arena::new(); + let root = parse_document(&arena, &document, &MD_OPTS); + + // Syntax highlighting is implemented by traversing the arena and + // replacing all code blocks with HTML blocks rendered by syntect. + iter_nodes(root, &|node| { + let mut ast = node.data.borrow_mut(); + match &ast.value { + NodeValue::CodeBlock(code_block) => { + let theme = &THEMES.themes["InspiredGitHub"]; + let info = String::from_utf8_lossy(&code_block.info); + + let syntax = find_syntax_case_insensitive(&info) + .or_else(|| SYNTAXES.find_syntax_by_extension(&info)) + .unwrap_or_else(|| SYNTAXES.find_syntax_plain_text()); + + let code = String::from_utf8_lossy(&code_block.literal); + let rendered = highlighted_html_for_string( + &code, &SYNTAXES, syntax, theme, + ); + + let block = NodeHtmlBlock { + block_type: 1, // It's unclear what behaviour is toggled by this + literal: rendered.into_bytes(), + }; + + ast.value = NodeValue::HtmlBlock(block); + }, + _ => (), + }; + }); + + format_html(root, &MD_OPTS, &mut io::stdout()) + .expect("Markdown rendering failed"); } fn format_code(extension: String) { @@ -76,8 +133,7 @@ fn format_code(extension: String) { let mut read_result = stdin.read_line(&mut linebuf); // Set up the highlighter - let ts = ThemeSet::load_defaults(); - let theme = &ts.themes["InspiredGitHub"]; + let theme = &THEMES.themes["InspiredGitHub"]; let syntax = SYNTAXES.find_syntax_by_extension(&extension) .or_else(|| SYNTAXES.find_syntax_by_first_line(&linebuf)) -- cgit 1.4.1