diff options
author | Vincent Ambo <tazjin@google.com> | 2019-12-21T15·57+0000 |
---|---|---|
committer | Vincent Ambo <tazjin@google.com> | 2019-12-21T15·57+0000 |
commit | f90495138483698c954620564d93e2c42a114c9f (patch) | |
tree | 980e201f78ed238a1a61053449aa3908b49a5b8b /tools | |
parent | 671dbc7f72c2ccd1bbd1aa3ae66a0e627382eebd (diff) |
feat(cheddar): Render code blocks in Markdown via syntect r/287
Implements fully static (i.e. no JavaScript!) highlighting of code blocks when rendering Markdown. This works by walking through the Comrak AST and replacing any code blocks with pre-rendered HTML blocks. Syntaxes are chosen based on the "block info", which is the string users put after the block's opening fence. This can either be a (case-insensitive) name of a syntax, or alternatively a file extension associated with the desired syntax. The theme is set to one that imitates GitHub.
Diffstat (limited to 'tools')
-rw-r--r-- | tools/cheddar/src/main.rs | 108 |
1 files changed, 82 insertions, 26 deletions
diff --git a/tools/cheddar/src/main.rs b/tools/cheddar/src/main.rs index 7affc3bc9cf8..67cd2f8bdfc0 100644 --- a/tools/cheddar/src/main.rs +++ b/tools/cheddar/src/main.rs @@ -1,4 +1,5 @@ -use comrak::{markdown_to_html, ComrakOptions}; +use comrak::nodes::{AstNode, NodeValue, NodeHtmlBlock}; +use comrak::{Arena, parse_document, format_html, ComrakOptions}; use lazy_static::lazy_static; use std::env; use std::ffi::OsStr; @@ -9,19 +10,36 @@ use std::path::Path; use syntect::dumps::from_binary; use syntect::easy::HighlightLines; use syntect::highlighting::ThemeSet; -use syntect::parsing::SyntaxSet; +use syntect::parsing::{SyntaxSet, SyntaxReference}; use syntect::html::{ + IncludeBackground, append_highlighted_html_for_styled_line, + highlighted_html_for_string, start_highlighted_html_snippet, - IncludeBackground, }; -// Set up syntaxes as a lazy_static. Initialisation might not be -// required in the case of Markdown rendering (if there's no code -// blocks within the document). lazy_static! { + // Load syntaxes & themes lazily. Initialisation might not be + // required in the case of Markdown rendering (if there's no code + // blocks within the document). static ref SYNTAXES: SyntaxSet = from_binary(include_bytes!(env!("BAT_SYNTAXES"))); + static ref THEMES: ThemeSet = ThemeSet::load_defaults(); + + // Configure Comrak's Markdown rendering with all the bells & + // whistles! + static ref MD_OPTS: ComrakOptions = ComrakOptions{ + ext_strikethrough: true, + ext_tagfilter: true, + ext_table: true, + ext_autolink: true, + ext_tasklist: true, + ext_header_ids: Some(String::new()), // yyeeesss! + ext_footnotes: true, + ext_description_lists: true, + unsafe_: true, // required for tagfilter + ..ComrakOptions::default() + }; } fn args_extension() -> Option<String> { @@ -44,27 +62,66 @@ fn should_continue(res: &io::Result<usize>) -> bool { } } -fn format_markdown() { - let mut buffer = String::new(); - let stdin = io::stdin(); - let mut stdin = stdin.lock(); - stdin.read_to_string(&mut buffer).expect("failed to read stdin"); +// This function is taken from the Comrak documentation. +fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F) where F : Fn(&'a AstNode<'a>) { + f(node); + for c in node.children() { + iter_nodes(c, f); + } +} - // Markdown rendering is configurd with most of the bells & - // whistles here: - let opts = ComrakOptions{ - ext_strikethrough: true, - ext_tagfilter: true, - ext_table: true, - ext_autolink: true, - ext_tasklist: true, - ext_header_ids: Some(String::new()), // yyeeesss! - ext_footnotes: true, - ext_description_lists: true, - ..ComrakOptions::default() +// Many of the syntaxes in the syntax list have random capitalisations, which +// means that name matching for the block info of a code block in HTML fails. +// +// Instead, try finding a syntax match by comparing case insensitively (for +// ASCII characters, anyways). +fn find_syntax_case_insensitive(info: &str) -> Option<&'static SyntaxReference> { + SYNTAXES.syntaxes().iter().rev().find(|&s| info.eq_ignore_ascii_case(&s.name)) +} + +fn format_markdown() { + let document = { + let mut buffer = String::new(); + let stdin = io::stdin(); + let mut stdin = stdin.lock(); + stdin.read_to_string(&mut buffer).expect("failed to read stdin"); + buffer }; - print!("{}", markdown_to_html(&buffer, &opts)); + let arena = Arena::new(); + let root = parse_document(&arena, &document, &MD_OPTS); + + // Syntax highlighting is implemented by traversing the arena and + // replacing all code blocks with HTML blocks rendered by syntect. + iter_nodes(root, &|node| { + let mut ast = node.data.borrow_mut(); + match &ast.value { + NodeValue::CodeBlock(code_block) => { + let theme = &THEMES.themes["InspiredGitHub"]; + let info = String::from_utf8_lossy(&code_block.info); + + let syntax = find_syntax_case_insensitive(&info) + .or_else(|| SYNTAXES.find_syntax_by_extension(&info)) + .unwrap_or_else(|| SYNTAXES.find_syntax_plain_text()); + + let code = String::from_utf8_lossy(&code_block.literal); + let rendered = highlighted_html_for_string( + &code, &SYNTAXES, syntax, theme, + ); + + let block = NodeHtmlBlock { + block_type: 1, // It's unclear what behaviour is toggled by this + literal: rendered.into_bytes(), + }; + + ast.value = NodeValue::HtmlBlock(block); + }, + _ => (), + }; + }); + + format_html(root, &MD_OPTS, &mut io::stdout()) + .expect("Markdown rendering failed"); } fn format_code(extension: String) { @@ -76,8 +133,7 @@ fn format_code(extension: String) { let mut read_result = stdin.read_line(&mut linebuf); // Set up the highlighter - let ts = ThemeSet::load_defaults(); - let theme = &ts.themes["InspiredGitHub"]; + let theme = &THEMES.themes["InspiredGitHub"]; let syntax = SYNTAXES.find_syntax_by_extension(&extension) .or_else(|| SYNTAXES.find_syntax_by_first_line(&linebuf)) |