From 82e07fc046cff2151f9d18a003c6331202d58c8b Mon Sep 17 00:00:00 2001 From: sterni Date: Thu, 14 Jan 2021 02:07:55 +0100 Subject: feat(panettone): render a subset of markdown in issue subjects This is achieved by implementing a simple markdown renderer in CL which has the following limitations: * Only supports inline `code`, *emphasize 1*, _emphasize 2_ and ~~strikethrough~~. * Does not support nested markup. This allows for a relatively simple renderer which doesn't need to parse markdown into a in-memory data structure first. The rendered result is directly written to a stream to integrate well with cl-who which is also reused for rendering tags and xml-escaping strings. Fixes #90. Change-Id: Ice88ed770b1fab6365f3b93e8663e25077befa0b Reviewed-on: https://cl.tvl.fyi/c/depot/+/2389 Tested-by: BuildkiteCI Reviewed-by: glittershark Reviewed-by: tazjin --- web/panettone/default.nix | 2 + web/panettone/src/inline-markdown.lisp | 131 +++++++++++++++++++++++++++ web/panettone/src/packages.lisp | 8 +- web/panettone/src/panettone.lisp | 4 +- web/panettone/test/inline-markdown_test.lisp | 54 +++++++++++ web/panettone/test/package.lisp | 3 +- 6 files changed, 198 insertions(+), 4 deletions(-) create mode 100644 web/panettone/src/inline-markdown.lisp create mode 100644 web/panettone/test/inline-markdown_test.lisp (limited to 'web/panettone') diff --git a/web/panettone/default.nix b/web/panettone/default.nix index 8d112901ec..4238af804a 100644 --- a/web/panettone/default.nix +++ b/web/panettone/default.nix @@ -24,6 +24,7 @@ depot.nix.buildLisp.program { ./src/packages.lisp ./src/util.lisp ./src/css.lisp + ./src/inline-markdown.lisp ./src/authentication.lisp ./src/model.lisp ./src/irc.lisp @@ -38,6 +39,7 @@ depot.nix.buildLisp.program { srcs = [ ./test/package.lisp ./test/model_test.lisp + ./test/inline-markdown_test.lisp ]; expression = "(fiveam:run!)"; diff --git a/web/panettone/src/inline-markdown.lisp b/web/panettone/src/inline-markdown.lisp new file mode 100644 index 0000000000..9948a629b3 --- /dev/null +++ b/web/panettone/src/inline-markdown.lisp @@ -0,0 +1,131 @@ +(in-package :panettone.inline-markdown) +(declaim (optimize (safety 3))) + +(define-constant +inline-markup-types+ + '(("~~" :del) + ("_" :em) + ("*" :em) + ("`" :code)) + :test #'equal) + +(defun next-token (mkdn &optional (escaped nil)) + "Parses and returns the next token from the beginning of + an inline markdown string which is not altered. The resulting + tokens are either :normal (normal text), :special (syntactically + significant) or :escaped (escaped using \\). If the string is + empty, a pseudo-token named :endofinput is returned. Return value + is a list where the first element is the token type, the second + the token content and optionally the third the markup type." + ; special tokens are syntactically significant characters + ; or strings for our inline markdown subset. “normal” tokens + ; the strings in between + (let* ((special-toks #.'(cons (list "\\" :escape) +inline-markup-types+)) + (toks (loop + for tok in special-toks + for pos = (search (car tok) mkdn) + when pos collect (cons tok pos))) + (next-tok + (unless (null toks) + (reduce (lambda (a b) (if (< (cdr a) (cdr b)) a b)) toks)))) + (cond + ; end of input + ((= (length mkdn) 0) (list :endofinput "")) + ; no special tokens, just return entire string + ((null next-tok) (list :normal mkdn)) + ; special token, but not at the beginning of the string + ; so we return everything until the special token as + ; a string + ((> (cdr next-tok) 0) (list :normal (subseq mkdn 0 (cdr next-tok)))) + ; \ at the beginning of the string: we get the next + ; token and mark it as escaped unless we are already + ; escaping in which case we just return the backslash + ; as a special token + ((eq (cadr (car next-tok)) :escape) + (if escaped + (list :special "\\") + (list :escaped + (next-token (subseq mkdn 1) t)))) + ; any other special token at the beginning of the string + ; here we also pass the markup type as a third list element + ; to prevent unnecessesary lookups + (t (list :special + (subseq mkdn 0 (length (car (car next-tok)))) + (cadr (car next-tok))))))) + +(defun token-length (tok-type tok-str) + "Returns the string length consumed by a call + to next-token returning the given token type and string." + (check-type tok-type symbol) + (if (eq tok-type :escaped) + ; backslash + length of escaped token + (progn + (check-type tok-str list) + (1+ (token-length (car tok-str) (cadr tok-str)))) + (progn + (check-type tok-str string) + (length tok-str)))) + +(defun write-tag (tag pos &optional (target *standard-output*)) + "Wrapper around who:convert-tag-to-string-list to + only output a single :opening or :closing tag." + (check-type tag symbol) + (check-type pos symbol) + (let + ((index + (cond + ((eq pos :opening) 0) + ((eq pos :closing) 3) + (t (error 'simple-type-error))))) + (dolist + (tag-part (subseq + (who:convert-tag-to-string-list tag nil nil nil) + index (+ index 3))) + (write-string tag-part target)))) + +(defun render-inline-markdown (s &optional (target *standard-output*) (in :normal)) + "Render inline markdown, a subset of markdown safe to render + inside inline elements. The resulting html is directly written + to a specified stream or *standard-output* to integrate well + with cl-who." + (check-type s string) + (check-type target stream) + (loop + for (tok-type tok-str tok-markup) = (next-token s) + do (setq s (subseq s (token-length tok-type tok-str))) + when (eq tok-type :endofinput) + return "" + when (eq tok-type :normal) + do (write-string (who:escape-string tok-str) target) + when (eq tok-type :escaped) + do (progn + ; if normal tokens are escaped we treat the \ as if it were \\ + ; + ; TODO(sterni): maybe also use the :normal behavior in :code except for #\`. + (when (eq (car tok-str) :normal) + (write-char #\\ target)) + (write-string (who:escape-string (cadr tok-str)) target)) + when (eq tok-type :special) + do (cond + ; we are on the outer level and encounter a special token: + ; render surrounding tags and call ourselves to render + ; inner content. + ((eq in :normal) + (progn + (write-tag tok-markup :opening target) + (setq s (render-inline-markdown s target tok-markup)) + (write-tag tok-markup :closing target))) + ; we are on the inner level and encounter the token that initiated + ; our markup again, meaning we need to return to the outer level. + ; we return the remaining string to be consumed. + ((eq in tok-markup) (return s)) + ; remaining case: we are on the inner level and encounter different markup. + + ; we don't support nested markup for simplicity reasons, so instead we + ; just render any nested markdown tokens as if they were escaped. This + ; only eliminates the slight use case for nesting :em inside :del, but + ; shouldn't be too bad. As a side effect this is the precise behavior + ; we want for :code. + ; + ; TODO(sterni): maybe bring back the restart-based system which allowed + ; to skip nested tokens if desired. + (t (write-string (who:escape-string tok-str) target))))) diff --git a/web/panettone/src/packages.lisp b/web/panettone/src/packages.lisp index 87285fa34d..c5fe79b7bc 100644 --- a/web/panettone/src/packages.lisp +++ b/web/panettone/src/packages.lisp @@ -7,6 +7,11 @@ (:use :cl :lass) (:export :styles)) +(defpackage panettone.inline-markdown + (:use :cl) + (:import-from :alexandria :define-constant) + (:export :render-inline-markdown)) + (defpackage panettone.irc (:use :cl :usocket) (:export :send-irc-notification)) @@ -42,7 +47,8 @@ (defpackage panettone (:use :cl :klatre :easy-routes :iterate :panettone.util - :panettone.authentication) + :panettone.authentication + :panettone.inline-markdown) (:import-from :defclass-std :defclass/std) (:import-from :alexandria :if-let :when-let :switch :alist-hash-table) (:import-from :cl-ppcre :split) diff --git a/web/panettone/src/panettone.lisp b/web/panettone/src/panettone.lisp index aaf58bd191..e090f11acf 100644 --- a/web/panettone/src/panettone.lisp +++ b/web/panettone/src/panettone.lisp @@ -192,7 +192,7 @@ (:a :href (format nil "/issues/~A" issue-id) (:p (:span :class "issue-subject" - (who:esc (subject issue)))) + (render-inline-markdown (subject issue)))) (:span :class "issue-number" (who:esc (format nil "#~A" issue-id))) " - " @@ -329,7 +329,7 @@ (issue-status (status issue))) (render () (:header - (:h1 (who:esc (subject issue))) + (:h1 (render-inline-markdown (subject issue))) (:div :class "issue-number" (who:esc (format nil "#~A" issue-id)))) (:main diff --git a/web/panettone/test/inline-markdown_test.lisp b/web/panettone/test/inline-markdown_test.lisp new file mode 100644 index 0000000000..2b6c3b8900 --- /dev/null +++ b/web/panettone/test/inline-markdown_test.lisp @@ -0,0 +1,54 @@ +(in-package :panettone.tests) +(declaim (optimize (safety 3))) + +(defmacro inline-markdown-unit-test (name input expected) + `(test ,name + (is (equal + ,expected + (with-output-to-string (*standard-output*) + (render-inline-markdown ,input)))))) + +(inline-markdown-unit-test + inline-markdown-typical-test + "hello _world_, here is ~~no~~ `code`!" + "hello world, here is no code!") + +(inline-markdown-unit-test + inline-markdown-two-emphasize-types-test + "_stress_ *this*" + "stress this") + +(inline-markdown-unit-test + inline-markdown-html-escaping-test + "öäü" + "<tag>öäü") + +(inline-markdown-unit-test + inline-markdown-nesting-test + "`inside code *anything* goes`, but also ~~*here*~~" + "inside code *anything* goes, but also *here*") + +(inline-markdown-unit-test + inline-markdown-escaping-test + "A backslash \\\\ shows: \\*, \\_, \\` and \\~~" + "A backslash \\ shows: *, _, ` and ~~") + +(inline-markdown-unit-test + inline-markdown-nested-escaping-test + "`prevent \\`code\\` from ending, but never stand alone \\\\`" + "prevent `code` from ending, but never stand alone \\") + +(inline-markdown-unit-test + inline-markdown-escape-normal-tokens-test + "\\Normal tokens \\escaped?" + "\\Normal tokens \\escaped?") + +(inline-markdown-unit-test + inline-markdown-no-unclosed-tags-test + "A tag, once opened, _must be closed" + "A tag, once opened, must be closed") + +(inline-markdown-unit-test + inline-markdown-unicode-safe + "Does Unicode 👨‍👨‍👧‍👦 break \\👩🏾‍🦰 tokenization?" + "Does Unicode 👨‍👨‍👧‍👦 break \\👩🏾‍🦰 tokenization?") diff --git a/web/panettone/test/package.lisp b/web/panettone/test/package.lisp index 77ba1b00bb..d2a2f97420 100644 --- a/web/panettone/test/package.lisp +++ b/web/panettone/test/package.lisp @@ -1,2 +1,3 @@ (defpackage :panettone.tests - (:use :cl :klatre :fiveam)) + (:use :cl :klatre :fiveam + :panettone.inline-markdown)) -- cgit 1.4.1