about summary refs log tree commit diff
path: root/web/panettone
diff options
context:
space:
mode:
authorsterni <sternenseemann@systemli.org>2021-01-14T01·07+0100
committersterni <sternenseemann@systemli.org>2021-01-25T21·02+0000
commit82e07fc046cff2151f9d18a003c6331202d58c8b (patch)
treec528dde5bed5899aa53fbc7bba761bf8abb23969 /web/panettone
parent7e408c874ac9b84f62bd48a3a6f2b57bae866d29 (diff)
feat(panettone): render a subset of markdown in issue subjects r/2144
This is achieved by implementing a simple markdown renderer in CL which
has the following limitations:

* Only supports inline `code`, *emphasize 1*, _emphasize 2_ and
  ~~strikethrough~~.
* Does not support nested markup.

This allows for a relatively simple renderer which doesn't need to parse
markdown into a in-memory data structure first. The rendered result is
directly written to a stream to integrate well with cl-who which is also
reused for rendering tags and xml-escaping strings.

Fixes #90.

Change-Id: Ice88ed770b1fab6365f3b93e8663e25077befa0b
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2389
Tested-by: BuildkiteCI
Reviewed-by: glittershark <grfn@gws.fyi>
Reviewed-by: tazjin <mail@tazj.in>
Diffstat (limited to 'web/panettone')
-rw-r--r--web/panettone/default.nix2
-rw-r--r--web/panettone/src/inline-markdown.lisp131
-rw-r--r--web/panettone/src/packages.lisp8
-rw-r--r--web/panettone/src/panettone.lisp4
-rw-r--r--web/panettone/test/inline-markdown_test.lisp54
-rw-r--r--web/panettone/test/package.lisp3
6 files changed, 198 insertions, 4 deletions
diff --git a/web/panettone/default.nix b/web/panettone/default.nix
index 8d112901ec07..4238af804af3 100644
--- a/web/panettone/default.nix
+++ b/web/panettone/default.nix
@@ -24,6 +24,7 @@ depot.nix.buildLisp.program {
     ./src/packages.lisp
     ./src/util.lisp
     ./src/css.lisp
+    ./src/inline-markdown.lisp
     ./src/authentication.lisp
     ./src/model.lisp
     ./src/irc.lisp
@@ -38,6 +39,7 @@ depot.nix.buildLisp.program {
     srcs = [
       ./test/package.lisp
       ./test/model_test.lisp
+      ./test/inline-markdown_test.lisp
     ];
 
     expression = "(fiveam:run!)";
diff --git a/web/panettone/src/inline-markdown.lisp b/web/panettone/src/inline-markdown.lisp
new file mode 100644
index 000000000000..9948a629b378
--- /dev/null
+++ b/web/panettone/src/inline-markdown.lisp
@@ -0,0 +1,131 @@
+(in-package :panettone.inline-markdown)
+(declaim (optimize (safety 3)))
+
+(define-constant +inline-markup-types+
+  '(("~~" :del)
+    ("_"  :em)
+    ("*"  :em)
+    ("`"  :code))
+  :test #'equal)
+
+(defun next-token (mkdn &optional (escaped nil))
+  "Parses and returns the next token from the beginning of
+  an inline markdown string which is not altered. The resulting
+  tokens are either :normal (normal text), :special (syntactically
+  significant) or :escaped (escaped using \\). If the string is
+  empty, a pseudo-token named :endofinput is returned. Return value
+  is a list where the first element is the token type, the second
+  the token content and optionally the third the markup type."
+  ; special tokens are syntactically significant characters
+  ; or strings for our inline markdown subset. “normal” tokens
+  ; the strings in between
+  (let* ((special-toks #.'(cons (list "\\" :escape) +inline-markup-types+))
+         (toks (loop
+                 for tok in special-toks
+                 for pos = (search (car tok) mkdn)
+                 when pos collect (cons tok pos)))
+         (next-tok
+           (unless (null toks)
+             (reduce (lambda (a b) (if (< (cdr a) (cdr b)) a b)) toks))))
+    (cond
+      ; end of input
+      ((= (length mkdn) 0) (list :endofinput ""))
+      ; no special tokens, just return entire string
+      ((null next-tok) (list :normal mkdn))
+      ; special token, but not at the beginning of the string
+      ; so we return everything until the special token as
+      ; a string
+      ((> (cdr next-tok) 0) (list :normal (subseq mkdn 0 (cdr next-tok))))
+      ; \ at the beginning of the string: we get the next
+      ; token and mark it as escaped unless we are already
+      ; escaping in which case we just return the backslash
+      ; as a special token
+      ((eq (cadr (car next-tok)) :escape)
+       (if escaped
+         (list :special "\\")
+         (list :escaped
+               (next-token (subseq mkdn 1) t))))
+      ; any other special token at the beginning of the string
+      ; here we also pass the markup type as a third list element
+      ; to prevent unnecessesary lookups
+      (t (list :special
+               (subseq mkdn 0 (length (car (car next-tok))))
+               (cadr (car next-tok)))))))
+
+(defun token-length (tok-type tok-str)
+  "Returns the string length consumed by a call
+  to next-token returning the given token type and string."
+  (check-type tok-type symbol)
+  (if (eq tok-type :escaped)
+    ; backslash + length of escaped token
+    (progn
+      (check-type tok-str list)
+      (1+ (token-length (car tok-str) (cadr tok-str))))
+    (progn
+      (check-type tok-str string)
+      (length tok-str))))
+
+(defun write-tag (tag pos &optional (target *standard-output*))
+  "Wrapper around who:convert-tag-to-string-list to
+  only output a single :opening or :closing tag."
+  (check-type tag symbol)
+  (check-type pos symbol)
+  (let
+    ((index
+       (cond
+         ((eq pos :opening) 0)
+         ((eq pos :closing) 3)
+         (t (error 'simple-type-error)))))
+    (dolist
+      (tag-part (subseq
+                  (who:convert-tag-to-string-list tag nil nil nil)
+                  index (+ index 3)))
+      (write-string tag-part target))))
+
+(defun render-inline-markdown (s &optional (target *standard-output*) (in :normal))
+  "Render inline markdown, a subset of markdown safe to render
+  inside inline elements. The resulting html is directly written
+  to a specified stream or *standard-output* to integrate well
+  with cl-who."
+  (check-type s string)
+  (check-type target stream)
+  (loop
+    for (tok-type tok-str tok-markup) = (next-token s)
+    do (setq s (subseq s (token-length tok-type tok-str)))
+    when (eq tok-type :endofinput)
+    return ""
+    when (eq tok-type :normal)
+    do (write-string (who:escape-string tok-str) target)
+    when (eq tok-type :escaped)
+    do (progn
+         ; if normal tokens are escaped we treat the \ as if it were \\
+         ;
+         ; TODO(sterni): maybe also use the :normal behavior in :code except for #\`.
+         (when (eq (car tok-str) :normal)
+           (write-char #\\ target))
+         (write-string (who:escape-string (cadr tok-str)) target))
+    when (eq tok-type :special)
+    do (cond
+         ; we are on the outer level and encounter a special token:
+         ; render surrounding tags and call ourselves to render
+         ; inner content.
+         ((eq in :normal)
+          (progn
+            (write-tag tok-markup :opening target)
+            (setq s (render-inline-markdown s target tok-markup))
+            (write-tag tok-markup :closing target)))
+         ; we are on the inner level and encounter the token that initiated
+         ; our markup again, meaning we need to return to the outer level.
+         ; we return the remaining string to be consumed.
+         ((eq in tok-markup) (return s))
+         ; remaining case: we are on the inner level and encounter different markup.
+
+         ; we don't support nested markup for simplicity reasons, so instead we
+         ; just render any nested markdown tokens as if they were escaped. This
+         ; only eliminates the slight use case for nesting :em inside :del, but
+         ; shouldn't be too bad. As a side effect this is the precise behavior
+         ; we want for :code.
+         ;
+         ; TODO(sterni): maybe bring back the restart-based system which allowed
+         ;               to skip nested tokens if desired.
+         (t (write-string (who:escape-string tok-str) target)))))
diff --git a/web/panettone/src/packages.lisp b/web/panettone/src/packages.lisp
index 87285fa34d16..c5fe79b7bc81 100644
--- a/web/panettone/src/packages.lisp
+++ b/web/panettone/src/packages.lisp
@@ -7,6 +7,11 @@
   (:use :cl :lass)
   (:export :styles))
 
+(defpackage panettone.inline-markdown
+  (:use :cl)
+  (:import-from :alexandria :define-constant)
+  (:export :render-inline-markdown))
+
 (defpackage panettone.irc
   (:use :cl :usocket)
   (:export :send-irc-notification))
@@ -42,7 +47,8 @@
 (defpackage panettone
   (:use :cl :klatre :easy-routes :iterate
         :panettone.util
-        :panettone.authentication)
+        :panettone.authentication
+        :panettone.inline-markdown)
   (:import-from :defclass-std :defclass/std)
   (:import-from :alexandria :if-let :when-let :switch :alist-hash-table)
   (:import-from :cl-ppcre :split)
diff --git a/web/panettone/src/panettone.lisp b/web/panettone/src/panettone.lisp
index aaf58bd1915e..e090f11acfac 100644
--- a/web/panettone/src/panettone.lisp
+++ b/web/panettone/src/panettone.lisp
@@ -192,7 +192,7 @@
            (:a :href (format nil "/issues/~A" issue-id)
                (:p
                 (:span :class "issue-subject"
-                       (who:esc (subject issue))))
+                       (render-inline-markdown (subject issue))))
                (:span :class "issue-number"
                       (who:esc (format nil "#~A" issue-id)))
                " - "
@@ -329,7 +329,7 @@
         (issue-status (status issue)))
     (render ()
       (:header
-       (:h1 (who:esc (subject issue)))
+       (:h1 (render-inline-markdown (subject issue)))
        (:div :class "issue-number"
              (who:esc (format nil "#~A" issue-id))))
       (:main
diff --git a/web/panettone/test/inline-markdown_test.lisp b/web/panettone/test/inline-markdown_test.lisp
new file mode 100644
index 000000000000..2b6c3b89002c
--- /dev/null
+++ b/web/panettone/test/inline-markdown_test.lisp
@@ -0,0 +1,54 @@
+(in-package :panettone.tests)
+(declaim (optimize (safety 3)))
+
+(defmacro inline-markdown-unit-test (name input expected)
+  `(test ,name
+     (is (equal
+           ,expected
+           (with-output-to-string (*standard-output*)
+             (render-inline-markdown ,input))))))
+
+(inline-markdown-unit-test
+  inline-markdown-typical-test
+  "hello _world_, here is ~~no~~ `code`!"
+  "hello <em>world</em>, here is <del>no</del> <code>code</code>!")
+
+(inline-markdown-unit-test
+  inline-markdown-two-emphasize-types-test
+  "_stress_ *this*"
+  "<em>stress</em> <em>this</em>")
+
+(inline-markdown-unit-test
+  inline-markdown-html-escaping-test
+  "<tag>öäü"
+  "&lt;tag&gt;&#246;&#228;&#252;")
+
+(inline-markdown-unit-test
+  inline-markdown-nesting-test
+  "`inside code *anything* goes`, but also ~~*here*~~"
+  "<code>inside code *anything* goes</code>, but also <del>*here*</del>")
+
+(inline-markdown-unit-test
+  inline-markdown-escaping-test
+  "A backslash \\\\ shows: \\*, \\_, \\` and \\~~"
+  "A backslash \\ shows: *, _, ` and ~~")
+
+(inline-markdown-unit-test
+  inline-markdown-nested-escaping-test
+  "`prevent \\`code\\` from ending, but never stand alone \\\\`"
+  "<code>prevent `code` from ending, but never stand alone \\</code>")
+
+(inline-markdown-unit-test
+  inline-markdown-escape-normal-tokens-test
+  "\\Normal tokens \\escaped?"
+  "\\Normal tokens \\escaped?")
+
+(inline-markdown-unit-test
+  inline-markdown-no-unclosed-tags-test
+  "A tag, once opened, _must be closed"
+  "A tag, once opened, <em>must be closed</em>")
+
+(inline-markdown-unit-test
+  inline-markdown-unicode-safe
+  "Does Unicode 👨‍👨‍👧‍👦 break \\👩🏾‍🦰 tokenization?"
+  "Does Unicode &#128104;&#8205;&#128104;&#8205;&#128103;&#8205;&#128102; break \\&#128105;&#127998;&#8205;&#129456; tokenization?")
diff --git a/web/panettone/test/package.lisp b/web/panettone/test/package.lisp
index 77ba1b00bb78..d2a2f974208e 100644
--- a/web/panettone/test/package.lisp
+++ b/web/panettone/test/package.lisp
@@ -1,2 +1,3 @@
 (defpackage :panettone.tests
-  (:use :cl :klatre :fiveam))
+  (:use :cl :klatre :fiveam
+        :panettone.inline-markdown))