about summary refs log tree commit diff
path: root/users/sterni
diff options
context:
space:
mode:
Diffstat (limited to 'users/sterni')
-rw-r--r--users/sterni/OWNERS3
-rw-r--r--users/sterni/clhs-lookup/README.md13
-rw-r--r--users/sterni/clhs-lookup/clhs-lookup.lisp46
-rw-r--r--users/sterni/clhs-lookup/default.nix36
-rw-r--r--users/sterni/clhs-lookup/packages.lisp10
-rw-r--r--users/sterni/htmlman/README.md36
-rw-r--r--users/sterni/htmlman/default.nix234
-rw-r--r--users/sterni/htmlman/defaultStyle.nix49
-rw-r--r--users/sterni/keys.nix7
-rw-r--r--users/sterni/nix/char/all-chars.bin2
-rw-r--r--users/sterni/nix/char/default.nix90
-rw-r--r--users/sterni/nix/char/tests/default.nix31
-rw-r--r--users/sterni/nix/flow/default.nix82
-rw-r--r--users/sterni/nix/flow/tests/default.nix39
-rw-r--r--users/sterni/nix/fun/default.nix45
-rw-r--r--users/sterni/nix/int/default.nix124
-rw-r--r--users/sterni/nix/int/tests/default.nix203
-rw-r--r--users/sterni/nix/string/default.nix76
-rw-r--r--users/sterni/nix/string/tests/default.nix65
-rw-r--r--users/sterni/nix/url/default.nix42
-rw-r--r--users/sterni/nix/url/tests/default.nix49
-rw-r--r--users/sterni/nix/utf8/default.nix208
-rw-r--r--users/sterni/nix/utf8/tests/default.nix121
23 files changed, 1611 insertions, 0 deletions
diff --git a/users/sterni/OWNERS b/users/sterni/OWNERS
new file mode 100644
index 000000000000..cace4d0f3759
--- /dev/null
+++ b/users/sterni/OWNERS
@@ -0,0 +1,3 @@
+inherited: false
+owners:
+  - sterni
diff --git a/users/sterni/clhs-lookup/README.md b/users/sterni/clhs-lookup/README.md
new file mode 100644
index 000000000000..1f42ff43a210
--- /dev/null
+++ b/users/sterni/clhs-lookup/README.md
@@ -0,0 +1,13 @@
+# clhs-lookup
+
+Simple cli to lookup symbols' documentation in a local copy of the
+Common Lisp HyperSpec.
+
+## usage
+
+```
+clhs-lookup [--print] symbol [symbol [...]]
+
+  --print  Print documentation paths to stdout instead of
+           opening them with $BROWSER (defaults to xdg-open).
+```
diff --git a/users/sterni/clhs-lookup/clhs-lookup.lisp b/users/sterni/clhs-lookup/clhs-lookup.lisp
new file mode 100644
index 000000000000..0e61dd901f93
--- /dev/null
+++ b/users/sterni/clhs-lookup/clhs-lookup.lisp
@@ -0,0 +1,46 @@
+(in-package :clhs-lookup)
+(declaim (optimize (safety 3)))
+
+(defun find-symbols-paths (syms clhs)
+  "Find pathnames to HyperSpec files describing the listed
+  symbol names (as strings). Paths are returned in the order
+  of the symbols given with missing entries removed."
+  (check-type syms list)
+  (check-type clhs pathname)
+  (let* ((data-dir (merge-pathnames "HyperSpec/Data/" clhs))
+         (data (merge-pathnames "Map_Sym.txt" data-dir))
+         (found (make-hash-table :test #'equal))
+         (syms (mapcar #'string-upcase syms)))
+  (with-open-file (s data :direction :input)
+    (loop
+      with missing    = syms
+      for symbol-line = (read-line s nil :eof)
+      for path-line   = (read-line s nil :eof)
+      until (or (eq symbol-line :eof)
+                (eq path-line   :eof)
+                (null missing))
+      for pos = (position symbol-line missing :test #'equal)
+      when pos
+      do (progn
+           (delete symbol-line missing)
+           (setf (gethash symbol-line found) path-line)))
+    ; TODO(sterni): get rid of Data/../ in path
+    (mapcar
+      (lambda (x) (merge-pathnames x data-dir))
+      (remove nil
+        (mapcar (lambda (x) (gethash x found)) syms))))))
+
+(defun main ()
+  (let* ((browser (or (uiop:getenvp "BROWSER") "xdg-open"))
+         (args    (uiop:command-line-arguments))
+         (prin    (member "--print" args :test #'equal))
+         (syms    (remove-if (lambda (x) (eq (char x 0) #\-)) args))
+         (paths (find-symbols-paths syms *clhs-path*)))
+      (if (null paths)
+        (uiop:quit 1)
+        (dolist (p paths)
+          (if prin
+            (format t "~A~%" p)
+            (uiop:launch-program
+              (format nil "~A ~A" browser p)
+              :force-shell t))))))
diff --git a/users/sterni/clhs-lookup/default.nix b/users/sterni/clhs-lookup/default.nix
new file mode 100644
index 000000000000..951b94d72f19
--- /dev/null
+++ b/users/sterni/clhs-lookup/default.nix
@@ -0,0 +1,36 @@
+{ pkgs, depot, ... }:
+
+let
+  inherit (pkgs) fetchzip writeText;
+  inherit (depot.nix) buildLisp;
+  inherit (builtins) replaceStrings;
+
+  clhsVersion = "7-0";
+
+  clhs = fetchzip {
+    name = "HyperSpec-${replaceStrings [ "-" ] [ "." ] clhsVersion}";
+    url = "ftp://ftp.lispworks.com/pub/software_tools/reference/HyperSpec-${clhsVersion}.tar.gz";
+    sha256 = "1zsi35245m5sfb862ibzy0pzlph48wvlggnqanymhgqkpa1v20ak";
+    stripRoot = false;
+  };
+
+  clhs-path = writeText "clhs-path.lisp" ''
+    (in-package :clhs-lookup.clhs-path)
+    (defparameter *clhs-path* (pathname "${clhs}/"))
+  '';
+
+  clhs-lookup = buildLisp.program {
+    name = "clhs-lookup";
+
+    deps = [
+      (buildLisp.bundled "uiop")
+    ];
+
+    srcs = [
+      ./packages.lisp
+      clhs-path
+      ./clhs-lookup.lisp
+    ];
+  };
+in
+  clhs-lookup
diff --git a/users/sterni/clhs-lookup/packages.lisp b/users/sterni/clhs-lookup/packages.lisp
new file mode 100644
index 000000000000..d059b96ce9f0
--- /dev/null
+++ b/users/sterni/clhs-lookup/packages.lisp
@@ -0,0 +1,10 @@
+(defpackage :clhs-lookup.clhs-path
+  (:use :cl)
+  (:export :*clhs-path*))
+
+(defpackage clhs-lookup
+  (:use :cl :uiop)
+  (:import-from :clhs-lookup.clhs-path :*clhs-path*)
+  (:export :main
+           :find-symbols-paths))
+
diff --git a/users/sterni/htmlman/README.md b/users/sterni/htmlman/README.md
new file mode 100644
index 000000000000..258233d4c4d2
--- /dev/null
+++ b/users/sterni/htmlman/README.md
@@ -0,0 +1,36 @@
+# htmlman
+
+static site generator for man pages intended for
+rendering man page documentation viewable using
+a web browser.
+
+## usage
+
+If you have a nix expression, `doc.nix`, like this:
+
+```nix
+{ depot, ... }:
+
+depot.users.sterni.htmlman {
+  title = "foo project";
+  pages = [
+    {
+      name = "foo";
+      section = 1;
+    }
+    {
+      name = "foo";
+      section = 3;
+      path = ../devman/foo.3;
+    }
+  ];
+  manDir = ../man;
+}
+```
+
+You can run the following to directly deploy the resulting
+documentation output to a specific target directory:
+
+```sh
+nix-build -A deploy doc.nix && ./result target_directory
+```
diff --git a/users/sterni/htmlman/default.nix b/users/sterni/htmlman/default.nix
new file mode 100644
index 000000000000..b88bc264103b
--- /dev/null
+++ b/users/sterni/htmlman/default.nix
@@ -0,0 +1,234 @@
+{ depot, lib, pkgs, ... }:
+
+let
+  inherit (depot.nix)
+    getBins
+    runExecline
+    yants
+    ;
+
+  inherit (depot.tools)
+    cheddar
+    ;
+
+  inherit (pkgs)
+    mandoc
+    coreutils
+    fetchurl
+    writers
+    ;
+
+  bins = getBins cheddar [ "cheddar" ]
+      // getBins mandoc [ "mandoc" ]
+      // getBins coreutils [ "cat" "mv" "mkdir" ]
+      ;
+
+  normalizeDrv = fetchurl {
+    url = "https://necolas.github.io/normalize.css/8.0.1/normalize.css";
+    sha256 = "04jmvybwh2ks4dlnfa70sb3a3z3ig4cv0ya9rizjvm140xq1h22q";
+  };
+
+  execlineStdoutInto = target: line: [
+    "redirfd" "-w" "1" target
+  ] ++ line;
+
+  # I will not write a pure nix markdown renderer
+  # I will not write a pure nix markdown renderer
+  # I will not write a pure nix markdown renderer
+  # I will not write a pure nix markdown renderer
+  # I will not write a pure nix markdown renderer
+  markdown = md:
+    let
+      html = runExecline.local "rendered-markdown" {
+        stdin = md;
+      } ([
+        "importas" "-iu" "out" "out"
+      ] ++ execlineStdoutInto "$out" [
+        bins.cheddar "--about-filter" "description.md"
+      ]);
+    in builtins.readFile html;
+
+  indexTemplate = { title, description, pages ? [] }: ''
+    <!doctype html>
+    <html>
+      <head>
+        <meta charset="utf-8">
+        <title>${title}</title>
+        <link rel="stylesheet" type="text/css" href="style.css"/>
+      </head>
+      <body>
+        <div class="index-text">
+          <h1>${title}</h1>
+          ${markdown description}
+          <h2>man pages</h2>
+          <ul>
+            ${lib.concatMapStrings ({ name, section, ... }: ''
+              <li><a href="${name}.${toString section}.html">${name}(${toString section})</a></li>
+            '') pages}
+          </ul>
+        </div>
+      </body>
+    </html>
+  '';
+
+  defaultStyle = import ./defaultStyle.nix { };
+
+  # This deploy script automatically copies the build result into
+  # a TARGET directory and marks it as writeable optionally.
+  # It is exposed as the deploy attribute of the result of
+  # htmlman, so an htmlman expression can be used like this:
+  # nix-build -A deploy htmlman.nix && ./result target_dir
+  deployScript = title: drv: writers.writeDash "deploy-${title}" ''
+    usage() {
+      printf 'Usage: %s [-w] TARGET\n\n' "$0"
+      printf 'Deploy htmlman documentation to TARGET directory.\n\n'
+      printf '  -h    Display this help message\n'
+      printf '  -w    Make TARGET directory writeable\n'
+    }
+
+    if test "$#" -lt 1; then
+      usage
+      exit 100
+    fi
+
+    writeable=false
+
+    while test "$#" -gt 0; do
+      case "$1" in
+        -h)
+          usage
+          exit 0
+          ;;
+        -w)
+          writeable=true
+          ;;
+        -*)
+          usage
+          exit 100
+          ;;
+        *)
+          if test -z "$target"; then
+            target="$1"
+          else
+            echo "Too many arguments"
+            exit 100
+          fi
+          ;;
+      esac
+
+      shift
+    done
+
+    if test -z "$target"; then
+      echo "Missing TARGET"
+      usage
+      exit 100
+    fi
+
+    set -ex
+
+    mkdir -p "$target"
+    cp -RTL --reflink=auto "${drv}" "$target"
+
+    if $writeable; then
+      chmod -R +w "$target"
+    fi
+  '';
+
+  htmlman =
+    { title
+    # title of the index page
+    , description ? ""
+    # description which is displayed after
+    # the main heading on the index page
+    , pages ? []
+    # man pages of the following structure:
+    # {
+    #   name : string;
+    #   section : int;
+    #   path : either path string;
+    # }
+    # path is optional, if it is not given,
+    # the man page source must be located at
+    # "${manDir}/${name}.${toString section}"
+    , manDir ? null
+    # directory in which man page sources are located
+    , style ? defaultStyle
+    # CSS to use as a string
+    , normalizeCss ? true
+    # whether to include normalize.css before the custom CSS
+    , linkXr ? "all"
+    # How to handle cross references in the html output:
+    #
+    # * none:     don't convert cross references into hyperlinks
+    # * all:      link all cross references as if they were
+    #             rendered into $out by htmlman
+    # * inManDir: link to all man pages which have their source
+    #             in `manDir` and use the format string defined
+    #             in linkXrFallback for all other cross references.
+    , linkXrFallback ? "https://manpages.debian.org/unstable/%N.%S.en.html"
+    # fallback link to use if linkXr == "inManDir" and the man
+    # page is not in ${manDir}. Placeholders %N (name of page)
+    # and %S (section of page) can be used. See mandoc(1) for
+    # more information.
+    }:
+
+    let
+      linkXrEnum = yants.enum "linkXr" [ "all" "inManDir" "none" ];
+
+      index = indexTemplate {
+        inherit title description pages;
+      };
+
+      resolvePath = { path ? null, name, section }:
+        if path != null
+        then path
+        else "${manDir}/${name}.${toString section}";
+
+      mandocOpts = lib.concatStringsSep "," ([
+        "style=style.css"
+      ] ++ linkXrEnum.match linkXr {
+        all      = [ "man=./%N.%S.html" ];
+        inManDir = [ "man=./%N.%S.html;${linkXrFallback}" ];
+        none     = [ ];
+      });
+
+      html =
+        runExecline.local "htmlman-${title}" {
+          derivationArgs = {
+            inherit index style;
+            passAsFile = [ "index" "style" ];
+          };
+        } ([
+          "multisubstitute" [
+            "importas" "-iu" "out" "out"
+            "importas" "-iu" "index" "indexPath"
+            "importas" "-iu" "style" "stylePath"
+          ]
+          "if" [ bins.mkdir "-p" "$out" ]
+          "if" [ bins.mv "$index" "\${out}/index.html" ]
+          "if" (execlineStdoutInto "\${out}/style.css" [
+            "if" ([
+              bins.cat
+            ] ++ lib.optional normalizeCss normalizeDrv
+              ++ [
+              "$style"
+            ])
+          ])
+          # let mandoc check for available man pages
+          "execline-cd" "${manDir}"
+        ] ++ lib.concatMap ({ name, section, ... }@p:
+          execlineStdoutInto "\${out}/${name}.${toString section}.html" [
+          "if" [
+            bins.mandoc
+            "-mdoc"
+            "-T" "html"
+            "-O" mandocOpts
+            (resolvePath p)
+          ]
+        ]) pages);
+    in html // {
+      deploy = deployScript title html;
+    };
+in
+  htmlman
diff --git a/users/sterni/htmlman/defaultStyle.nix b/users/sterni/htmlman/defaultStyle.nix
new file mode 100644
index 000000000000..a44b5ef06934
--- /dev/null
+++ b/users/sterni/htmlman/defaultStyle.nix
@@ -0,0 +1,49 @@
+{ ... }:
+
+''
+  body {
+    font-size: 1em;
+    line-height: 1.5;
+    font-family: serif;
+    background-color: #efefef;
+  }
+
+  h1, h2, h3, h4, h5, h6 {
+    font-family: sans-serif;
+    font-size: 1em;
+    margin: 5px 0;
+  }
+
+  h1 {
+    margin-top: 0;
+  }
+
+  a:link, a:visited {
+    color: #3e7eff;
+  }
+
+  h1 a, h2 a, h3 a, h4 a, h5 a, h6 a {
+    text-decoration: none;
+  }
+
+  .manual-text, .index-text {
+    padding: 20px;
+    max-width: 800px;
+    background-color: white;
+    margin: 0 auto;
+  }
+
+  table.head, table.foot {
+    display: none;
+  }
+
+  .Nd {
+    display: inline;
+  }
+
+  /* use same as cheddar for man pages */
+  pre {
+    padding: 16px;
+    background-color: #f6f8fa;
+  }
+''
diff --git a/users/sterni/keys.nix b/users/sterni/keys.nix
new file mode 100644
index 000000000000..815f62ee080e
--- /dev/null
+++ b/users/sterni/keys.nix
@@ -0,0 +1,7 @@
+{ ... }:
+
+{
+  all = [
+    "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJk+KvgvI2oJTppMASNUfMcMkA2G5ZNt+HnWDzaXKLlo lukas@wolfgang"
+  ];
+}
diff --git a/users/sterni/nix/char/all-chars.bin b/users/sterni/nix/char/all-chars.bin
new file mode 100644
index 000000000000..017b909e8e8e
--- /dev/null
+++ b/users/sterni/nix/char/all-chars.bin
@@ -0,0 +1,2 @@
+	
+
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
\ No newline at end of file
diff --git a/users/sterni/nix/char/default.nix b/users/sterni/nix/char/default.nix
new file mode 100644
index 000000000000..e6b8d6d7f168
--- /dev/null
+++ b/users/sterni/nix/char/default.nix
@@ -0,0 +1,90 @@
+{ depot, lib, pkgs, ... }:
+
+let
+
+  inherit (depot.users.sterni.nix.flow)
+    cond
+    ;
+
+  inherit (depot.nix)
+    yants
+    ;
+
+  inherit (depot.users.sterni.nix)
+    string
+    ;
+
+  # A char is the atomic element of a nix string
+  # which is essentially an array of arbitrary bytes
+  # as long as they are not a NUL byte.
+  #
+  # A char is neither a byte nor a unicode codepoint!
+  char = yants.restrict "char" (s: builtins.stringLength s == 1) yants.string;
+
+  # integer representation of char
+  charval = yants.restrict "charval" (i: i >= 1 && i < 256) yants.int;
+
+  allChars = builtins.readFile ./all-chars.bin;
+
+  # Originally I searched a list for this, but came to the
+  # conclusion that this can never be fast enough in Nix.
+  # We therefore use a solution similar to infinisil's.
+  ordMap = builtins.listToAttrs
+    (lib.imap1 (i: v: { name = v; value = i; })
+      (string.toChars allChars));
+
+  # Note on performance:
+  # chr and ord have been benchmarked using the following cases:
+  #
+  #  builtins.map ord (lib.stringToCharacters allChars)
+  #  builtins.map chr (builtins.genList (int.add 1) 255
+  #
+  # The findings are as follows:
+  # 1. Searching through either strings using recursion is
+  #    unbearably slow in Nix, leading to evaluation times
+  #    of up to 3s for the following very small test case.
+  #    This is why we use the trusty attribute set for ord.
+  # 2. String indexing is much faster than list indexing which
+  #    is why we use the former for chr.
+  ord = c: ordMap."${c}";
+
+  chr = i: string.charAt (i - 1) allChars;
+
+  asciiAlpha = c:
+    let
+      v = ord c;
+    in (v >= 65 && v <= 90)
+    || (v >= 97 && v <= 122);
+
+  asciiNum = c:
+    let
+      v = ord c;
+    in v >= 48 && v <= 57;
+
+  asciiAlphaNum = c: asciiAlpha c || asciiNum c;
+
+in {
+  inherit
+    allChars
+    char
+    charval
+    ord
+    chr
+    asciiAlpha
+    asciiNum
+    asciiAlphaNum
+    ;
+
+  # originally I generated a nix file containing a list of
+  # characters, but infinisil uses a better way which I adapt
+  # which is using builtins.readFile instead of import.
+  __generateAllChars = pkgs.writers.writeC "generate-all-chars" {} ''
+    #include <stdio.h>
+
+    int main(void) {
+      for(int i = 1; i <= 0xff; i++) {
+        putchar(i);
+      }
+    }
+  '';
+}
diff --git a/users/sterni/nix/char/tests/default.nix b/users/sterni/nix/char/tests/default.nix
new file mode 100644
index 000000000000..49b439adbb84
--- /dev/null
+++ b/users/sterni/nix/char/tests/default.nix
@@ -0,0 +1,31 @@
+{ depot, ... }:
+
+let
+  inherit (depot.nix.runTestsuite)
+    it
+    assertEq
+    runTestsuite
+    ;
+
+  inherit (depot.users.sterni.nix)
+    char
+    string
+    int
+    fun
+    ;
+
+  charList = string.toChars char.allChars;
+
+  testAllCharConversion = it "tests conversion of all chars" [
+    (assertEq "char.chr converts to char.allChars"
+      (builtins.genList (fun.rl char.chr (int.add 1)) 255)
+      charList)
+    (assertEq "char.ord converts from char.allChars"
+      (builtins.genList (int.add 1) 255)
+      (builtins.map char.ord charList))
+  ];
+
+in
+  runTestsuite "char" [
+    testAllCharConversion
+  ]
diff --git a/users/sterni/nix/flow/default.nix b/users/sterni/nix/flow/default.nix
new file mode 100644
index 000000000000..b5783bd86deb
--- /dev/null
+++ b/users/sterni/nix/flow/default.nix
@@ -0,0 +1,82 @@
+{ depot, ... }:
+
+let
+
+  inherit (depot.nix)
+    yants
+    ;
+
+  inherit (depot.users.sterni.nix)
+    fun
+    ;
+
+  # we must avoid evaluating any of the sublists
+  # as they may contain conditions that throw
+  condition = yants.restrict "condition"
+    (ls: builtins.length ls == 2)
+    (yants.list yants.any);
+
+  /* Like the common lisp macro: takes a list
+     of two elemented lists whose first element
+     is a boolean. The second element of the
+     first list that has true as its first
+     element is returned.
+
+     Type: [ [ bool a ] ] -> a
+
+     Example:
+
+     cond [
+       [ (builtins.isString true) 12 ]
+       [ (3 == 2) 13 ]
+       [ true 42 ]
+     ]
+
+     => 42
+   */
+  cond = conds: switch true conds;
+
+  /* Generic pattern match-ish construct for nix.
+     Takes a bunch of lists which are of length
+     two and checks the first element for either
+     a predicate or a value. The second value of
+     the first list which either has a value equal
+     to or a function that evaluates to true for
+     the given value.
+
+     Type: a -> [ [ (function | a) b ] ] -> b
+
+     Example:
+
+     switch "foo" [
+       [ "smol" "SMOL!!!" ]
+       [ (x: builtins.stringLength x <= 3) "smol-ish" ]
+       [ (fun.const true) "not smol" ]
+      ]
+
+      => "smol-ish"
+  */
+  switch = x: conds:
+    if builtins.length conds == 0
+    then builtins.throw "exhausted all conditions"
+    else
+      let
+        c = condition (builtins.head conds);
+        s = builtins.head c;
+        b =
+          if builtins.isFunction s
+          then s x
+          else x == s;
+      in
+        if b
+        then builtins.elemAt c 1
+        else switch x (builtins.tail conds);
+
+
+
+in {
+  inherit
+    cond
+    switch
+    ;
+}
diff --git a/users/sterni/nix/flow/tests/default.nix b/users/sterni/nix/flow/tests/default.nix
new file mode 100644
index 000000000000..54cea01858e7
--- /dev/null
+++ b/users/sterni/nix/flow/tests/default.nix
@@ -0,0 +1,39 @@
+{ depot, ... }:
+
+let
+
+  inherit (depot.nix.runTestsuite)
+    runTestsuite
+    it
+    assertEq
+    assertThrows
+    ;
+
+  inherit (depot.users.sterni.nix.flow)
+    cond
+    match
+    ;
+
+  dontEval = builtins.throw "this should not get evaluated";
+
+  testCond = it "tests cond" [
+    (assertThrows "malformed cond list"
+      (cond [ [ true 1 2 ] [ false 1 ] ]))
+    (assertEq "last is true" "last"
+      (cond [
+        [ false dontEval]
+        [ false dontEval ]
+        [ true "last" ]
+      ]))
+    (assertEq "first is true" 1
+      (cond [
+        [ true 1 ]
+        [ true dontEval ]
+        [ true dontEval ]
+      ]))
+  ];
+
+in
+  runTestsuite "nix.flow" [
+    testCond
+  ]
diff --git a/users/sterni/nix/fun/default.nix b/users/sterni/nix/fun/default.nix
new file mode 100644
index 000000000000..a32b70a62b79
--- /dev/null
+++ b/users/sterni/nix/fun/default.nix
@@ -0,0 +1,45 @@
+{ depot, lib, ... }:
+
+let
+
+  inherit (lib)
+    id
+    ;
+
+  # Simple function composition,
+  # application is right to left.
+  rl = f1: f2:
+    (x: f1 (f2 x));
+
+  # Compose a list of functions,
+  # application is right to left.
+  rls = fs:
+    builtins.foldl' (fOut: f: lr f fOut) id fs;
+
+  # Simple function composition,
+  # application is left to right.
+  lr = f1: f2:
+    (x: f2 (f1 x));
+
+  # Compose a list of functions,
+  # application is left to right
+  lrs = x: fs:
+    builtins.foldl' (v: f: f v) x fs;
+
+in
+
+{
+  inherit (lib)
+    fix
+    flip
+    const
+    ;
+
+  inherit
+    id
+    rl
+    rls
+    lr
+    lrs
+    ;
+}
diff --git a/users/sterni/nix/int/default.nix b/users/sterni/nix/int/default.nix
new file mode 100644
index 000000000000..b3157571272f
--- /dev/null
+++ b/users/sterni/nix/int/default.nix
@@ -0,0 +1,124 @@
+{ depot, lib, ... }:
+
+let
+
+  # TODO(sterni): implement nix.float and figure out which of these
+  #               functions can be split out into a common nix.num
+  #               library.
+
+  inherit (depot.users.sterni.nix)
+    string
+    ;
+
+  inherit (builtins)
+    bitOr
+    bitAnd
+    bitXor
+    mul
+    div
+    add
+    sub
+    ;
+
+  abs = i: if i < 0 then -i else i;
+
+  exp = base: pow:
+    if pow > 0
+    then base * (exp base (pow - 1))
+    else if pow < 0
+    then 1.0 / exp base (abs pow)
+    else 1;
+
+  bitShiftR = bit: count:
+    if count == 0
+    then bit
+    else div (bitShiftR bit (count - 1)) 2;
+
+  bitShiftL = bit: count:
+    if count == 0
+    then bit
+    else 2 * (bitShiftL bit (count - 1));
+
+  hexdigits = "0123456789ABCDEF";
+
+  toHex = int:
+    let
+      go = i:
+        if i == 0
+        then ""
+        else go (bitShiftR i 4)
+           + string.charAt (bitAnd i 15) hexdigits;
+      sign = lib.optionalString (int < 0) "-";
+    in
+      if int == 0
+      then "0"
+      else "${sign}${go (abs int)}";
+
+  fromHexMap = builtins.listToAttrs
+    (lib.imap0 (i: c: { name = c; value = i; })
+      (lib.stringToCharacters hexdigits));
+
+  fromHex = literal:
+    let
+      negative = string.charAt 0 literal == "-";
+      start = if negative then 1 else 0;
+      len = builtins.stringLength literal;
+      # reversed list of all digits
+      digits = builtins.genList
+        (i: string.charAt (len - 1 - i) literal)
+        (len - start);
+      parsed = builtins.foldl'
+        (v: d: {
+          val = v.val + (fromHexMap."${d}" * v.mul);
+          mul = v.mul * 16;
+        })
+        { val = 0; mul = 1; } digits;
+    in
+      if negative
+      then -parsed.val
+      else parsed.val;
+
+  # A nix integer is a 64bit signed integer
+  maxBound = 9223372036854775807;
+
+  # fun fact: -9223372036854775808 is the lower bound
+  # for a nix integer (as you would expect), but you can't
+  # use it as an integer literal or you'll be greeted with:
+  # error: invalid integer '9223372036854775808'
+  # This is because all int literals when parsing are
+  # positive, negative "literals" are positive literals
+  # which are preceded by the arithmetric negation operator.
+  minBound = -9223372036854775807 - 1;
+
+  odd = x: bitAnd x 1 == 1;
+  even = x: bitAnd x 1 == 0;
+
+  # div and mod behave like quot and rem in Haskell,
+  # i. e. they truncate towards 0
+  mod = a: b: let res = a / b; in a - (res * b);
+
+  inRange = a: b: x: x >= a && x <= b;
+
+in {
+  inherit
+    maxBound
+    minBound
+    abs
+    exp
+    odd
+    even
+    add
+    sub
+    mul
+    div
+    mod
+    bitShiftR
+    bitShiftL
+    bitOr
+    bitAnd
+    bitXor
+    toHex
+    fromHex
+    inRange
+    ;
+}
diff --git a/users/sterni/nix/int/tests/default.nix b/users/sterni/nix/int/tests/default.nix
new file mode 100644
index 000000000000..fac45dd251e1
--- /dev/null
+++ b/users/sterni/nix/int/tests/default.nix
@@ -0,0 +1,203 @@
+{ depot, lib, ... }:
+
+let
+
+  inherit (depot.nix.runTestsuite)
+    runTestsuite
+    it
+    assertEq
+    ;
+
+  inherit (depot.users.sterni.nix)
+    int
+    string
+    fun
+    ;
+
+  testBounds = it "checks minBound and maxBound" [
+    # this is gonna blow up in my face because
+    # integer overflow is undefined behavior in
+    # C++, so most likely anything could happen?
+    (assertEq "maxBound is the maxBound" true
+      (int.maxBound + 1 < int.maxBound))
+    (assertEq "minBound is the minBound" true
+      (int.minBound - 1 > int.minBound))
+    (assertEq "maxBound overflows to minBound"
+      (int.maxBound + 1)
+      int.minBound)
+    (assertEq "minBound overflows to maxBound"
+      (int.minBound - 1)
+      int.maxBound)
+  ];
+
+  expectedBytes = [
+    "00" "01" "02" "03" "04" "05" "06" "07" "08" "09" "0A" "0B" "0C" "0D" "0E" "0F"
+    "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "1A" "1B" "1C" "1D" "1E" "1F"
+    "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "2A" "2B" "2C" "2D" "2E" "2F"
+    "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" "3A" "3B" "3C" "3D" "3E" "3F"
+    "40" "41" "42" "43" "44" "45" "46" "47" "48" "49" "4A" "4B" "4C" "4D" "4E" "4F"
+    "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" "5A" "5B" "5C" "5D" "5E" "5F"
+    "60" "61" "62" "63" "64" "65" "66" "67" "68" "69" "6A" "6B" "6C" "6D" "6E" "6F"
+    "70" "71" "72" "73" "74" "75" "76" "77" "78" "79" "7A" "7B" "7C" "7D" "7E" "7F"
+    "80" "81" "82" "83" "84" "85" "86" "87" "88" "89" "8A" "8B" "8C" "8D" "8E" "8F"
+    "90" "91" "92" "93" "94" "95" "96" "97" "98" "99" "9A" "9B" "9C" "9D" "9E" "9F"
+    "A0" "A1" "A2" "A3" "A4" "A5" "A6" "A7" "A8" "A9" "AA" "AB" "AC" "AD" "AE" "AF"
+    "B0" "B1" "B2" "B3" "B4" "B5" "B6" "B7" "B8" "B9" "BA" "BB" "BC" "BD" "BE" "BF"
+    "C0" "C1" "C2" "C3" "C4" "C5" "C6" "C7" "C8" "C9" "CA" "CB" "CC" "CD" "CE" "CF"
+    "D0" "D1" "D2" "D3" "D4" "D5" "D6" "D7" "D8" "D9" "DA" "DB" "DC" "DD" "DE" "DF"
+    "E0" "E1" "E2" "E3" "E4" "E5" "E6" "E7" "E8" "E9" "EA" "EB" "EC" "ED" "EE" "EF"
+    "F0" "F1" "F2" "F3" "F4" "F5" "F6" "F7" "F8" "F9" "FA" "FB" "FC" "FD" "FE" "FF"
+  ];
+
+  hexByte = i: string.fit { width = 2; char = "0"; } (int.toHex i);
+
+  hexInts = [
+    { left = 0; right = "0"; }
+    { left = 1; right = "1"; }
+    { left = 11; right = "B"; }
+    { left = 123; right = "7B"; }
+    { left = 9000; right = "2328"; }
+    { left = 2323; right = "913"; }
+    { left = 4096; right = "1000"; }
+    { left = int.maxBound; right = "7FFFFFFFFFFFFFFF"; }
+    { left = int.minBound; right = "-8000000000000000"; }
+  ];
+
+  testHex = it "checks conversion to hex" (lib.flatten [
+    (lib.imap0 (i: hex: [
+      (assertEq "hexByte ${toString i} == ${hex}" (hexByte i) hex)
+      (assertEq "${toString i} == fromHex ${hex}" i (int.fromHex hex))
+    ]) expectedBytes)
+    (builtins.map ({ left, right }: [
+      (assertEq "toHex ${toString left} == ${right}" (int.toHex left) right)
+      (assertEq "${toString left} == fromHex ${right}" left (int.fromHex right))
+    ]) hexInts)
+  ]);
+
+  testBasic = it "checks basic int operations" [
+    (assertEq "122 is even" (int.even 122 && !(int.odd 122)) true)
+    (assertEq "123 is odd" (int.odd 123 && !(int.even 123)) true)
+    (assertEq "abs -4959" (int.abs (-4959)) 4959)
+  ];
+
+  expNumbers = [
+    { left = -3; right = 0.125; }
+    { left = -2; right = 0.25; }
+    { left = -1; right = 0.5; }
+    { left = 0; right = 1; }
+    { left = 1; right = 2; }
+    { left = 2; right = 4; }
+    { left = 3; right = 8; }
+    { left = 4; right = 16; }
+    { left = 5; right = 32; }
+    { left = 16; right = 65536; }
+  ];
+
+  testExp = it "checks exponentiation"
+    (builtins.map ({ left, right }:
+      assertEq
+        "2 ^ ${toString left} == ${toString right}"
+        (int.exp 2 left) right) expNumbers);
+
+  shifts = [
+    { a =   2; b = 5; c =   64; op = "<<"; }
+    { a =  -2; b = 5; c =  -64; op = "<<"; }
+    { a = 123; b = 4; c = 1968; op = "<<"; }
+    { a =   1; b = 8; c =  256; op = "<<"; }
+    { a = 256; b = 8; c =    1; op = ">>"; }
+    { a = 374; b = 2; c =   93; op = ">>"; }
+    { a =   2; b = 2; c =    0; op = ">>"; }
+    { a =  99; b = 9; c =    0; op = ">>"; }
+  ];
+
+  checkShift = { a, b, c, op }@args:
+    let
+      f = string.match op {
+        "<<" = int.bitShiftL;
+        ">>" = int.bitShiftR;
+      };
+    in assertEq "${toString a} ${op} ${toString b} == ${toString c}" (f a b) c;
+
+  checkShiftRDivExp = n:
+    assertEq "${toString n} >> 5 == ${toString n} / 2 ^ 5"
+      (int.bitShiftR n 5) (int.div n (int.exp 2 5));
+
+  checkShiftLMulExp = n:
+    assertEq "${toString n} >> 6 == ${toString n} * 2 ^ 6"
+      (int.bitShiftL n 5) (int.mul n (int.exp 2 5));
+
+  testBit = it "checks bitwise operations" (lib.flatten [
+    (builtins.map checkShift shifts)
+    (builtins.map checkShiftRDivExp [
+      1
+      2
+      3
+      5
+      7
+      23
+      1623
+      238
+      34
+      348
+      2834
+      834
+      348
+    ])
+    (builtins.map checkShiftLMulExp [
+      1
+      2
+      3
+      5
+      7
+      23
+      384
+      3
+      2
+      5991
+      85109
+      38
+    ])
+  ]);
+
+  divisions = [
+    { a =  2; b =  1; c = 2; mod = 0;}
+    { a =  2; b =  2; c = 1; mod = 0;}
+    { a = 20; b = 10; c = 2; mod = 0;}
+    { a = 12; b =  5; c = 2; mod = 2;}
+    { a = 23; b =  4; c = 5; mod = 3;}
+  ];
+
+  checkDiv = n: { a, b, c, mod }: [
+    (assertEq "${n}: div result" (int.div a b) c)
+    (assertEq "${n}: mod result" (int.mod a b) mod)
+    (assertEq "${n}: divMod law" ((int.div a b) * b + (int.mod a b)) a)
+  ];
+
+  testDivMod = it "checks integer division and modulo"
+    (lib.flatten [
+      (builtins.map (checkDiv "+a / +b") divisions)
+      (builtins.map (fun.rl (checkDiv "-a / +b") (x: x // {
+        a = -x.a;
+        c = -x.c;
+        mod = -x.mod;
+      })) divisions)
+      (builtins.map (fun.rl (checkDiv "+a / -b") (x: x // {
+        b = -x.b;
+        c = -x.c;
+      })) divisions)
+      (builtins.map (fun.rl (checkDiv "-a / -b") (x: x // {
+        a = -x.a;
+        b = -x.b;
+        mod = -x.mod;
+      })) divisions)
+    ]);
+
+in
+  runTestsuite "nix.int" [
+    testBounds
+    testHex
+    testBasic
+    testExp
+    testBit
+    testDivMod
+  ]
diff --git a/users/sterni/nix/string/default.nix b/users/sterni/nix/string/default.nix
new file mode 100644
index 000000000000..3fe7c04618c3
--- /dev/null
+++ b/users/sterni/nix/string/default.nix
@@ -0,0 +1,76 @@
+{ depot, lib, ... }:
+
+let
+
+  inherit (depot.users.sterni.nix.char)
+    chr
+    ord
+    ;
+
+  inherit (depot.users.sterni.nix)
+    int
+    flow
+    ;
+
+  take = n: s:
+    builtins.substring 0 n s;
+
+  drop = n: s:
+    builtins.substring n int.maxBound s;
+
+  charAt = i: s:
+    let
+      r = builtins.substring i 1 s;
+    in if r == "" then null else r;
+
+  charIndex = char: s:
+    let
+      len = builtins.stringLength s;
+      go = i:
+        flow.cond [
+          [ (i >= len) null ]
+          [ (charAt i s == char) i ]
+          [ true (go (i + 1)) ]
+        ];
+    in go 0;
+
+  toChars = lib.stringToCharacters;
+  fromChars = lib.concatStrings;
+
+  toBytes = str:
+    builtins.map ord (toChars str);
+
+  fromBytes = is: lib.concatMapStrings chr is;
+
+  pad = { left ? 0, right ? 0, char ? " " }: s:
+    let
+      leftS = fromChars (builtins.genList (_: char) left);
+      rightS = fromChars (builtins.genList (_: char) right);
+    in "${leftS}${s}${rightS}";
+
+  fit = { char ? " ", width, side ? "left" }: s:
+    let
+      diff = width - builtins.stringLength s;
+    in
+      if diff <= 0
+      then s
+      else pad { inherit char; "${side}" = diff; } s;
+
+  # pattern matching for strings only
+  match = val: matcher: matcher."${val}";
+
+in {
+  inherit
+    take
+    drop
+    charAt
+    charIndex
+    toBytes
+    fromBytes
+    toChars
+    fromChars
+    pad
+    fit
+    match
+    ;
+}
diff --git a/users/sterni/nix/string/tests/default.nix b/users/sterni/nix/string/tests/default.nix
new file mode 100644
index 000000000000..2caecbfa7b3f
--- /dev/null
+++ b/users/sterni/nix/string/tests/default.nix
@@ -0,0 +1,65 @@
+{ depot, ... }:
+
+let
+
+  inherit (depot.users.sterni.nix)
+    string
+    ;
+
+  inherit (depot.nix.runTestsuite)
+    it
+    assertEq
+    runTestsuite
+    ;
+
+  testTakeDrop = it "tests take and drop" [
+    (assertEq "take"
+      (string.take 5 "five and more")
+      "five ")
+    (assertEq "drop"
+      (string.drop 2 "coin")
+      "in")
+    (assertEq "take out of bounds"
+      (string.take 100 "foo")
+      "foo")
+    (assertEq "drop out of bounds"
+      (string.drop 42 "lol")
+      "")
+  ];
+
+  testIndexing = it "tests string indexing" [
+    (assertEq "normal charAt"
+      (string.charAt 3 "helo")
+      "o")
+    (assertEq "out of bounds charAt"
+      (string.charAt 5 "helo")
+      null)
+  ];
+
+  testFinding = it "tests finding in strings" [
+    (assertEq "normal charIndex"
+      (string.charIndex "d" "abcdefghijkl")
+      3)
+    (assertEq "charIndex no match"
+      (string.charIndex "w" "zZzZzzzZZZ")
+      null)
+  ];
+
+  dontEval = builtins.throw "this should not get evaluated";
+
+  testMatch = it "tests match" [
+    (assertEq "basic match usage" 42
+      (string.match "answer" {
+        "answer" = 42;
+        "banana" = dontEval;
+        "maleur" = dontEval;
+      }))
+  ];
+
+in
+  runTestsuite "nix.string" [
+    testTakeDrop
+    testIndexing
+    testFinding
+    testMatch
+  ]
diff --git a/users/sterni/nix/url/default.nix b/users/sterni/nix/url/default.nix
new file mode 100644
index 000000000000..ce7ed9b83437
--- /dev/null
+++ b/users/sterni/nix/url/default.nix
@@ -0,0 +1,42 @@
+{ depot, lib, ... }:
+
+let
+
+  inherit (depot.users.sterni.nix)
+    char
+    int
+    string
+    ;
+
+  reserved = c: builtins.elem c [
+    "!" "#" "$" "&" "'" "(" ")"
+    "*" "+" "," "/" ":" ";" "="
+    "?" "@" "[" "]"
+  ];
+
+  unreserved = c: char.asciiAlphaNum c
+    || builtins.elem c [ "-" "_" "." "~" ];
+
+  percentEncode = c:
+    if unreserved c
+    then c
+    else "%" + (string.fit {
+      width = 2;
+      char = "0";
+      side = "left";
+    } (int.toHex (char.ord c)));
+
+  encode = { leaveReserved ? false }: s:
+    let
+      chars = lib.stringToCharacters s;
+      tr = c:
+        if leaveReserved && reserved c
+        then c
+        else percentEncode c;
+    in lib.concatStrings (builtins.map tr chars);
+
+in {
+  inherit
+    encode
+    ;
+}
diff --git a/users/sterni/nix/url/tests/default.nix b/users/sterni/nix/url/tests/default.nix
new file mode 100644
index 000000000000..f58cf12a02b2
--- /dev/null
+++ b/users/sterni/nix/url/tests/default.nix
@@ -0,0 +1,49 @@
+{ depot, ... }:
+
+let
+
+  inherit (depot.nix.runTestsuite)
+    it
+    assertEq
+    runTestsuite
+    ;
+
+  inherit (depot.users.sterni.nix)
+    url
+    ;
+
+  checkEncoding = args: { left, right }:
+    assertEq "encode ${builtins.toJSON left} == ${builtins.toJSON right}"
+      (url.encode args left) right;
+
+  unreserved = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_~";
+
+  encodeExpected = [
+    { left = "Laguna Beach"; right = "Laguna%20Beach"; }
+    { left = "👾 Exterminate!"; right = "%F0%9F%91%BE%20Exterminate%21"; }
+    { left = unreserved; right = unreserved; }
+    {
+      left = "`!@#$%^&*()+={}[]:;'\\|<>,?/ \"";
+      right = "%60%21%40%23%24%25%5E%26%2A%28%29%2B%3D%7B%7D%5B%5D%3A%3B%27%5C%7C%3C%3E%2C%3F%2F%20%22";
+    }
+  ];
+
+  testEncode = it "checks url.encode"
+    (builtins.map (checkEncoding {}) encodeExpected);
+
+  testLeaveReserved = it "checks that leaveReserved is like id for valid URLs"
+    (builtins.map (x: checkEncoding { leaveReserved = true; } { left = x; right = x; }) [
+      "ftp://ftp.is.co.za/rfc/rfc1808.txt"
+      "http://www.ietf.org/rfc/rfc2396.txt"
+      "ldap://[2001:db8::7]/c=GB?objectClass?one"
+      "mailto:John.Doe@example.com"
+      "news:comp.infosystems.www.servers.unix"
+      "tel:+1-816-555-1212"
+      "telnet://192.0.2.16:80/"
+      "urn:oasis:names:specification:docbook:dtd:xml:4.1.2"
+    ]);
+in
+  runTestsuite "nix.url" [
+    testEncode
+    testLeaveReserved
+  ]
diff --git a/users/sterni/nix/utf8/default.nix b/users/sterni/nix/utf8/default.nix
new file mode 100644
index 000000000000..713f1f57cbe6
--- /dev/null
+++ b/users/sterni/nix/utf8/default.nix
@@ -0,0 +1,208 @@
+{ depot, lib, ... }:
+
+let
+
+  # TODO(sterni): encode
+
+  inherit (depot.users.sterni.nix)
+    char
+    flow
+    fun
+    int
+    string
+    util
+    ;
+
+  /* (Internal) function to determine the amount
+     bytes left in a UTF-8 byte sequence from the
+     first byte.
+
+     This function will throw if the given first
+     byte is ill-formed, but will not detect all
+     cases of ill-formed-ness.
+
+     Based on table 3-6. from The Unicode Standard,
+     Version 13.0, section 3.9.
+
+     Type: integer -> integer
+  */
+  byteCount = i: flow.cond [
+    [ (int.bitAnd i 128 == 0)   1 ]
+    [ (int.bitAnd i 224 == 192) 2 ]
+    [ (int.bitAnd i 240 == 224) 3 ]
+    [ (int.bitAnd i 248 == 240) 4 ]
+    [ true (builtins.throw "Ill-formed first byte ${int.toHex i}") ]
+  ];
+
+  /* (Internal) function to check if a given byte in
+     an UTF-8 byte sequence is well-formed.
+
+     Based on table 3-7. from The Unicode Standard,
+     Version 13.0, section 3.9.
+
+     Throws if the first byte is invalid.
+
+     Type: integer -> integer -> (integer -> bool)
+  */
+  wellFormedByte =
+    # first byte's integer value
+    first:
+    # byte position as an index starting with 0
+    pos:
+      let
+        defaultRange = int.inRange 128 191;
+      in
+        # The first byte is either ASCII which requires no checks
+        # or we automatically check it when we check the subsequent
+        # bytes. The downside is that this may generate bad error
+        # messages in very rare cases.
+        if pos == 0
+        then lib.const true
+        else if pos > 1 # 3rd and 4th byte have only one validity rule
+        then defaultRange
+        else assert pos == 1; flow.switch first [
+          [ (int.inRange 194 223) defaultRange          ] # C2..DF
+          [ 224                   (int.inRange 160 191) ] # E0
+          [ (int.inRange 225 236) defaultRange          ] # E1..EC
+          [ 237                   (int.inRange 128 159) ] # ED
+          [ (int.inRange 238 239) defaultRange          ] # EE..EF
+          [ 240                   (int.inRange 144 191) ] # F0
+          [ (int.inRange 241 243) defaultRange          ] # F1..F3
+          [ 244                   (int.inRange 128 143) ] # F4
+          [
+            (fun.const true)
+            (builtins.throw "Invalid first byte ${int.toHex first}")
+          ]
+        ];
+
+  /* Iteration step for decoding an UTF-8 byte sequence.
+     It decodes incrementally, i. e. it has to be fed
+     one byte at a time and then returns either a
+     new state or a final result.
+
+     If the resulting attribute set contains the attribute
+     result, it is finished and the decoded codepoint is
+     contained in that attribute. In all other cases,
+     pass the returned set to step again along with
+     a new byte. The initial state to pass is the empty
+     set.
+
+     Extra attributes are always passed through, so you
+     can pass extra state. Be sure not to use result,
+     pos, code, first or count.
+
+     This function will throw with a fairly detailed
+     message if it encounters ill-formed bytes.
+
+     The implementation is based on The Unicode Standard,
+     Version 13.0, section 3.9, especially table 3-6.
+
+     Type: { ... } -> string -> ({ result :: integer, ... } | { ... })
+
+     Example: utf8.step {} "f"
+     => { result = 102; }
+  */
+  step = { pos ? 0, code ? 0, ... }@args: byte:
+    let
+      value = char.ord byte;
+      # first byte is context for well-formed-ness
+      first = args.first or value;
+      count = args.count or (byteCount first);
+      newCode =
+        if count == 1
+        then int.bitAnd 127 first # ascii character
+        else # multi byte UTF-8 sequence
+          let
+            # Calculate the bitmask for extracting the
+            # codepoint data in the current byte.
+            # If the codepoint is not ASCII, the bits
+            # used for codepoint data differ depending
+            # on the byte position and overall byte
+            # count. The first byte always ignores
+            # the (count + 1) most significant bits.
+            # For all subsequent bytes, the 2 most
+            # significant bits need to be ignored.
+            # See also table 3-6.
+            mask =
+              if pos == 0
+              then int.exp 2 (8 - (count + 1)) - 1
+              else 63;
+            # UTF-8 uses the 6 least significant bits in all
+            # subsequent bytes after the first one. Therefore
+            # We can determine the amount we need to shift
+            # the current value by the amount of bytes left.
+            offset = (count - (pos + 1)) * 6;
+          in
+            code + (int.bitShiftL (int.bitAnd mask value) offset);
+      illFormedMsg =
+        "Ill-formed byte ${int.toHex value} at position ${toString pos} in ${toString count} byte UTF-8 sequence";
+    in
+      if !(wellFormedByte first pos value) then builtins.throw illFormedMsg
+      else if pos + 1 == count
+      then (builtins.removeAttrs args [ # allow extra state being passed through
+        "count"
+        "code"
+        "pos"
+        "first"
+      ]) // { result = newCode; }
+      else (builtins.removeAttrs args [ "result" ]) // {
+        inherit count first;
+        code = newCode;
+        pos  = pos + 1;
+      };
+
+  /* Decode an UTF-8 string into a list of codepoints.
+
+     Throws if the string is ill-formed UTF-8.
+
+     Type: string -> [ integer ]
+  */
+  # TODO(sterni): option to fallback to replacement char instead of failure
+  decode = s:
+    let
+      iter = { codes ? [], ... }@args: byte:
+        let
+          res = step args byte;
+        in
+          # foldl' forceValues the calculate value only at the end
+          # this makes the thunk grow large enough to cause a stack
+          # overflow with sufficiently large strings. To avoid this
+          # we always deepSeq the result which also keeps memory
+          # usage of decode reasonable.
+          builtins.deepSeq res
+            (if res ? result
+            then res // {
+              codes = codes ++ [ res.result ];
+            }
+            else res);
+      iterResult =
+        builtins.foldl' iter {} (string.toChars s);
+      earlyEndMsg =
+        if iterResult ? count && iterResult ? pos
+        then "Missing ${toString (with iterResult; count - pos)} bytes at end of input"
+        else "Unexpected end of input";
+    in
+      if iterResult ? result
+      then iterResult.codes
+      else builtins.throw earlyEndMsg;
+
+  /* Decodes an UTF-8 string, but doesn't throw on error.
+     Instead it returns null.
+
+     Type: string -> ( [ integer ] | null)
+  */
+  decodeSafe = s:
+    let
+      res = builtins.tryEval (decode s);
+    in
+      if res.success
+      then res.value
+      else null;
+
+in {
+  inherit
+    decode
+    decodeSafe
+    step
+    ;
+}
diff --git a/users/sterni/nix/utf8/tests/default.nix b/users/sterni/nix/utf8/tests/default.nix
new file mode 100644
index 000000000000..d9d8ae77105d
--- /dev/null
+++ b/users/sterni/nix/utf8/tests/default.nix
@@ -0,0 +1,121 @@
+{ depot, lib, ... }:
+
+let
+
+  inherit (depot.third_party)
+    runCommandLocal
+    ;
+
+  inherit (depot.nix.runTestsuite)
+    runTestsuite
+    it
+    assertEq
+    assertThrows
+    assertDoesNotThrow
+    ;
+
+  inherit (depot.users.Profpatsch.writers)
+    rustSimple
+    ;
+
+  inherit (depot.users.sterni.nix)
+    int
+    utf8
+    string
+    char
+    ;
+
+  rustDecoder = rustSimple {
+    name = "utf8-decode";
+  } ''
+    use std::io::{self, Read};
+    fn main() -> std::io::Result<()> {
+      let mut buffer = String::new();
+      io::stdin().read_to_string(&mut buffer)?;
+
+      print!("[ ");
+
+      for c in buffer.chars() {
+        print!("{} ", u32::from(c));
+      }
+
+      print!("]");
+
+      Ok(())
+    }
+  '';
+
+  rustDecode = s:
+    let
+      expr = runCommandLocal "${s}-decoded" {} ''
+        printf '%s' ${lib.escapeShellArg s} | ${rustDecoder} > $out
+      '';
+    in import expr;
+
+  hexDecode = l:
+    utf8.decode (string.fromBytes (builtins.map int.fromHex l));
+
+  testFailures = it "checks UTF-8 decoding failures" [
+    (assertThrows "emtpy bytestring throws" (utf8.decode ""))
+    (assertThrows "truncated UTF-8 string throws" (hexDecode [ "F0" "9F" ]))
+    # examples from The Unicode Standard
+    (assertThrows "ill-formed: C0 AF" (hexDecode [ "C0" "AF" ]))
+    (assertThrows "ill-formed: E0 9F 80" (hexDecode [ "E0" "9F" "80" ]))
+    (assertEq "well-formed: F4 80 83 92" (hexDecode [ "F4" "80" "83" "92" ]) [ 1048786 ])
+  ];
+
+  testAscii = it "checks decoding of ascii strings"
+    (builtins.map (s: assertEq "ASCII decoding is equal to UTF-8 decoding for \"${s}\""
+      (string.toBytes s) (utf8.decode s)) [
+        "foo bar"
+        "hello\nworld"
+        "carriage\r\nreturn"
+        "1238398494829304 []<><>({})[]!!)"
+        (string.take 127 char.allChars)
+      ]);
+
+  randomUnicode = [
+    "🥰👨‍👨‍👧‍👦🐈‍⬛👩🏽‍🦰"
+    # https://kermitproject.org/utf8.html
+    "ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ"
+    "An preost wes on leoden, Laȝamon was ihoten"
+    "Sîne klâwen durh die wolken sint geslagen,"
+    "Τὴ γλῶσσα μοῦ ἔδωσαν ἑλληνικὴ"
+    "На берегу пустынных волн"
+    "ვეპხის ტყაოსანი შოთა რუსთაველი"
+    "யாமறிந்த மொழிகளிலே தமிழ்மொழி போல் இனிதாவது எங்கும் காணோம், "
+    "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸು "
+  ];
+
+  # https://kermitproject.org/utf8.html
+  glassSentences = [
+    "Euro Symbol: €."
+    "Greek: Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα."
+    "Íslenska / Icelandic: Ég get etið gler án þess að meiða mig."
+    "Polish: Mogę jeść szkło, i mi nie szkodzi."
+    "Romanian: Pot să mănânc sticlă și ea nu mă rănește."
+    "Ukrainian: Я можу їсти шкло, й воно мені не пошкодить."
+    "Armenian: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։"
+    "Georgian: მინას ვჭამ და არა მტკივა."
+    "Hindi: मैं काँच खा सकता हूँ, मुझे उस से कोई पीडा नहीं होती."
+    "Hebrew(2): אני יכול לאכול זכוכית וזה לא מזיק לי."
+    "Yiddish(2): איך קען עסן גלאָז און עס טוט מיר נישט װײ."
+    "Arabic(2): أنا قادر على أكل الزجاج و هذا لا يؤلمني."
+    "Japanese: 私はガラスを食べられます。それは私を傷つけません。"
+    "Thai: ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ "
+  ];
+
+  testDecoding = it "checks decoding of UTF-8 strings against Rust's String"
+    (builtins.map
+      (s: assertEq "Decoding of “${s}” is correct" (utf8.decode s) (rustDecode s))
+      (lib.flatten [
+        glassSentences
+        randomUnicode
+      ]));
+
+in
+  runTestsuite "nix.utf8" [
+    testFailures
+    testAscii
+    testDecoding
+  ]