diff options
Diffstat (limited to 'users/sterni')
23 files changed, 1611 insertions, 0 deletions
diff --git a/users/sterni/OWNERS b/users/sterni/OWNERS new file mode 100644 index 000000000000..cace4d0f3759 --- /dev/null +++ b/users/sterni/OWNERS @@ -0,0 +1,3 @@ +inherited: false +owners: + - sterni diff --git a/users/sterni/clhs-lookup/README.md b/users/sterni/clhs-lookup/README.md new file mode 100644 index 000000000000..1f42ff43a210 --- /dev/null +++ b/users/sterni/clhs-lookup/README.md @@ -0,0 +1,13 @@ +# clhs-lookup + +Simple cli to lookup symbols' documentation in a local copy of the +Common Lisp HyperSpec. + +## usage + +``` +clhs-lookup [--print] symbol [symbol [...]] + + --print Print documentation paths to stdout instead of + opening them with $BROWSER (defaults to xdg-open). +``` diff --git a/users/sterni/clhs-lookup/clhs-lookup.lisp b/users/sterni/clhs-lookup/clhs-lookup.lisp new file mode 100644 index 000000000000..0e61dd901f93 --- /dev/null +++ b/users/sterni/clhs-lookup/clhs-lookup.lisp @@ -0,0 +1,46 @@ +(in-package :clhs-lookup) +(declaim (optimize (safety 3))) + +(defun find-symbols-paths (syms clhs) + "Find pathnames to HyperSpec files describing the listed + symbol names (as strings). Paths are returned in the order + of the symbols given with missing entries removed." + (check-type syms list) + (check-type clhs pathname) + (let* ((data-dir (merge-pathnames "HyperSpec/Data/" clhs)) + (data (merge-pathnames "Map_Sym.txt" data-dir)) + (found (make-hash-table :test #'equal)) + (syms (mapcar #'string-upcase syms))) + (with-open-file (s data :direction :input) + (loop + with missing = syms + for symbol-line = (read-line s nil :eof) + for path-line = (read-line s nil :eof) + until (or (eq symbol-line :eof) + (eq path-line :eof) + (null missing)) + for pos = (position symbol-line missing :test #'equal) + when pos + do (progn + (delete symbol-line missing) + (setf (gethash symbol-line found) path-line))) + ; TODO(sterni): get rid of Data/../ in path + (mapcar + (lambda (x) (merge-pathnames x data-dir)) + (remove nil + (mapcar (lambda (x) (gethash x found)) syms)))))) + +(defun main () + (let* ((browser (or (uiop:getenvp "BROWSER") "xdg-open")) + (args (uiop:command-line-arguments)) + (prin (member "--print" args :test #'equal)) + (syms (remove-if (lambda (x) (eq (char x 0) #\-)) args)) + (paths (find-symbols-paths syms *clhs-path*))) + (if (null paths) + (uiop:quit 1) + (dolist (p paths) + (if prin + (format t "~A~%" p) + (uiop:launch-program + (format nil "~A ~A" browser p) + :force-shell t)))))) diff --git a/users/sterni/clhs-lookup/default.nix b/users/sterni/clhs-lookup/default.nix new file mode 100644 index 000000000000..951b94d72f19 --- /dev/null +++ b/users/sterni/clhs-lookup/default.nix @@ -0,0 +1,36 @@ +{ pkgs, depot, ... }: + +let + inherit (pkgs) fetchzip writeText; + inherit (depot.nix) buildLisp; + inherit (builtins) replaceStrings; + + clhsVersion = "7-0"; + + clhs = fetchzip { + name = "HyperSpec-${replaceStrings [ "-" ] [ "." ] clhsVersion}"; + url = "ftp://ftp.lispworks.com/pub/software_tools/reference/HyperSpec-${clhsVersion}.tar.gz"; + sha256 = "1zsi35245m5sfb862ibzy0pzlph48wvlggnqanymhgqkpa1v20ak"; + stripRoot = false; + }; + + clhs-path = writeText "clhs-path.lisp" '' + (in-package :clhs-lookup.clhs-path) + (defparameter *clhs-path* (pathname "${clhs}/")) + ''; + + clhs-lookup = buildLisp.program { + name = "clhs-lookup"; + + deps = [ + (buildLisp.bundled "uiop") + ]; + + srcs = [ + ./packages.lisp + clhs-path + ./clhs-lookup.lisp + ]; + }; +in + clhs-lookup diff --git a/users/sterni/clhs-lookup/packages.lisp b/users/sterni/clhs-lookup/packages.lisp new file mode 100644 index 000000000000..d059b96ce9f0 --- /dev/null +++ b/users/sterni/clhs-lookup/packages.lisp @@ -0,0 +1,10 @@ +(defpackage :clhs-lookup.clhs-path + (:use :cl) + (:export :*clhs-path*)) + +(defpackage clhs-lookup + (:use :cl :uiop) + (:import-from :clhs-lookup.clhs-path :*clhs-path*) + (:export :main + :find-symbols-paths)) + diff --git a/users/sterni/htmlman/README.md b/users/sterni/htmlman/README.md new file mode 100644 index 000000000000..258233d4c4d2 --- /dev/null +++ b/users/sterni/htmlman/README.md @@ -0,0 +1,36 @@ +# htmlman + +static site generator for man pages intended for +rendering man page documentation viewable using +a web browser. + +## usage + +If you have a nix expression, `doc.nix`, like this: + +```nix +{ depot, ... }: + +depot.users.sterni.htmlman { + title = "foo project"; + pages = [ + { + name = "foo"; + section = 1; + } + { + name = "foo"; + section = 3; + path = ../devman/foo.3; + } + ]; + manDir = ../man; +} +``` + +You can run the following to directly deploy the resulting +documentation output to a specific target directory: + +```sh +nix-build -A deploy doc.nix && ./result target_directory +``` diff --git a/users/sterni/htmlman/default.nix b/users/sterni/htmlman/default.nix new file mode 100644 index 000000000000..b88bc264103b --- /dev/null +++ b/users/sterni/htmlman/default.nix @@ -0,0 +1,234 @@ +{ depot, lib, pkgs, ... }: + +let + inherit (depot.nix) + getBins + runExecline + yants + ; + + inherit (depot.tools) + cheddar + ; + + inherit (pkgs) + mandoc + coreutils + fetchurl + writers + ; + + bins = getBins cheddar [ "cheddar" ] + // getBins mandoc [ "mandoc" ] + // getBins coreutils [ "cat" "mv" "mkdir" ] + ; + + normalizeDrv = fetchurl { + url = "https://necolas.github.io/normalize.css/8.0.1/normalize.css"; + sha256 = "04jmvybwh2ks4dlnfa70sb3a3z3ig4cv0ya9rizjvm140xq1h22q"; + }; + + execlineStdoutInto = target: line: [ + "redirfd" "-w" "1" target + ] ++ line; + + # I will not write a pure nix markdown renderer + # I will not write a pure nix markdown renderer + # I will not write a pure nix markdown renderer + # I will not write a pure nix markdown renderer + # I will not write a pure nix markdown renderer + markdown = md: + let + html = runExecline.local "rendered-markdown" { + stdin = md; + } ([ + "importas" "-iu" "out" "out" + ] ++ execlineStdoutInto "$out" [ + bins.cheddar "--about-filter" "description.md" + ]); + in builtins.readFile html; + + indexTemplate = { title, description, pages ? [] }: '' + <!doctype html> + <html> + <head> + <meta charset="utf-8"> + <title>${title}</title> + <link rel="stylesheet" type="text/css" href="style.css"/> + </head> + <body> + <div class="index-text"> + <h1>${title}</h1> + ${markdown description} + <h2>man pages</h2> + <ul> + ${lib.concatMapStrings ({ name, section, ... }: '' + <li><a href="${name}.${toString section}.html">${name}(${toString section})</a></li> + '') pages} + </ul> + </div> + </body> + </html> + ''; + + defaultStyle = import ./defaultStyle.nix { }; + + # This deploy script automatically copies the build result into + # a TARGET directory and marks it as writeable optionally. + # It is exposed as the deploy attribute of the result of + # htmlman, so an htmlman expression can be used like this: + # nix-build -A deploy htmlman.nix && ./result target_dir + deployScript = title: drv: writers.writeDash "deploy-${title}" '' + usage() { + printf 'Usage: %s [-w] TARGET\n\n' "$0" + printf 'Deploy htmlman documentation to TARGET directory.\n\n' + printf ' -h Display this help message\n' + printf ' -w Make TARGET directory writeable\n' + } + + if test "$#" -lt 1; then + usage + exit 100 + fi + + writeable=false + + while test "$#" -gt 0; do + case "$1" in + -h) + usage + exit 0 + ;; + -w) + writeable=true + ;; + -*) + usage + exit 100 + ;; + *) + if test -z "$target"; then + target="$1" + else + echo "Too many arguments" + exit 100 + fi + ;; + esac + + shift + done + + if test -z "$target"; then + echo "Missing TARGET" + usage + exit 100 + fi + + set -ex + + mkdir -p "$target" + cp -RTL --reflink=auto "${drv}" "$target" + + if $writeable; then + chmod -R +w "$target" + fi + ''; + + htmlman = + { title + # title of the index page + , description ? "" + # description which is displayed after + # the main heading on the index page + , pages ? [] + # man pages of the following structure: + # { + # name : string; + # section : int; + # path : either path string; + # } + # path is optional, if it is not given, + # the man page source must be located at + # "${manDir}/${name}.${toString section}" + , manDir ? null + # directory in which man page sources are located + , style ? defaultStyle + # CSS to use as a string + , normalizeCss ? true + # whether to include normalize.css before the custom CSS + , linkXr ? "all" + # How to handle cross references in the html output: + # + # * none: don't convert cross references into hyperlinks + # * all: link all cross references as if they were + # rendered into $out by htmlman + # * inManDir: link to all man pages which have their source + # in `manDir` and use the format string defined + # in linkXrFallback for all other cross references. + , linkXrFallback ? "https://manpages.debian.org/unstable/%N.%S.en.html" + # fallback link to use if linkXr == "inManDir" and the man + # page is not in ${manDir}. Placeholders %N (name of page) + # and %S (section of page) can be used. See mandoc(1) for + # more information. + }: + + let + linkXrEnum = yants.enum "linkXr" [ "all" "inManDir" "none" ]; + + index = indexTemplate { + inherit title description pages; + }; + + resolvePath = { path ? null, name, section }: + if path != null + then path + else "${manDir}/${name}.${toString section}"; + + mandocOpts = lib.concatStringsSep "," ([ + "style=style.css" + ] ++ linkXrEnum.match linkXr { + all = [ "man=./%N.%S.html" ]; + inManDir = [ "man=./%N.%S.html;${linkXrFallback}" ]; + none = [ ]; + }); + + html = + runExecline.local "htmlman-${title}" { + derivationArgs = { + inherit index style; + passAsFile = [ "index" "style" ]; + }; + } ([ + "multisubstitute" [ + "importas" "-iu" "out" "out" + "importas" "-iu" "index" "indexPath" + "importas" "-iu" "style" "stylePath" + ] + "if" [ bins.mkdir "-p" "$out" ] + "if" [ bins.mv "$index" "\${out}/index.html" ] + "if" (execlineStdoutInto "\${out}/style.css" [ + "if" ([ + bins.cat + ] ++ lib.optional normalizeCss normalizeDrv + ++ [ + "$style" + ]) + ]) + # let mandoc check for available man pages + "execline-cd" "${manDir}" + ] ++ lib.concatMap ({ name, section, ... }@p: + execlineStdoutInto "\${out}/${name}.${toString section}.html" [ + "if" [ + bins.mandoc + "-mdoc" + "-T" "html" + "-O" mandocOpts + (resolvePath p) + ] + ]) pages); + in html // { + deploy = deployScript title html; + }; +in + htmlman diff --git a/users/sterni/htmlman/defaultStyle.nix b/users/sterni/htmlman/defaultStyle.nix new file mode 100644 index 000000000000..a44b5ef06934 --- /dev/null +++ b/users/sterni/htmlman/defaultStyle.nix @@ -0,0 +1,49 @@ +{ ... }: + +'' + body { + font-size: 1em; + line-height: 1.5; + font-family: serif; + background-color: #efefef; + } + + h1, h2, h3, h4, h5, h6 { + font-family: sans-serif; + font-size: 1em; + margin: 5px 0; + } + + h1 { + margin-top: 0; + } + + a:link, a:visited { + color: #3e7eff; + } + + h1 a, h2 a, h3 a, h4 a, h5 a, h6 a { + text-decoration: none; + } + + .manual-text, .index-text { + padding: 20px; + max-width: 800px; + background-color: white; + margin: 0 auto; + } + + table.head, table.foot { + display: none; + } + + .Nd { + display: inline; + } + + /* use same as cheddar for man pages */ + pre { + padding: 16px; + background-color: #f6f8fa; + } +'' diff --git a/users/sterni/keys.nix b/users/sterni/keys.nix new file mode 100644 index 000000000000..815f62ee080e --- /dev/null +++ b/users/sterni/keys.nix @@ -0,0 +1,7 @@ +{ ... }: + +{ + all = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJk+KvgvI2oJTppMASNUfMcMkA2G5ZNt+HnWDzaXKLlo lukas@wolfgang" + ]; +} diff --git a/users/sterni/nix/char/all-chars.bin b/users/sterni/nix/char/all-chars.bin new file mode 100644 index 000000000000..017b909e8e8e --- /dev/null +++ b/users/sterni/nix/char/all-chars.bin @@ -0,0 +1,2 @@ + + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ \ No newline at end of file diff --git a/users/sterni/nix/char/default.nix b/users/sterni/nix/char/default.nix new file mode 100644 index 000000000000..e6b8d6d7f168 --- /dev/null +++ b/users/sterni/nix/char/default.nix @@ -0,0 +1,90 @@ +{ depot, lib, pkgs, ... }: + +let + + inherit (depot.users.sterni.nix.flow) + cond + ; + + inherit (depot.nix) + yants + ; + + inherit (depot.users.sterni.nix) + string + ; + + # A char is the atomic element of a nix string + # which is essentially an array of arbitrary bytes + # as long as they are not a NUL byte. + # + # A char is neither a byte nor a unicode codepoint! + char = yants.restrict "char" (s: builtins.stringLength s == 1) yants.string; + + # integer representation of char + charval = yants.restrict "charval" (i: i >= 1 && i < 256) yants.int; + + allChars = builtins.readFile ./all-chars.bin; + + # Originally I searched a list for this, but came to the + # conclusion that this can never be fast enough in Nix. + # We therefore use a solution similar to infinisil's. + ordMap = builtins.listToAttrs + (lib.imap1 (i: v: { name = v; value = i; }) + (string.toChars allChars)); + + # Note on performance: + # chr and ord have been benchmarked using the following cases: + # + # builtins.map ord (lib.stringToCharacters allChars) + # builtins.map chr (builtins.genList (int.add 1) 255 + # + # The findings are as follows: + # 1. Searching through either strings using recursion is + # unbearably slow in Nix, leading to evaluation times + # of up to 3s for the following very small test case. + # This is why we use the trusty attribute set for ord. + # 2. String indexing is much faster than list indexing which + # is why we use the former for chr. + ord = c: ordMap."${c}"; + + chr = i: string.charAt (i - 1) allChars; + + asciiAlpha = c: + let + v = ord c; + in (v >= 65 && v <= 90) + || (v >= 97 && v <= 122); + + asciiNum = c: + let + v = ord c; + in v >= 48 && v <= 57; + + asciiAlphaNum = c: asciiAlpha c || asciiNum c; + +in { + inherit + allChars + char + charval + ord + chr + asciiAlpha + asciiNum + asciiAlphaNum + ; + + # originally I generated a nix file containing a list of + # characters, but infinisil uses a better way which I adapt + # which is using builtins.readFile instead of import. + __generateAllChars = pkgs.writers.writeC "generate-all-chars" {} '' + #include <stdio.h> + + int main(void) { + for(int i = 1; i <= 0xff; i++) { + putchar(i); + } + } + ''; +} diff --git a/users/sterni/nix/char/tests/default.nix b/users/sterni/nix/char/tests/default.nix new file mode 100644 index 000000000000..49b439adbb84 --- /dev/null +++ b/users/sterni/nix/char/tests/default.nix @@ -0,0 +1,31 @@ +{ depot, ... }: + +let + inherit (depot.nix.runTestsuite) + it + assertEq + runTestsuite + ; + + inherit (depot.users.sterni.nix) + char + string + int + fun + ; + + charList = string.toChars char.allChars; + + testAllCharConversion = it "tests conversion of all chars" [ + (assertEq "char.chr converts to char.allChars" + (builtins.genList (fun.rl char.chr (int.add 1)) 255) + charList) + (assertEq "char.ord converts from char.allChars" + (builtins.genList (int.add 1) 255) + (builtins.map char.ord charList)) + ]; + +in + runTestsuite "char" [ + testAllCharConversion + ] diff --git a/users/sterni/nix/flow/default.nix b/users/sterni/nix/flow/default.nix new file mode 100644 index 000000000000..b5783bd86deb --- /dev/null +++ b/users/sterni/nix/flow/default.nix @@ -0,0 +1,82 @@ +{ depot, ... }: + +let + + inherit (depot.nix) + yants + ; + + inherit (depot.users.sterni.nix) + fun + ; + + # we must avoid evaluating any of the sublists + # as they may contain conditions that throw + condition = yants.restrict "condition" + (ls: builtins.length ls == 2) + (yants.list yants.any); + + /* Like the common lisp macro: takes a list + of two elemented lists whose first element + is a boolean. The second element of the + first list that has true as its first + element is returned. + + Type: [ [ bool a ] ] -> a + + Example: + + cond [ + [ (builtins.isString true) 12 ] + [ (3 == 2) 13 ] + [ true 42 ] + ] + + => 42 + */ + cond = conds: switch true conds; + + /* Generic pattern match-ish construct for nix. + Takes a bunch of lists which are of length + two and checks the first element for either + a predicate or a value. The second value of + the first list which either has a value equal + to or a function that evaluates to true for + the given value. + + Type: a -> [ [ (function | a) b ] ] -> b + + Example: + + switch "foo" [ + [ "smol" "SMOL!!!" ] + [ (x: builtins.stringLength x <= 3) "smol-ish" ] + [ (fun.const true) "not smol" ] + ] + + => "smol-ish" + */ + switch = x: conds: + if builtins.length conds == 0 + then builtins.throw "exhausted all conditions" + else + let + c = condition (builtins.head conds); + s = builtins.head c; + b = + if builtins.isFunction s + then s x + else x == s; + in + if b + then builtins.elemAt c 1 + else switch x (builtins.tail conds); + + + +in { + inherit + cond + switch + ; +} diff --git a/users/sterni/nix/flow/tests/default.nix b/users/sterni/nix/flow/tests/default.nix new file mode 100644 index 000000000000..54cea01858e7 --- /dev/null +++ b/users/sterni/nix/flow/tests/default.nix @@ -0,0 +1,39 @@ +{ depot, ... }: + +let + + inherit (depot.nix.runTestsuite) + runTestsuite + it + assertEq + assertThrows + ; + + inherit (depot.users.sterni.nix.flow) + cond + match + ; + + dontEval = builtins.throw "this should not get evaluated"; + + testCond = it "tests cond" [ + (assertThrows "malformed cond list" + (cond [ [ true 1 2 ] [ false 1 ] ])) + (assertEq "last is true" "last" + (cond [ + [ false dontEval] + [ false dontEval ] + [ true "last" ] + ])) + (assertEq "first is true" 1 + (cond [ + [ true 1 ] + [ true dontEval ] + [ true dontEval ] + ])) + ]; + +in + runTestsuite "nix.flow" [ + testCond + ] diff --git a/users/sterni/nix/fun/default.nix b/users/sterni/nix/fun/default.nix new file mode 100644 index 000000000000..a32b70a62b79 --- /dev/null +++ b/users/sterni/nix/fun/default.nix @@ -0,0 +1,45 @@ +{ depot, lib, ... }: + +let + + inherit (lib) + id + ; + + # Simple function composition, + # application is right to left. + rl = f1: f2: + (x: f1 (f2 x)); + + # Compose a list of functions, + # application is right to left. + rls = fs: + builtins.foldl' (fOut: f: lr f fOut) id fs; + + # Simple function composition, + # application is left to right. + lr = f1: f2: + (x: f2 (f1 x)); + + # Compose a list of functions, + # application is left to right + lrs = x: fs: + builtins.foldl' (v: f: f v) x fs; + +in + +{ + inherit (lib) + fix + flip + const + ; + + inherit + id + rl + rls + lr + lrs + ; +} diff --git a/users/sterni/nix/int/default.nix b/users/sterni/nix/int/default.nix new file mode 100644 index 000000000000..b3157571272f --- /dev/null +++ b/users/sterni/nix/int/default.nix @@ -0,0 +1,124 @@ +{ depot, lib, ... }: + +let + + # TODO(sterni): implement nix.float and figure out which of these + # functions can be split out into a common nix.num + # library. + + inherit (depot.users.sterni.nix) + string + ; + + inherit (builtins) + bitOr + bitAnd + bitXor + mul + div + add + sub + ; + + abs = i: if i < 0 then -i else i; + + exp = base: pow: + if pow > 0 + then base * (exp base (pow - 1)) + else if pow < 0 + then 1.0 / exp base (abs pow) + else 1; + + bitShiftR = bit: count: + if count == 0 + then bit + else div (bitShiftR bit (count - 1)) 2; + + bitShiftL = bit: count: + if count == 0 + then bit + else 2 * (bitShiftL bit (count - 1)); + + hexdigits = "0123456789ABCDEF"; + + toHex = int: + let + go = i: + if i == 0 + then "" + else go (bitShiftR i 4) + + string.charAt (bitAnd i 15) hexdigits; + sign = lib.optionalString (int < 0) "-"; + in + if int == 0 + then "0" + else "${sign}${go (abs int)}"; + + fromHexMap = builtins.listToAttrs + (lib.imap0 (i: c: { name = c; value = i; }) + (lib.stringToCharacters hexdigits)); + + fromHex = literal: + let + negative = string.charAt 0 literal == "-"; + start = if negative then 1 else 0; + len = builtins.stringLength literal; + # reversed list of all digits + digits = builtins.genList + (i: string.charAt (len - 1 - i) literal) + (len - start); + parsed = builtins.foldl' + (v: d: { + val = v.val + (fromHexMap."${d}" * v.mul); + mul = v.mul * 16; + }) + { val = 0; mul = 1; } digits; + in + if negative + then -parsed.val + else parsed.val; + + # A nix integer is a 64bit signed integer + maxBound = 9223372036854775807; + + # fun fact: -9223372036854775808 is the lower bound + # for a nix integer (as you would expect), but you can't + # use it as an integer literal or you'll be greeted with: + # error: invalid integer '9223372036854775808' + # This is because all int literals when parsing are + # positive, negative "literals" are positive literals + # which are preceded by the arithmetric negation operator. + minBound = -9223372036854775807 - 1; + + odd = x: bitAnd x 1 == 1; + even = x: bitAnd x 1 == 0; + + # div and mod behave like quot and rem in Haskell, + # i. e. they truncate towards 0 + mod = a: b: let res = a / b; in a - (res * b); + + inRange = a: b: x: x >= a && x <= b; + +in { + inherit + maxBound + minBound + abs + exp + odd + even + add + sub + mul + div + mod + bitShiftR + bitShiftL + bitOr + bitAnd + bitXor + toHex + fromHex + inRange + ; +} diff --git a/users/sterni/nix/int/tests/default.nix b/users/sterni/nix/int/tests/default.nix new file mode 100644 index 000000000000..fac45dd251e1 --- /dev/null +++ b/users/sterni/nix/int/tests/default.nix @@ -0,0 +1,203 @@ +{ depot, lib, ... }: + +let + + inherit (depot.nix.runTestsuite) + runTestsuite + it + assertEq + ; + + inherit (depot.users.sterni.nix) + int + string + fun + ; + + testBounds = it "checks minBound and maxBound" [ + # this is gonna blow up in my face because + # integer overflow is undefined behavior in + # C++, so most likely anything could happen? + (assertEq "maxBound is the maxBound" true + (int.maxBound + 1 < int.maxBound)) + (assertEq "minBound is the minBound" true + (int.minBound - 1 > int.minBound)) + (assertEq "maxBound overflows to minBound" + (int.maxBound + 1) + int.minBound) + (assertEq "minBound overflows to maxBound" + (int.minBound - 1) + int.maxBound) + ]; + + expectedBytes = [ + "00" "01" "02" "03" "04" "05" "06" "07" "08" "09" "0A" "0B" "0C" "0D" "0E" "0F" + "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "1A" "1B" "1C" "1D" "1E" "1F" + "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "2A" "2B" "2C" "2D" "2E" "2F" + "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" "3A" "3B" "3C" "3D" "3E" "3F" + "40" "41" "42" "43" "44" "45" "46" "47" "48" "49" "4A" "4B" "4C" "4D" "4E" "4F" + "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" "5A" "5B" "5C" "5D" "5E" "5F" + "60" "61" "62" "63" "64" "65" "66" "67" "68" "69" "6A" "6B" "6C" "6D" "6E" "6F" + "70" "71" "72" "73" "74" "75" "76" "77" "78" "79" "7A" "7B" "7C" "7D" "7E" "7F" + "80" "81" "82" "83" "84" "85" "86" "87" "88" "89" "8A" "8B" "8C" "8D" "8E" "8F" + "90" "91" "92" "93" "94" "95" "96" "97" "98" "99" "9A" "9B" "9C" "9D" "9E" "9F" + "A0" "A1" "A2" "A3" "A4" "A5" "A6" "A7" "A8" "A9" "AA" "AB" "AC" "AD" "AE" "AF" + "B0" "B1" "B2" "B3" "B4" "B5" "B6" "B7" "B8" "B9" "BA" "BB" "BC" "BD" "BE" "BF" + "C0" "C1" "C2" "C3" "C4" "C5" "C6" "C7" "C8" "C9" "CA" "CB" "CC" "CD" "CE" "CF" + "D0" "D1" "D2" "D3" "D4" "D5" "D6" "D7" "D8" "D9" "DA" "DB" "DC" "DD" "DE" "DF" + "E0" "E1" "E2" "E3" "E4" "E5" "E6" "E7" "E8" "E9" "EA" "EB" "EC" "ED" "EE" "EF" + "F0" "F1" "F2" "F3" "F4" "F5" "F6" "F7" "F8" "F9" "FA" "FB" "FC" "FD" "FE" "FF" + ]; + + hexByte = i: string.fit { width = 2; char = "0"; } (int.toHex i); + + hexInts = [ + { left = 0; right = "0"; } + { left = 1; right = "1"; } + { left = 11; right = "B"; } + { left = 123; right = "7B"; } + { left = 9000; right = "2328"; } + { left = 2323; right = "913"; } + { left = 4096; right = "1000"; } + { left = int.maxBound; right = "7FFFFFFFFFFFFFFF"; } + { left = int.minBound; right = "-8000000000000000"; } + ]; + + testHex = it "checks conversion to hex" (lib.flatten [ + (lib.imap0 (i: hex: [ + (assertEq "hexByte ${toString i} == ${hex}" (hexByte i) hex) + (assertEq "${toString i} == fromHex ${hex}" i (int.fromHex hex)) + ]) expectedBytes) + (builtins.map ({ left, right }: [ + (assertEq "toHex ${toString left} == ${right}" (int.toHex left) right) + (assertEq "${toString left} == fromHex ${right}" left (int.fromHex right)) + ]) hexInts) + ]); + + testBasic = it "checks basic int operations" [ + (assertEq "122 is even" (int.even 122 && !(int.odd 122)) true) + (assertEq "123 is odd" (int.odd 123 && !(int.even 123)) true) + (assertEq "abs -4959" (int.abs (-4959)) 4959) + ]; + + expNumbers = [ + { left = -3; right = 0.125; } + { left = -2; right = 0.25; } + { left = -1; right = 0.5; } + { left = 0; right = 1; } + { left = 1; right = 2; } + { left = 2; right = 4; } + { left = 3; right = 8; } + { left = 4; right = 16; } + { left = 5; right = 32; } + { left = 16; right = 65536; } + ]; + + testExp = it "checks exponentiation" + (builtins.map ({ left, right }: + assertEq + "2 ^ ${toString left} == ${toString right}" + (int.exp 2 left) right) expNumbers); + + shifts = [ + { a = 2; b = 5; c = 64; op = "<<"; } + { a = -2; b = 5; c = -64; op = "<<"; } + { a = 123; b = 4; c = 1968; op = "<<"; } + { a = 1; b = 8; c = 256; op = "<<"; } + { a = 256; b = 8; c = 1; op = ">>"; } + { a = 374; b = 2; c = 93; op = ">>"; } + { a = 2; b = 2; c = 0; op = ">>"; } + { a = 99; b = 9; c = 0; op = ">>"; } + ]; + + checkShift = { a, b, c, op }@args: + let + f = string.match op { + "<<" = int.bitShiftL; + ">>" = int.bitShiftR; + }; + in assertEq "${toString a} ${op} ${toString b} == ${toString c}" (f a b) c; + + checkShiftRDivExp = n: + assertEq "${toString n} >> 5 == ${toString n} / 2 ^ 5" + (int.bitShiftR n 5) (int.div n (int.exp 2 5)); + + checkShiftLMulExp = n: + assertEq "${toString n} >> 6 == ${toString n} * 2 ^ 6" + (int.bitShiftL n 5) (int.mul n (int.exp 2 5)); + + testBit = it "checks bitwise operations" (lib.flatten [ + (builtins.map checkShift shifts) + (builtins.map checkShiftRDivExp [ + 1 + 2 + 3 + 5 + 7 + 23 + 1623 + 238 + 34 + 348 + 2834 + 834 + 348 + ]) + (builtins.map checkShiftLMulExp [ + 1 + 2 + 3 + 5 + 7 + 23 + 384 + 3 + 2 + 5991 + 85109 + 38 + ]) + ]); + + divisions = [ + { a = 2; b = 1; c = 2; mod = 0;} + { a = 2; b = 2; c = 1; mod = 0;} + { a = 20; b = 10; c = 2; mod = 0;} + { a = 12; b = 5; c = 2; mod = 2;} + { a = 23; b = 4; c = 5; mod = 3;} + ]; + + checkDiv = n: { a, b, c, mod }: [ + (assertEq "${n}: div result" (int.div a b) c) + (assertEq "${n}: mod result" (int.mod a b) mod) + (assertEq "${n}: divMod law" ((int.div a b) * b + (int.mod a b)) a) + ]; + + testDivMod = it "checks integer division and modulo" + (lib.flatten [ + (builtins.map (checkDiv "+a / +b") divisions) + (builtins.map (fun.rl (checkDiv "-a / +b") (x: x // { + a = -x.a; + c = -x.c; + mod = -x.mod; + })) divisions) + (builtins.map (fun.rl (checkDiv "+a / -b") (x: x // { + b = -x.b; + c = -x.c; + })) divisions) + (builtins.map (fun.rl (checkDiv "-a / -b") (x: x // { + a = -x.a; + b = -x.b; + mod = -x.mod; + })) divisions) + ]); + +in + runTestsuite "nix.int" [ + testBounds + testHex + testBasic + testExp + testBit + testDivMod + ] diff --git a/users/sterni/nix/string/default.nix b/users/sterni/nix/string/default.nix new file mode 100644 index 000000000000..3fe7c04618c3 --- /dev/null +++ b/users/sterni/nix/string/default.nix @@ -0,0 +1,76 @@ +{ depot, lib, ... }: + +let + + inherit (depot.users.sterni.nix.char) + chr + ord + ; + + inherit (depot.users.sterni.nix) + int + flow + ; + + take = n: s: + builtins.substring 0 n s; + + drop = n: s: + builtins.substring n int.maxBound s; + + charAt = i: s: + let + r = builtins.substring i 1 s; + in if r == "" then null else r; + + charIndex = char: s: + let + len = builtins.stringLength s; + go = i: + flow.cond [ + [ (i >= len) null ] + [ (charAt i s == char) i ] + [ true (go (i + 1)) ] + ]; + in go 0; + + toChars = lib.stringToCharacters; + fromChars = lib.concatStrings; + + toBytes = str: + builtins.map ord (toChars str); + + fromBytes = is: lib.concatMapStrings chr is; + + pad = { left ? 0, right ? 0, char ? " " }: s: + let + leftS = fromChars (builtins.genList (_: char) left); + rightS = fromChars (builtins.genList (_: char) right); + in "${leftS}${s}${rightS}"; + + fit = { char ? " ", width, side ? "left" }: s: + let + diff = width - builtins.stringLength s; + in + if diff <= 0 + then s + else pad { inherit char; "${side}" = diff; } s; + + # pattern matching for strings only + match = val: matcher: matcher."${val}"; + +in { + inherit + take + drop + charAt + charIndex + toBytes + fromBytes + toChars + fromChars + pad + fit + match + ; +} diff --git a/users/sterni/nix/string/tests/default.nix b/users/sterni/nix/string/tests/default.nix new file mode 100644 index 000000000000..2caecbfa7b3f --- /dev/null +++ b/users/sterni/nix/string/tests/default.nix @@ -0,0 +1,65 @@ +{ depot, ... }: + +let + + inherit (depot.users.sterni.nix) + string + ; + + inherit (depot.nix.runTestsuite) + it + assertEq + runTestsuite + ; + + testTakeDrop = it "tests take and drop" [ + (assertEq "take" + (string.take 5 "five and more") + "five ") + (assertEq "drop" + (string.drop 2 "coin") + "in") + (assertEq "take out of bounds" + (string.take 100 "foo") + "foo") + (assertEq "drop out of bounds" + (string.drop 42 "lol") + "") + ]; + + testIndexing = it "tests string indexing" [ + (assertEq "normal charAt" + (string.charAt 3 "helo") + "o") + (assertEq "out of bounds charAt" + (string.charAt 5 "helo") + null) + ]; + + testFinding = it "tests finding in strings" [ + (assertEq "normal charIndex" + (string.charIndex "d" "abcdefghijkl") + 3) + (assertEq "charIndex no match" + (string.charIndex "w" "zZzZzzzZZZ") + null) + ]; + + dontEval = builtins.throw "this should not get evaluated"; + + testMatch = it "tests match" [ + (assertEq "basic match usage" 42 + (string.match "answer" { + "answer" = 42; + "banana" = dontEval; + "maleur" = dontEval; + })) + ]; + +in + runTestsuite "nix.string" [ + testTakeDrop + testIndexing + testFinding + testMatch + ] diff --git a/users/sterni/nix/url/default.nix b/users/sterni/nix/url/default.nix new file mode 100644 index 000000000000..ce7ed9b83437 --- /dev/null +++ b/users/sterni/nix/url/default.nix @@ -0,0 +1,42 @@ +{ depot, lib, ... }: + +let + + inherit (depot.users.sterni.nix) + char + int + string + ; + + reserved = c: builtins.elem c [ + "!" "#" "$" "&" "'" "(" ")" + "*" "+" "," "/" ":" ";" "=" + "?" "@" "[" "]" + ]; + + unreserved = c: char.asciiAlphaNum c + || builtins.elem c [ "-" "_" "." "~" ]; + + percentEncode = c: + if unreserved c + then c + else "%" + (string.fit { + width = 2; + char = "0"; + side = "left"; + } (int.toHex (char.ord c))); + + encode = { leaveReserved ? false }: s: + let + chars = lib.stringToCharacters s; + tr = c: + if leaveReserved && reserved c + then c + else percentEncode c; + in lib.concatStrings (builtins.map tr chars); + +in { + inherit + encode + ; +} diff --git a/users/sterni/nix/url/tests/default.nix b/users/sterni/nix/url/tests/default.nix new file mode 100644 index 000000000000..f58cf12a02b2 --- /dev/null +++ b/users/sterni/nix/url/tests/default.nix @@ -0,0 +1,49 @@ +{ depot, ... }: + +let + + inherit (depot.nix.runTestsuite) + it + assertEq + runTestsuite + ; + + inherit (depot.users.sterni.nix) + url + ; + + checkEncoding = args: { left, right }: + assertEq "encode ${builtins.toJSON left} == ${builtins.toJSON right}" + (url.encode args left) right; + + unreserved = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_~"; + + encodeExpected = [ + { left = "Laguna Beach"; right = "Laguna%20Beach"; } + { left = "👾 Exterminate!"; right = "%F0%9F%91%BE%20Exterminate%21"; } + { left = unreserved; right = unreserved; } + { + left = "`!@#$%^&*()+={}[]:;'\\|<>,?/ \""; + right = "%60%21%40%23%24%25%5E%26%2A%28%29%2B%3D%7B%7D%5B%5D%3A%3B%27%5C%7C%3C%3E%2C%3F%2F%20%22"; + } + ]; + + testEncode = it "checks url.encode" + (builtins.map (checkEncoding {}) encodeExpected); + + testLeaveReserved = it "checks that leaveReserved is like id for valid URLs" + (builtins.map (x: checkEncoding { leaveReserved = true; } { left = x; right = x; }) [ + "ftp://ftp.is.co.za/rfc/rfc1808.txt" + "http://www.ietf.org/rfc/rfc2396.txt" + "ldap://[2001:db8::7]/c=GB?objectClass?one" + "mailto:John.Doe@example.com" + "news:comp.infosystems.www.servers.unix" + "tel:+1-816-555-1212" + "telnet://192.0.2.16:80/" + "urn:oasis:names:specification:docbook:dtd:xml:4.1.2" + ]); +in + runTestsuite "nix.url" [ + testEncode + testLeaveReserved + ] diff --git a/users/sterni/nix/utf8/default.nix b/users/sterni/nix/utf8/default.nix new file mode 100644 index 000000000000..713f1f57cbe6 --- /dev/null +++ b/users/sterni/nix/utf8/default.nix @@ -0,0 +1,208 @@ +{ depot, lib, ... }: + +let + + # TODO(sterni): encode + + inherit (depot.users.sterni.nix) + char + flow + fun + int + string + util + ; + + /* (Internal) function to determine the amount + bytes left in a UTF-8 byte sequence from the + first byte. + + This function will throw if the given first + byte is ill-formed, but will not detect all + cases of ill-formed-ness. + + Based on table 3-6. from The Unicode Standard, + Version 13.0, section 3.9. + + Type: integer -> integer + */ + byteCount = i: flow.cond [ + [ (int.bitAnd i 128 == 0) 1 ] + [ (int.bitAnd i 224 == 192) 2 ] + [ (int.bitAnd i 240 == 224) 3 ] + [ (int.bitAnd i 248 == 240) 4 ] + [ true (builtins.throw "Ill-formed first byte ${int.toHex i}") ] + ]; + + /* (Internal) function to check if a given byte in + an UTF-8 byte sequence is well-formed. + + Based on table 3-7. from The Unicode Standard, + Version 13.0, section 3.9. + + Throws if the first byte is invalid. + + Type: integer -> integer -> (integer -> bool) + */ + wellFormedByte = + # first byte's integer value + first: + # byte position as an index starting with 0 + pos: + let + defaultRange = int.inRange 128 191; + in + # The first byte is either ASCII which requires no checks + # or we automatically check it when we check the subsequent + # bytes. The downside is that this may generate bad error + # messages in very rare cases. + if pos == 0 + then lib.const true + else if pos > 1 # 3rd and 4th byte have only one validity rule + then defaultRange + else assert pos == 1; flow.switch first [ + [ (int.inRange 194 223) defaultRange ] # C2..DF + [ 224 (int.inRange 160 191) ] # E0 + [ (int.inRange 225 236) defaultRange ] # E1..EC + [ 237 (int.inRange 128 159) ] # ED + [ (int.inRange 238 239) defaultRange ] # EE..EF + [ 240 (int.inRange 144 191) ] # F0 + [ (int.inRange 241 243) defaultRange ] # F1..F3 + [ 244 (int.inRange 128 143) ] # F4 + [ + (fun.const true) + (builtins.throw "Invalid first byte ${int.toHex first}") + ] + ]; + + /* Iteration step for decoding an UTF-8 byte sequence. + It decodes incrementally, i. e. it has to be fed + one byte at a time and then returns either a + new state or a final result. + + If the resulting attribute set contains the attribute + result, it is finished and the decoded codepoint is + contained in that attribute. In all other cases, + pass the returned set to step again along with + a new byte. The initial state to pass is the empty + set. + + Extra attributes are always passed through, so you + can pass extra state. Be sure not to use result, + pos, code, first or count. + + This function will throw with a fairly detailed + message if it encounters ill-formed bytes. + + The implementation is based on The Unicode Standard, + Version 13.0, section 3.9, especially table 3-6. + + Type: { ... } -> string -> ({ result :: integer, ... } | { ... }) + + Example: utf8.step {} "f" + => { result = 102; } + */ + step = { pos ? 0, code ? 0, ... }@args: byte: + let + value = char.ord byte; + # first byte is context for well-formed-ness + first = args.first or value; + count = args.count or (byteCount first); + newCode = + if count == 1 + then int.bitAnd 127 first # ascii character + else # multi byte UTF-8 sequence + let + # Calculate the bitmask for extracting the + # codepoint data in the current byte. + # If the codepoint is not ASCII, the bits + # used for codepoint data differ depending + # on the byte position and overall byte + # count. The first byte always ignores + # the (count + 1) most significant bits. + # For all subsequent bytes, the 2 most + # significant bits need to be ignored. + # See also table 3-6. + mask = + if pos == 0 + then int.exp 2 (8 - (count + 1)) - 1 + else 63; + # UTF-8 uses the 6 least significant bits in all + # subsequent bytes after the first one. Therefore + # We can determine the amount we need to shift + # the current value by the amount of bytes left. + offset = (count - (pos + 1)) * 6; + in + code + (int.bitShiftL (int.bitAnd mask value) offset); + illFormedMsg = + "Ill-formed byte ${int.toHex value} at position ${toString pos} in ${toString count} byte UTF-8 sequence"; + in + if !(wellFormedByte first pos value) then builtins.throw illFormedMsg + else if pos + 1 == count + then (builtins.removeAttrs args [ # allow extra state being passed through + "count" + "code" + "pos" + "first" + ]) // { result = newCode; } + else (builtins.removeAttrs args [ "result" ]) // { + inherit count first; + code = newCode; + pos = pos + 1; + }; + + /* Decode an UTF-8 string into a list of codepoints. + + Throws if the string is ill-formed UTF-8. + + Type: string -> [ integer ] + */ + # TODO(sterni): option to fallback to replacement char instead of failure + decode = s: + let + iter = { codes ? [], ... }@args: byte: + let + res = step args byte; + in + # foldl' forceValues the calculate value only at the end + # this makes the thunk grow large enough to cause a stack + # overflow with sufficiently large strings. To avoid this + # we always deepSeq the result which also keeps memory + # usage of decode reasonable. + builtins.deepSeq res + (if res ? result + then res // { + codes = codes ++ [ res.result ]; + } + else res); + iterResult = + builtins.foldl' iter {} (string.toChars s); + earlyEndMsg = + if iterResult ? count && iterResult ? pos + then "Missing ${toString (with iterResult; count - pos)} bytes at end of input" + else "Unexpected end of input"; + in + if iterResult ? result + then iterResult.codes + else builtins.throw earlyEndMsg; + + /* Decodes an UTF-8 string, but doesn't throw on error. + Instead it returns null. + + Type: string -> ( [ integer ] | null) + */ + decodeSafe = s: + let + res = builtins.tryEval (decode s); + in + if res.success + then res.value + else null; + +in { + inherit + decode + decodeSafe + step + ; +} diff --git a/users/sterni/nix/utf8/tests/default.nix b/users/sterni/nix/utf8/tests/default.nix new file mode 100644 index 000000000000..d9d8ae77105d --- /dev/null +++ b/users/sterni/nix/utf8/tests/default.nix @@ -0,0 +1,121 @@ +{ depot, lib, ... }: + +let + + inherit (depot.third_party) + runCommandLocal + ; + + inherit (depot.nix.runTestsuite) + runTestsuite + it + assertEq + assertThrows + assertDoesNotThrow + ; + + inherit (depot.users.Profpatsch.writers) + rustSimple + ; + + inherit (depot.users.sterni.nix) + int + utf8 + string + char + ; + + rustDecoder = rustSimple { + name = "utf8-decode"; + } '' + use std::io::{self, Read}; + fn main() -> std::io::Result<()> { + let mut buffer = String::new(); + io::stdin().read_to_string(&mut buffer)?; + + print!("[ "); + + for c in buffer.chars() { + print!("{} ", u32::from(c)); + } + + print!("]"); + + Ok(()) + } + ''; + + rustDecode = s: + let + expr = runCommandLocal "${s}-decoded" {} '' + printf '%s' ${lib.escapeShellArg s} | ${rustDecoder} > $out + ''; + in import expr; + + hexDecode = l: + utf8.decode (string.fromBytes (builtins.map int.fromHex l)); + + testFailures = it "checks UTF-8 decoding failures" [ + (assertThrows "emtpy bytestring throws" (utf8.decode "")) + (assertThrows "truncated UTF-8 string throws" (hexDecode [ "F0" "9F" ])) + # examples from The Unicode Standard + (assertThrows "ill-formed: C0 AF" (hexDecode [ "C0" "AF" ])) + (assertThrows "ill-formed: E0 9F 80" (hexDecode [ "E0" "9F" "80" ])) + (assertEq "well-formed: F4 80 83 92" (hexDecode [ "F4" "80" "83" "92" ]) [ 1048786 ]) + ]; + + testAscii = it "checks decoding of ascii strings" + (builtins.map (s: assertEq "ASCII decoding is equal to UTF-8 decoding for \"${s}\"" + (string.toBytes s) (utf8.decode s)) [ + "foo bar" + "hello\nworld" + "carriage\r\nreturn" + "1238398494829304 []<><>({})[]!!)" + (string.take 127 char.allChars) + ]); + + randomUnicode = [ + "🥰👨👨👧👦🐈⬛👩🏽🦰" + # https://kermitproject.org/utf8.html + "ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ" + "An preost wes on leoden, Laȝamon was ihoten" + "Sîne klâwen durh die wolken sint geslagen," + "Τὴ γλῶσσα μοῦ ἔδωσαν ἑλληνικὴ" + "На берегу пустынных волн" + "ვეპხის ტყაოსანი შოთა რუსთაველი" + "யாமறிந்த மொழிகளிலே தமிழ்மொழி போல் இனிதாவது எங்கும் காணோம், " + "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸು " + ]; + + # https://kermitproject.org/utf8.html + glassSentences = [ + "Euro Symbol: €." + "Greek: Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα." + "Íslenska / Icelandic: Ég get etið gler án þess að meiða mig." + "Polish: Mogę jeść szkło, i mi nie szkodzi." + "Romanian: Pot să mănânc sticlă și ea nu mă rănește." + "Ukrainian: Я можу їсти шкло, й воно мені не пошкодить." + "Armenian: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։" + "Georgian: მინას ვჭამ და არა მტკივა." + "Hindi: मैं काँच खा सकता हूँ, मुझे उस से कोई पीडा नहीं होती." + "Hebrew(2): אני יכול לאכול זכוכית וזה לא מזיק לי." + "Yiddish(2): איך קען עסן גלאָז און עס טוט מיר נישט װײ." + "Arabic(2): أنا قادر على أكل الزجاج و هذا لا يؤلمني." + "Japanese: 私はガラスを食べられます。それは私を傷つけません。" + "Thai: ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ " + ]; + + testDecoding = it "checks decoding of UTF-8 strings against Rust's String" + (builtins.map + (s: assertEq "Decoding of “${s}” is correct" (utf8.decode s) (rustDecode s)) + (lib.flatten [ + glassSentences + randomUnicode + ])); + +in + runTestsuite "nix.utf8" [ + testFailures + testAscii + testDecoding + ] |