diff options
Diffstat (limited to 'users/sterni/nix')
24 files changed, 1275 insertions, 321 deletions
diff --git a/users/sterni/nix/build/buildGopherHole/default.nix b/users/sterni/nix/build/buildGopherHole/default.nix new file mode 100644 index 000000000000..eec13a865421 --- /dev/null +++ b/users/sterni/nix/build/buildGopherHole/default.nix @@ -0,0 +1,109 @@ +{ depot, pkgs, lib, ... }: + +let + inherit (pkgs) + runCommand + writeText + ; + + inherit (depot.users.sterni.nix.build) + buildGopherHole + ; + + fileTypes = { + # RFC1436 + text = "0"; + menu = "1"; + cso = "2"; + error = "3"; + binhex = "4"; + dos = "5"; + uuencoded = "6"; + index-server = "7"; + telnet = "8"; + binary = "9"; + mirror = "+"; + gif = "g"; + image = "I"; + tn3270 = "T"; + # non-standard + info = "i"; + html = "h"; + }; + + buildFile = { file, name, fileType ? fileTypes.text }: + runCommand name + { + passthru = { + # respect the file type the file derivation passes + # through. otherwise use explicitly set type or + # default value. + fileType = file.fileType or fileType; + }; + } '' + ln -s ${file} "$out" + ''; + + buildGopherMap = dir: + let + /* strings constitute an info line or an empty line + if their length is zero. sets that contain a menu + value have that added to the gophermap as-is. + + all other entries should be a set which can be built using + buildGopherHole and is linked by their name. The resulting + derivation is expected to passthru a fileType containing the + gopher file type char of themselves. + */ + gopherMapLine = e: + if builtins.isString e + then e + else if e ? menu + then e.menu + else + let + drv = buildGopherHole e; + title = e.title or e.name; + in + "${drv.fileType}${title}\t${drv.name}"; + in + writeText ".gophermap" (lib.concatMapStringsSep "\n" gopherMapLine dir); + + buildDir = + { dir, name, ... }: + + let + # filter all entries out that have to be symlinked: + # sets with the file or dir attribute + drvOnly = builtins.map buildGopherHole (builtins.filter + (x: !(builtins.isString x) && (x ? dir || x ? file)) + dir); + gopherMap = buildGopherMap dir; + in + runCommand name + { + passthru = { + fileType = fileTypes.dir; + }; + } + ('' + mkdir -p "$out" + ln -s "${gopherMap}" "$out/.gophermap" + '' + lib.concatMapStrings + (drv: '' + ln -s "${drv}" "$out/${drv.name}" + '') + drvOnly); +in + +{ + # Dispatch into different file / dir handling code + # which is mutually recursive with this function. + __functor = _: args: + if args ? file then buildFile args + else if args ? dir then buildDir args + else builtins.throw "Unrecognized gopher hole item type: " + + lib.generators.toPretty { } args; + + inherit fileTypes; +} diff --git a/users/sterni/nix/char/default.nix b/users/sterni/nix/char/default.nix index aacfc9dcbe4d..9c6ce2fb250b 100644 --- a/users/sterni/nix/char/default.nix +++ b/users/sterni/nix/char/default.nix @@ -53,17 +53,20 @@ let asciiAlpha = c: let v = ord c; - in (v >= 65 && v <= 90) + in + (v >= 65 && v <= 90) || (v >= 97 && v <= 122); asciiNum = c: let v = ord c; - in v >= 48 && v <= 57; + in + v >= 48 && v <= 57; asciiAlphaNum = c: asciiAlpha c || asciiNum c; -in { +in +{ inherit allChars char @@ -78,18 +81,19 @@ in { # originally I generated a nix file containing a list of # characters, but infinisil uses a better way which I adapt # which is using builtins.readFile instead of import. - __generateAllChars = pkgs.runCommandCC "generate-all-chars" { - source = '' - #include <stdio.h> - - int main(void) { - for(int i = 1; i <= 0xff; i++) { - putchar(i); + __generateAllChars = pkgs.runCommandCC "generate-all-chars" + { + source = '' + #include <stdio.h> + + int main(void) { + for(int i = 1; i <= 0xff; i++) { + putchar(i); + } } - } - ''; - passAsFile = [ "source" ]; - } '' + ''; + passAsFile = [ "source" ]; + } '' $CC -o "$out" -x c "$sourcePath" ''; } diff --git a/users/sterni/nix/char/tests/default.nix b/users/sterni/nix/char/tests/default.nix index 49b439adbb84..cb17b74c578f 100644 --- a/users/sterni/nix/char/tests/default.nix +++ b/users/sterni/nix/char/tests/default.nix @@ -10,7 +10,7 @@ let inherit (depot.users.sterni.nix) char string - int + num fun ; @@ -18,14 +18,14 @@ let testAllCharConversion = it "tests conversion of all chars" [ (assertEq "char.chr converts to char.allChars" - (builtins.genList (fun.rl char.chr (int.add 1)) 255) + (builtins.genList (fun.rl char.chr (num.add 1)) 255) charList) (assertEq "char.ord converts from char.allChars" - (builtins.genList (int.add 1) 255) + (builtins.genList (num.add 1) 255) (builtins.map char.ord charList)) ]; in - runTestsuite "char" [ - testAllCharConversion - ] +runTestsuite "char" [ + testAllCharConversion +] diff --git a/users/sterni/nix/float/default.nix b/users/sterni/nix/float/default.nix new file mode 100644 index 000000000000..ecb6465c8842 --- /dev/null +++ b/users/sterni/nix/float/default.nix @@ -0,0 +1,23 @@ +{ depot, ... }: + +let + inherit (depot.users.sterni.nix) + num + ; +in + +rec { + # In C++ Nix, the required builtins have been added in version 2.4 + ceil = builtins.ceil or (throw "Nix implementation is missing builtins.ceil"); + floor = builtins.floor or (throw "Nix implementation is missing builtins.floor"); + + truncate = f: if f >= 0 then floor f else ceil f; + round = f: + let + s = num.sign f; + a = s * f; + in + s * (if a >= floor a + 0.5 then ceil a else floor a); + + intToFloat = i: i * 1.0; +} diff --git a/users/sterni/nix/float/tests/default.nix b/users/sterni/nix/float/tests/default.nix new file mode 100644 index 000000000000..75e2a1bfa091 --- /dev/null +++ b/users/sterni/nix/float/tests/default.nix @@ -0,0 +1,49 @@ +{ depot, lib, ... }: + +let + + inherit (depot.nix.runTestsuite) + runTestsuite + it + assertEq + ; + + inherit (depot.users.sterni.nix) + float + ; + + testsBuiltins = it "tests builtin operations" [ + (assertEq "ceil pos" (float.ceil 1.5) 2) + (assertEq "ceil neg" (float.ceil (-1.5)) (-1)) + (assertEq "floor pos" (float.floor 1.5) 1) + (assertEq "floor neg" (float.floor (-1.5)) (-2)) + ]; + + testsConversionFrom = it "tests integer to float conversion" [ + (assertEq "float.intToFloat is identity for floats" (float.intToFloat 1.3) 1.3) + (assertEq "float.intToFloat converts ints" + (builtins.all + (val: builtins.isFloat val) + (builtins.map float.intToFloat (builtins.genList (i: i - 500) 1000))) + true) + ]; + + exampleFloats = [ 0.5 0.45 0.3 0.1 200 203.457847 204.65547 (-1.5) (-2) (-1.3) (-0.45) ]; + testsConversionTo = it "tests float to integer conversion" [ + (assertEq "round" + (builtins.map float.round exampleFloats) + [ 1 0 0 0 200 203 205 (-2) (-2) (-1) 0 ]) + (assertEq "truncate towards zero" + (builtins.map float.truncate exampleFloats) + [ 0 0 0 0 200 203 204 (-1) (-2) (-1) 0 ]) + ]; +in + +runTestsuite "nix.num" ([ + testsConversionFrom +] + # Skip for e.g. C++ Nix < 2.4 +++ lib.optionals (builtins ? ceil && builtins ? floor) [ + testsConversionTo + testsBuiltins +]) diff --git a/users/sterni/nix/flow/default.nix b/users/sterni/nix/flow/default.nix index b5783bd86deb..4bef0abb91e9 100644 --- a/users/sterni/nix/flow/default.nix +++ b/users/sterni/nix/flow/default.nix @@ -68,13 +68,14 @@ let then s x else x == s; in - if b - then builtins.elemAt c 1 - else switch x (builtins.tail conds); + if b + then builtins.elemAt c 1 + else switch x (builtins.tail conds); -in { +in +{ inherit cond switch diff --git a/users/sterni/nix/flow/tests/default.nix b/users/sterni/nix/flow/tests/default.nix index 54cea01858e7..9f974a61c7b2 100644 --- a/users/sterni/nix/flow/tests/default.nix +++ b/users/sterni/nix/flow/tests/default.nix @@ -21,7 +21,7 @@ let (cond [ [ true 1 2 ] [ false 1 ] ])) (assertEq "last is true" "last" (cond [ - [ false dontEval] + [ false dontEval ] [ false dontEval ] [ true "last" ] ])) @@ -34,6 +34,6 @@ let ]; in - runTestsuite "nix.flow" [ - testCond - ] +runTestsuite "nix.flow" [ + testCond +] diff --git a/users/sterni/nix/fun/default.nix b/users/sterni/nix/fun/default.nix index 6b3541ed4c65..824cebfed244 100644 --- a/users/sterni/nix/fun/default.nix +++ b/users/sterni/nix/fun/default.nix @@ -39,6 +39,198 @@ let builtins.match ".*<attrspat ellipsis=\"1\">.*" (builtins.toXML f) != null; + /* Return the number of arguments the given function accepts or 0 if the value + is not a function. + + Example: + + argCount argCount + => 1 + + argCount builtins.add + => 2 + + argCount pkgs.stdenv.mkDerivation + => 1 + */ + argCount = f: + let + # N.B. since we are only interested if the result of calling is a function + # as opposed to a normal value or evaluation failure, we never need to + # check success, as value will be false (i.e. not a function) in the + # failure case. + called = builtins.tryEval ( + f (builtins.throw "You should never see this error message") + ); + in + if !(builtins.isFunction f || builtins.isFunction (f.__functor or null)) + then 0 + else 1 + argCount called.value; + + /* Call a given function with a given list of arguments. + + Example: + + apply builtins.sub [ 20 10 ] + => 10 + */ + apply = f: args: + builtins.foldl' (f: x: f x) f args; + + # TODO(sterni): think of a better name for unapply + /* Collect n arguments into a list and pass them to the given function. + Allows calling a function that expects a list by feeding it the list + elements individually as function arguments - the limitation is + that the list must be of constant length. + + This is mainly useful for functions that wrap other, arbitrary functions + in conjunction with argCount and apply, since lists of arguments are + easier to deal with usually. + + Example: + + (unapply 3 lib.id) 1 2 3 + => [ 1 2 3 ] + + (unapply 5 lib.reverse) 1 2 null 4 5 + => [ 5 4 null 2 1 ] + + # unapply and apply compose the identity relation together + + unapply (argCount f) (apply f) + # is equivalent to f (if the function has a constant number of arguments) + + (unapply 2 (apply builtins.sub)) 20 10 + => 10 + */ + unapply = + let + unapply' = acc: n: f: x: + if n == 1 + then f (acc ++ [ x ]) + else unapply' (acc ++ [ x ]) (n - 1) f; + in + unapply' [ ]; + + /* Optimize a tail recursive Nix function by intercepting the recursive + function application and expressing it in terms of builtins.genericClosure + instead. The main benefit of this optimization is that even a naively + written recursive algorithm won't overflow the stack. + + For this to work the following things prerequisites are necessary: + + - The passed function needs to be a fix point for its self reference, + i. e. the argument to tailCallOpt needs to be of the form + `self: # function body that uses self to call itself`. + This is because tailCallOpt needs to manipulate the call to self + which otherwise wouldn't be possible due to Nix's lexical scoping. + + - The passed function may only call itself as a tail call, all other + forms of recursions will fail evaluation. + + This function was mainly written to prove that builtins.genericClosure + can be used to express any (tail) recursive algorithm. It can be used + to avoid stack overflows for deeply recursive, but naively written + functions (in the context of Nix this mainly means using recursion + instead of (ab)using more performant and less limited builtins). + A better alternative to using this function is probably translating + the algorithm to builtins.genericClosure manually. Also note that + using tailCallOpt doesn't mean that the stack won't ever overflow: + Data structures, especially lazy ones, can still cause all the + available stack space to be consumed. + + The optimization also only concerns avoiding stack overflows, + tailCallOpt will make functions slower if anything. + + Type: (F -> F) -> F where F is any tail recursive function. + + Example: + + let + label' = self: acc: n: + if n == 0 + then "This is " + acc + "cursed." + else self (acc + "very ") (n - 1); + + # Equivalent to a naive recursive implementation in Nix + label = (lib.fix label') ""; + + labelOpt = (tailCallOpt label') ""; + in + + label 5 + => "This is very very very very very cursed." + + labelOpt 5 + => "This is very very very very very cursed." + + label 10000 + => error: stack overflow (possible infinite recursion) + + labelOpt 10000 + => "This is very very very very very very very very veryβ¦ + */ + tailCallOpt = f: + let + argc = argCount (lib.fix f); + + # This function simulates being f for f's self reference. Instead of + # recursing, it will just return the arguments received as a specially + # tagged set, so the recursion step can be performed later. + fakef = unapply argc (args: { + __tailCall = true; + inherit args; + }); + # Pass fakef to f so that it'll be called instead of recursing, ensuring + # only one recursion step is performed at a time. + encodedf = f fakef; + + opt = args: + let + steps = builtins.genericClosure { + # This is how we encode a (tail) call: A set with final == false + # and the list of arguments to pass to be found in args. + startSet = [ + { + key = 0; + final = false; + inherit args; + } + ]; + + operator = + { key, final, ... }@state: + let + # Plumbing to make genericClosure happy + newId = { + key = key + 1; + }; + + # Perform recursion step + call = apply encodedf state.args; + + # If call encodes a new call, return the new encoded call, + # otherwise signal that we're done. + newState = + if builtins.isAttrs call && call.__tailCall or false + then newId // { + final = false; + inherit (call) args; + } else newId // { + final = true; + value = call; + }; + in + + if final + then [ ] # end condition for genericClosure + else [ newState ]; + }; + in + # The returned list contains intermediate steps we ignore. + (builtins.head (builtins.filter (x: x.final) steps)).value; + in + unapply argc opt; in { @@ -55,5 +247,9 @@ in lr lrs hasEllipsis + argCount + tailCallOpt + apply + unapply ; } diff --git a/users/sterni/nix/fun/tests/default.nix b/users/sterni/nix/fun/tests/default.nix index 6492554306e1..6b1e6fcc7b0b 100644 --- a/users/sterni/nix/fun/tests/default.nix +++ b/users/sterni/nix/fun/tests/default.nix @@ -7,6 +7,8 @@ let assertEq ; + inherit (depot.nix) escapeExecline; + inherit (depot.users.sterni.nix) fun ; @@ -23,7 +25,58 @@ let (assertEq "Ellipsis" true (fun.hasEllipsis ({ depot, pkgs, ... }: 42))) ]; + + argCountTests = it "checks fun.argCount" [ + (assertEq "builtins.sub has two arguments" 2 + (fun.argCount builtins.sub)) + (assertEq "fun.argCount has one argument" 1 + (fun.argCount fun.argCount)) + (assertEq "runTestsuite has two arguments" 2 + (fun.argCount runTestsuite)) + ]; + + applyTests = it "checks that fun.apply is equivalent to calling" [ + (assertEq "fun.apply builtins.sub" (builtins.sub 23 42) + (fun.apply builtins.sub [ 23 42 ])) + (assertEq "fun.apply escapeExecline" (escapeExecline [ "foo" [ "bar" ] ]) + (fun.apply escapeExecline [ [ "foo" [ "bar" ] ] ])) + ]; + + unapplyTests = it "checks fun.unapply" [ + (assertEq "fun.unapply 3 accepts 3 args" 3 + (fun.argCount (fun.unapply 3 fun.id))) + (assertEq "fun.unapply 73 accepts 73 args" 73 + (fun.argCount (fun.unapply 73 fun.id))) + (assertEq "fun.unapply 1 accepts 73 args" 1 + (fun.argCount (fun.unapply 1 fun.id))) + (assertEq "fun.unapply collects arguments correctly" + (fun.unapply 5 fun.id 1 2 3 4 5) + [ 1 2 3 4 5 ]) + (assertEq "fun.unapply calls the given function correctly" 1 + (fun.unapply 1 builtins.head 1)) + ]; + + fac' = self: acc: n: if n == 0 then acc else self (n * acc) (n - 1); + + facPlain = fun.fix fac' 1; + facOpt = fun.tailCallOpt fac' 1; + + tailCallOptTests = it "checks fun.tailCallOpt" [ + (assertEq "optimized and unoptimized factorial have the same base case" + (facPlain 0) + (facOpt 0)) + (assertEq "optimized and unoptimized factorial have same value for 1" + (facPlain 1) + (facOpt 1)) + (assertEq "optimized and unoptimized factorial have same value for 100" + (facPlain 100) + (facOpt 100)) + ]; in - runTestsuite "nix.fun" [ - hasEllipsisTests - ] +runTestsuite "nix.fun" [ + hasEllipsisTests + argCountTests + applyTests + unapplyTests + tailCallOptTests +] diff --git a/users/sterni/nix/html/default.nix b/users/sterni/nix/html/default.nix index 2498d832aadf..d25a7ab8dac0 100644 --- a/users/sterni/nix/html/default.nix +++ b/users/sterni/nix/html/default.nix @@ -20,7 +20,7 @@ let => "<hello>" */ escapeMinimal = builtins.replaceStrings - [ "<" ">" "&" "\"" "'" ] + [ "<" ">" "&" "\"" "'" ] [ "<" ">" "&" """ "'" ]; /* Return a string with a correctly rendered tag of the given name, @@ -87,18 +87,20 @@ let renderTag = tag: attrs: content: let attrs' = builtins.concatStringsSep "" ( - builtins.map (n: - " ${escapeMinimal n}=\"${escapeMinimal (toString attrs.${n})}\"" - ) (builtins.attrNames attrs) + builtins.map + (n: + " ${escapeMinimal n}=\"${escapeMinimal (toString attrs.${n})}\"" + ) + (builtins.attrNames attrs) ); content' = if builtins.isList content then builtins.concatStringsSep "" content else content; in - if content == null - then "<${tag}${attrs'}/>" - else "<${tag}${attrs'}>${content'}</${tag}>"; + if content == null + then "<${tag}${attrs'}/>" + else "<${tag}${attrs'}>${content'}</${tag}>"; /* Prepend "<!DOCTYPE html>" to a string. @@ -111,7 +113,8 @@ let */ withDoctype = doc: "<!DOCTYPE html>" + doc; -in { +in +{ inherit escapeMinimal renderTag withDoctype; __findFile = _: renderTag; diff --git a/users/sterni/nix/html/tests/default.nix b/users/sterni/nix/html/tests/default.nix index 8688b6937130..ed520675c55a 100644 --- a/users/sterni/nix/html/tests/default.nix +++ b/users/sterni/nix/html/tests/default.nix @@ -8,15 +8,17 @@ let ; exampleDocument = withDoctype (<html> { lang = "en"; } [ - (<head> {} [ + (<head> { } [ (<meta> { charset = "utf-8"; } null) - (<title> {} "html.nix example document") - (<link> { - rel = "license"; - href = "https://code.tvl.fyi/about/LICENSE"; - type = "text/html"; - } null) - (<style> {} (esc '' + (<title> { } "html.nix example document") + (<link> + { + rel = "license"; + href = "https://code.tvl.fyi/about/LICENSE"; + type = "text/html"; + } + null) + (<style> { } (esc '' hgroup h2 { font-weight: normal; } @@ -26,39 +28,45 @@ let } '')) ]) - (<body> {} [ - (<main> {} [ - (<hgroup> {} [ - (<h1> {} (esc "html.nix")) - (<h2> {} [ - (<em> {} "the") + (<body> { } [ + (<main> { } [ + (<hgroup> { } [ + (<h1> { } (esc "html.nix")) + (<h2> { } [ + (<em> { } "the") (esc " most cursed HTML DSL ever!") ]) ]) - (<dl> {} [ - (<dt> {} [ + (<dl> { } [ + (<dt> { } [ (esc "Q: Wait, it's all ") - (<a> { - href = "https://cl.tvl.fyi/q/hashtag:cursed"; - } (esc "cursed")) + (<a> + { + href = "https://cl.tvl.fyi/q/hashtag:cursed"; + } + (esc "cursed")) (esc " nix hacks?") ]) - (<dd> {} (esc "A: Always has been. π«")) - (<dt> {} (esc "Q: Why does this work?")) - (<dd> {} [ + (<dd> { } (esc "A: Always has been. π«")) + (<dt> { } (esc "Q: Why does this work?")) + (<dd> { } [ (esc "Because nix ") - (<a> { - href = "https://github.com/NixOS/nix/blob/293220bed5a75efc963e33c183787e87e55e28d9/src/libexpr/parser.y#L410-L416"; - } (esc "translates ")) - (<a> { - href = "https://github.com/NixOS/nix/blob/293220bed5a75efc963e33c183787e87e55e28d9/src/libexpr/lexer.l#L100"; - } (esc "SPATH tokens")) + (<a> + { + href = "https://github.com/NixOS/nix/blob/293220bed5a75efc963e33c183787e87e55e28d9/src/libexpr/parser.y#L410-L416"; + } + (esc "translates ")) + (<a> + { + href = "https://github.com/NixOS/nix/blob/293220bed5a75efc963e33c183787e87e55e28d9/src/libexpr/lexer.l#L100"; + } + (esc "SPATH tokens")) (esc " like ") - (<code> {} (esc "<nixpkgs>")) + (<code> { } (esc "<nixpkgs>")) (esc " into calls to ") - (<code> {} (esc "__findFile")) + (<code> { } (esc "__findFile")) (esc " in the ") - (<em> {} (esc "current")) + (<em> { } (esc "current")) (esc " scope.") ]) ]) @@ -67,7 +75,8 @@ let ]); in -pkgs.runCommandNoCC "html.nix.html" { +pkgs.runCommand "html.nix.html" +{ passAsFile = [ "exampleDocument" ]; inherit exampleDocument; nativeBuildInputs = [ pkgs.html5validator ]; diff --git a/users/sterni/nix/int/default.nix b/users/sterni/nix/int/default.nix index b3157571272f..870744522361 100644 --- a/users/sterni/nix/int/default.nix +++ b/users/sterni/nix/int/default.nix @@ -2,37 +2,28 @@ let - # TODO(sterni): implement nix.float and figure out which of these - # functions can be split out into a common nix.num - # library. - inherit (depot.users.sterni.nix) string + num ; inherit (builtins) bitOr bitAnd bitXor - mul - div - add - sub ; - abs = i: if i < 0 then -i else i; - exp = base: pow: if pow > 0 then base * (exp base (pow - 1)) else if pow < 0 - then 1.0 / exp base (abs pow) + then 1.0 / exp base (num.abs pow) else 1; bitShiftR = bit: count: if count == 0 then bit - else div (bitShiftR bit (count - 1)) 2; + else (bitShiftR bit (count - 1)) / 2; bitShiftL = bit: count: if count == 0 @@ -47,12 +38,12 @@ let if i == 0 then "" else go (bitShiftR i 4) - + string.charAt (bitAnd i 15) hexdigits; + + string.charAt (bitAnd i 15) hexdigits; sign = lib.optionalString (int < 0) "-"; in - if int == 0 - then "0" - else "${sign}${go (abs int)}"; + if int == 0 + then "0" + else "${sign}${go (num.abs int)}"; fromHexMap = builtins.listToAttrs (lib.imap0 (i: c: { name = c; value = i; }) @@ -72,11 +63,12 @@ let val = v.val + (fromHexMap."${d}" * v.mul); mul = v.mul * 16; }) - { val = 0; mul = 1; } digits; + { val = 0; mul = 1; } + digits; in - if negative - then -parsed.val - else parsed.val; + if negative + then -parsed.val + else parsed.val; # A nix integer is a 64bit signed integer maxBound = 9223372036854775807; @@ -93,25 +85,24 @@ let odd = x: bitAnd x 1 == 1; even = x: bitAnd x 1 == 0; - # div and mod behave like quot and rem in Haskell, - # i. e. they truncate towards 0 - mod = a: b: let res = a / b; in a - (res * b); - - inRange = a: b: x: x >= a && x <= b; + quot' = builtins.div; # no typecheck + rem = a: b: + assert builtins.isInt a && builtins.isInt b; + let res = quot' a b; in a - (res * b); + quot = a: b: + assert builtins.isInt a && builtins.isInt b; + quot' a b; -in { +in +{ inherit maxBound minBound - abs exp odd even - add - sub - mul - div - mod + quot + rem bitShiftR bitShiftL bitOr @@ -119,6 +110,5 @@ in { bitXor toHex fromHex - inRange ; } diff --git a/users/sterni/nix/int/tests/default.nix b/users/sterni/nix/int/tests/default.nix index fac45dd251e1..80bd05b6b5eb 100644 --- a/users/sterni/nix/int/tests/default.nix +++ b/users/sterni/nix/int/tests/default.nix @@ -15,9 +15,6 @@ let ; testBounds = it "checks minBound and maxBound" [ - # this is gonna blow up in my face because - # integer overflow is undefined behavior in - # C++, so most likely anything could happen? (assertEq "maxBound is the maxBound" true (int.maxBound + 1 < int.maxBound)) (assertEq "minBound is the minBound" true @@ -31,22 +28,262 @@ let ]; expectedBytes = [ - "00" "01" "02" "03" "04" "05" "06" "07" "08" "09" "0A" "0B" "0C" "0D" "0E" "0F" - "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "1A" "1B" "1C" "1D" "1E" "1F" - "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "2A" "2B" "2C" "2D" "2E" "2F" - "30" "31" "32" "33" "34" "35" "36" "37" "38" "39" "3A" "3B" "3C" "3D" "3E" "3F" - "40" "41" "42" "43" "44" "45" "46" "47" "48" "49" "4A" "4B" "4C" "4D" "4E" "4F" - "50" "51" "52" "53" "54" "55" "56" "57" "58" "59" "5A" "5B" "5C" "5D" "5E" "5F" - "60" "61" "62" "63" "64" "65" "66" "67" "68" "69" "6A" "6B" "6C" "6D" "6E" "6F" - "70" "71" "72" "73" "74" "75" "76" "77" "78" "79" "7A" "7B" "7C" "7D" "7E" "7F" - "80" "81" "82" "83" "84" "85" "86" "87" "88" "89" "8A" "8B" "8C" "8D" "8E" "8F" - "90" "91" "92" "93" "94" "95" "96" "97" "98" "99" "9A" "9B" "9C" "9D" "9E" "9F" - "A0" "A1" "A2" "A3" "A4" "A5" "A6" "A7" "A8" "A9" "AA" "AB" "AC" "AD" "AE" "AF" - "B0" "B1" "B2" "B3" "B4" "B5" "B6" "B7" "B8" "B9" "BA" "BB" "BC" "BD" "BE" "BF" - "C0" "C1" "C2" "C3" "C4" "C5" "C6" "C7" "C8" "C9" "CA" "CB" "CC" "CD" "CE" "CF" - "D0" "D1" "D2" "D3" "D4" "D5" "D6" "D7" "D8" "D9" "DA" "DB" "DC" "DD" "DE" "DF" - "E0" "E1" "E2" "E3" "E4" "E5" "E6" "E7" "E8" "E9" "EA" "EB" "EC" "ED" "EE" "EF" - "F0" "F1" "F2" "F3" "F4" "F5" "F6" "F7" "F8" "F9" "FA" "FB" "FC" "FD" "FE" "FF" + "00" + "01" + "02" + "03" + "04" + "05" + "06" + "07" + "08" + "09" + "0A" + "0B" + "0C" + "0D" + "0E" + "0F" + "10" + "11" + "12" + "13" + "14" + "15" + "16" + "17" + "18" + "19" + "1A" + "1B" + "1C" + "1D" + "1E" + "1F" + "20" + "21" + "22" + "23" + "24" + "25" + "26" + "27" + "28" + "29" + "2A" + "2B" + "2C" + "2D" + "2E" + "2F" + "30" + "31" + "32" + "33" + "34" + "35" + "36" + "37" + "38" + "39" + "3A" + "3B" + "3C" + "3D" + "3E" + "3F" + "40" + "41" + "42" + "43" + "44" + "45" + "46" + "47" + "48" + "49" + "4A" + "4B" + "4C" + "4D" + "4E" + "4F" + "50" + "51" + "52" + "53" + "54" + "55" + "56" + "57" + "58" + "59" + "5A" + "5B" + "5C" + "5D" + "5E" + "5F" + "60" + "61" + "62" + "63" + "64" + "65" + "66" + "67" + "68" + "69" + "6A" + "6B" + "6C" + "6D" + "6E" + "6F" + "70" + "71" + "72" + "73" + "74" + "75" + "76" + "77" + "78" + "79" + "7A" + "7B" + "7C" + "7D" + "7E" + "7F" + "80" + "81" + "82" + "83" + "84" + "85" + "86" + "87" + "88" + "89" + "8A" + "8B" + "8C" + "8D" + "8E" + "8F" + "90" + "91" + "92" + "93" + "94" + "95" + "96" + "97" + "98" + "99" + "9A" + "9B" + "9C" + "9D" + "9E" + "9F" + "A0" + "A1" + "A2" + "A3" + "A4" + "A5" + "A6" + "A7" + "A8" + "A9" + "AA" + "AB" + "AC" + "AD" + "AE" + "AF" + "B0" + "B1" + "B2" + "B3" + "B4" + "B5" + "B6" + "B7" + "B8" + "B9" + "BA" + "BB" + "BC" + "BD" + "BE" + "BF" + "C0" + "C1" + "C2" + "C3" + "C4" + "C5" + "C6" + "C7" + "C8" + "C9" + "CA" + "CB" + "CC" + "CD" + "CE" + "CF" + "D0" + "D1" + "D2" + "D3" + "D4" + "D5" + "D6" + "D7" + "D8" + "D9" + "DA" + "DB" + "DC" + "DD" + "DE" + "DF" + "E0" + "E1" + "E2" + "E3" + "E4" + "E5" + "E6" + "E7" + "E8" + "E9" + "EA" + "EB" + "EC" + "ED" + "EE" + "EF" + "F0" + "F1" + "F2" + "F3" + "F4" + "F5" + "F6" + "F7" + "F8" + "F9" + "FA" + "FB" + "FC" + "FD" + "FE" + "FF" ]; hexByte = i: string.fit { width = 2; char = "0"; } (int.toHex i); @@ -64,20 +301,23 @@ let ]; testHex = it "checks conversion to hex" (lib.flatten [ - (lib.imap0 (i: hex: [ - (assertEq "hexByte ${toString i} == ${hex}" (hexByte i) hex) - (assertEq "${toString i} == fromHex ${hex}" i (int.fromHex hex)) - ]) expectedBytes) - (builtins.map ({ left, right }: [ - (assertEq "toHex ${toString left} == ${right}" (int.toHex left) right) - (assertEq "${toString left} == fromHex ${right}" left (int.fromHex right)) - ]) hexInts) + (lib.imap0 + (i: hex: [ + (assertEq "hexByte ${toString i} == ${hex}" (hexByte i) hex) + (assertEq "${toString i} == fromHex ${hex}" i (int.fromHex hex)) + ]) + expectedBytes) + (builtins.map + ({ left, right }: [ + (assertEq "toHex ${toString left} == ${right}" (int.toHex left) right) + (assertEq "${toString left} == fromHex ${right}" left (int.fromHex right)) + ]) + hexInts) ]); testBasic = it "checks basic int operations" [ (assertEq "122 is even" (int.even 122 && !(int.odd 122)) true) (assertEq "123 is odd" (int.odd 123 && !(int.even 123)) true) - (assertEq "abs -4959" (int.abs (-4959)) 4959) ]; expNumbers = [ @@ -94,20 +334,23 @@ let ]; testExp = it "checks exponentiation" - (builtins.map ({ left, right }: - assertEq - "2 ^ ${toString left} == ${toString right}" - (int.exp 2 left) right) expNumbers); + (builtins.map + ({ left, right }: + assertEq + "2 ^ ${toString left} == ${toString right}" + (int.exp 2 left) + right) + expNumbers); shifts = [ - { a = 2; b = 5; c = 64; op = "<<"; } - { a = -2; b = 5; c = -64; op = "<<"; } + { a = 2; b = 5; c = 64; op = "<<"; } + { a = -2; b = 5; c = -64; op = "<<"; } { a = 123; b = 4; c = 1968; op = "<<"; } - { a = 1; b = 8; c = 256; op = "<<"; } - { a = 256; b = 8; c = 1; op = ">>"; } - { a = 374; b = 2; c = 93; op = ">>"; } - { a = 2; b = 2; c = 0; op = ">>"; } - { a = 99; b = 9; c = 0; op = ">>"; } + { a = 1; b = 8; c = 256; op = "<<"; } + { a = 256; b = 8; c = 1; op = ">>"; } + { a = 374; b = 2; c = 93; op = ">>"; } + { a = 2; b = 2; c = 0; op = ">>"; } + { a = 99; b = 9; c = 0; op = ">>"; } ]; checkShift = { a, b, c, op }@args: @@ -116,15 +359,18 @@ let "<<" = int.bitShiftL; ">>" = int.bitShiftR; }; - in assertEq "${toString a} ${op} ${toString b} == ${toString c}" (f a b) c; + in + assertEq "${toString a} ${op} ${toString b} == ${toString c}" (f a b) c; checkShiftRDivExp = n: assertEq "${toString n} >> 5 == ${toString n} / 2 ^ 5" - (int.bitShiftR n 5) (int.div n (int.exp 2 5)); + (int.bitShiftR n 5) + (n / (int.exp 2 5)); checkShiftLMulExp = n: assertEq "${toString n} >> 6 == ${toString n} * 2 ^ 6" - (int.bitShiftL n 5) (int.mul n (int.exp 2 5)); + (int.bitShiftL n 5) + (n * (int.exp 2 5)); testBit = it "checks bitwise operations" (lib.flatten [ (builtins.map checkShift shifts) @@ -160,44 +406,50 @@ let ]); divisions = [ - { a = 2; b = 1; c = 2; mod = 0;} - { a = 2; b = 2; c = 1; mod = 0;} - { a = 20; b = 10; c = 2; mod = 0;} - { a = 12; b = 5; c = 2; mod = 2;} - { a = 23; b = 4; c = 5; mod = 3;} + { a = 2; b = 1; c = 2; rem = 0; } + { a = 2; b = 2; c = 1; rem = 0; } + { a = 20; b = 10; c = 2; rem = 0; } + { a = 12; b = 5; c = 2; rem = 2; } + { a = 23; b = 4; c = 5; rem = 3; } ]; - checkDiv = n: { a, b, c, mod }: [ - (assertEq "${n}: div result" (int.div a b) c) - (assertEq "${n}: mod result" (int.mod a b) mod) - (assertEq "${n}: divMod law" ((int.div a b) * b + (int.mod a b)) a) + checkQuot = n: { a, b, c, rem }: [ + (assertEq "${n}: quot result" (int.quot a b) c) + (assertEq "${n}: rem result" (int.rem a b) rem) + (assertEq "${n}: quotRem law" ((int.quot a b) * b + (int.rem a b)) a) ]; - testDivMod = it "checks integer division and modulo" + testQuotRem = it "checks integer quotient and remainder" (lib.flatten [ - (builtins.map (checkDiv "+a / +b") divisions) - (builtins.map (fun.rl (checkDiv "-a / +b") (x: x // { - a = -x.a; - c = -x.c; - mod = -x.mod; - })) divisions) - (builtins.map (fun.rl (checkDiv "+a / -b") (x: x // { - b = -x.b; - c = -x.c; - })) divisions) - (builtins.map (fun.rl (checkDiv "-a / -b") (x: x // { - a = -x.a; - b = -x.b; - mod = -x.mod; - })) divisions) + (builtins.map (checkQuot "+a / +b") divisions) + (builtins.map + (fun.rl (checkQuot "-a / +b") (x: x // { + a = -x.a; + c = -x.c; + rem = -x.rem; + })) + divisions) + (builtins.map + (fun.rl (checkQuot "+a / -b") (x: x // { + b = -x.b; + c = -x.c; + })) + divisions) + (builtins.map + (fun.rl (checkQuot "-a / -b") (x: x // { + a = -x.a; + b = -x.b; + rem = -x.rem; + })) + divisions) ]); in - runTestsuite "nix.int" [ - testBounds - testHex - testBasic - testExp - testBit - testDivMod - ] +runTestsuite "nix.int" [ + testBounds + testHex + testBasic + testExp + testBit + testQuotRem +] diff --git a/users/sterni/nix/list/default.nix b/users/sterni/nix/list/default.nix new file mode 100644 index 000000000000..568a76d637a1 --- /dev/null +++ b/users/sterni/nix/list/default.nix @@ -0,0 +1,30 @@ +{ ... }: + +{ + /* For a list of length n that consists of lists of length m, + return a list of length m containing lists of length n + so that + + builtins.elemAt (builtins.elemAt orig a) b + == builtins.elemAt (builtins.elemAt transposed b) a + + Essentially, if you think of the nested list as an array with two + dimensions, the two index axes are swapped. + + The length of the inner lists m is determined based on the first element + and assumed to be used for all other lists. Malformed input data may + cause the function to crash or lose data. + + Type: <n>[ <m>[ ] ] -> <m>[ <n>[ ] ] + */ + transpose = list: + let + innerLength = builtins.length (builtins.head list); + outerLength = builtins.length list; + in + builtins.genList + (inner: builtins.genList + (outer: builtins.elemAt (builtins.elemAt list outer) inner) + outerLength) + innerLength; +} diff --git a/users/sterni/nix/misc/default.nix b/users/sterni/nix/misc/default.nix new file mode 100644 index 000000000000..1de9c973ec84 --- /dev/null +++ b/users/sterni/nix/misc/default.nix @@ -0,0 +1,18 @@ +{ ... }: + +let + /* Returns true if it is being evaluated using restrict-eval, false if not. + It's more robust than using `builtins.getEnv` since it isn't fooled by + `env -i`. + + See https://github.com/NixOS/nix/issues/6579 for a description of the + behavior. Precise cause in the evaluator / store implementation is unclear. + + Type: bool + */ + inRestrictedEval = builtins.pathExists (toString ./guinea-pig + "/."); +in + +{ + inherit inRestrictedEval; +} diff --git a/users/sterni/nix/misc/guinea-pig b/users/sterni/nix/misc/guinea-pig new file mode 120000 index 000000000000..73537e478e3f --- /dev/null +++ b/users/sterni/nix/misc/guinea-pig @@ -0,0 +1 @@ +default.nix \ No newline at end of file diff --git a/users/sterni/nix/num/default.nix b/users/sterni/nix/num/default.nix new file mode 100644 index 000000000000..81e2f8377f3b --- /dev/null +++ b/users/sterni/nix/num/default.nix @@ -0,0 +1,17 @@ +{ ... }: + +rec { + inherit (builtins) + mul + div + add + sub + ; + + sign = i: if i < 0 then -1 else 1; + abs = i: if i < 0 then -i else i; + + inRange = a: b: x: x >= a && x <= b; + + sum = builtins.foldl' (a: b: a + b) 0; +} diff --git a/users/sterni/nix/num/tests/default.nix b/users/sterni/nix/num/tests/default.nix new file mode 100644 index 000000000000..ca5f861debe8 --- /dev/null +++ b/users/sterni/nix/num/tests/default.nix @@ -0,0 +1,26 @@ +{ depot, ... }: + +let + + inherit (depot.nix.runTestsuite) + runTestsuite + it + assertEq + ; + + inherit (depot.users.sterni.nix) + num + ; + + testsBasic = it "tests basic operations" [ + (assertEq "abs -4959" (num.abs (-4959)) 4959) + (assertEq "sum" (num.sum [ 123 321 1.5 ]) (123 + 321 + 1.5)) + (assertEq "inRange" + (builtins.map (num.inRange 1.0 5) [ 0 0.5 3 4 4.5 5.5 5 6 ]) + [ false false true true true false true false ]) + ]; +in + +runTestsuite "nix.num" [ + testsBasic +] diff --git a/users/sterni/nix/string/default.nix b/users/sterni/nix/string/default.nix index 19d2cec243c0..381c8ddff748 100644 --- a/users/sterni/nix/string/default.nix +++ b/users/sterni/nix/string/default.nix @@ -21,7 +21,8 @@ let charAt = i: s: let r = builtins.substring i 1 s; - in if r == "" then null else r; + in + if r == "" then null else r; charIndex = char: s: let @@ -32,7 +33,8 @@ let [ (charAt i s == char) i ] [ true (go (i + 1)) ] ]; - in go 0; + in + go 0; toChars = lib.stringToCharacters; fromChars = lib.concatStrings; @@ -46,15 +48,16 @@ let let leftS = fromChars (builtins.genList (_: char) left); rightS = fromChars (builtins.genList (_: char) right); - in "${leftS}${s}${rightS}"; + in + "${leftS}${s}${rightS}"; fit = { char ? " ", width, side ? "left" }: s: let diff = width - builtins.stringLength s; in - if diff <= 0 - then s - else pad { inherit char; "${side}" = diff; } s; + if diff <= 0 + then s + else pad { inherit char; "${side}" = diff; } s; # pattern matching for strings only match = val: matcher: matcher."${val}"; @@ -80,23 +83,27 @@ let tokens = lib.flatten (builtins.split "(%.)" formatString); argsNeeded = builtins.length (builtins.filter specifierWithArg tokens); - format = args: (builtins.foldl' ({ out ? "", argIndex ? 0 }: token: { - argIndex = argIndex + (if specifierWithArg token then 1 else 0); - out = - /**/ if token == "%s" then out + builtins.elemAt args argIndex - else if token == "%%" then out + "%" - else if isSpecifier token then throw "Unsupported format specifier ${token}" - else out + token; - }) {} tokens).out; + format = args: (builtins.foldl' + ({ out ? "", argIndex ? 0 }: token: { + argIndex = argIndex + (if specifierWithArg token then 1 else 0); + out = + if token == "%s" then out + builtins.elemAt args argIndex + else if token == "%%" then out + "%" + else if isSpecifier token then throw "Unsupported format specifier ${token}" + else out + token; + }) + { } + tokens).out; accumulateArgs = argCount: args: if argCount > 0 then arg: accumulateArgs (argCount - 1) (args ++ [ arg ]) else format args; in - accumulateArgs argsNeeded []; + accumulateArgs argsNeeded [ ]; -in { +in +{ inherit take drop diff --git a/users/sterni/nix/string/tests/default.nix b/users/sterni/nix/string/tests/default.nix index c8aec9464077..e9015e95dca4 100644 --- a/users/sterni/nix/string/tests/default.nix +++ b/users/sterni/nix/string/tests/default.nix @@ -63,10 +63,10 @@ let ]; in - runTestsuite "nix.string" [ - testTakeDrop - testIndexing - testFinding - testMatch - testPrintf - ] +runTestsuite "nix.string" [ + testTakeDrop + testIndexing + testFinding + testMatch + testPrintf +] diff --git a/users/sterni/nix/url/default.nix b/users/sterni/nix/url/default.nix index 37bd0de66ac9..4a401873a1f2 100644 --- a/users/sterni/nix/url/default.nix +++ b/users/sterni/nix/url/default.nix @@ -10,9 +10,24 @@ let ; reserved = c: builtins.elem c [ - "!" "#" "$" "&" "'" "(" ")" - "*" "+" "," "/" ":" ";" "=" - "?" "@" "[" "]" + "!" + "#" + "$" + "&" + "'" + "(" + ")" + "*" + "+" + "," + "/" + ":" + ";" + "=" + "?" + "@" + "[" + "]" ]; unreserved = c: char.asciiAlphaNum c @@ -21,11 +36,13 @@ let percentEncode = c: if unreserved c then c - else "%" + (string.fit { - width = 2; - char = "0"; - side = "left"; - } (int.toHex (char.ord c))); + else "%" + (string.fit + { + width = 2; + char = "0"; + side = "left"; + } + (int.toHex (char.ord c))); encode = { leaveReserved ? false }: s: let @@ -34,7 +51,8 @@ let if leaveReserved && reserved c then c else percentEncode c; - in lib.concatStrings (builtins.map tr chars); + in + lib.concatStrings (builtins.map tr chars); decode = s: let @@ -71,9 +89,10 @@ let ]; in - (builtins.foldl' decodeStep {} tokens).result; + (builtins.foldl' decodeStep { } tokens).result; -in { +in +{ inherit encode decode diff --git a/users/sterni/nix/url/tests/default.nix b/users/sterni/nix/url/tests/default.nix index 7cf53cde1555..4eb6f95ccd07 100644 --- a/users/sterni/nix/url/tests/default.nix +++ b/users/sterni/nix/url/tests/default.nix @@ -14,11 +14,13 @@ let checkEncoding = args: { left, right }: assertEq "encode ${builtins.toJSON left} == ${builtins.toJSON right}" - (url.encode args left) right; + (url.encode args left) + right; checkDecoding = { left, right }: - assertEq "${builtins.toJSON left} == decode ${builtins.toJSON right}" - (url.decode left) right; + assertEq "${builtins.toJSON left} == decode ${builtins.toJSON right}" + (url.decode left) + right; unreserved = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_~"; @@ -33,7 +35,7 @@ let ]; testEncode = it "checks url.encode" - (builtins.map (checkEncoding {}) encodeExpected); + (builtins.map (checkEncoding { }) encodeExpected); testDecode = it "checks url.decode" (builtins.map checkDecoding encodeExpected); @@ -50,7 +52,7 @@ let "urn:oasis:names:specification:docbook:dtd:xml:4.1.2" ]); in - runTestsuite "nix.url" [ - testEncode - testLeaveReserved - ] +runTestsuite "nix.url" [ + testEncode + testLeaveReserved +] diff --git a/users/sterni/nix/utf8/default.nix b/users/sterni/nix/utf8/default.nix index 713f1f57cbe6..e76695f128b2 100644 --- a/users/sterni/nix/utf8/default.nix +++ b/users/sterni/nix/utf8/default.nix @@ -2,12 +2,11 @@ let - # TODO(sterni): encode - inherit (depot.users.sterni.nix) char flow fun + num int string util @@ -27,7 +26,7 @@ let Type: integer -> integer */ byteCount = i: flow.cond [ - [ (int.bitAnd i 128 == 0) 1 ] + [ (int.bitAnd i 128 == 0) 1 ] [ (int.bitAnd i 224 == 192) 2 ] [ (int.bitAnd i 240 == 224) 3 ] [ (int.bitAnd i 248 == 240) 4 ] @@ -40,40 +39,37 @@ let Based on table 3-7. from The Unicode Standard, Version 13.0, section 3.9. - Throws if the first byte is invalid. - - Type: integer -> integer -> (integer -> bool) + Type: integer -> integer -> integer -> bool */ wellFormedByte = # first byte's integer value first: # byte position as an index starting with 0 pos: - let - defaultRange = int.inRange 128 191; - in - # The first byte is either ASCII which requires no checks - # or we automatically check it when we check the subsequent - # bytes. The downside is that this may generate bad error - # messages in very rare cases. - if pos == 0 - then lib.const true - else if pos > 1 # 3rd and 4th byte have only one validity rule - then defaultRange - else assert pos == 1; flow.switch first [ - [ (int.inRange 194 223) defaultRange ] # C2..DF - [ 224 (int.inRange 160 191) ] # E0 - [ (int.inRange 225 236) defaultRange ] # E1..EC - [ 237 (int.inRange 128 159) ] # ED - [ (int.inRange 238 239) defaultRange ] # EE..EF - [ 240 (int.inRange 144 191) ] # F0 - [ (int.inRange 241 243) defaultRange ] # F1..F3 - [ 244 (int.inRange 128 143) ] # F4 - [ - (fun.const true) - (builtins.throw "Invalid first byte ${int.toHex first}") - ] - ]; + let + defaultRange = num.inRange 128 191; + + secondBytePredicate = flow.switch first [ + [ (num.inRange 194 223) defaultRange ] # C2..DF + [ 224 (num.inRange 160 191) ] # E0 + [ (num.inRange 225 236) defaultRange ] # E1..EC + [ 237 (num.inRange 128 159) ] # ED + [ (num.inRange 238 239) defaultRange ] # EE..EF + [ 240 (num.inRange 144 191) ] # F0 + [ (num.inRange 241 243) defaultRange ] # F1..F3 + [ 244 (num.inRange 128 143) ] # F4 + [ (fun.const true) null ] + ]; + + firstBytePredicate = byte: assert first == byte; + first < 128 || secondBytePredicate != null; + in + # Either ASCII or in one of the byte ranges of Table 3-6. + if pos == 0 then firstBytePredicate + # return predicate according to Table 3-6. + else if pos == 1 then assert secondBytePredicate != null; secondBytePredicate + # 3rd and 4th byte have only one validity rule + else defaultRange; /* Iteration step for decoding an UTF-8 byte sequence. It decodes incrementally, i. e. it has to be fed @@ -133,23 +129,24 @@ let # the current value by the amount of bytes left. offset = (count - (pos + 1)) * 6; in - code + (int.bitShiftL (int.bitAnd mask value) offset); + code + (int.bitShiftL (int.bitAnd mask value) offset); illFormedMsg = "Ill-formed byte ${int.toHex value} at position ${toString pos} in ${toString count} byte UTF-8 sequence"; in - if !(wellFormedByte first pos value) then builtins.throw illFormedMsg - else if pos + 1 == count - then (builtins.removeAttrs args [ # allow extra state being passed through - "count" - "code" - "pos" - "first" - ]) // { result = newCode; } - else (builtins.removeAttrs args [ "result" ]) // { - inherit count first; - code = newCode; - pos = pos + 1; - }; + if !(wellFormedByte first pos value) then builtins.throw illFormedMsg + else if pos + 1 == count + then (builtins.removeAttrs args [ + # allow extra state being passed through + "count" + "code" + "pos" + "first" + ]) // { result = newCode; } + else (builtins.removeAttrs args [ "result" ]) // { + inherit count first; + code = newCode; + pos = pos + 1; + }; /* Decode an UTF-8 string into a list of codepoints. @@ -160,49 +157,170 @@ let # TODO(sterni): option to fallback to replacement char instead of failure decode = s: let - iter = { codes ? [], ... }@args: byte: - let - res = step args byte; - in - # foldl' forceValues the calculate value only at the end - # this makes the thunk grow large enough to cause a stack - # overflow with sufficiently large strings. To avoid this - # we always deepSeq the result which also keeps memory - # usage of decode reasonable. - builtins.deepSeq res - (if res ? result - then res // { - codes = codes ++ [ res.result ]; - } - else res); - iterResult = - builtins.foldl' iter {} (string.toChars s); - earlyEndMsg = - if iterResult ? count && iterResult ? pos - then "Missing ${toString (with iterResult; count - pos)} bytes at end of input" - else "Unexpected end of input"; + stringLength = builtins.stringLength s; + iterResult = builtins.genericClosure { + startSet = [ + { + key = "start"; + stringIndex = -1; + state = { }; + codepoint = null; + } + ]; + operator = { state, stringIndex, ... }: + let + # updated values for current iteration step + newIndex = stringIndex + 1; + newState = step state (builtins.substring newIndex 1 s); + in + lib.optional (newIndex < stringLength) { + # unique keys to make genericClosure happy + key = toString newIndex; + # carryover state for the next step + stringIndex = newIndex; + state = newState; + # actual payload for later, steps with value null are filtered out + codepoint = newState.result or null; + }; + }; in - if iterResult ? result - then iterResult.codes - else builtins.throw earlyEndMsg; + # extract all steps that yield a code point into a list + builtins.map (v: v.codepoint) ( + builtins.filter + ( + { codepoint, stringIndex, state, ... }: + + let + # error message in case we are missing bytes at the end of input + earlyEndMsg = + if state ? count && state ? pos + then "Missing ${toString (with state; count - pos)} bytes at end of input" + else "Unexpected end of input"; + in - /* Decodes an UTF-8 string, but doesn't throw on error. - Instead it returns null. + # filter out all iteration steps without a codepoint value + codepoint != null + # if we are at the iteration step of a non-empty input string, throw + # an error if no codepoint was returned, as it indicates an incomplete + # UTF-8 sequence. + || (stringLength > 0 && stringIndex == stringLength - 1 && throw earlyEndMsg) - Type: string -> ( [ integer ] | null) + ) + iterResult + ); + + /* Pretty prints a Unicode codepoint in the U+<HEX> notation. + + Type: integer -> string */ - decodeSafe = s: + formatCodepoint = cp: "U+" + string.fit + { + width = 4; + char = "0"; + } + (int.toHex cp); + + encodeCodepoint = cp: let - res = builtins.tryEval (decode s); + # Find the amount of bytes needed to encode the given codepoint. + # Note that this doesn't check if the Unicode codepoint is allowed, + # but rather allows all theoretically UTF-8-encodeable ones. + count = flow.switch cp [ + [ (num.inRange 0 127) 1 ] # 00000000 0xxxxxxx + [ (num.inRange 128 2047) 2 ] # 00000yyy yyxxxxxx + [ (num.inRange 2048 65535) 3 ] # zzzzyyyy yyxxxxxx + [ (num.inRange 65536 1114111) 4 ] # 000uuuuu zzzzyyyy yyxxxxxx, + # capped at U+10FFFF + + [ (fun.const true) (builtins.throw invalidCodepointMsg) ] + ]; + + invalidCodepointMsg = "${formatCodepoint cp} is not a Unicode codepoint"; + + # Extract the bit ranges x, y, z and u from the given codepoint + # according to Table 3-6. from The Unicode Standard, Version 13.0, + # section 3.9. u is split into uh and ul since they are used in + # different bytes in the end. + components = lib.mapAttrs + (_: { mask, offset }: + int.bitAnd (int.bitShiftR cp offset) mask + ) + { + x = { + mask = if count > 1 then 63 else 127; + offset = 0; + }; + y = { + mask = if count > 2 then 63 else 31; + offset = 6; + }; + z = { + mask = 15; + offset = 12; + }; + # u which belongs into the second byte + ul = { + mask = 3; + offset = 16; + }; + # u which belongs into the first byte + uh = { + mask = 7; + offset = 18; + }; + }; + inherit (components) x y z ul uh; + + # Finally construct the byte sequence for the given codepoint. This is + # usually done by using the component and adding a few bits as a prefix + # which depends on the length of the sequence. The longer the sequence, + # the further back each component is pushed. To simplify this, we + # always construct a 4 element list and take the last `count` elements. + # Thanks to laziness the bogus values created by this are never evaluated. + # + # Based on table 3-6. from The Unicode Standard, + # Version 13.0, section 3.9. + bytes = lib.sublist (4 - count) count [ + # 11110uuu + (uh + 240) + # 10uuzzzz or 1110zzzz + (z + (if count > 3 then 128 + int.bitShiftL ul 4 else 224)) + # 10yyyyyy or 110yyyyy + (y + (if count > 2 then 128 else 192)) + # 10xxxxxx or 0xxxxxxx + (x + (if count > 1 then 128 else 0)) + ]; + + firstByte = builtins.head bytes; + + unableToEncodeMessage = "Can't encode ${formatCodepoint cp} as UTF-8"; + in - if res.success - then res.value - else null; + string.fromBytes ( + builtins.genList + (i: + let + byte = builtins.elemAt bytes i; + in + if wellFormedByte firstByte i byte + then byte + else builtins.throw unableToEncodeMessage + ) + count + ); + + /* Encode a list of Unicode codepoints into an UTF-8 string. + + Type: [ integer ] -> string + */ + encode = lib.concatMapStrings encodeCodepoint; -in { +in +{ inherit + encode decode - decodeSafe step + formatCodepoint ; } diff --git a/users/sterni/nix/utf8/tests/default.nix b/users/sterni/nix/utf8/tests/default.nix index ed38bd1241f3..40783eab2421 100644 --- a/users/sterni/nix/utf8/tests/default.nix +++ b/users/sterni/nix/utf8/tests/default.nix @@ -25,9 +25,10 @@ let char ; - rustDecoder = rustSimple { - name = "utf8-decode"; - } '' + rustDecoder = rustSimple + { + name = "utf8-decode"; + } '' use std::io::{self, Read}; fn main() -> std::io::Result<()> { let mut buffer = String::new(); @@ -47,34 +48,49 @@ let rustDecode = s: let - expr = runCommandLocal "${s}-decoded" {} '' + expr = runCommandLocal "${s}-decoded" { } '' printf '%s' ${lib.escapeShellArg s} | ${rustDecoder} > $out ''; - in import expr; + in + import expr; hexDecode = l: utf8.decode (string.fromBytes (builtins.map int.fromHex l)); - testFailures = it "checks UTF-8 decoding failures" [ - (assertThrows "emtpy bytestring throws" (utf8.decode "")) + hexEncode = l: utf8.encode (builtins.map int.fromHex l); + + testFailures = it "checks UTF-8 decoding failures" ([ (assertThrows "truncated UTF-8 string throws" (hexDecode [ "F0" "9F" ])) # examples from The Unicode Standard (assertThrows "ill-formed: C0 AF" (hexDecode [ "C0" "AF" ])) (assertThrows "ill-formed: E0 9F 80" (hexDecode [ "E0" "9F" "80" ])) (assertEq "well-formed: F4 80 83 92" (hexDecode [ "F4" "80" "83" "92" ]) [ 1048786 ]) - ]; + (assertThrows "Codepoint out of range: 0xFFFFFF" (hexEncode [ "FFFFFF" ])) + (assertThrows "Codepoint out of range: -0x02" (hexEncode [ "-02" ])) + ] ++ builtins.genList + (i: + let + cp = i + int.fromHex "D800"; + in + assertThrows "Can't encode UTF-16 reserved characters: ${utf8.formatCodepoint cp}" + (utf8.encode [ cp ]) + ) + (int.fromHex "07FF")); testAscii = it "checks decoding of ascii strings" - (builtins.map (s: assertEq "ASCII decoding is equal to UTF-8 decoding for \"${s}\"" - (string.toBytes s) (utf8.decode s)) [ - "foo bar" - "hello\nworld" - "carriage\r\nreturn" - "1238398494829304 []<><>({})[]!!)" - (string.take 127 char.allChars) - ]); + (builtins.map + (s: assertEq "ASCII decoding is equal to UTF-8 decoding for \"${s}\"" + (string.toBytes s) + (utf8.decode s)) [ + "foo bar" + "hello\nworld" + "carriage\r\nreturn" + "1238398494829304 []<><>({})[]!!)" + (string.take 127 char.allChars) + ]); randomUnicode = [ + "" # empty string should yield empty list "π₯°π¨βπ¨βπ§βπ¦πββ¬π©π½βπ¦°" # https://kermitproject.org/utf8.html "α αα»α«αα¦α¦α«α α±α©α α’α±α«α αα±αͺα«α·αα»αΉα¦αα³α’α" @@ -113,9 +129,20 @@ let randomUnicode ])); + testDecodingEncoding = it "checks that decoding and then encoding forms an identity" + (builtins.map + (s: assertEq "Decoding and then encoding β${s}β yields itself" + (utf8.encode (utf8.decode s)) + s) + (lib.flatten [ + glassSentences + randomUnicode + ])); + in - runTestsuite "nix.utf8" [ - testFailures - testAscii - testDecoding - ] +runTestsuite "nix.utf8" [ + testFailures + testAscii + testDecoding + testDecodingEncoding +] |