about summary refs log tree commit diff
path: root/users/sterni/nix/char/default.nix
diff options
context:
space:
mode:
authorsterni <sternenseemann@systemli.org>2021-02-22T13·32+0100
committersterni <sternenseemann@systemli.org>2021-03-01T17·34+0000
commit3b33c1bd7627c9427a410276c2a49c2b04f70edc (patch)
tree0cd92fecc34ad0db0d4b3af9e0f1f266172a988d /users/sterni/nix/char/default.nix
parent369f504250fe6508d3b927c6c8320a2525edfad1 (diff)
feat(users/sterni/nix): add sternis nix lib r/2257
What you see here is mostly the fallout of me implementing a correct
urlencode implementation in nix for Profpatsch's blog implementation
(although they'll probably keep it at arm's length).

Where I want to go from here:

* Extend this library towards general purpose nix™, mainly by
  implementing missing interfaces which you'd still have to use
  <nixpkgs/lib> for right now. Reexposing parts of <nixpkgs/lib>
  with better naming is fine for now, at some point I'd contemplate
  making this depend on nothing outside of depot, maybe even itself
  (should be easy we only use yants for an easily replaceable check).

* Improve error messages possibly by carefully reintroducing yants. I
  originally typed essentially everything using yants, but turns out
  this can a) be dangerous when stuff you are handling throws because
  type checking means evaluating and b) has a incredible performance
  cost in some cases.

* Reexpose builtins with better naming and slightly wrapped so they
  don't unrecoverably throw in cases where a null or something would
  suffice.

Change-Id: I33ab08ca4e62dbc16b86c66c653935686e6b0e79
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2541
Reviewed-by: sterni <sternenseemann@systemli.org>
Reviewed-by: Profpatsch <mail@profpatsch.de>
Tested-by: BuildkiteCI
Diffstat (limited to 'users/sterni/nix/char/default.nix')
-rw-r--r--users/sterni/nix/char/default.nix90
1 files changed, 90 insertions, 0 deletions
diff --git a/users/sterni/nix/char/default.nix b/users/sterni/nix/char/default.nix
new file mode 100644
index 0000000000..e6b8d6d7f1
--- /dev/null
+++ b/users/sterni/nix/char/default.nix
@@ -0,0 +1,90 @@
+{ depot, lib, pkgs, ... }:
+
+let
+
+  inherit (depot.users.sterni.nix.flow)
+    cond
+    ;
+
+  inherit (depot.nix)
+    yants
+    ;
+
+  inherit (depot.users.sterni.nix)
+    string
+    ;
+
+  # A char is the atomic element of a nix string
+  # which is essentially an array of arbitrary bytes
+  # as long as they are not a NUL byte.
+  #
+  # A char is neither a byte nor a unicode codepoint!
+  char = yants.restrict "char" (s: builtins.stringLength s == 1) yants.string;
+
+  # integer representation of char
+  charval = yants.restrict "charval" (i: i >= 1 && i < 256) yants.int;
+
+  allChars = builtins.readFile ./all-chars.bin;
+
+  # Originally I searched a list for this, but came to the
+  # conclusion that this can never be fast enough in Nix.
+  # We therefore use a solution similar to infinisil's.
+  ordMap = builtins.listToAttrs
+    (lib.imap1 (i: v: { name = v; value = i; })
+      (string.toChars allChars));
+
+  # Note on performance:
+  # chr and ord have been benchmarked using the following cases:
+  #
+  #  builtins.map ord (lib.stringToCharacters allChars)
+  #  builtins.map chr (builtins.genList (int.add 1) 255
+  #
+  # The findings are as follows:
+  # 1. Searching through either strings using recursion is
+  #    unbearably slow in Nix, leading to evaluation times
+  #    of up to 3s for the following very small test case.
+  #    This is why we use the trusty attribute set for ord.
+  # 2. String indexing is much faster than list indexing which
+  #    is why we use the former for chr.
+  ord = c: ordMap."${c}";
+
+  chr = i: string.charAt (i - 1) allChars;
+
+  asciiAlpha = c:
+    let
+      v = ord c;
+    in (v >= 65 && v <= 90)
+    || (v >= 97 && v <= 122);
+
+  asciiNum = c:
+    let
+      v = ord c;
+    in v >= 48 && v <= 57;
+
+  asciiAlphaNum = c: asciiAlpha c || asciiNum c;
+
+in {
+  inherit
+    allChars
+    char
+    charval
+    ord
+    chr
+    asciiAlpha
+    asciiNum
+    asciiAlphaNum
+    ;
+
+  # originally I generated a nix file containing a list of
+  # characters, but infinisil uses a better way which I adapt
+  # which is using builtins.readFile instead of import.
+  __generateAllChars = pkgs.writers.writeC "generate-all-chars" {} ''
+    #include <stdio.h>
+
+    int main(void) {
+      for(int i = 1; i <= 0xff; i++) {
+        putchar(i);
+      }
+    }
+  '';
+}