about summary refs log tree commit diff
path: root/users/sterni/nix/char
diff options
context:
space:
mode:
Diffstat (limited to 'users/sterni/nix/char')
-rw-r--r--users/sterni/nix/char/all-chars.bin2
-rw-r--r--users/sterni/nix/char/default.nix90
-rw-r--r--users/sterni/nix/char/tests/default.nix31
3 files changed, 123 insertions, 0 deletions
diff --git a/users/sterni/nix/char/all-chars.bin b/users/sterni/nix/char/all-chars.bin
new file mode 100644
index 000000000000..017b909e8e8e
--- /dev/null
+++ b/users/sterni/nix/char/all-chars.bin
@@ -0,0 +1,2 @@
+	
+
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
\ No newline at end of file
diff --git a/users/sterni/nix/char/default.nix b/users/sterni/nix/char/default.nix
new file mode 100644
index 000000000000..e6b8d6d7f168
--- /dev/null
+++ b/users/sterni/nix/char/default.nix
@@ -0,0 +1,90 @@
+{ depot, lib, pkgs, ... }:
+
+let
+
+  inherit (depot.users.sterni.nix.flow)
+    cond
+    ;
+
+  inherit (depot.nix)
+    yants
+    ;
+
+  inherit (depot.users.sterni.nix)
+    string
+    ;
+
+  # A char is the atomic element of a nix string
+  # which is essentially an array of arbitrary bytes
+  # as long as they are not a NUL byte.
+  #
+  # A char is neither a byte nor a unicode codepoint!
+  char = yants.restrict "char" (s: builtins.stringLength s == 1) yants.string;
+
+  # integer representation of char
+  charval = yants.restrict "charval" (i: i >= 1 && i < 256) yants.int;
+
+  allChars = builtins.readFile ./all-chars.bin;
+
+  # Originally I searched a list for this, but came to the
+  # conclusion that this can never be fast enough in Nix.
+  # We therefore use a solution similar to infinisil's.
+  ordMap = builtins.listToAttrs
+    (lib.imap1 (i: v: { name = v; value = i; })
+      (string.toChars allChars));
+
+  # Note on performance:
+  # chr and ord have been benchmarked using the following cases:
+  #
+  #  builtins.map ord (lib.stringToCharacters allChars)
+  #  builtins.map chr (builtins.genList (int.add 1) 255
+  #
+  # The findings are as follows:
+  # 1. Searching through either strings using recursion is
+  #    unbearably slow in Nix, leading to evaluation times
+  #    of up to 3s for the following very small test case.
+  #    This is why we use the trusty attribute set for ord.
+  # 2. String indexing is much faster than list indexing which
+  #    is why we use the former for chr.
+  ord = c: ordMap."${c}";
+
+  chr = i: string.charAt (i - 1) allChars;
+
+  asciiAlpha = c:
+    let
+      v = ord c;
+    in (v >= 65 && v <= 90)
+    || (v >= 97 && v <= 122);
+
+  asciiNum = c:
+    let
+      v = ord c;
+    in v >= 48 && v <= 57;
+
+  asciiAlphaNum = c: asciiAlpha c || asciiNum c;
+
+in {
+  inherit
+    allChars
+    char
+    charval
+    ord
+    chr
+    asciiAlpha
+    asciiNum
+    asciiAlphaNum
+    ;
+
+  # originally I generated a nix file containing a list of
+  # characters, but infinisil uses a better way which I adapt
+  # which is using builtins.readFile instead of import.
+  __generateAllChars = pkgs.writers.writeC "generate-all-chars" {} ''
+    #include <stdio.h>
+
+    int main(void) {
+      for(int i = 1; i <= 0xff; i++) {
+        putchar(i);
+      }
+    }
+  '';
+}
diff --git a/users/sterni/nix/char/tests/default.nix b/users/sterni/nix/char/tests/default.nix
new file mode 100644
index 000000000000..49b439adbb84
--- /dev/null
+++ b/users/sterni/nix/char/tests/default.nix
@@ -0,0 +1,31 @@
+{ depot, ... }:
+
+let
+  inherit (depot.nix.runTestsuite)
+    it
+    assertEq
+    runTestsuite
+    ;
+
+  inherit (depot.users.sterni.nix)
+    char
+    string
+    int
+    fun
+    ;
+
+  charList = string.toChars char.allChars;
+
+  testAllCharConversion = it "tests conversion of all chars" [
+    (assertEq "char.chr converts to char.allChars"
+      (builtins.genList (fun.rl char.chr (int.add 1)) 255)
+      charList)
+    (assertEq "char.ord converts from char.allChars"
+      (builtins.genList (int.add 1) 255)
+      (builtins.map char.ord charList))
+  ];
+
+in
+  runTestsuite "char" [
+    testAllCharConversion
+  ]