about summary refs log tree commit diff
diff options
context:
space:
mode:
authorsterni <sternenseemann@systemli.org>2021-11-22T21·20+0100
committersterni <sternenseemann@systemli.org>2021-11-23T13·23+0100
commitab92c42f594c22ad6a5bb090e7f4f347ddddcd34 (patch)
treed330932acd7fe469296457dc168da8fdeea9c4cf
parent8615322bc81dfe106aa158b95567300372ea469d (diff)
feat(sterni/nix/utf8): allow decoding the empty string r/3086
Change-Id: I8de9cd28c822ac5befbcd16e118440cd13cd86e9
-rw-r--r--users/sterni/nix/utf8/default.nix4
-rw-r--r--users/sterni/nix/utf8/tests/default.nix2
2 files changed, 3 insertions, 3 deletions
diff --git a/users/sterni/nix/utf8/default.nix b/users/sterni/nix/utf8/default.nix
index c89263cd8f..c4a3e8eb03 100644
--- a/users/sterni/nix/utf8/default.nix
+++ b/users/sterni/nix/utf8/default.nix
@@ -201,10 +201,10 @@ let
 
         # filter out all iteration steps without a codepoint value
         codepoint != null
-          # if we are at the iteration step of the input string, throw
+          # if we are at the iteration step of a non-empty input string, throw
           # an error if no codepoint was returned, as it indicates an incomplete
           # UTF-8 sequence.
-          || (stringIndex == stringLength - 1 && throw earlyEndMsg)
+          || (stringLength > 0 && stringIndex == stringLength - 1 && throw earlyEndMsg)
 
       ) iterResult
     );
diff --git a/users/sterni/nix/utf8/tests/default.nix b/users/sterni/nix/utf8/tests/default.nix
index ed38bd1241..2f8054fad6 100644
--- a/users/sterni/nix/utf8/tests/default.nix
+++ b/users/sterni/nix/utf8/tests/default.nix
@@ -56,7 +56,6 @@ let
     utf8.decode (string.fromBytes (builtins.map int.fromHex l));
 
   testFailures = it "checks UTF-8 decoding failures" [
-    (assertThrows "emtpy bytestring throws" (utf8.decode ""))
     (assertThrows "truncated UTF-8 string throws" (hexDecode [ "F0" "9F" ]))
     # examples from The Unicode Standard
     (assertThrows "ill-formed: C0 AF" (hexDecode [ "C0" "AF" ]))
@@ -75,6 +74,7 @@ let
       ]);
 
   randomUnicode = [
+    "" # empty string should yield empty list
     "🥰👨‍👨‍👧‍👦🐈‍⬛👩🏽‍🦰"
     # https://kermitproject.org/utf8.html
     "ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ"