diff options
author | sterni <sternenseemann@systemli.org> | 2021-11-22T21·20+0100 |
---|---|---|
committer | sterni <sternenseemann@systemli.org> | 2021-11-23T13·23+0100 |
commit | ab92c42f594c22ad6a5bb090e7f4f347ddddcd34 (patch) | |
tree | d330932acd7fe469296457dc168da8fdeea9c4cf /users/sterni | |
parent | 8615322bc81dfe106aa158b95567300372ea469d (diff) |
feat(sterni/nix/utf8): allow decoding the empty string r/3086
Change-Id: I8de9cd28c822ac5befbcd16e118440cd13cd86e9
Diffstat (limited to 'users/sterni')
-rw-r--r-- | users/sterni/nix/utf8/default.nix | 4 | ||||
-rw-r--r-- | users/sterni/nix/utf8/tests/default.nix | 2 |
2 files changed, 3 insertions, 3 deletions
diff --git a/users/sterni/nix/utf8/default.nix b/users/sterni/nix/utf8/default.nix index c89263cd8fbf..c4a3e8eb03b3 100644 --- a/users/sterni/nix/utf8/default.nix +++ b/users/sterni/nix/utf8/default.nix @@ -201,10 +201,10 @@ let # filter out all iteration steps without a codepoint value codepoint != null - # if we are at the iteration step of the input string, throw + # if we are at the iteration step of a non-empty input string, throw # an error if no codepoint was returned, as it indicates an incomplete # UTF-8 sequence. - || (stringIndex == stringLength - 1 && throw earlyEndMsg) + || (stringLength > 0 && stringIndex == stringLength - 1 && throw earlyEndMsg) ) iterResult ); diff --git a/users/sterni/nix/utf8/tests/default.nix b/users/sterni/nix/utf8/tests/default.nix index ed38bd1241f3..2f8054fad6d9 100644 --- a/users/sterni/nix/utf8/tests/default.nix +++ b/users/sterni/nix/utf8/tests/default.nix @@ -56,7 +56,6 @@ let utf8.decode (string.fromBytes (builtins.map int.fromHex l)); testFailures = it "checks UTF-8 decoding failures" [ - (assertThrows "emtpy bytestring throws" (utf8.decode "")) (assertThrows "truncated UTF-8 string throws" (hexDecode [ "F0" "9F" ])) # examples from The Unicode Standard (assertThrows "ill-formed: C0 AF" (hexDecode [ "C0" "AF" ])) @@ -75,6 +74,7 @@ let ]); randomUnicode = [ + "" # empty string should yield empty list "🥰👨👨👧👦🐈⬛👩🏽🦰" # https://kermitproject.org/utf8.html "ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ" |