From ab92c42f594c22ad6a5bb090e7f4f347ddddcd34 Mon Sep 17 00:00:00 2001 From: sterni Date: Mon, 22 Nov 2021 22:20:16 +0100 Subject: feat(sterni/nix/utf8): allow decoding the empty string Change-Id: I8de9cd28c822ac5befbcd16e118440cd13cd86e9 --- users/sterni/nix/utf8/default.nix | 4 ++-- users/sterni/nix/utf8/tests/default.nix | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/users/sterni/nix/utf8/default.nix b/users/sterni/nix/utf8/default.nix index c89263cd8f..c4a3e8eb03 100644 --- a/users/sterni/nix/utf8/default.nix +++ b/users/sterni/nix/utf8/default.nix @@ -201,10 +201,10 @@ let # filter out all iteration steps without a codepoint value codepoint != null - # if we are at the iteration step of the input string, throw + # if we are at the iteration step of a non-empty input string, throw # an error if no codepoint was returned, as it indicates an incomplete # UTF-8 sequence. - || (stringIndex == stringLength - 1 && throw earlyEndMsg) + || (stringLength > 0 && stringIndex == stringLength - 1 && throw earlyEndMsg) ) iterResult ); diff --git a/users/sterni/nix/utf8/tests/default.nix b/users/sterni/nix/utf8/tests/default.nix index ed38bd1241..2f8054fad6 100644 --- a/users/sterni/nix/utf8/tests/default.nix +++ b/users/sterni/nix/utf8/tests/default.nix @@ -56,7 +56,6 @@ let utf8.decode (string.fromBytes (builtins.map int.fromHex l)); testFailures = it "checks UTF-8 decoding failures" [ - (assertThrows "emtpy bytestring throws" (utf8.decode "")) (assertThrows "truncated UTF-8 string throws" (hexDecode [ "F0" "9F" ])) # examples from The Unicode Standard (assertThrows "ill-formed: C0 AF" (hexDecode [ "C0" "AF" ])) @@ -75,6 +74,7 @@ let ]); randomUnicode = [ + "" # empty string should yield empty list "🥰👨‍👨‍👧‍👦🐈‍⬛👩🏽‍🦰" # https://kermitproject.org/utf8.html "ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ" -- cgit 1.4.1