diff options
3 files changed, 12 insertions, 1 deletions
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-escape-string-correct-char-boundaries.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-escape-string-correct-char-boundaries.exp new file mode 100644 index 000000000000..d889063f9ab1 --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-escape-string-correct-char-boundaries.exp @@ -0,0 +1 @@ +"๐ญ(\":thonking:\")" diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-escape-string-correct-char-boundaries.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-escape-string-correct-char-boundaries.nix new file mode 100644 index 000000000000..49f4b6273106 --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-escape-string-correct-char-boundaries.nix @@ -0,0 +1,6 @@ +# Regression test for a bug where tvix would crash in nix_escape_string +# because it counted the string position by unicode code point count, +# but then used it as a byte index for slicing. Consequently, it would +# try slicing ๐ญ in half, thinking the first element to be escaped was +# at byte index 2 (i.e. the quote). +"๐ญ(\":thonking:\")" diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs index 7144ca360d15..2649e00f0830 100644 --- a/tvix/eval/src/value/string.rs +++ b/tvix/eval/src/value/string.rs @@ -193,7 +193,7 @@ fn is_valid_nix_identifier(s: &str) -> bool { /// /// Note that this does not add the outer pair of surrounding quotes. fn nix_escape_string(input: &str) -> Cow<str> { - let mut iter = input.chars().enumerate().peekable(); + let mut iter = input.char_indices().peekable(); while let Some((i, c)) = iter.next() { if let Some(esc) = nix_escape_char(c, iter.peek().map(|(_, c)| c)) { @@ -201,6 +201,10 @@ fn nix_escape_string(input: &str) -> Cow<str> { escaped.push_str(&input[..i]); escaped.push_str(esc); + // In theory we calculate how many bytes it takes to represent `esc` + // in UTF-8 and use that for the offset. It is, however, safe to + // assume that to be 1, as all characters that can be escaped in a + // Nix string are ASCII. let mut inner_iter = input[i + 1..].chars().peekable(); while let Some(c) = inner_iter.next() { match nix_escape_char(c, inner_iter.peek()) { |