about summary refs log tree commit diff
path: root/tvix/eval
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/eval')
-rw-r--r--tvix/eval/src/tests/tvix_tests/eval-okay-escape-string-correct-char-boundaries.exp1
-rw-r--r--tvix/eval/src/tests/tvix_tests/eval-okay-escape-string-correct-char-boundaries.nix6
-rw-r--r--tvix/eval/src/value/string.rs6
3 files changed, 12 insertions, 1 deletions
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-escape-string-correct-char-boundaries.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-escape-string-correct-char-boundaries.exp
new file mode 100644
index 000000000000..d889063f9ab1
--- /dev/null
+++ b/tvix/eval/src/tests/tvix_tests/eval-okay-escape-string-correct-char-boundaries.exp
@@ -0,0 +1 @@
+"๐Ÿ’ญ(\":thonking:\")"
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-escape-string-correct-char-boundaries.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-escape-string-correct-char-boundaries.nix
new file mode 100644
index 000000000000..49f4b6273106
--- /dev/null
+++ b/tvix/eval/src/tests/tvix_tests/eval-okay-escape-string-correct-char-boundaries.nix
@@ -0,0 +1,6 @@
+# Regression test for a bug where tvix would crash in nix_escape_string
+# because it counted the string position by unicode code point count,
+# but then used it as a byte index for slicing. Consequently, it would
+# try slicing ๐Ÿ’ญ in half, thinking the first element to be escaped was
+# at byte index 2 (i.e. the quote).
+"๐Ÿ’ญ(\":thonking:\")"
diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs
index 7144ca360d15..2649e00f0830 100644
--- a/tvix/eval/src/value/string.rs
+++ b/tvix/eval/src/value/string.rs
@@ -193,7 +193,7 @@ fn is_valid_nix_identifier(s: &str) -> bool {
 ///
 /// Note that this does not add the outer pair of surrounding quotes.
 fn nix_escape_string(input: &str) -> Cow<str> {
-    let mut iter = input.chars().enumerate().peekable();
+    let mut iter = input.char_indices().peekable();
 
     while let Some((i, c)) = iter.next() {
         if let Some(esc) = nix_escape_char(c, iter.peek().map(|(_, c)| c)) {
@@ -201,6 +201,10 @@ fn nix_escape_string(input: &str) -> Cow<str> {
             escaped.push_str(&input[..i]);
             escaped.push_str(esc);
 
+            // In theory we calculate how many bytes it takes to represent `esc`
+            // in UTF-8 and use that for the offset. It is, however, safe to
+            // assume that to be 1, as all characters that can be escaped in a
+            // Nix string are ASCII.
             let mut inner_iter = input[i + 1..].chars().peekable();
             while let Some(c) = inner_iter.next() {
                 match nix_escape_char(c, inner_iter.peek()) {