about summary refs log tree commit diff
path: root/absl/strings
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/escaping.cc17
-rw-r--r--absl/strings/escaping_test.cc13
2 files changed, 25 insertions, 5 deletions
diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc
index 7d688ac3fac7..abe9e0aaacd9 100644
--- a/absl/strings/escaping.cc
+++ b/absl/strings/escaping.cc
@@ -72,6 +72,17 @@ inline int hex_digit_to_int(char c) {
   return x & 0xf;
 }
 
+inline bool IsSurrogate(char32_t c, absl::string_view src, std::string* error) {
+  if (c >= 0xD800 && c <= 0xDFFF) {
+    if (error) {
+      *error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\",
+                            src);
+    }
+    return true;
+  }
+  return false;
+}
+
 // ----------------------------------------------------------------------
 // CUnescapeInternal()
 //    Implements both CUnescape() and CUnescapeForNullTerminatedString().
@@ -214,6 +225,9 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
             d += 5;
             break;
           }
+          if (IsSurrogate(rune, absl::string_view(hex_start, 5), error)) {
+            return false;
+          }
           d += strings_internal::EncodeUTF8Char(d, rune);
           break;
         }
@@ -259,6 +273,9 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
             d += 9;
             break;
           }
+          if (IsSurrogate(rune, absl::string_view(hex_start, 9), error)) {
+            return false;
+          }
           d += strings_internal::EncodeUTF8Char(d, rune);
           break;
         }
diff --git a/absl/strings/escaping_test.cc b/absl/strings/escaping_test.cc
index ecac9ca8c588..982989b8b4a4 100644
--- a/absl/strings/escaping_test.cc
+++ b/absl/strings/escaping_test.cc
@@ -160,11 +160,14 @@ TEST(Unescape, BasicFunction) {
     EXPECT_EQ(out, val.unescaped);
   }
   std::string bad[] =
-     {"\\u1",  // too short
-      "\\U1",  // too short
-      "\\Uffffff",
-      "\\777",  // exceeds 0xff
-      "\\xABCD"};  // exceeds 0xff
+     {"\\u1",         // too short
+      "\\U1",         // too short
+      "\\Uffffff",    // exceeds 0x10ffff (largest Unicode)
+      "\\U00110000",  // exceeds 0x10ffff (largest Unicode)
+      "\\uD835",      // surrogate character (D800-DFFF)
+      "\\U0000DD04",  // surrogate character (D800-DFFF)
+      "\\777",        // exceeds 0xff
+      "\\xABCD"};     // exceeds 0xff
   for (const std::string& e : bad) {
     std::string error;
     std::string out;