about summary refs log tree commit diff
path: root/absl/strings
diff options
context:
space:
mode:
authorAbseil Team <absl-team@google.com>2017-12-13T20·02-0800
committerTitus Winters <titus@google.com>2017-12-14T14·04-0500
commit720c017e30339fd1786ce4aac68bc8559736e53f (patch)
treee1ee954f7311f39125b93c91303828e27fc5e4cf /absl/strings
parent5fe41affbaab5b9ad4876a6295c78f21a86d862d (diff)
Changes imported from Abseil "staging" branch:
  - a42e9b454ca8be7d021789cdb9bcada07d3e2d3e Merge pull request #57. by Derek Mauro <dmauro@google.com>
  - b1e03838f059c034a6489501804d516326246042 Move the long ostream tests into a separate source file u... by Alex Strelnikov <strel@google.com>
  - 7c56b7dbb05faa7e8653632e00be470331d79cb9 Return reference from absl::InlinedVector::emplace_back(). by Abseil Team <absl-team@google.com>
  - 85b070822b62688ff348d9ad9cc9e230a851f617 Treat \u or \U followed by Unicode surrogate character as... by Abseil Team <absl-team@google.com>

GitOrigin-RevId: a42e9b454ca8be7d021789cdb9bcada07d3e2d3e
Change-Id: I7d8fb68ffd7eb4e9e737f21fbed6d56b71985f94
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/escaping.cc17
-rw-r--r--absl/strings/escaping_test.cc13
2 files changed, 25 insertions, 5 deletions
diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc
index 7d688ac3fac7..abe9e0aaacd9 100644
--- a/absl/strings/escaping.cc
+++ b/absl/strings/escaping.cc
@@ -72,6 +72,17 @@ inline int hex_digit_to_int(char c) {
   return x & 0xf;
 }
 
+inline bool IsSurrogate(char32_t c, absl::string_view src, std::string* error) {
+  if (c >= 0xD800 && c <= 0xDFFF) {
+    if (error) {
+      *error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\",
+                            src);
+    }
+    return true;
+  }
+  return false;
+}
+
 // ----------------------------------------------------------------------
 // CUnescapeInternal()
 //    Implements both CUnescape() and CUnescapeForNullTerminatedString().
@@ -214,6 +225,9 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
             d += 5;
             break;
           }
+          if (IsSurrogate(rune, absl::string_view(hex_start, 5), error)) {
+            return false;
+          }
           d += strings_internal::EncodeUTF8Char(d, rune);
           break;
         }
@@ -259,6 +273,9 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
             d += 9;
             break;
           }
+          if (IsSurrogate(rune, absl::string_view(hex_start, 9), error)) {
+            return false;
+          }
           d += strings_internal::EncodeUTF8Char(d, rune);
           break;
         }
diff --git a/absl/strings/escaping_test.cc b/absl/strings/escaping_test.cc
index ecac9ca8c588..982989b8b4a4 100644
--- a/absl/strings/escaping_test.cc
+++ b/absl/strings/escaping_test.cc
@@ -160,11 +160,14 @@ TEST(Unescape, BasicFunction) {
     EXPECT_EQ(out, val.unescaped);
   }
   std::string bad[] =
-     {"\\u1",  // too short
-      "\\U1",  // too short
-      "\\Uffffff",
-      "\\777",  // exceeds 0xff
-      "\\xABCD"};  // exceeds 0xff
+     {"\\u1",         // too short
+      "\\U1",         // too short
+      "\\Uffffff",    // exceeds 0x10ffff (largest Unicode)
+      "\\U00110000",  // exceeds 0x10ffff (largest Unicode)
+      "\\uD835",      // surrogate character (D800-DFFF)
+      "\\U0000DD04",  // surrogate character (D800-DFFF)
+      "\\777",        // exceeds 0xff
+      "\\xABCD"};     // exceeds 0xff
   for (const std::string& e : bad) {
     std::string error;
     std::string out;