about summary refs log tree commit diff
path: root/absl/strings
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/BUILD.bazel2
-rw-r--r--absl/strings/CMakeLists.txt2
-rw-r--r--absl/strings/escaping.cc188
-rw-r--r--absl/strings/internal/escaping.cc180
-rw-r--r--absl/strings/internal/escaping.h58
-rw-r--r--absl/strings/string_view.h4
-rw-r--r--absl/strings/string_view_test.cc25
7 files changed, 282 insertions, 177 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index 8d0a6b6d4ef2..dc7e1bfd280e 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -37,6 +37,7 @@ cc_library(
         "internal/charconv_bigint.h",
         "internal/charconv_parse.cc",
         "internal/charconv_parse.h",
+        "internal/escaping.cc",
         "internal/memutil.cc",
         "internal/memutil.h",
         "internal/stl_type_traits.h",
@@ -54,6 +55,7 @@ cc_library(
         "ascii.h",
         "charconv.h",
         "escaping.h",
+        "internal/escaping.h",
         "match.h",
         "numbers.h",
         "str_cat.h",
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index 98101573acdb..36702f7106d9 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt
@@ -38,6 +38,8 @@ absl_cc_library(
     "internal/charconv_bigint.h"
     "internal/charconv_parse.cc"
     "internal/charconv_parse.h"
+    "internal/escaping.cc"
+    "internal/escaping.h"
     "internal/memutil.cc"
     "internal/memutil.h"
     "internal/stl_type_traits.h"
diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc
index d2fcd9c111bc..7adc1b657168 100644
--- a/absl/strings/escaping.cc
+++ b/absl/strings/escaping.cc
@@ -26,6 +26,7 @@
 #include "absl/base/internal/raw_logging.h"
 #include "absl/base/internal/unaligned_access.h"
 #include "absl/strings/internal/char_map.h"
+#include "absl/strings/internal/escaping.h"
 #include "absl/strings/internal/resize_uninitialized.h"
 #include "absl/strings/internal/utf8.h"
 #include "absl/strings/str_cat.h"
@@ -764,177 +765,10 @@ constexpr signed char kUnWebSafeBase64[] = {
 };
 /* clang-format on */
 
-size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
-  // Base64 encodes three bytes of input at a time. If the input is not
-  // divisible by three, we pad as appropriate.
-  //
-  // (from https://tools.ietf.org/html/rfc3548)
-  // Special processing is performed if fewer than 24 bits are available
-  // at the end of the data being encoded.  A full encoding quantum is
-  // always completed at the end of a quantity.  When fewer than 24 input
-  // bits are available in an input group, zero bits are added (on the
-  // right) to form an integral number of 6-bit groups.  Padding at the
-  // end of the data is performed using the '=' character.  Since all base
-  // 64 input is an integral number of octets, only the following cases
-  // can arise:
-
-  // Base64 encodes each three bytes of input into four bytes of output.
-  size_t len = (input_len / 3) * 4;
-
-  if (input_len % 3 == 0) {
-    // (from https://tools.ietf.org/html/rfc3548)
-    // (1) the final quantum of encoding input is an integral multiple of 24
-    // bits; here, the final unit of encoded output will be an integral
-    // multiple of 4 characters with no "=" padding,
-  } else if (input_len % 3 == 1) {
-    // (from https://tools.ietf.org/html/rfc3548)
-    // (2) the final quantum of encoding input is exactly 8 bits; here, the
-    // final unit of encoded output will be two characters followed by two
-    // "=" padding characters, or
-    len += 2;
-    if (do_padding) {
-      len += 2;
-    }
-  } else {  // (input_len % 3 == 2)
-    // (from https://tools.ietf.org/html/rfc3548)
-    // (3) the final quantum of encoding input is exactly 16 bits; here, the
-    // final unit of encoded output will be three characters followed by one
-    // "=" padding character.
-    len += 3;
-    if (do_padding) {
-      len += 1;
-    }
-  }
-
-  assert(len >= input_len);  // make sure we didn't overflow
-  return len;
-}
-
-size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
-                            size_t szdest, const char* base64,
-                            bool do_padding) {
-  static const char kPad64 = '=';
-
-  if (szsrc * 4 > szdest * 3) return 0;
-
-  char* cur_dest = dest;
-  const unsigned char* cur_src = src;
-
-  char* const limit_dest = dest + szdest;
-  const unsigned char* const limit_src = src + szsrc;
-
-  // Three bytes of data encodes to four characters of cyphertext.
-  // So we can pump through three-byte chunks atomically.
-  if (szsrc >= 3) {  // "limit_src - 3" is UB if szsrc < 3.
-    while (cur_src < limit_src - 3) {  // While we have >= 32 bits.
-      uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
-
-      cur_dest[0] = base64[in >> 18];
-      in &= 0x3FFFF;
-      cur_dest[1] = base64[in >> 12];
-      in &= 0xFFF;
-      cur_dest[2] = base64[in >> 6];
-      in &= 0x3F;
-      cur_dest[3] = base64[in];
-
-      cur_dest += 4;
-      cur_src += 3;
-    }
-  }
-  // To save time, we didn't update szdest or szsrc in the loop.  So do it now.
-  szdest = limit_dest - cur_dest;
-  szsrc = limit_src - cur_src;
-
-  /* now deal with the tail (<=3 bytes) */
-  switch (szsrc) {
-    case 0:
-      // Nothing left; nothing more to do.
-      break;
-    case 1: {
-      // One byte left: this encodes to two characters, and (optionally)
-      // two pad characters to round out the four-character cypherblock.
-      if (szdest < 2) return 0;
-      uint32_t in = cur_src[0];
-      cur_dest[0] = base64[in >> 2];
-      in &= 0x3;
-      cur_dest[1] = base64[in << 4];
-      cur_dest += 2;
-      szdest -= 2;
-      if (do_padding) {
-        if (szdest < 2) return 0;
-        cur_dest[0] = kPad64;
-        cur_dest[1] = kPad64;
-        cur_dest += 2;
-        szdest -= 2;
-      }
-      break;
-    }
-    case 2: {
-      // Two bytes left: this encodes to three characters, and (optionally)
-      // one pad character to round out the four-character cypherblock.
-      if (szdest < 3) return 0;
-      uint32_t in = absl::big_endian::Load16(cur_src);
-      cur_dest[0] = base64[in >> 10];
-      in &= 0x3FF;
-      cur_dest[1] = base64[in >> 4];
-      in &= 0x00F;
-      cur_dest[2] = base64[in << 2];
-      cur_dest += 3;
-      szdest -= 3;
-      if (do_padding) {
-        if (szdest < 1) return 0;
-        cur_dest[0] = kPad64;
-        cur_dest += 1;
-        szdest -= 1;
-      }
-      break;
-    }
-    case 3: {
-      // Three bytes left: same as in the big loop above.  We can't do this in
-      // the loop because the loop above always reads 4 bytes, and the fourth
-      // byte is past the end of the input.
-      if (szdest < 4) return 0;
-      uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1);
-      cur_dest[0] = base64[in >> 18];
-      in &= 0x3FFFF;
-      cur_dest[1] = base64[in >> 12];
-      in &= 0xFFF;
-      cur_dest[2] = base64[in >> 6];
-      in &= 0x3F;
-      cur_dest[3] = base64[in];
-      cur_dest += 4;
-      szdest -= 4;
-      break;
-    }
-    default:
-      // Should not be reached: blocks of 4 bytes are handled
-      // in the while loop before this switch statement.
-      ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
-      break;
-  }
-  return (cur_dest - dest);
-}
-
-constexpr char kBase64Chars[] =
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-
 constexpr char kWebSafeBase64Chars[] =
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
 
 template <typename String>
-void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest,
-                          bool do_padding, const char* base64_chars) {
-  const size_t calc_escaped_size =
-      CalculateBase64EscapedLenInternal(szsrc, do_padding);
-  strings_internal::STLStringResizeUninitialized(dest, calc_escaped_size);
-
-  const size_t escaped_len = Base64EscapeInternal(
-      src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding);
-  assert(calc_escaped_size == escaped_len);
-  dest->erase(escaped_len);
-}
-
-template <typename String>
 bool Base64UnescapeInternal(const char* src, size_t slen, String* dest,
                             const signed char* unbase64) {
   // Determine the size of the output std::string.  Base64 encodes every 3 bytes into
@@ -1068,26 +902,30 @@ bool WebSafeBase64Unescape(absl::string_view src, std::string* dest) {
 }
 
 void Base64Escape(absl::string_view src, std::string* dest) {
-  Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()),
-                       src.size(), dest, true, kBase64Chars);
+  strings_internal::Base64EscapeInternal(
+      reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest,
+      true, strings_internal::kBase64Chars);
 }
 
 void WebSafeBase64Escape(absl::string_view src, std::string* dest) {
-  Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()),
-                       src.size(), dest, false, kWebSafeBase64Chars);
+  strings_internal::Base64EscapeInternal(
+      reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest,
+      false, kWebSafeBase64Chars);
 }
 
 std::string Base64Escape(absl::string_view src) {
   std::string dest;
-  Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()),
-                       src.size(), &dest, true, kBase64Chars);
+  strings_internal::Base64EscapeInternal(
+      reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest,
+      true, strings_internal::kBase64Chars);
   return dest;
 }
 
 std::string WebSafeBase64Escape(absl::string_view src) {
   std::string dest;
-  Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()),
-                       src.size(), &dest, false, kWebSafeBase64Chars);
+  strings_internal::Base64EscapeInternal(
+      reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest,
+      false, kWebSafeBase64Chars);
   return dest;
 }
 
diff --git a/absl/strings/internal/escaping.cc b/absl/strings/internal/escaping.cc
new file mode 100644
index 000000000000..c5271286ad00
--- /dev/null
+++ b/absl/strings/internal/escaping.cc
@@ -0,0 +1,180 @@
+// Copyright 2020 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/escaping.h"
+
+#include "absl/base/internal/endian.h"
+#include "absl/base/internal/raw_logging.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace strings_internal {
+
+const char kBase64Chars[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
+  // Base64 encodes three bytes of input at a time. If the input is not
+  // divisible by three, we pad as appropriate.
+  //
+  // (from https://tools.ietf.org/html/rfc3548)
+  // Special processing is performed if fewer than 24 bits are available
+  // at the end of the data being encoded.  A full encoding quantum is
+  // always completed at the end of a quantity.  When fewer than 24 input
+  // bits are available in an input group, zero bits are added (on the
+  // right) to form an integral number of 6-bit groups.  Padding at the
+  // end of the data is performed using the '=' character.  Since all base
+  // 64 input is an integral number of octets, only the following cases
+  // can arise:
+
+  // Base64 encodes each three bytes of input into four bytes of output.
+  size_t len = (input_len / 3) * 4;
+
+  if (input_len % 3 == 0) {
+    // (from https://tools.ietf.org/html/rfc3548)
+    // (1) the final quantum of encoding input is an integral multiple of 24
+    // bits; here, the final unit of encoded output will be an integral
+    // multiple of 4 characters with no "=" padding,
+  } else if (input_len % 3 == 1) {
+    // (from https://tools.ietf.org/html/rfc3548)
+    // (2) the final quantum of encoding input is exactly 8 bits; here, the
+    // final unit of encoded output will be two characters followed by two
+    // "=" padding characters, or
+    len += 2;
+    if (do_padding) {
+      len += 2;
+    }
+  } else {  // (input_len % 3 == 2)
+    // (from https://tools.ietf.org/html/rfc3548)
+    // (3) the final quantum of encoding input is exactly 16 bits; here, the
+    // final unit of encoded output will be three characters followed by one
+    // "=" padding character.
+    len += 3;
+    if (do_padding) {
+      len += 1;
+    }
+  }
+
+  assert(len >= input_len);  // make sure we didn't overflow
+  return len;
+}
+
+size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
+                            size_t szdest, const char* base64,
+                            bool do_padding) {
+  static const char kPad64 = '=';
+
+  if (szsrc * 4 > szdest * 3) return 0;
+
+  char* cur_dest = dest;
+  const unsigned char* cur_src = src;
+
+  char* const limit_dest = dest + szdest;
+  const unsigned char* const limit_src = src + szsrc;
+
+  // Three bytes of data encodes to four characters of cyphertext.
+  // So we can pump through three-byte chunks atomically.
+  if (szsrc >= 3) {                    // "limit_src - 3" is UB if szsrc < 3.
+    while (cur_src < limit_src - 3) {  // While we have >= 32 bits.
+      uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
+
+      cur_dest[0] = base64[in >> 18];
+      in &= 0x3FFFF;
+      cur_dest[1] = base64[in >> 12];
+      in &= 0xFFF;
+      cur_dest[2] = base64[in >> 6];
+      in &= 0x3F;
+      cur_dest[3] = base64[in];
+
+      cur_dest += 4;
+      cur_src += 3;
+    }
+  }
+  // To save time, we didn't update szdest or szsrc in the loop.  So do it now.
+  szdest = limit_dest - cur_dest;
+  szsrc = limit_src - cur_src;
+
+  /* now deal with the tail (<=3 bytes) */
+  switch (szsrc) {
+    case 0:
+      // Nothing left; nothing more to do.
+      break;
+    case 1: {
+      // One byte left: this encodes to two characters, and (optionally)
+      // two pad characters to round out the four-character cypherblock.
+      if (szdest < 2) return 0;
+      uint32_t in = cur_src[0];
+      cur_dest[0] = base64[in >> 2];
+      in &= 0x3;
+      cur_dest[1] = base64[in << 4];
+      cur_dest += 2;
+      szdest -= 2;
+      if (do_padding) {
+        if (szdest < 2) return 0;
+        cur_dest[0] = kPad64;
+        cur_dest[1] = kPad64;
+        cur_dest += 2;
+        szdest -= 2;
+      }
+      break;
+    }
+    case 2: {
+      // Two bytes left: this encodes to three characters, and (optionally)
+      // one pad character to round out the four-character cypherblock.
+      if (szdest < 3) return 0;
+      uint32_t in = absl::big_endian::Load16(cur_src);
+      cur_dest[0] = base64[in >> 10];
+      in &= 0x3FF;
+      cur_dest[1] = base64[in >> 4];
+      in &= 0x00F;
+      cur_dest[2] = base64[in << 2];
+      cur_dest += 3;
+      szdest -= 3;
+      if (do_padding) {
+        if (szdest < 1) return 0;
+        cur_dest[0] = kPad64;
+        cur_dest += 1;
+        szdest -= 1;
+      }
+      break;
+    }
+    case 3: {
+      // Three bytes left: same as in the big loop above.  We can't do this in
+      // the loop because the loop above always reads 4 bytes, and the fourth
+      // byte is past the end of the input.
+      if (szdest < 4) return 0;
+      uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1);
+      cur_dest[0] = base64[in >> 18];
+      in &= 0x3FFFF;
+      cur_dest[1] = base64[in >> 12];
+      in &= 0xFFF;
+      cur_dest[2] = base64[in >> 6];
+      in &= 0x3F;
+      cur_dest[3] = base64[in];
+      cur_dest += 4;
+      szdest -= 4;
+      break;
+    }
+    default:
+      // Should not be reached: blocks of 4 bytes are handled
+      // in the while loop before this switch statement.
+      ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
+      break;
+  }
+  return (cur_dest - dest);
+}
+
+}  // namespace strings_internal
+ABSL_NAMESPACE_END
+}  // namespace absl
diff --git a/absl/strings/internal/escaping.h b/absl/strings/internal/escaping.h
new file mode 100644
index 000000000000..6a9ce602d9ed
--- /dev/null
+++ b/absl/strings/internal/escaping.h
@@ -0,0 +1,58 @@
+// Copyright 2020 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_H_
+#define ABSL_STRINGS_INTERNAL_ESCAPING_H_
+
+#include <cassert>
+
+#include "absl/strings/internal/resize_uninitialized.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace strings_internal {
+
+ABSL_CONST_INIT extern const char kBase64Chars[];
+
+// Calculates how long a string will be when it is base64 encoded given its
+// length and whether or not the result should be padded.
+size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding);
+
+// Base64-encodes `src` using the alphabet provided in `base64` and writes the
+// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars
+// until its length is a multiple of 3. Returns the length of `dest`.
+size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
+                            size_t szdest, const char* base64, bool do_padding);
+
+// Base64-encodes `src` using the alphabet provided in `base64` and writes the
+// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars
+// until its length is a multiple of 3.
+template <typename String>
+void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest,
+                          bool do_padding, const char* base64_chars) {
+  const size_t calc_escaped_size =
+      CalculateBase64EscapedLenInternal(szsrc, do_padding);
+  STLStringResizeUninitialized(dest, calc_escaped_size);
+
+  const size_t escaped_len = Base64EscapeInternal(
+      src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding);
+  assert(calc_escaped_size == escaped_len);
+  dest->erase(escaped_len);
+}
+
+}  // namespace strings_internal
+ABSL_NAMESPACE_END
+}  // namespace absl
+
+#endif  // ABSL_STRINGS_INTERNAL_ESCAPING_H_
diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h
index 4b34e5634f13..4f7dd6b3a17b 100644
--- a/absl/strings/string_view.h
+++ b/absl/strings/string_view.h
@@ -122,7 +122,7 @@ ABSL_NAMESPACE_BEGIN
 //
 // You may create a null `string_view` in two ways:
 //
-//   absl::string_view sv();
+//   absl::string_view sv;
 //   absl::string_view sv(nullptr, 0);
 //
 // For the above, `sv.data() == nullptr`, `sv.length() == 0`, and
@@ -605,7 +605,7 @@ inline string_view ClippedSubstr(string_view s, size_t pos,
 // Creates an `absl::string_view` from a pointer `p` even if it's null-valued.
 // This function should be used where an `absl::string_view` can be created from
 // a possibly-null pointer.
-inline string_view NullSafeStringView(const char* p) {
+constexpr string_view NullSafeStringView(const char* p) {
   return p ? string_view(p) : string_view();
 }
 
diff --git a/absl/strings/string_view_test.cc b/absl/strings/string_view_test.cc
index 96dacdf048cf..c4fbd16c201a 100644
--- a/absl/strings/string_view_test.cc
+++ b/absl/strings/string_view_test.cc
@@ -931,6 +931,31 @@ TEST(StringViewTest, NullSafeStringView) {
   }
 }
 
+TEST(StringViewTest, ConstexprNullSafeStringView) {
+  {
+    constexpr absl::string_view s = absl::NullSafeStringView(nullptr);
+    EXPECT_EQ(nullptr, s.data());
+    EXPECT_EQ(0, s.size());
+    EXPECT_EQ(absl::string_view(), s);
+  }
+#if !defined(_MSC_VER) || _MSC_VER >= 1910
+  // MSVC 2017+ is required for good constexpr string_view support.
+  // See the implementation of `absl::string_view::StrlenInternal()`.
+  {
+    static constexpr char kHi[] = "hi";
+    absl::string_view s = absl::NullSafeStringView(kHi);
+    EXPECT_EQ(kHi, s.data());
+    EXPECT_EQ(strlen(kHi), s.size());
+    EXPECT_EQ(absl::string_view("hi"), s);
+  }
+  {
+    constexpr absl::string_view s = absl::NullSafeStringView("hello");
+    EXPECT_EQ(s.size(), 5);
+    EXPECT_EQ("hello", s);
+  }
+#endif
+}
+
 TEST(StringViewTest, ConstexprCompiles) {
   constexpr absl::string_view sp;
 #ifdef ABSL_HAVE_STRING_VIEW_FROM_NULLPTR