diff options
Diffstat (limited to 'absl/strings')
-rw-r--r-- | absl/strings/BUILD.bazel | 2 | ||||
-rw-r--r-- | absl/strings/CMakeLists.txt | 2 | ||||
-rw-r--r-- | absl/strings/escaping.cc | 188 | ||||
-rw-r--r-- | absl/strings/internal/escaping.cc | 180 | ||||
-rw-r--r-- | absl/strings/internal/escaping.h | 58 | ||||
-rw-r--r-- | absl/strings/string_view.h | 4 | ||||
-rw-r--r-- | absl/strings/string_view_test.cc | 25 |
7 files changed, 282 insertions, 177 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 8d0a6b6d4ef2..dc7e1bfd280e 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -37,6 +37,7 @@ cc_library( "internal/charconv_bigint.h", "internal/charconv_parse.cc", "internal/charconv_parse.h", + "internal/escaping.cc", "internal/memutil.cc", "internal/memutil.h", "internal/stl_type_traits.h", @@ -54,6 +55,7 @@ cc_library( "ascii.h", "charconv.h", "escaping.h", + "internal/escaping.h", "match.h", "numbers.h", "str_cat.h", diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 98101573acdb..36702f7106d9 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -38,6 +38,8 @@ absl_cc_library( "internal/charconv_bigint.h" "internal/charconv_parse.cc" "internal/charconv_parse.h" + "internal/escaping.cc" + "internal/escaping.h" "internal/memutil.cc" "internal/memutil.h" "internal/stl_type_traits.h" diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index d2fcd9c111bc..7adc1b657168 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -26,6 +26,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/base/internal/unaligned_access.h" #include "absl/strings/internal/char_map.h" +#include "absl/strings/internal/escaping.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/internal/utf8.h" #include "absl/strings/str_cat.h" @@ -764,177 +765,10 @@ constexpr signed char kUnWebSafeBase64[] = { }; /* clang-format on */ -size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { - // Base64 encodes three bytes of input at a time. If the input is not - // divisible by three, we pad as appropriate. - // - // (from https://tools.ietf.org/html/rfc3548) - // Special processing is performed if fewer than 24 bits are available - // at the end of the data being encoded. A full encoding quantum is - // always completed at the end of a quantity. When fewer than 24 input - // bits are available in an input group, zero bits are added (on the - // right) to form an integral number of 6-bit groups. Padding at the - // end of the data is performed using the '=' character. Since all base - // 64 input is an integral number of octets, only the following cases - // can arise: - - // Base64 encodes each three bytes of input into four bytes of output. - size_t len = (input_len / 3) * 4; - - if (input_len % 3 == 0) { - // (from https://tools.ietf.org/html/rfc3548) - // (1) the final quantum of encoding input is an integral multiple of 24 - // bits; here, the final unit of encoded output will be an integral - // multiple of 4 characters with no "=" padding, - } else if (input_len % 3 == 1) { - // (from https://tools.ietf.org/html/rfc3548) - // (2) the final quantum of encoding input is exactly 8 bits; here, the - // final unit of encoded output will be two characters followed by two - // "=" padding characters, or - len += 2; - if (do_padding) { - len += 2; - } - } else { // (input_len % 3 == 2) - // (from https://tools.ietf.org/html/rfc3548) - // (3) the final quantum of encoding input is exactly 16 bits; here, the - // final unit of encoded output will be three characters followed by one - // "=" padding character. - len += 3; - if (do_padding) { - len += 1; - } - } - - assert(len >= input_len); // make sure we didn't overflow - return len; -} - -size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, - size_t szdest, const char* base64, - bool do_padding) { - static const char kPad64 = '='; - - if (szsrc * 4 > szdest * 3) return 0; - - char* cur_dest = dest; - const unsigned char* cur_src = src; - - char* const limit_dest = dest + szdest; - const unsigned char* const limit_src = src + szsrc; - - // Three bytes of data encodes to four characters of cyphertext. - // So we can pump through three-byte chunks atomically. - if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3. - while (cur_src < limit_src - 3) { // While we have >= 32 bits. - uint32_t in = absl::big_endian::Load32(cur_src) >> 8; - - cur_dest[0] = base64[in >> 18]; - in &= 0x3FFFF; - cur_dest[1] = base64[in >> 12]; - in &= 0xFFF; - cur_dest[2] = base64[in >> 6]; - in &= 0x3F; - cur_dest[3] = base64[in]; - - cur_dest += 4; - cur_src += 3; - } - } - // To save time, we didn't update szdest or szsrc in the loop. So do it now. - szdest = limit_dest - cur_dest; - szsrc = limit_src - cur_src; - - /* now deal with the tail (<=3 bytes) */ - switch (szsrc) { - case 0: - // Nothing left; nothing more to do. - break; - case 1: { - // One byte left: this encodes to two characters, and (optionally) - // two pad characters to round out the four-character cypherblock. - if (szdest < 2) return 0; - uint32_t in = cur_src[0]; - cur_dest[0] = base64[in >> 2]; - in &= 0x3; - cur_dest[1] = base64[in << 4]; - cur_dest += 2; - szdest -= 2; - if (do_padding) { - if (szdest < 2) return 0; - cur_dest[0] = kPad64; - cur_dest[1] = kPad64; - cur_dest += 2; - szdest -= 2; - } - break; - } - case 2: { - // Two bytes left: this encodes to three characters, and (optionally) - // one pad character to round out the four-character cypherblock. - if (szdest < 3) return 0; - uint32_t in = absl::big_endian::Load16(cur_src); - cur_dest[0] = base64[in >> 10]; - in &= 0x3FF; - cur_dest[1] = base64[in >> 4]; - in &= 0x00F; - cur_dest[2] = base64[in << 2]; - cur_dest += 3; - szdest -= 3; - if (do_padding) { - if (szdest < 1) return 0; - cur_dest[0] = kPad64; - cur_dest += 1; - szdest -= 1; - } - break; - } - case 3: { - // Three bytes left: same as in the big loop above. We can't do this in - // the loop because the loop above always reads 4 bytes, and the fourth - // byte is past the end of the input. - if (szdest < 4) return 0; - uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1); - cur_dest[0] = base64[in >> 18]; - in &= 0x3FFFF; - cur_dest[1] = base64[in >> 12]; - in &= 0xFFF; - cur_dest[2] = base64[in >> 6]; - in &= 0x3F; - cur_dest[3] = base64[in]; - cur_dest += 4; - szdest -= 4; - break; - } - default: - // Should not be reached: blocks of 4 bytes are handled - // in the while loop before this switch statement. - ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc); - break; - } - return (cur_dest - dest); -} - -constexpr char kBase64Chars[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - constexpr char kWebSafeBase64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; template <typename String> -void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest, - bool do_padding, const char* base64_chars) { - const size_t calc_escaped_size = - CalculateBase64EscapedLenInternal(szsrc, do_padding); - strings_internal::STLStringResizeUninitialized(dest, calc_escaped_size); - - const size_t escaped_len = Base64EscapeInternal( - src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding); - assert(calc_escaped_size == escaped_len); - dest->erase(escaped_len); -} - -template <typename String> bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, const signed char* unbase64) { // Determine the size of the output std::string. Base64 encodes every 3 bytes into @@ -1068,26 +902,30 @@ bool WebSafeBase64Unescape(absl::string_view src, std::string* dest) { } void Base64Escape(absl::string_view src, std::string* dest) { - Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), - src.size(), dest, true, kBase64Chars); + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest, + true, strings_internal::kBase64Chars); } void WebSafeBase64Escape(absl::string_view src, std::string* dest) { - Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), - src.size(), dest, false, kWebSafeBase64Chars); + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest, + false, kWebSafeBase64Chars); } std::string Base64Escape(absl::string_view src) { std::string dest; - Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), - src.size(), &dest, true, kBase64Chars); + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest, + true, strings_internal::kBase64Chars); return dest; } std::string WebSafeBase64Escape(absl::string_view src) { std::string dest; - Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), - src.size(), &dest, false, kWebSafeBase64Chars); + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest, + false, kWebSafeBase64Chars); return dest; } diff --git a/absl/strings/internal/escaping.cc b/absl/strings/internal/escaping.cc new file mode 100644 index 000000000000..c5271286ad00 --- /dev/null +++ b/absl/strings/internal/escaping.cc @@ -0,0 +1,180 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/escaping.h" + +#include "absl/base/internal/endian.h" +#include "absl/base/internal/raw_logging.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +const char kBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { + // Base64 encodes three bytes of input at a time. If the input is not + // divisible by three, we pad as appropriate. + // + // (from https://tools.ietf.org/html/rfc3548) + // Special processing is performed if fewer than 24 bits are available + // at the end of the data being encoded. A full encoding quantum is + // always completed at the end of a quantity. When fewer than 24 input + // bits are available in an input group, zero bits are added (on the + // right) to form an integral number of 6-bit groups. Padding at the + // end of the data is performed using the '=' character. Since all base + // 64 input is an integral number of octets, only the following cases + // can arise: + + // Base64 encodes each three bytes of input into four bytes of output. + size_t len = (input_len / 3) * 4; + + if (input_len % 3 == 0) { + // (from https://tools.ietf.org/html/rfc3548) + // (1) the final quantum of encoding input is an integral multiple of 24 + // bits; here, the final unit of encoded output will be an integral + // multiple of 4 characters with no "=" padding, + } else if (input_len % 3 == 1) { + // (from https://tools.ietf.org/html/rfc3548) + // (2) the final quantum of encoding input is exactly 8 bits; here, the + // final unit of encoded output will be two characters followed by two + // "=" padding characters, or + len += 2; + if (do_padding) { + len += 2; + } + } else { // (input_len % 3 == 2) + // (from https://tools.ietf.org/html/rfc3548) + // (3) the final quantum of encoding input is exactly 16 bits; here, the + // final unit of encoded output will be three characters followed by one + // "=" padding character. + len += 3; + if (do_padding) { + len += 1; + } + } + + assert(len >= input_len); // make sure we didn't overflow + return len; +} + +size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, + size_t szdest, const char* base64, + bool do_padding) { + static const char kPad64 = '='; + + if (szsrc * 4 > szdest * 3) return 0; + + char* cur_dest = dest; + const unsigned char* cur_src = src; + + char* const limit_dest = dest + szdest; + const unsigned char* const limit_src = src + szsrc; + + // Three bytes of data encodes to four characters of cyphertext. + // So we can pump through three-byte chunks atomically. + if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3. + while (cur_src < limit_src - 3) { // While we have >= 32 bits. + uint32_t in = absl::big_endian::Load32(cur_src) >> 8; + + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + + cur_dest += 4; + cur_src += 3; + } + } + // To save time, we didn't update szdest or szsrc in the loop. So do it now. + szdest = limit_dest - cur_dest; + szsrc = limit_src - cur_src; + + /* now deal with the tail (<=3 bytes) */ + switch (szsrc) { + case 0: + // Nothing left; nothing more to do. + break; + case 1: { + // One byte left: this encodes to two characters, and (optionally) + // two pad characters to round out the four-character cypherblock. + if (szdest < 2) return 0; + uint32_t in = cur_src[0]; + cur_dest[0] = base64[in >> 2]; + in &= 0x3; + cur_dest[1] = base64[in << 4]; + cur_dest += 2; + szdest -= 2; + if (do_padding) { + if (szdest < 2) return 0; + cur_dest[0] = kPad64; + cur_dest[1] = kPad64; + cur_dest += 2; + szdest -= 2; + } + break; + } + case 2: { + // Two bytes left: this encodes to three characters, and (optionally) + // one pad character to round out the four-character cypherblock. + if (szdest < 3) return 0; + uint32_t in = absl::big_endian::Load16(cur_src); + cur_dest[0] = base64[in >> 10]; + in &= 0x3FF; + cur_dest[1] = base64[in >> 4]; + in &= 0x00F; + cur_dest[2] = base64[in << 2]; + cur_dest += 3; + szdest -= 3; + if (do_padding) { + if (szdest < 1) return 0; + cur_dest[0] = kPad64; + cur_dest += 1; + szdest -= 1; + } + break; + } + case 3: { + // Three bytes left: same as in the big loop above. We can't do this in + // the loop because the loop above always reads 4 bytes, and the fourth + // byte is past the end of the input. + if (szdest < 4) return 0; + uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1); + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + cur_dest += 4; + szdest -= 4; + break; + } + default: + // Should not be reached: blocks of 4 bytes are handled + // in the while loop before this switch statement. + ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc); + break; + } + return (cur_dest - dest); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/escaping.h b/absl/strings/internal/escaping.h new file mode 100644 index 000000000000..6a9ce602d9ed --- /dev/null +++ b/absl/strings/internal/escaping.h @@ -0,0 +1,58 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_H_ +#define ABSL_STRINGS_INTERNAL_ESCAPING_H_ + +#include <cassert> + +#include "absl/strings/internal/resize_uninitialized.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +ABSL_CONST_INIT extern const char kBase64Chars[]; + +// Calculates how long a string will be when it is base64 encoded given its +// length and whether or not the result should be padded. +size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding); + +// Base64-encodes `src` using the alphabet provided in `base64` and writes the +// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars +// until its length is a multiple of 3. Returns the length of `dest`. +size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, + size_t szdest, const char* base64, bool do_padding); + +// Base64-encodes `src` using the alphabet provided in `base64` and writes the +// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars +// until its length is a multiple of 3. +template <typename String> +void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest, + bool do_padding, const char* base64_chars) { + const size_t calc_escaped_size = + CalculateBase64EscapedLenInternal(szsrc, do_padding); + STLStringResizeUninitialized(dest, calc_escaped_size); + + const size_t escaped_len = Base64EscapeInternal( + src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding); + assert(calc_escaped_size == escaped_len); + dest->erase(escaped_len); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_ESCAPING_H_ diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h index 4b34e5634f13..4f7dd6b3a17b 100644 --- a/absl/strings/string_view.h +++ b/absl/strings/string_view.h @@ -122,7 +122,7 @@ ABSL_NAMESPACE_BEGIN // // You may create a null `string_view` in two ways: // -// absl::string_view sv(); +// absl::string_view sv; // absl::string_view sv(nullptr, 0); // // For the above, `sv.data() == nullptr`, `sv.length() == 0`, and @@ -605,7 +605,7 @@ inline string_view ClippedSubstr(string_view s, size_t pos, // Creates an `absl::string_view` from a pointer `p` even if it's null-valued. // This function should be used where an `absl::string_view` can be created from // a possibly-null pointer. -inline string_view NullSafeStringView(const char* p) { +constexpr string_view NullSafeStringView(const char* p) { return p ? string_view(p) : string_view(); } diff --git a/absl/strings/string_view_test.cc b/absl/strings/string_view_test.cc index 96dacdf048cf..c4fbd16c201a 100644 --- a/absl/strings/string_view_test.cc +++ b/absl/strings/string_view_test.cc @@ -931,6 +931,31 @@ TEST(StringViewTest, NullSafeStringView) { } } +TEST(StringViewTest, ConstexprNullSafeStringView) { + { + constexpr absl::string_view s = absl::NullSafeStringView(nullptr); + EXPECT_EQ(nullptr, s.data()); + EXPECT_EQ(0, s.size()); + EXPECT_EQ(absl::string_view(), s); + } +#if !defined(_MSC_VER) || _MSC_VER >= 1910 + // MSVC 2017+ is required for good constexpr string_view support. + // See the implementation of `absl::string_view::StrlenInternal()`. + { + static constexpr char kHi[] = "hi"; + absl::string_view s = absl::NullSafeStringView(kHi); + EXPECT_EQ(kHi, s.data()); + EXPECT_EQ(strlen(kHi), s.size()); + EXPECT_EQ(absl::string_view("hi"), s); + } + { + constexpr absl::string_view s = absl::NullSafeStringView("hello"); + EXPECT_EQ(s.size(), 5); + EXPECT_EQ("hello", s); + } +#endif +} + TEST(StringViewTest, ConstexprCompiles) { constexpr absl::string_view sp; #ifdef ABSL_HAVE_STRING_VIEW_FROM_NULLPTR |