diff options
Diffstat (limited to 'absl/strings')
55 files changed, 16262 insertions, 0 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel new file mode 100644 index 000000000000..070721cc117c --- /dev/null +++ b/absl/strings/BUILD.bazel @@ -0,0 +1,293 @@ +# +# Copyright 2017 The Abseil Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# -*- mode: python; -*- +# Libraries in this low-level package may not depend on libraries in packages +# that are not low level. For more information, including how to submit +# changes to this file, see http://www/eng/howto/build-monitors.html + +load( + "//absl:test_dependencies.bzl", + "GUNIT_MAIN_DEPS_SELECTOR", + "GUNIT_DEPS_SELECTOR", +) +load( + "//absl:copts.bzl", + "ABSL_DEFAULT_COPTS", + "ABSL_TEST_COPTS", + "ABSL_EXCEPTIONS_FLAG", +) + +package( + default_visibility = ["//visibility:public"], + features = [ + "parse_headers", + "header_modules", + ], +) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "strings", + srcs = [ + "ascii.cc", + "escaping.cc", + "internal/memutil.cc", + "internal/memutil.h", + "internal/str_join_internal.h", + "internal/str_split_internal.h", + "match.cc", + "numbers.cc", + "str_cat.cc", + "str_replace.cc", + "str_split.cc", + "string_view.cc", + "substitute.cc", + ], + hdrs = [ + "ascii.h", + "escaping.h", + "match.h", + "numbers.h", + "str_cat.h", + "str_join.h", + "str_replace.h", + "str_split.h", + "string_view.h", + "strip.h", + "substitute.h", + ], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":internal", + "//absl/base", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:endian", + "//absl/base:throw_delegate", + "//absl/memory", + "//absl/meta:type_traits", + "//absl/numeric:int128", + ], +) + +cc_library( + name = "internal", + srcs = [ + "internal/utf8.cc", + ], + hdrs = [ + "internal/char_map.h", + "internal/fastmem.h", + "internal/ostringstream.h", + "internal/resize_uninitialized.h", + "internal/utf8.h", + ], + copts = ABSL_DEFAULT_COPTS, + deps = [ + "//absl/base:core_headers", + "//absl/base:endian", + "//absl/meta:type_traits", + ], +) + +cc_test( + name = "match_test", + size = "small", + srcs = ["match_test.cc"], + copts = ABSL_TEST_COPTS, + deps = [":strings"] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "escaping_test", + size = "small", + srcs = [ + "escaping_test.cc", + "internal/escaping_test_common.inc", + ], + copts = ABSL_TEST_COPTS, + deps = [ + ":strings", + "//absl/base:core_headers", + "//absl/container:fixed_array", + ] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "ascii_test", + size = "small", + srcs = ["ascii_test.cc"], + copts = ABSL_TEST_COPTS, + deps = [ + ":strings", + "//absl/base:core_headers", + ] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "memutil_test", + size = "small", + srcs = [ + "internal/memutil.h", + "internal/memutil_test.cc", + ], + copts = ABSL_TEST_COPTS, + deps = [ + ":strings", + "//absl/base:core_headers", + ] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "utf8_test", + size = "small", + srcs = [ + "internal/utf8_test.cc", + ], + copts = ABSL_TEST_COPTS, + deps = [ + ":strings", + ":internal", + ] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "string_view_test", + size = "small", + srcs = ["string_view_test.cc"], + copts = ABSL_TEST_COPTS + ABSL_EXCEPTIONS_FLAG, + deps = [ + ":strings", + "//absl/base:core_headers", + "//absl/base:config", + "//absl/base:dynamic_annotations", + ] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "substitute_test", + size = "small", + srcs = ["substitute_test.cc"], + copts = ABSL_TEST_COPTS, + deps = [ + ":strings", + "//absl/base:core_headers", + ] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "str_replace_test", + size = "small", + srcs = ["str_replace_test.cc"], + copts = ABSL_TEST_COPTS, + deps = [ + ":strings", + ] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "str_split_test", + srcs = ["str_split_test.cc"], + copts = ABSL_TEST_COPTS, + deps = [ + ":strings", + "//absl/base:core_headers", + "//absl/base:dynamic_annotations", + ] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "ostringstream_test", + size = "small", + srcs = ["internal/ostringstream_test.cc"], + copts = ABSL_TEST_COPTS, + deps = [ + ":internal", + ] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "resize_uninitialized_test", + size = "small", + srcs = [ + "internal/resize_uninitialized.h", + "internal/resize_uninitialized_test.cc", + ], + copts = ABSL_TEST_COPTS, + deps = [ + "//absl/base:core_headers", + "//absl/meta:type_traits", + ] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "str_join_test", + size = "small", + srcs = ["str_join_test.cc"], + copts = ABSL_TEST_COPTS, + deps = [ + ":strings", + "//absl/base:core_headers", + "//absl/memory", + ] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "str_cat_test", + size = "small", + srcs = ["str_cat_test.cc"], + copts = ABSL_TEST_COPTS, + deps = [ + ":strings", + "//absl/base:core_headers", + ] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "numbers_test", + size = "small", + srcs = [ + "internal/numbers_test_common.inc", + "numbers_test.cc", + ], + copts = ABSL_TEST_COPTS, + tags = [ + "no_test_loonix", + ], + deps = [ + ":strings", + "//absl/base", + "//absl/base:core_headers", + ] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "strip_test", + size = "small", + srcs = ["strip_test.cc"], + copts = ABSL_TEST_COPTS, + deps = [":strings"] + select(GUNIT_MAIN_DEPS_SELECTOR), +) + +cc_test( + name = "char_map_test", + srcs = ["internal/char_map_test.cc"], + copts = ABSL_TEST_COPTS, + deps = [ + ":internal", + ] + select(GUNIT_MAIN_DEPS_SELECTOR), +) diff --git a/absl/strings/README.md b/absl/strings/README.md new file mode 100644 index 000000000000..d5320eb0a635 --- /dev/null +++ b/absl/strings/README.md @@ -0,0 +1,87 @@ +# ABSL Strings + +This directory contains packages related to std::string operations and std::string +alternatives (such as character-agnostic byte manipulation packages). + +## Library Listing + +Two library targets are available within this directory: + +* **strings** (`//absl/strings:strings`) provides classes and + utility functions for manipulating and comparing strings, converting other + types (such as integers) into strings, or evaluating strings for other usages + (such as tokenization). + +* **cord** (`//absl/strings:cord`) provides classes and utility + functions for manipulating `Cord` elements. A `Cord` is a sequence of + characters that internally uses a tree structure to store their data, + avoiding the need for long regions of contiguous memory, and allows memory + sharing, sub-std::string copy-on-write, and a host of other advanced std::string + features. + +## Strings Library File Listing + +The following header files are directly included within the +`absl::strings` library. + +## Alternate std::string-like Classes + +* `bytestream.h` + <br/>Abstraction of std::string for I/O +* `string_view.h` + <br/>Pointer to part or all of another std::string + +## Formatting and Parsing + +* `numbers.h` + <br/>Converter between strings and numbers. Prefer `str_cat.h` for numbers + to strings + +## Operations on Characters + +* `ascii_ctype.h` + <br/>Char classifiers like <ctype.h> but faster +* `charset.h` + <br/>Bitmap from unsigned char -> bool + +## Operations on Strings + +* `case.h` + <br/>Case-changers +* `escaping.h` + <br/>Escapers and unescapers +* `str_join.h` + <br/>Joiner functions using a delimiter +* `str_split.h` + <br/>Split functions +* `str_cat.h` + <br/>Concatenators and appenders +* `string_view_utils.h` + <br>Utility functions for strings +* `strip.h` + <br/>Character removal functions +* `substitute.h` + <br/>Printf-like typesafe formatter + +## Miscellaneous + +* `util.h` + <br/>Grab bag of useful std::string functions + + +## Cord Library File Listing + +The following header files are directly included within the +`absl::strings::cord` library: + +## The `Cord` Class + +* `cord.h` + <br/>A std::string built from a tree of shareable nodes + +## Operations on Cords + +* `cord_cat.h` + <br/>Concatenator functions for cords +* `cord_util.h` + <br/>Utility functions for cords diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc new file mode 100644 index 000000000000..c9481e886542 --- /dev/null +++ b/absl/strings/ascii.cc @@ -0,0 +1,198 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/ascii.h" + +namespace absl { +namespace ascii_internal { + +// # Table generated by this Python code (bit 0x02 is currently unused): +// TODO(mbar) Move Python code for generation of table to BUILD and link here. + +// NOTE: The kAsciiPropertyBits table used within this code was generated by +// Python code of the following form. (Bit 0x02 is currently unused and +// available.) +// +// def Hex2(n): +// return '0x' + hex(n/16)[2:] + hex(n%16)[2:] +// def IsPunct(ch): +// return (ord(ch) >= 32 and ord(ch) < 127 and +// not ch.isspace() and not ch.isalnum()) +// def IsBlank(ch): +// return ch in ' \t' +// def IsCntrl(ch): +// return ord(ch) < 32 or ord(ch) == 127 +// def IsXDigit(ch): +// return ch.isdigit() or ch.lower() in 'abcdef' +// for i in range(128): +// ch = chr(i) +// mask = ((ch.isalpha() and 0x01 or 0) | +// (ch.isalnum() and 0x04 or 0) | +// (ch.isspace() and 0x08 or 0) | +// (IsPunct(ch) and 0x10 or 0) | +// (IsBlank(ch) and 0x20 or 0) | +// (IsCntrl(ch) and 0x40 or 0) | +// (IsXDigit(ch) and 0x80 or 0)) +// print Hex2(mask) + ',', +// if i % 16 == 7: +// print ' //', Hex2(i & 0x78) +// elif i % 16 == 15: +// print + +// clang-format off +// Array of bitfields holding character information. Each bit value corresponds +// to a particular character feature. For readability, and because the value +// of these bits is tightly coupled to this implementation, the individual bits +// are not named. Note that bitfields for all characters above ASCII 127 are +// zero-initialized. +const unsigned char kPropertyBits[256] = { + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x00 + 0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x10 + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x28, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x20 + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, // 0x30 + 0x84, 0x84, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x40 + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x50 + 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x60 + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x70 + 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x40, +}; + +// Array of characters for the ascii_tolower() function. For values 'A' +// through 'Z', return the lower-case character; otherwise, return the +// identity of the passed character. +const char kToLower[256] = { + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', + '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', + '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', + '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', + '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', + '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', + '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', + '\x40', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', + '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', + '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', + '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', + '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f', + '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', + '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', + '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', + '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', + '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', + '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', + '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', + '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', + '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', + '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', + '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', + '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', + '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', + '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', + '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', + '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff', +}; + +// Array of characters for the ascii_toupper() function. For values 'a' +// through 'z', return the upper-case character; otherwise, return the +// identity of the passed character. +const char kToUpper[256] = { + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', + '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', + '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', + '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', + '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', + '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', + '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', + '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', + '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', + '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', + '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', + '\x60', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f', + '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', + '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', + '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', + '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', + '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', + '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', + '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', + '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', + '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', + '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', + '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', + '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', + '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', + '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', + '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', + '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff', +}; +// clang-format on + +} // namespace ascii_internal + +void AsciiStrToLower(std::string* s) { + for (auto& ch : *s) { + ch = absl::ascii_tolower(ch); + } +} + +void AsciiStrToUpper(std::string* s) { + for (auto& ch : *s) { + ch = absl::ascii_toupper(ch); + } +} + +void RemoveExtraAsciiWhitespace(std::string* str) { + auto stripped = StripAsciiWhitespace(*str); + + if (stripped.empty()) { + str->clear(); + return; + } + + auto input_it = stripped.begin(); + auto input_end = stripped.end(); + auto output_it = &(*str)[0]; + bool is_ws = false; + + for (; input_it < input_end; ++input_it) { + if (is_ws) { + // Consecutive whitespace? Keep only the last. + is_ws = absl::ascii_isspace(*input_it); + if (is_ws) --output_it; + } else { + is_ws = absl::ascii_isspace(*input_it); + } + + *output_it = *input_it; + ++output_it; + } + + str->erase(output_it - &(*str)[0]); +} + +} // namespace absl diff --git a/absl/strings/ascii.h b/absl/strings/ascii.h new file mode 100644 index 000000000000..fc2bb33e0d77 --- /dev/null +++ b/absl/strings/ascii.h @@ -0,0 +1,239 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: ascii.h +// ----------------------------------------------------------------------------- +// +// This package contains functions operating on characters and strings +// restricted to standard ASCII. These include character classification +// functions analogous to those found in the ANSI C Standard Library <ctype.h> +// header file. +// +// C++ implementations provide <ctype.h> functionality based on their +// C environment locale. In general, reliance on such a locale is not ideal, as +// the locale standard is problematic (and may not return invariant information +// for the same character set, for example). These `ascii_*()` functions are +// hard-wired for standard ASCII, much faster, and guaranteed to behave +// consistently. They will never be overloaded, nor will their function +// signature change. +// +// `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`, +// `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`, +// `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`, +// `ascii_isxdigit()` +// Analagous to the <ctype.h> functions with similar names, these +// functions take an unsigned char and return a bool, based on whether the +// character matches the condition specified. +// +// If the input character has a numerical value greater than 127, these +// functions return `false`. +// +// `ascii_tolower()`, `ascii_toupper()` +// Analagous to the <ctype.h> functions with similar names, these functions +// take an unsigned char and return a char. +// +// If the input character is not an ASCII {lower,upper}-case letter (including +// numerical values greater than 127) then the functions return the same value +// as the input character. + +#ifndef ABSL_STRINGS_ASCII_H_ +#define ABSL_STRINGS_ASCII_H_ + +#include <algorithm> +#include <string> + +#include "absl/base/attributes.h" +#include "absl/strings/string_view.h" + +namespace absl { +namespace ascii_internal { + +// Declaration for an array of bitfields holding character information. +extern const unsigned char kPropertyBits[256]; + +// Declaration for the array of characters to upper-case characters. +extern const char kToUpper[256]; + +// Declaration for the array of characters to lower-case characters. +extern const char kToLower[256]; + +} // namespace ascii_internal + +// ascii_isalpha() +// +// Determines whether the given character is an alphabetic character. +inline bool ascii_isalpha(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x01) != 0; +} + +// ascii_isalnum() +// +// Determines whether the given character is an alphanumeric character. +inline bool ascii_isalnum(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x04) != 0; +} + +// ascii_isspace() +// +// Determines whether the given character is a whitespace character (space, +// tab, vertical tab, formfeed, linefeed, or carriage return). +inline bool ascii_isspace(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x08) != 0; +} + +// ascii_ispunct() +// +// Determines whether the given character is a punctuation character. +inline bool ascii_ispunct(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x10) != 0; +} + +// ascii_isblank() +// +// Determines whether the given character is a blank character (tab or space). +inline bool ascii_isblank(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x20) != 0; +} + +// ascii_iscntrl() +// +// Determines whether the given character is a control character. +inline bool ascii_iscntrl(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x40) != 0; +} + +// ascii_isxdigit() +// +// Determines whether the given character can be represented as a hexadecimal +// digit character (i.e. {0-9} or {A-F}). +inline bool ascii_isxdigit(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x80) != 0; +} + +// ascii_isdigit() +// +// Determines whether the given character can be represented as a decimal +// digit character (i.e. {0-9}). +inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; } + +// ascii_isprint() +// +// Determines whether the given character is printable, including whitespace. +inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; } + +// ascii_isgraph() +// +// Determines whether the given character has a graphical representation. +inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; } + +// ascii_isupper() +// +// Determines whether the given character is uppercase. +inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; } + +// ascii_islower() +// +// Determines whether the given character is lowercase. +inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; } + +// ascii_isascii() +// +// Determines whether the given character is ASCII. +inline bool ascii_isascii(unsigned char c) { return c < 128; } + +// ascii_tolower() +// +// Returns an ASCII character, converting to lowercase if uppercase is +// passed. Note that character values > 127 are simply returned. +inline char ascii_tolower(unsigned char c) { + return ascii_internal::kToLower[c]; +} + +// Converts the characters in `s` to lowercase, changing the contents of `s`. +void AsciiStrToLower(std::string* s); + +// Creates a lowercase std::string from a given absl::string_view. +ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) { + std::string result(s); + absl::AsciiStrToLower(&result); + return result; +} + +// ascii_toupper() +// +// Returns the ASCII character, converting to upper-case if lower-case is +// passed. Note that characters values > 127 are simply returned. +inline char ascii_toupper(unsigned char c) { + return ascii_internal::kToUpper[c]; +} + +// Converts the characters in `s` to uppercase, changing the contents of `s`. +void AsciiStrToUpper(std::string* s); + +// Creates an uppercase std::string from a given absl::string_view. +ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) { + std::string result(s); + absl::AsciiStrToUpper(&result); + return result; +} + +// Returns absl::string_view with whitespace stripped from the beginning of the +// given string_view. +ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace( + absl::string_view str) { + auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace); + return absl::string_view(it, str.end() - it); +} + +// Strips in place whitespace from the beginning of the given std::string. +inline void StripLeadingAsciiWhitespace(std::string* str) { + auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace); + str->erase(str->begin(), it); +} + +// Returns absl::string_view with whitespace stripped from the end of the given +// string_view. +ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace( + absl::string_view str) { + auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace); + return absl::string_view(str.begin(), str.rend() - it); +} + +// Strips in place whitespace from the end of the given std::string +inline void StripTrailingAsciiWhitespace(std::string* str) { + auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace); + str->erase(str->rend() - it); +} + +// Returns absl::string_view with whitespace stripped from both ends of the +// given string_view. +ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace( + absl::string_view str) { + return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str)); +} + +// Strips in place whitespace from both ends of the given std::string +inline void StripAsciiWhitespace(std::string* str) { + StripTrailingAsciiWhitespace(str); + StripLeadingAsciiWhitespace(str); +} + +// Removes leading, trailing, and consecutive internal whitespace. +void RemoveExtraAsciiWhitespace(std::string*); + +} // namespace absl + +#endif // ABSL_STRINGS_ASCII_H_ diff --git a/absl/strings/ascii_ctype.h b/absl/strings/ascii_ctype.h new file mode 100644 index 000000000000..e1ba9e24a3d5 --- /dev/null +++ b/absl/strings/ascii_ctype.h @@ -0,0 +1,66 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_ASCII_CTYPE_H_ +#define ABSL_STRINGS_ASCII_CTYPE_H_ + +#include "absl/strings/ascii.h" + +inline bool ascii_isalpha(unsigned char c) { + return absl::ascii_isalpha(c); +} +inline bool ascii_isalnum(unsigned char c) { + return absl::ascii_isalnum(c); +} +inline bool ascii_isspace(unsigned char c) { + return absl::ascii_isspace(c); +} +inline bool ascii_ispunct(unsigned char c) { + return absl::ascii_ispunct(c); +} +inline bool ascii_isblank(unsigned char c) { + return absl::ascii_isblank(c); +} +inline bool ascii_iscntrl(unsigned char c) { + return absl::ascii_iscntrl(c); +} +inline bool ascii_isxdigit(unsigned char c) { + return absl::ascii_isxdigit(c); +} +inline bool ascii_isdigit(unsigned char c) { + return absl::ascii_isdigit(c); +} +inline bool ascii_isprint(unsigned char c) { + return absl::ascii_isprint(c); +} +inline bool ascii_isgraph(unsigned char c) { + return absl::ascii_isgraph(c); +} +inline bool ascii_isupper(unsigned char c) { + return absl::ascii_isupper(c); +} +inline bool ascii_islower(unsigned char c) { + return absl::ascii_islower(c); +} +inline bool ascii_isascii(unsigned char c) { + return absl::ascii_isascii(c); +} +inline char ascii_tolower(unsigned char c) { + return absl::ascii_tolower(c); +} +inline char ascii_toupper(unsigned char c) { + return absl::ascii_toupper(c); +} + +#endif // ABSL_STRINGS_ASCII_CTYPE_H_ diff --git a/absl/strings/ascii_test.cc b/absl/strings/ascii_test.cc new file mode 100644 index 000000000000..97f36013bd61 --- /dev/null +++ b/absl/strings/ascii_test.cc @@ -0,0 +1,354 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/ascii.h" + +#include <cctype> +#include <clocale> +#include <cstring> +#include <string> + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" + +namespace { + +TEST(AsciiIsFoo, All) { + for (int i = 0; i < 256; i++) { + if ((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z')) + EXPECT_TRUE(absl::ascii_isalpha(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isalpha(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if ((i >= '0' && i <= '9')) + EXPECT_TRUE(absl::ascii_isdigit(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isdigit(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (absl::ascii_isalpha(i) || absl::ascii_isdigit(i)) + EXPECT_TRUE(absl::ascii_isalnum(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isalnum(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i != '\0' && strchr(" \r\n\t\v\f", i)) + EXPECT_TRUE(absl::ascii_isspace(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isspace(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i >= 32 && i < 127) + EXPECT_TRUE(absl::ascii_isprint(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isprint(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (absl::ascii_isprint(i) && !absl::ascii_isspace(i) && + !absl::ascii_isalnum(i)) + EXPECT_TRUE(absl::ascii_ispunct(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_ispunct(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i == ' ' || i == '\t') + EXPECT_TRUE(absl::ascii_isblank(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isblank(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i < 32 || i == 127) + EXPECT_TRUE(absl::ascii_iscntrl(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_iscntrl(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (absl::ascii_isdigit(i) || (i >= 'A' && i <= 'F') || + (i >= 'a' && i <= 'f')) + EXPECT_TRUE(absl::ascii_isxdigit(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isxdigit(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i > 32 && i < 127) + EXPECT_TRUE(absl::ascii_isgraph(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isgraph(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i >= 'A' && i <= 'Z') + EXPECT_TRUE(absl::ascii_isupper(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isupper(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i >= 'a' && i <= 'z') + EXPECT_TRUE(absl::ascii_islower(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_islower(i)) << ": failed on " << i; + } + for (int i = 0; i < 128; i++) { + EXPECT_TRUE(absl::ascii_isascii(i)) << ": failed on " << i; + } + for (int i = 128; i < 256; i++) { + EXPECT_TRUE(!absl::ascii_isascii(i)) << ": failed on " << i; + } + + // The official is* functions don't accept negative signed chars, but + // our absl::ascii_is* functions do. + for (int i = 0; i < 256; i++) { + signed char sc = static_cast<signed char>(static_cast<unsigned char>(i)); + EXPECT_EQ(absl::ascii_isalpha(i), absl::ascii_isalpha(sc)) << i; + EXPECT_EQ(absl::ascii_isdigit(i), absl::ascii_isdigit(sc)) << i; + EXPECT_EQ(absl::ascii_isalnum(i), absl::ascii_isalnum(sc)) << i; + EXPECT_EQ(absl::ascii_isspace(i), absl::ascii_isspace(sc)) << i; + EXPECT_EQ(absl::ascii_ispunct(i), absl::ascii_ispunct(sc)) << i; + EXPECT_EQ(absl::ascii_isblank(i), absl::ascii_isblank(sc)) << i; + EXPECT_EQ(absl::ascii_iscntrl(i), absl::ascii_iscntrl(sc)) << i; + EXPECT_EQ(absl::ascii_isxdigit(i), absl::ascii_isxdigit(sc)) << i; + EXPECT_EQ(absl::ascii_isprint(i), absl::ascii_isprint(sc)) << i; + EXPECT_EQ(absl::ascii_isgraph(i), absl::ascii_isgraph(sc)) << i; + EXPECT_EQ(absl::ascii_isupper(i), absl::ascii_isupper(sc)) << i; + EXPECT_EQ(absl::ascii_islower(i), absl::ascii_islower(sc)) << i; + EXPECT_EQ(absl::ascii_isascii(i), absl::ascii_isascii(sc)) << i; + } +} + +// Checks that absl::ascii_isfoo returns the same value as isfoo in the C +// locale. +TEST(AsciiIsFoo, SameAsIsFoo) { + // temporarily change locale to C. It should already be C, but just for safety + std::string old_locale = setlocale(LC_CTYPE, nullptr); + ASSERT_TRUE(setlocale(LC_CTYPE, "C")); + + for (int i = 0; i < 256; i++) { + EXPECT_EQ(isalpha(i) != 0, absl::ascii_isalpha(i)) << i; + EXPECT_EQ(isdigit(i) != 0, absl::ascii_isdigit(i)) << i; + EXPECT_EQ(isalnum(i) != 0, absl::ascii_isalnum(i)) << i; + EXPECT_EQ(isspace(i) != 0, absl::ascii_isspace(i)) << i; + EXPECT_EQ(ispunct(i) != 0, absl::ascii_ispunct(i)) << i; + EXPECT_EQ(isblank(i) != 0, absl::ascii_isblank(i)) << i; + EXPECT_EQ(iscntrl(i) != 0, absl::ascii_iscntrl(i)) << i; + EXPECT_EQ(isxdigit(i) != 0, absl::ascii_isxdigit(i)) << i; + EXPECT_EQ(isprint(i) != 0, absl::ascii_isprint(i)) << i; + EXPECT_EQ(isgraph(i) != 0, absl::ascii_isgraph(i)) << i; + EXPECT_EQ(isupper(i) != 0, absl::ascii_isupper(i)) << i; + EXPECT_EQ(islower(i) != 0, absl::ascii_islower(i)) << i; + EXPECT_EQ(isascii(i) != 0, absl::ascii_isascii(i)) << i; + } + + // restore the old locale. + ASSERT_TRUE(setlocale(LC_CTYPE, old_locale.c_str())); +} + +TEST(AsciiToFoo, All) { + // temporarily change locale to C. It should already be C, but just for safety + std::string old_locale = setlocale(LC_CTYPE, nullptr); + ASSERT_TRUE(setlocale(LC_CTYPE, "C")); + + for (int i = 0; i < 256; i++) { + if (absl::ascii_islower(i)) + EXPECT_EQ(absl::ascii_toupper(i), 'A' + (i - 'a')) << i; + else + EXPECT_EQ(absl::ascii_toupper(i), static_cast<char>(i)) << i; + + if (absl::ascii_isupper(i)) + EXPECT_EQ(absl::ascii_tolower(i), 'a' + (i - 'A')) << i; + else + EXPECT_EQ(absl::ascii_tolower(i), static_cast<char>(i)) << i; + + // These CHECKs only hold in a C locale. + EXPECT_EQ(static_cast<char>(tolower(i)), absl::ascii_tolower(i)) << i; + EXPECT_EQ(static_cast<char>(toupper(i)), absl::ascii_toupper(i)) << i; + + // The official to* functions don't accept negative signed chars, but + // our absl::ascii_to* functions do. + signed char sc = static_cast<signed char>(static_cast<unsigned char>(i)); + EXPECT_EQ(absl::ascii_tolower(i), absl::ascii_tolower(sc)) << i; + EXPECT_EQ(absl::ascii_toupper(i), absl::ascii_toupper(sc)) << i; + } + + // restore the old locale. + ASSERT_TRUE(setlocale(LC_CTYPE, old_locale.c_str())); +} + +TEST(AsciiStrTo, Lower) { + const char buf[] = "ABCDEF"; + const std::string str("GHIJKL"); + const std::string str2("MNOPQR"); + const absl::string_view sp(str2); + + EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf)); + EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str)); + EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp)); + + char mutable_buf[] = "Mutable"; + std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), + mutable_buf, absl::ascii_tolower); + EXPECT_STREQ("mutable", mutable_buf); +} + +TEST(AsciiStrTo, Upper) { + const char buf[] = "abcdef"; + const std::string str("ghijkl"); + const std::string str2("mnopqr"); + const absl::string_view sp(str2); + + EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf)); + EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str)); + EXPECT_EQ("MNOPQR", absl::AsciiStrToUpper(sp)); + + char mutable_buf[] = "Mutable"; + std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), + mutable_buf, absl::ascii_toupper); + EXPECT_STREQ("MUTABLE", mutable_buf); +} + +TEST(StripLeadingAsciiWhitespace, FromStringView) { + EXPECT_EQ(absl::string_view{}, + absl::StripLeadingAsciiWhitespace(absl::string_view{})); + EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace({"foo"})); + EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace({"\t \n\f\r\n\vfoo"})); + EXPECT_EQ("foo foo\n ", + absl::StripLeadingAsciiWhitespace({"\t \n\f\r\n\vfoo foo\n "})); + EXPECT_EQ(absl::string_view{}, absl::StripLeadingAsciiWhitespace( + {"\t \n\f\r\v\n\t \n\f\r\v\n"})); +} + +TEST(StripLeadingAsciiWhitespace, InPlace) { + std::string str; + + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ("", str); + + str = "foo"; + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "\t \n\f\r\n\vfoo"; + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "\t \n\f\r\n\vfoo foo\n "; + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ("foo foo\n ", str); + + str = "\t \n\f\r\v\n\t \n\f\r\v\n"; + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ(absl::string_view{}, str); +} + +TEST(StripTrailingAsciiWhitespace, FromStringView) { + EXPECT_EQ(absl::string_view{}, + absl::StripTrailingAsciiWhitespace(absl::string_view{})); + EXPECT_EQ("foo", absl::StripTrailingAsciiWhitespace({"foo"})); + EXPECT_EQ("foo", absl::StripTrailingAsciiWhitespace({"foo\t \n\f\r\n\v"})); + EXPECT_EQ(" \nfoo foo", + absl::StripTrailingAsciiWhitespace({" \nfoo foo\t \n\f\r\n\v"})); + EXPECT_EQ(absl::string_view{}, absl::StripTrailingAsciiWhitespace( + {"\t \n\f\r\v\n\t \n\f\r\v\n"})); +} + +TEST(StripTrailingAsciiWhitespace, InPlace) { + std::string str; + + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ("", str); + + str = "foo"; + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "foo\t \n\f\r\n\v"; + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = " \nfoo foo\t \n\f\r\n\v"; + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ(" \nfoo foo", str); + + str = "\t \n\f\r\v\n\t \n\f\r\v\n"; + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ(absl::string_view{}, str); +} + +TEST(StripAsciiWhitespace, FromStringView) { + EXPECT_EQ(absl::string_view{}, + absl::StripAsciiWhitespace(absl::string_view{})); + EXPECT_EQ("foo", absl::StripAsciiWhitespace({"foo"})); + EXPECT_EQ("foo", + absl::StripAsciiWhitespace({"\t \n\f\r\n\vfoo\t \n\f\r\n\v"})); + EXPECT_EQ("foo foo", absl::StripAsciiWhitespace( + {"\t \n\f\r\n\vfoo foo\t \n\f\r\n\v"})); + EXPECT_EQ(absl::string_view{}, + absl::StripAsciiWhitespace({"\t \n\f\r\v\n\t \n\f\r\v\n"})); +} + +TEST(StripAsciiWhitespace, InPlace) { + std::string str; + + absl::StripAsciiWhitespace(&str); + EXPECT_EQ("", str); + + str = "foo"; + absl::StripAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "\t \n\f\r\n\vfoo\t \n\f\r\n\v"; + absl::StripAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "\t \n\f\r\n\vfoo foo\t \n\f\r\n\v"; + absl::StripAsciiWhitespace(&str); + EXPECT_EQ("foo foo", str); + + str = "\t \n\f\r\v\n\t \n\f\r\v\n"; + absl::StripAsciiWhitespace(&str); + EXPECT_EQ(absl::string_view{}, str); +} + +TEST(RemoveExtraAsciiWhitespace, InPlace) { + const char* inputs[] = {"No extra space", + " Leading whitespace", + "Trailing whitespace ", + " Leading and trailing ", + " Whitespace \t in\v middle ", + "'Eeeeep! \n Newlines!\n", + "nospaces", + "", + "\n\t a\t\n\nb \t\n"}; + + const char* outputs[] = { + "No extra space", + "Leading whitespace", + "Trailing whitespace", + "Leading and trailing", + "Whitespace in middle", + "'Eeeeep! Newlines!", + "nospaces", + "", + "a\nb", + }; + const int NUM_TESTS = ABSL_ARRAYSIZE(inputs); + + for (int i = 0; i < NUM_TESTS; i++) { + std::string s(inputs[i]); + absl::RemoveExtraAsciiWhitespace(&s); + EXPECT_EQ(outputs[i], s); + } +} + +} // namespace diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc new file mode 100644 index 000000000000..f1576057f849 --- /dev/null +++ b/absl/strings/escaping.cc @@ -0,0 +1,1093 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/escaping.h" + +#include <cassert> +#include <cstdint> +#include <cstdio> +#include <cstring> +#include <limits> +#include <string> +#include <vector> + +#include "absl/base/internal/endian.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/internal/unaligned_access.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/strings/internal/char_map.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/internal/utf8.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" + +namespace absl { +namespace { + +// Digit conversion. +constexpr char kHexChar[] = "0123456789abcdef"; + +constexpr char kHexTable[513] = + "000102030405060708090a0b0c0d0e0f" + "101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f" + "303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f" + "505152535455565758595a5b5c5d5e5f" + "606162636465666768696a6b6c6d6e6f" + "707172737475767778797a7b7c7d7e7f" + "808182838485868788898a8b8c8d8e8f" + "909192939495969798999a9b9c9d9e9f" + "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" + "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" + "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" + "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; + +// These are used for the leave_nulls_escaped argument to CUnescapeInternal(). +constexpr bool kUnescapeNulls = false; + +inline bool is_octal_digit(char c) { return ('0' <= c) && (c <= '7'); } + +inline int hex_digit_to_int(char c) { + static_assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61, + "Character set must be ASCII."); + assert(absl::ascii_isxdigit(c)); + int x = static_cast<unsigned char>(c); + if (x > '9') { + x += 9; + } + return x & 0xf; +} + +// ---------------------------------------------------------------------- +// CUnescapeInternal() +// Implements both CUnescape() and CUnescapeForNullTerminatedString(). +// +// Unescapes C escape sequences and is the reverse of CEscape(). +// +// If 'source' is valid, stores the unescaped std::string and its size in +// 'dest' and 'dest_len' respectively, and returns true. Otherwise +// returns false and optionally stores the error description in +// 'error'. Set 'error' to nullptr to disable error reporting. +// +// 'dest' should point to a buffer that is at least as big as 'source'. +// 'source' and 'dest' may be the same. +// +// NOTE: any changes to this function must also be reflected in the older +// UnescapeCEscapeSequences(). +// ---------------------------------------------------------------------- +bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, + char* dest, ptrdiff_t* dest_len, std::string* error) { + char* d = dest; + const char* p = source.data(); + const char* end = source.end(); + const char* last_byte = end - 1; + + // Small optimization for case where source = dest and there's no escaping + while (p == d && p < end && *p != '\\') p++, d++; + + while (p < end) { + if (*p != '\\') { + *d++ = *p++; + } else { + if (++p > last_byte) { // skip past the '\\' + if (error) *error = "String cannot end with \\"; + return false; + } + switch (*p) { + case 'a': *d++ = '\a'; break; + case 'b': *d++ = '\b'; break; + case 'f': *d++ = '\f'; break; + case 'n': *d++ = '\n'; break; + case 'r': *d++ = '\r'; break; + case 't': *d++ = '\t'; break; + case 'v': *d++ = '\v'; break; + case '\\': *d++ = '\\'; break; + case '?': *d++ = '\?'; break; // \? Who knew? + case '\'': *d++ = '\''; break; + case '"': *d++ = '\"'; break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': { + // octal digit: 1 to 3 digits + const char* octal_start = p; + unsigned int ch = *p - '0'; + if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p - '0'; + if (p < last_byte && is_octal_digit(p[1])) + ch = ch * 8 + *++p - '0'; // now points at last digit + if (ch > 0xff) { + if (error) { + *error = "Value of \\" + + std::string(octal_start, p + 1 - octal_start) + + " exceeds 0xff"; + } + return false; + } + if ((ch == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + const ptrdiff_t octal_size = p + 1 - octal_start; + *d++ = '\\'; + memcpy(d, octal_start, octal_size); + d += octal_size; + break; + } + *d++ = ch; + break; + } + case 'x': + case 'X': { + if (p >= last_byte) { + if (error) *error = "String cannot end with \\x"; + return false; + } else if (!absl::ascii_isxdigit(p[1])) { + if (error) *error = "\\x cannot be followed by a non-hex digit"; + return false; + } + unsigned int ch = 0; + const char* hex_start = p; + while (p < last_byte && absl::ascii_isxdigit(p[1])) + // Arbitrarily many hex digits + ch = (ch << 4) + hex_digit_to_int(*++p); + if (ch > 0xFF) { + if (error) { + *error = "Value of \\" + std::string(hex_start, p + 1 - hex_start) + + " exceeds 0xff"; + } + return false; + } + if ((ch == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + const ptrdiff_t hex_size = p + 1 - hex_start; + *d++ = '\\'; + memcpy(d, hex_start, hex_size); + d += hex_size; + break; + } + *d++ = ch; + break; + } + case 'u': { + // \uhhhh => convert 4 hex digits to UTF-8 + char32_t rune = 0; + const char* hex_start = p; + if (p + 4 >= end) { + if (error) { + *error = "\\u must be followed by 4 hex digits: \\" + + std::string(hex_start, p + 1 - hex_start); + } + return false; + } + for (int i = 0; i < 4; ++i) { + // Look one char ahead. + if (absl::ascii_isxdigit(p[1])) { + rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. + } else { + if (error) { + *error = "\\u must be followed by 4 hex digits: \\" + + std::string(hex_start, p + 1 - hex_start); + } + return false; + } + } + if ((rune == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + *d++ = '\\'; + memcpy(d, hex_start, 5); // u0000 + d += 5; + break; + } + d += strings_internal::EncodeUTF8Char(d, rune); + break; + } + case 'U': { + // \Uhhhhhhhh => convert 8 hex digits to UTF-8 + char32_t rune = 0; + const char* hex_start = p; + if (p + 8 >= end) { + if (error) { + *error = "\\U must be followed by 8 hex digits: \\" + + std::string(hex_start, p + 1 - hex_start); + } + return false; + } + for (int i = 0; i < 8; ++i) { + // Look one char ahead. + if (absl::ascii_isxdigit(p[1])) { + // Don't change rune until we're sure this + // is within the Unicode limit, but do advance p. + uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p); + if (newrune > 0x10FFFF) { + if (error) { + *error = "Value of \\" + + std::string(hex_start, p + 1 - hex_start) + + " exceeds Unicode limit (0x10FFFF)"; + } + return false; + } else { + rune = newrune; + } + } else { + if (error) { + *error = "\\U must be followed by 8 hex digits: \\" + + std::string(hex_start, p + 1 - hex_start); + } + return false; + } + } + if ((rune == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + *d++ = '\\'; + memcpy(d, hex_start, 9); // U00000000 + d += 9; + break; + } + d += strings_internal::EncodeUTF8Char(d, rune); + break; + } + default: { + if (error) *error = std::string("Unknown escape sequence: \\") + *p; + return false; + } + } + p++; // read past letter we escaped + } + } + *dest_len = d - dest; + return true; +} + +// ---------------------------------------------------------------------- +// CUnescapeInternal() +// +// Same as above but uses a C++ std::string for output. 'source' and 'dest' +// may be the same. +// ---------------------------------------------------------------------- +bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, + std::string* dest, std::string* error) { + strings_internal::STLStringResizeUninitialized(dest, source.size()); + + ptrdiff_t dest_size; + if (!CUnescapeInternal(source, + leave_nulls_escaped, + const_cast<char*>(dest->data()), + &dest_size, + error)) { + return false; + } + dest->erase(dest_size); + return true; +} + +// ---------------------------------------------------------------------- +// CEscape() +// CHexEscape() +// Utf8SafeCEscape() +// Utf8SafeCHexEscape() +// Escapes 'src' using C-style escape sequences. This is useful for +// preparing query flags. The 'Hex' version uses hexadecimal rather than +// octal sequences. The 'Utf8Safe' version does not touch UTF-8 bytes. +// +// Escaped chars: \n, \r, \t, ", ', \, and !absl::ascii_isprint(). +// ---------------------------------------------------------------------- +std::string CEscapeInternal(absl::string_view src, bool use_hex, bool utf8_safe) { + std::string dest; + bool last_hex_escape = false; // true if last output char was \xNN. + + for (unsigned char c : src) { + bool is_hex_escape = false; + switch (c) { + case '\n': dest.append("\\" "n"); break; + case '\r': dest.append("\\" "r"); break; + case '\t': dest.append("\\" "t"); break; + case '\"': dest.append("\\" "\""); break; + case '\'': dest.append("\\" "'"); break; + case '\\': dest.append("\\" "\\"); break; + default: + // Note that if we emit \xNN and the src character after that is a hex + // digit then that digit must be escaped too to prevent it being + // interpreted as part of the character code by C. + if ((!utf8_safe || c < 0x80) && + (!absl::ascii_isprint(c) || + (last_hex_escape && absl::ascii_isxdigit(c)))) { + if (use_hex) { + dest.append("\\" "x"); + dest.push_back(kHexChar[c / 16]); + dest.push_back(kHexChar[c % 16]); + is_hex_escape = true; + } else { + dest.append("\\"); + dest.push_back(kHexChar[c / 64]); + dest.push_back(kHexChar[(c % 64) / 8]); + dest.push_back(kHexChar[c % 8]); + } + } else { + dest.push_back(c); + break; + } + } + last_hex_escape = is_hex_escape; + } + + return dest; +} + +// Calculates the length of the C-style escaped version of 'src'. +// Assumes that non-printable characters are escaped using octal sequences, and +// that UTF-8 bytes are not handled specially. +inline size_t CEscapedLength(absl::string_view src) { + /* clang-format off */ + constexpr char c_escaped_len[256] = { + 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4, // \t, \n, \r + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // ", ' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // '0'..'9' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A'..'O' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, // 'P'..'Z', '\' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a'..'o' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, // 'p'..'z', DEL + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + }; + /* clang-format on */ + + size_t escaped_len = 0; + for (unsigned char c : src) escaped_len += c_escaped_len[c]; + return escaped_len; +} + +void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) { + size_t escaped_len = CEscapedLength(src); + if (escaped_len == src.size()) { + dest->append(src.data(), src.size()); + return; + } + + size_t cur_dest_len = dest->size(); + strings_internal::STLStringResizeUninitialized(dest, + cur_dest_len + escaped_len); + char* append_ptr = &(*dest)[cur_dest_len]; + + for (unsigned char c : src) { + switch (c) { + case '\n': + *append_ptr++ = '\\'; + *append_ptr++ = 'n'; + break; + case '\r': + *append_ptr++ = '\\'; + *append_ptr++ = 'r'; + break; + case '\t': + *append_ptr++ = '\\'; + *append_ptr++ = 't'; + break; + case '\"': + *append_ptr++ = '\\'; + *append_ptr++ = '\"'; + break; + case '\'': + *append_ptr++ = '\\'; + *append_ptr++ = '\''; + break; + case '\\': + *append_ptr++ = '\\'; + *append_ptr++ = '\\'; + break; + default: + if (!absl::ascii_isprint(c)) { + *append_ptr++ = '\\'; + *append_ptr++ = '0' + c / 64; + *append_ptr++ = '0' + (c % 64) / 8; + *append_ptr++ = '0' + c % 8; + } else { + *append_ptr++ = c; + } + break; + } + } +} + +bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, + size_t szdest, const signed char* unbase64, + size_t* len) { + static const char kPad64Equals = '='; + static const char kPad64Dot = '.'; + + size_t destidx = 0; + int decode = 0; + int state = 0; + unsigned int ch = 0; + unsigned int temp = 0; + + // If "char" is signed by default, using *src as an array index results in + // accessing negative array elements. Treat the input as a pointer to + // unsigned char to avoid this. + const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param); + + // The GET_INPUT macro gets the next input character, skipping + // over any whitespace, and stopping when we reach the end of the + // std::string or when we read any non-data character. The arguments are + // an arbitrary identifier (used as a label for goto) and the number + // of data bytes that must remain in the input to avoid aborting the + // loop. +#define GET_INPUT(label, remain) \ + label: \ + --szsrc; \ + ch = *src++; \ + decode = unbase64[ch]; \ + if (decode < 0) { \ + if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \ + state = 4 - remain; \ + break; \ + } + + // if dest is null, we're just checking to see if it's legal input + // rather than producing output. (I suspect this could just be done + // with a regexp...). We duplicate the loop so this test can be + // outside it instead of in every iteration. + + if (dest) { + // This loop consumes 4 input bytes and produces 3 output bytes + // per iteration. We can't know at the start that there is enough + // data left in the std::string for a full iteration, so the loop may + // break out in the middle; if so 'state' will be set to the + // number of input bytes read. + + while (szsrc >= 4) { + // We'll start by optimistically assuming that the next four + // bytes of the std::string (src[0..3]) are four good data bytes + // (that is, no nulls, whitespace, padding chars, or illegal + // chars). We need to test src[0..2] for nulls individually + // before constructing temp to preserve the property that we + // never read past a null in the std::string (no matter how long + // szsrc claims the std::string is). + + if (!src[0] || !src[1] || !src[2] || + ((temp = ((unsigned(unbase64[src[0]]) << 18) | + (unsigned(unbase64[src[1]]) << 12) | + (unsigned(unbase64[src[2]]) << 6) | + (unsigned(unbase64[src[3]])))) & + 0x80000000)) { + // Iff any of those four characters was bad (null, illegal, + // whitespace, padding), then temp's high bit will be set + // (because unbase64[] is -1 for all bad characters). + // + // We'll back up and resort to the slower decoder, which knows + // how to handle those cases. + + GET_INPUT(first, 4); + temp = decode; + GET_INPUT(second, 3); + temp = (temp << 6) | decode; + GET_INPUT(third, 2); + temp = (temp << 6) | decode; + GET_INPUT(fourth, 1); + temp = (temp << 6) | decode; + } else { + // We really did have four good data bytes, so advance four + // characters in the std::string. + + szsrc -= 4; + src += 4; + } + + // temp has 24 bits of input, so write that out as three bytes. + + if (destidx + 3 > szdest) return false; + dest[destidx + 2] = temp; + temp >>= 8; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + destidx += 3; + } + } else { + while (szsrc >= 4) { + if (!src[0] || !src[1] || !src[2] || + ((temp = ((unsigned(unbase64[src[0]]) << 18) | + (unsigned(unbase64[src[1]]) << 12) | + (unsigned(unbase64[src[2]]) << 6) | + (unsigned(unbase64[src[3]])))) & + 0x80000000)) { + GET_INPUT(first_no_dest, 4); + GET_INPUT(second_no_dest, 3); + GET_INPUT(third_no_dest, 2); + GET_INPUT(fourth_no_dest, 1); + } else { + szsrc -= 4; + src += 4; + } + destidx += 3; + } + } + +#undef GET_INPUT + + // if the loop terminated because we read a bad character, return + // now. + if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot && + !absl::ascii_isspace(ch)) + return false; + + if (ch == kPad64Equals || ch == kPad64Dot) { + // if we stopped by hitting an '=' or '.', un-read that character -- we'll + // look at it again when we count to check for the proper number of + // equals signs at the end. + ++szsrc; + --src; + } else { + // This loop consumes 1 input byte per iteration. It's used to + // clean up the 0-3 input bytes remaining when the first, faster + // loop finishes. 'temp' contains the data from 'state' input + // characters read by the first loop. + while (szsrc > 0) { + --szsrc; + ch = *src++; + decode = unbase64[ch]; + if (decode < 0) { + if (absl::ascii_isspace(ch)) { + continue; + } else if (ch == kPad64Equals || ch == kPad64Dot) { + // back up one character; we'll read it again when we check + // for the correct number of pad characters at the end. + ++szsrc; + --src; + break; + } else { + return false; + } + } + + // Each input character gives us six bits of output. + temp = (temp << 6) | decode; + ++state; + if (state == 4) { + // If we've accumulated 24 bits of output, write that out as + // three bytes. + if (dest) { + if (destidx + 3 > szdest) return false; + dest[destidx + 2] = temp; + temp >>= 8; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + } + destidx += 3; + state = 0; + temp = 0; + } + } + } + + // Process the leftover data contained in 'temp' at the end of the input. + int expected_equals = 0; + switch (state) { + case 0: + // Nothing left over; output is a multiple of 3 bytes. + break; + + case 1: + // Bad input; we have 6 bits left over. + return false; + + case 2: + // Produce one more output byte from the 12 input bits we have left. + if (dest) { + if (destidx + 1 > szdest) return false; + temp >>= 4; + dest[destidx] = temp; + } + ++destidx; + expected_equals = 2; + break; + + case 3: + // Produce two more output bytes from the 18 input bits we have left. + if (dest) { + if (destidx + 2 > szdest) return false; + temp >>= 2; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + } + destidx += 2; + expected_equals = 1; + break; + + default: + // state should have no other values at this point. + ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d", + state); + } + + // The remainder of the std::string should be all whitespace, mixed with + // exactly 0 equals signs, or exactly 'expected_equals' equals + // signs. (Always accepting 0 equals signs is an Abseil extension + // not covered in the RFC, as is accepting dot as the pad character.) + + int equals = 0; + while (szsrc > 0) { + if (*src == kPad64Equals || *src == kPad64Dot) + ++equals; + else if (!absl::ascii_isspace(*src)) + return false; + --szsrc; + ++src; + } + + const bool ok = (equals == 0 || equals == expected_equals); + if (ok) *len = destidx; + return ok; +} + +// The arrays below were generated by the following code +// #include <sys/time.h> +// #include <stdlib.h> +// #include <std::string.h> +// main() +// { +// static const char Base64[] = +// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +// char* pos; +// int idx, i, j; +// printf(" "); +// for (i = 0; i < 255; i += 8) { +// for (j = i; j < i + 8; j++) { +// pos = strchr(Base64, j); +// if ((pos == nullptr) || (j == 0)) +// idx = -1; +// else +// idx = pos - Base64; +// if (idx == -1) +// printf(" %2d, ", idx); +// else +// printf(" %2d/*%c*/,", idx, j); +// } +// printf("\n "); +// } +// } +// +// where the value of "Base64[]" was replaced by one of the base-64 conversion +// tables from the functions below. +/* clang-format off */ +constexpr signed char kUnBase64[] = { + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */, + 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, + 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, + -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, + 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, + 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, + 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1, + -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, + 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, + 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, + 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1 +}; + +constexpr signed char kUnWebSafeBase64[] = { + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 62/*-*/, -1, -1, + 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, + 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, + -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, + 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, + 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, + 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, 63/*_*/, + -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, + 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, + 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, + 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1 +}; +/* clang-format on */ + +size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { + // Base64 encodes three bytes of input at a time. If the input is not + // divisible by three, we pad as appropriate. + // + // (from http://tools.ietf.org/html/rfc3548) + // Special processing is performed if fewer than 24 bits are available + // at the end of the data being encoded. A full encoding quantum is + // always completed at the end of a quantity. When fewer than 24 input + // bits are available in an input group, zero bits are added (on the + // right) to form an integral number of 6-bit groups. Padding at the + // end of the data is performed using the '=' character. Since all base + // 64 input is an integral number of octets, only the following cases + // can arise: + + // Base64 encodes each three bytes of input into four bytes of output. + size_t len = (input_len / 3) * 4; + + if (input_len % 3 == 0) { + // (from http://tools.ietf.org/html/rfc3548) + // (1) the final quantum of encoding input is an integral multiple of 24 + // bits; here, the final unit of encoded output will be an integral + // multiple of 4 characters with no "=" padding, + } else if (input_len % 3 == 1) { + // (from http://tools.ietf.org/html/rfc3548) + // (2) the final quantum of encoding input is exactly 8 bits; here, the + // final unit of encoded output will be two characters followed by two + // "=" padding characters, or + len += 2; + if (do_padding) { + len += 2; + } + } else { // (input_len % 3 == 2) + // (from http://tools.ietf.org/html/rfc3548) + // (3) the final quantum of encoding input is exactly 16 bits; here, the + // final unit of encoded output will be three characters followed by one + // "=" padding character. + len += 3; + if (do_padding) { + len += 1; + } + } + + assert(len >= input_len); // make sure we didn't overflow + return len; +} + +size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, + size_t szdest, const char* base64, + bool do_padding) { + static const char kPad64 = '='; + + if (szsrc * 4 > szdest * 3) return 0; + + char* cur_dest = dest; + const unsigned char* cur_src = src; + + char* const limit_dest = dest + szdest; + const unsigned char* const limit_src = src + szsrc; + + // Three bytes of data encodes to four characters of cyphertext. + // So we can pump through three-byte chunks atomically. + if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3 + while (cur_src < limit_src - 3) { // as long as we have >= 32 bits + uint32_t in = absl::big_endian::Load32(cur_src) >> 8; + + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + + cur_dest += 4; + cur_src += 3; + } + } + // To save time, we didn't update szdest or szsrc in the loop. So do it now. + szdest = limit_dest - cur_dest; + szsrc = limit_src - cur_src; + + /* now deal with the tail (<=3 bytes) */ + switch (szsrc) { + case 0: + // Nothing left; nothing more to do. + break; + case 1: { + // One byte left: this encodes to two characters, and (optionally) + // two pad characters to round out the four-character cypherblock. + if (szdest < 2) return 0; + uint32_t in = cur_src[0]; + cur_dest[0] = base64[in >> 2]; + in &= 0x3; + cur_dest[1] = base64[in << 4]; + cur_dest += 2; + szdest -= 2; + if (do_padding) { + if (szdest < 2) return 0; + cur_dest[0] = kPad64; + cur_dest[1] = kPad64; + cur_dest += 2; + szdest -= 2; + } + break; + } + case 2: { + // Two bytes left: this encodes to three characters, and (optionally) + // one pad character to round out the four-character cypherblock. + if (szdest < 3) return 0; + uint32_t in = absl::big_endian::Load16(cur_src); + cur_dest[0] = base64[in >> 10]; + in &= 0x3FF; + cur_dest[1] = base64[in >> 4]; + in &= 0x00F; + cur_dest[2] = base64[in << 2]; + cur_dest += 3; + szdest -= 3; + if (do_padding) { + if (szdest < 1) return 0; + cur_dest[0] = kPad64; + cur_dest += 1; + szdest -= 1; + } + break; + } + case 3: { + // Three bytes left: same as in the big loop above. We can't do this in + // the loop because the loop above always reads 4 bytes, and the fourth + // byte is past the end of the input. + if (szdest < 4) return 0; + uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1); + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + cur_dest += 4; + szdest -= 4; + break; + } + default: + // Should not be reached: blocks of 4 bytes are handled + // in the while loop before this switch statement. + ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc); + break; + } + return (cur_dest - dest); +} + +constexpr char kBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +constexpr char kWebSafeBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + +void Base64EscapeInternal(const unsigned char* src, size_t szsrc, std::string* dest, + bool do_padding, const char* base64_chars) { + const size_t calc_escaped_size = + CalculateBase64EscapedLenInternal(szsrc, do_padding); + strings_internal::STLStringResizeUninitialized(dest, calc_escaped_size); + + const size_t escaped_len = Base64EscapeInternal( + src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding); + assert(calc_escaped_size == escaped_len); + dest->erase(escaped_len); +} + +bool Base64UnescapeInternal(const char* src, size_t slen, std::string* dest, + const signed char* unbase64) { + // Determine the size of the output std::string. Base64 encodes every 3 bytes into + // 4 characters. any leftover chars are added directly for good measure. + // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548 + const size_t dest_len = 3 * (slen / 4) + (slen % 4); + + strings_internal::STLStringResizeUninitialized(dest, dest_len); + + // We are getting the destination buffer by getting the beginning of the + // std::string and converting it into a char *. + size_t len; + const bool ok = + Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len); + if (!ok) { + dest->clear(); + return false; + } + + // could be shorter if there was padding + assert(len <= dest_len); + dest->erase(len); + + return true; +} + +/* clang-format off */ +constexpr char kHexValue[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, // '0'..'9' + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'A'..'F' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'a'..'f' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +/* clang-format on */ + +// This is a templated function so that T can be either a char* +// or a std::string. This works because we use the [] operator to access +// individual characters at a time. +template <typename T> +void HexStringToBytesInternal(const char* from, T to, ptrdiff_t num) { + for (int i = 0; i < num; i++) { + to[i] = (kHexValue[from[i * 2] & 0xFF] << 4) + + (kHexValue[from[i * 2 + 1] & 0xFF]); + } +} + +// This is a templated function so that T can be either a char* or a std::string. +template <typename T> +void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) { + auto dest_ptr = &dest[0]; + for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) { + const char* hex_p = &kHexTable[*src_ptr * 2]; + std::copy(hex_p, hex_p + 2, dest_ptr); + } +} + +} // namespace + +// ---------------------------------------------------------------------- +// CUnescape() +// +// See CUnescapeInternal() for implementation details. +// ---------------------------------------------------------------------- +bool CUnescape(absl::string_view source, std::string* dest, std::string* error) { + return CUnescapeInternal(source, kUnescapeNulls, dest, error); +} + +std::string CEscape(absl::string_view src) { + std::string dest; + CEscapeAndAppendInternal(src, &dest); + return dest; +} + +std::string CHexEscape(absl::string_view src) { + return CEscapeInternal(src, true, false); +} + +std::string Utf8SafeCEscape(absl::string_view src) { + return CEscapeInternal(src, false, true); +} + +std::string Utf8SafeCHexEscape(absl::string_view src) { + return CEscapeInternal(src, true, true); +} + +// ---------------------------------------------------------------------- +// ptrdiff_t Base64Unescape() - base64 decoder +// ptrdiff_t Base64Escape() - base64 encoder +// ptrdiff_t WebSafeBase64Unescape() - Google's variation of base64 decoder +// ptrdiff_t WebSafeBase64Escape() - Google's variation of base64 encoder +// +// Check out +// http://tools.ietf.org/html/rfc2045 for formal description, but what we +// care about is that... +// Take the encoded stuff in groups of 4 characters and turn each +// character into a code 0 to 63 thus: +// A-Z map to 0 to 25 +// a-z map to 26 to 51 +// 0-9 map to 52 to 61 +// +(- for WebSafe) maps to 62 +// /(_ for WebSafe) maps to 63 +// There will be four numbers, all less than 64 which can be represented +// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). +// Arrange the 6 digit binary numbers into three bytes as such: +// aaaaaabb bbbbcccc ccdddddd +// Equals signs (one or two) are used at the end of the encoded block to +// indicate that the text was not an integer multiple of three bytes long. +// ---------------------------------------------------------------------- + +bool Base64Unescape(absl::string_view src, std::string* dest) { + return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64); +} + +bool WebSafeBase64Unescape(absl::string_view src, std::string* dest) { + return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64); +} + +void Base64Escape(absl::string_view src, std::string* dest) { + Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), + src.size(), dest, true, kBase64Chars); +} + +void WebSafeBase64Escape(absl::string_view src, std::string* dest) { + Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), + src.size(), dest, false, kWebSafeBase64Chars); +} + +std::string HexStringToBytes(absl::string_view from) { + std::string result; + const auto num = from.size() / 2; + strings_internal::STLStringResizeUninitialized(&result, num); + absl::HexStringToBytesInternal<std::string&>(from.data(), result, num); + return result; +} + +std::string BytesToHexString(absl::string_view from) { + std::string result; + strings_internal::STLStringResizeUninitialized(&result, 2 * from.size()); + absl::BytesToHexStringInternal<std::string&>( + reinterpret_cast<const unsigned char*>(from.data()), result, from.size()); + return result; +} + +} // namespace absl diff --git a/absl/strings/escaping.h b/absl/strings/escaping.h new file mode 100644 index 000000000000..05327e7c43a6 --- /dev/null +++ b/absl/strings/escaping.h @@ -0,0 +1,158 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: escaping.h +// ----------------------------------------------------------------------------- +// +// This header file contains std::string utilities involved in escaping and +// unescaping strings in various ways. +// + +#ifndef ABSL_STRINGS_ESCAPING_H_ +#define ABSL_STRINGS_ESCAPING_H_ + +#include <cstddef> +#include <string> +#include <vector> + +#include "absl/base/macros.h" +#include "absl/strings/ascii.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" + +namespace absl { + +// CUnescape() +// +// Unescapes a `source` std::string and copies it into `dest`, rewriting C-style +// escape sequences (http://en.cppreference.com/w/cpp/language/escape) into +// their proper code point equivalents, returning `true` if successful. +// +// The following unescape sequences can be handled: +// +// * ASCII escape sequences ('\n','\r','\\', etc.) to their ASCII equivalents +// * Octal escape sequences ('\nnn') to byte nnn. The unescaped value must +// resolve to a single byte or an error will occur. E.g. values greater than +// 0xff will produce an error. +// * Hexadecimal escape sequences ('\xnn') to byte nn. While an arbitrary +// number of following digits are allowed, the unescaped value must resolve +// to a single byte or an error will occur. E.g. '\x0045' is equivalent to +// '\x45', but '\x1234' will produce an error. +// * Unicode escape sequences ('\unnnn' for exactly four hex digits or +// '\Unnnnnnnn' for exactly eight hex digits, which will be encoded in +// UTF-8. (E.g., `\u2019` unescapes to the three bytes 0xE2, 0x80, and +// 0x99). +// +// +// If any errors are encountered, this function returns `false` and stores the +// first encountered error in `error`. To disable error reporting, set `error` +// to `nullptr` or use the overload with no error reporting below. +// +// Example: +// +// std::string s = "foo\\rbar\\nbaz\\t"; +// std::string unescaped_s = absl::CUnescape(s); +// EXPECT_EQ(unescaped_s, "foo\rbar\nbaz\t"); +bool CUnescape(absl::string_view source, std::string* dest, std::string* error); + +// Overload of `CUnescape()` with no error reporting. +inline bool CUnescape(absl::string_view source, std::string* dest) { + return CUnescape(source, dest, nullptr); +} + +// CEscape() +// +// Escapes a 'src' std::string using C-style escapes sequences +// (http://en.cppreference.com/w/cpp/language/escape), escaping other +// non-printable/non-whitespace bytes as octal sequences (e.g. "\377"). +// +// Example: +// +// std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n"; +// std::string escaped_s = absl::CEscape(s); +// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\010\\t\\n\\013\\014\\r\\n"); +std::string CEscape(absl::string_view src); + +// CHexEscape() +// +// Escapes a 'src' std::string using C-style escape sequences, escaping +// other non-printable/non-whitespace bytes as hexadecimal sequences (e.g. +// "\xFF"). +// +// Example: +// +// std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n"; +// std::string escaped_s = absl::CHexEscape(s); +// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\x08\\t\\n\\x0b\\x0c\\r\\n"); +std::string CHexEscape(absl::string_view src); + +// Utf8SafeCEscape() +// +// Escapes a 'src' std::string using C-style escape sequences, escaping bytes as +// octal sequences, and passing through UTF-8 characters without conversion. +// I.e., when encountering any bytes with their high bit set, this function +// will not escape those values, whether or not they are valid UTF-8. +std::string Utf8SafeCEscape(absl::string_view src); + +// Utf8SafeCHexEscape() +// +// Escapes a 'src' std::string using C-style escape sequences, escaping bytes as +// hexidecimal sequences, and passing through UTF-8 characters without +// conversion. +std::string Utf8SafeCHexEscape(absl::string_view src); + +// Base64Unescape() +// +// Converts a `src` std::string encoded in Base64 to its binary equivalent, writing +// it to a `dest` buffer, returning `true` on success. If `src` contains invalid +// characters, `dest` is cleared and returns `false`. +bool Base64Unescape(absl::string_view src, std::string* dest); + +// WebSafeBase64Unescape(absl::string_view, std::string*) +// +// Converts a `src` std::string encoded in Base64 to its binary equivalent, writing +// it to a `dest` buffer, but using '-' instead of '+', and '_' instead of '/'. +// If `src` contains invalid characters, `dest` is cleared and returns `false`. +bool WebSafeBase64Unescape(absl::string_view src, std::string* dest); + +// Base64Escape() +// +// Encodes a `src` std::string into a `dest` buffer using base64 encoding, with +// padding characters. This function conforms with RFC 4648 section 4 (base64). +void Base64Escape(absl::string_view src, std::string* dest); + +// WebSafeBase64Escape() +// +// Encodes a `src` std::string into a `dest` buffer using uses '-' instead of '+' and +// '_' instead of '/', and without padding. This function conforms with RFC 4648 +// section 5 (base64url). +void WebSafeBase64Escape(absl::string_view src, std::string* dest); + +// HexStringToBytes() +// +// Converts an ASCII hex std::string into bytes, returning binary data of length +// `from.size()/2`. +std::string HexStringToBytes(absl::string_view from); + +// BytesToHexString() +// +// Converts binary data into an ASCII text std::string, returing a std::string of size +// `2*from.size()`. +std::string BytesToHexString(absl::string_view from); + +} // namespace absl + +#endif // ABSL_STRINGS_ESCAPING_H_ diff --git a/absl/strings/escaping_test.cc b/absl/strings/escaping_test.cc new file mode 100644 index 000000000000..d464051df020 --- /dev/null +++ b/absl/strings/escaping_test.cc @@ -0,0 +1,638 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/escaping.h" + +#include <array> +#include <cstdio> +#include <cstring> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/container/fixed_array.h" +#include "absl/strings/str_cat.h" + +#include "absl/strings/internal/escaping_test_common.inc" + +namespace { + +struct epair { + std::string escaped; + std::string unescaped; +}; + +TEST(CEscape, EscapeAndUnescape) { + const std::string inputs[] = { + std::string("foo\nxx\r\b\0023"), + std::string(""), + std::string("abc"), + std::string("\1chad_rules"), + std::string("\1arnar_drools"), + std::string("xxxx\r\t'\"\\"), + std::string("\0xx\0", 4), + std::string("\x01\x31"), + std::string("abc\xb\x42\141bc"), + std::string("123\1\x31\x32\x33"), + std::string("\xc1\xca\x1b\x62\x19o\xcc\x04"), + std::string("\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name"), + }; + // Do this twice, once for octal escapes and once for hex escapes. + for (int kind = 0; kind < 4; kind++) { + for (const std::string& original : inputs) { + std::string escaped; + switch (kind) { + case 0: + escaped = absl::CEscape(original); + break; + case 1: + escaped = absl::CHexEscape(original); + break; + case 2: + escaped = absl::Utf8SafeCEscape(original); + break; + case 3: + escaped = absl::Utf8SafeCHexEscape(original); + break; + } + std::string unescaped_str; + EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str)); + EXPECT_EQ(unescaped_str, original); + + // Check in-place unescaping + std::string s = escaped; + EXPECT_TRUE(absl::CUnescape(s, &s)); + ASSERT_EQ(s, original); + } + } + // Check that all possible two character strings can be escaped then + // unescaped successfully. + for (int char0 = 0; char0 < 256; char0++) { + for (int char1 = 0; char1 < 256; char1++) { + char chars[2]; + chars[0] = char0; + chars[1] = char1; + std::string s(chars, 2); + std::string escaped = absl::CHexEscape(s); + std::string unescaped; + EXPECT_TRUE(absl::CUnescape(escaped, &unescaped)); + EXPECT_EQ(s, unescaped); + } + } +} + +TEST(CEscape, BasicEscaping) { + epair oct_values[] = { + {"foo\\rbar\\nbaz\\t", "foo\rbar\nbaz\t"}, + {"\\'full of \\\"sound\\\" and \\\"fury\\\"\\'", + "'full of \"sound\" and \"fury\"'"}, + {"signi\\\\fying\\\\ nothing\\\\", "signi\\fying\\ nothing\\"}, + {"\\010\\t\\n\\013\\014\\r", "\010\011\012\013\014\015"} + }; + epair hex_values[] = { + {"ubik\\rubik\\nubik\\t", "ubik\rubik\nubik\t"}, + {"I\\\'ve just seen a \\\"face\\\"", + "I've just seen a \"face\""}, + {"hel\\\\ter\\\\skel\\\\ter\\\\", "hel\\ter\\skel\\ter\\"}, + {"\\x08\\t\\n\\x0b\\x0c\\r", "\010\011\012\013\014\015"} + }; + epair utf8_oct_values[] = { + {"\xe8\xb0\xb7\xe6\xad\x8c\\r\xe8\xb0\xb7\xe6\xad\x8c\\nbaz\\t", + "\xe8\xb0\xb7\xe6\xad\x8c\r\xe8\xb0\xb7\xe6\xad\x8c\nbaz\t"}, + {"\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name", + "\"\xe8\xb0\xb7\xe6\xad\x8c\" is Google\'s Chinese name"}, + {"\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\\\are\\\\Japanese\\\\chars\\\\", + "\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\are\\Japanese\\chars\\"}, + {"\xed\x81\xac\xeb\xa1\xac\\010\\t\\n\\013\\014\\r", + "\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"} + }; + epair utf8_hex_values[] = { + {"\x20\xe4\xbd\xa0\\t\xe5\xa5\xbd,\\r!\\n", + "\x20\xe4\xbd\xa0\t\xe5\xa5\xbd,\r!\n"}, + {"\xe8\xa9\xa6\xe9\xa8\x93\\\' means \\\"test\\\"", + "\xe8\xa9\xa6\xe9\xa8\x93\' means \"test\""}, + {"\\\\\xe6\x88\x91\\\\:\\\\\xe6\x9d\xa8\xe6\xac\xa2\\\\", + "\\\xe6\x88\x91\\:\\\xe6\x9d\xa8\xe6\xac\xa2\\"}, + {"\xed\x81\xac\xeb\xa1\xac\\x08\\t\\n\\x0b\\x0c\\r", + "\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"} + }; + + for (const epair& val : oct_values) { + std::string escaped = absl::CEscape(val.unescaped); + EXPECT_EQ(escaped, val.escaped); + } + for (const epair& val : hex_values) { + std::string escaped = absl::CHexEscape(val.unescaped); + EXPECT_EQ(escaped, val.escaped); + } + for (const epair& val : utf8_oct_values) { + std::string escaped = absl::Utf8SafeCEscape(val.unescaped); + EXPECT_EQ(escaped, val.escaped); + } + for (const epair& val : utf8_hex_values) { + std::string escaped = absl::Utf8SafeCHexEscape(val.unescaped); + EXPECT_EQ(escaped, val.escaped); + } +} + +TEST(Unescape, BasicFunction) { + epair tests[] = + {{"\\u0030", "0"}, + {"\\u00A3", "\xC2\xA3"}, + {"\\u22FD", "\xE2\x8B\xBD"}, + {"\\U00010000", "\xF0\x90\x80\x80"}, + {"\\U0010FFFD", "\xF4\x8F\xBF\xBD"}}; + for (const epair& val : tests) { + std::string out; + EXPECT_TRUE(absl::CUnescape(val.escaped, &out)); + EXPECT_EQ(out, val.unescaped); + } + std::string bad[] = + {"\\u1", // too short + "\\U1", // too short + "\\Uffffff", + "\\777", // exceeds 0xff + "\\xABCD"}; // exceeds 0xff + for (const std::string& e : bad) { + std::string error; + std::string out; + EXPECT_FALSE(absl::CUnescape(e, &out, &error)); + EXPECT_FALSE(error.empty()); + } +} + +class CUnescapeTest : public testing::Test { + protected: + static const char kStringWithMultipleOctalNulls[]; + static const char kStringWithMultipleHexNulls[]; + static const char kStringWithMultipleUnicodeNulls[]; + + std::string result_string_; +}; + +const char CUnescapeTest::kStringWithMultipleOctalNulls[] = + "\\0\\n" // null escape \0 plus newline + "0\\n" // just a number 0 (not a null escape) plus newline + "\\00\\12" // null escape \00 plus octal newline code + "\\000"; // null escape \000 + +// This has the same ingredients as kStringWithMultipleOctalNulls +// but with \x hex escapes instead of octal escapes. +const char CUnescapeTest::kStringWithMultipleHexNulls[] = + "\\x0\\n" + "0\\n" + "\\x00\\xa" + "\\x000"; + +const char CUnescapeTest::kStringWithMultipleUnicodeNulls[] = + "\\u0000\\n" // short-form (4-digit) null escape plus newline + "0\\n" // just a number 0 (not a null escape) plus newline + "\\U00000000"; // long-form (8-digit) null escape + +TEST_F(CUnescapeTest, Unescapes1CharOctalNull) { + std::string original_string = "\\0"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes2CharOctalNull) { + std::string original_string = "\\00"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes3CharOctalNull) { + std::string original_string = "\\000"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes1CharHexNull) { + std::string original_string = "\\x0"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes2CharHexNull) { + std::string original_string = "\\x00"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes3CharHexNull) { + std::string original_string = "\\x000"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes4CharUnicodeNull) { + std::string original_string = "\\u0000"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes8CharUnicodeNull) { + std::string original_string = "\\U00000000"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, UnescapesMultipleOctalNulls) { + std::string original_string(kStringWithMultipleOctalNulls); + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + // All escapes, including newlines and null escapes, should have been + // converted to the equivalent characters. + EXPECT_EQ(std::string("\0\n" + "0\n" + "\0\n" + "\0", 7), result_string_); +} + + +TEST_F(CUnescapeTest, UnescapesMultipleHexNulls) { + std::string original_string(kStringWithMultipleHexNulls); + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0\n" + "0\n" + "\0\n" + "\0", 7), result_string_); +} + +TEST_F(CUnescapeTest, UnescapesMultipleUnicodeNulls) { + std::string original_string(kStringWithMultipleUnicodeNulls); + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0\n" + "0\n" + "\0", 5), result_string_); +} + +static struct { + absl::string_view plaintext; + absl::string_view cyphertext; +} const base64_tests[] = { + // Empty std::string. + {{"", 0}, {"", 0}}, + {{nullptr, 0}, + {"", 0}}, // if length is zero, plaintext ptr must be ignored! + + // Basic bit patterns; + // values obtained with "echo -n '...' | uuencode -m test" + + {{"\000", 1}, "AA=="}, + {{"\001", 1}, "AQ=="}, + {{"\002", 1}, "Ag=="}, + {{"\004", 1}, "BA=="}, + {{"\010", 1}, "CA=="}, + {{"\020", 1}, "EA=="}, + {{"\040", 1}, "IA=="}, + {{"\100", 1}, "QA=="}, + {{"\200", 1}, "gA=="}, + + {{"\377", 1}, "/w=="}, + {{"\376", 1}, "/g=="}, + {{"\375", 1}, "/Q=="}, + {{"\373", 1}, "+w=="}, + {{"\367", 1}, "9w=="}, + {{"\357", 1}, "7w=="}, + {{"\337", 1}, "3w=="}, + {{"\277", 1}, "vw=="}, + {{"\177", 1}, "fw=="}, + {{"\000\000", 2}, "AAA="}, + {{"\000\001", 2}, "AAE="}, + {{"\000\002", 2}, "AAI="}, + {{"\000\004", 2}, "AAQ="}, + {{"\000\010", 2}, "AAg="}, + {{"\000\020", 2}, "ABA="}, + {{"\000\040", 2}, "ACA="}, + {{"\000\100", 2}, "AEA="}, + {{"\000\200", 2}, "AIA="}, + {{"\001\000", 2}, "AQA="}, + {{"\002\000", 2}, "AgA="}, + {{"\004\000", 2}, "BAA="}, + {{"\010\000", 2}, "CAA="}, + {{"\020\000", 2}, "EAA="}, + {{"\040\000", 2}, "IAA="}, + {{"\100\000", 2}, "QAA="}, + {{"\200\000", 2}, "gAA="}, + + {{"\377\377", 2}, "//8="}, + {{"\377\376", 2}, "//4="}, + {{"\377\375", 2}, "//0="}, + {{"\377\373", 2}, "//s="}, + {{"\377\367", 2}, "//c="}, + {{"\377\357", 2}, "/+8="}, + {{"\377\337", 2}, "/98="}, + {{"\377\277", 2}, "/78="}, + {{"\377\177", 2}, "/38="}, + {{"\376\377", 2}, "/v8="}, + {{"\375\377", 2}, "/f8="}, + {{"\373\377", 2}, "+/8="}, + {{"\367\377", 2}, "9/8="}, + {{"\357\377", 2}, "7/8="}, + {{"\337\377", 2}, "3/8="}, + {{"\277\377", 2}, "v/8="}, + {{"\177\377", 2}, "f/8="}, + + {{"\000\000\000", 3}, "AAAA"}, + {{"\000\000\001", 3}, "AAAB"}, + {{"\000\000\002", 3}, "AAAC"}, + {{"\000\000\004", 3}, "AAAE"}, + {{"\000\000\010", 3}, "AAAI"}, + {{"\000\000\020", 3}, "AAAQ"}, + {{"\000\000\040", 3}, "AAAg"}, + {{"\000\000\100", 3}, "AABA"}, + {{"\000\000\200", 3}, "AACA"}, + {{"\000\001\000", 3}, "AAEA"}, + {{"\000\002\000", 3}, "AAIA"}, + {{"\000\004\000", 3}, "AAQA"}, + {{"\000\010\000", 3}, "AAgA"}, + {{"\000\020\000", 3}, "ABAA"}, + {{"\000\040\000", 3}, "ACAA"}, + {{"\000\100\000", 3}, "AEAA"}, + {{"\000\200\000", 3}, "AIAA"}, + {{"\001\000\000", 3}, "AQAA"}, + {{"\002\000\000", 3}, "AgAA"}, + {{"\004\000\000", 3}, "BAAA"}, + {{"\010\000\000", 3}, "CAAA"}, + {{"\020\000\000", 3}, "EAAA"}, + {{"\040\000\000", 3}, "IAAA"}, + {{"\100\000\000", 3}, "QAAA"}, + {{"\200\000\000", 3}, "gAAA"}, + + {{"\377\377\377", 3}, "////"}, + {{"\377\377\376", 3}, "///+"}, + {{"\377\377\375", 3}, "///9"}, + {{"\377\377\373", 3}, "///7"}, + {{"\377\377\367", 3}, "///3"}, + {{"\377\377\357", 3}, "///v"}, + {{"\377\377\337", 3}, "///f"}, + {{"\377\377\277", 3}, "//+/"}, + {{"\377\377\177", 3}, "//9/"}, + {{"\377\376\377", 3}, "//7/"}, + {{"\377\375\377", 3}, "//3/"}, + {{"\377\373\377", 3}, "//v/"}, + {{"\377\367\377", 3}, "//f/"}, + {{"\377\357\377", 3}, "/+//"}, + {{"\377\337\377", 3}, "/9//"}, + {{"\377\277\377", 3}, "/7//"}, + {{"\377\177\377", 3}, "/3//"}, + {{"\376\377\377", 3}, "/v//"}, + {{"\375\377\377", 3}, "/f//"}, + {{"\373\377\377", 3}, "+///"}, + {{"\367\377\377", 3}, "9///"}, + {{"\357\377\377", 3}, "7///"}, + {{"\337\377\377", 3}, "3///"}, + {{"\277\377\377", 3}, "v///"}, + {{"\177\377\377", 3}, "f///"}, + + // Random numbers: values obtained with + // + // #! /bin/bash + // dd bs=$1 count=1 if=/dev/random of=/tmp/bar.random + // od -N $1 -t o1 /tmp/bar.random + // uuencode -m test < /tmp/bar.random + // + // where $1 is the number of bytes (2, 3) + + {{"\243\361", 2}, "o/E="}, + {{"\024\167", 2}, "FHc="}, + {{"\313\252", 2}, "y6o="}, + {{"\046\041", 2}, "JiE="}, + {{"\145\236", 2}, "ZZ4="}, + {{"\254\325", 2}, "rNU="}, + {{"\061\330", 2}, "Mdg="}, + {{"\245\032", 2}, "pRo="}, + {{"\006\000", 2}, "BgA="}, + {{"\375\131", 2}, "/Vk="}, + {{"\303\210", 2}, "w4g="}, + {{"\040\037", 2}, "IB8="}, + {{"\261\372", 2}, "sfo="}, + {{"\335\014", 2}, "3Qw="}, + {{"\233\217", 2}, "m48="}, + {{"\373\056", 2}, "+y4="}, + {{"\247\232", 2}, "p5o="}, + {{"\107\053", 2}, "Rys="}, + {{"\204\077", 2}, "hD8="}, + {{"\276\211", 2}, "vok="}, + {{"\313\110", 2}, "y0g="}, + {{"\363\376", 2}, "8/4="}, + {{"\251\234", 2}, "qZw="}, + {{"\103\262", 2}, "Q7I="}, + {{"\142\312", 2}, "Yso="}, + {{"\067\211", 2}, "N4k="}, + {{"\220\001", 2}, "kAE="}, + {{"\152\240", 2}, "aqA="}, + {{"\367\061", 2}, "9zE="}, + {{"\133\255", 2}, "W60="}, + {{"\176\035", 2}, "fh0="}, + {{"\032\231", 2}, "Gpk="}, + + {{"\013\007\144", 3}, "Cwdk"}, + {{"\030\112\106", 3}, "GEpG"}, + {{"\047\325\046", 3}, "J9Um"}, + {{"\310\160\022", 3}, "yHAS"}, + {{"\131\100\237", 3}, "WUCf"}, + {{"\064\342\134", 3}, "NOJc"}, + {{"\010\177\004", 3}, "CH8E"}, + {{"\345\147\205", 3}, "5WeF"}, + {{"\300\343\360", 3}, "wOPw"}, + {{"\061\240\201", 3}, "MaCB"}, + {{"\225\333\044", 3}, "ldsk"}, + {{"\215\137\352", 3}, "jV/q"}, + {{"\371\147\160", 3}, "+Wdw"}, + {{"\030\320\051", 3}, "GNAp"}, + {{"\044\174\241", 3}, "JHyh"}, + {{"\260\127\037", 3}, "sFcf"}, + {{"\111\045\033", 3}, "SSUb"}, + {{"\202\114\107", 3}, "gkxH"}, + {{"\057\371\042", 3}, "L/ki"}, + {{"\223\247\244", 3}, "k6ek"}, + {{"\047\216\144", 3}, "J45k"}, + {{"\203\070\327", 3}, "gzjX"}, + {{"\247\140\072", 3}, "p2A6"}, + {{"\124\115\116", 3}, "VE1O"}, + {{"\157\162\050", 3}, "b3Io"}, + {{"\357\223\004", 3}, "75ME"}, + {{"\052\117\156", 3}, "Kk9u"}, + {{"\347\154\000", 3}, "52wA"}, + {{"\303\012\142", 3}, "wwpi"}, + {{"\060\035\362", 3}, "MB3y"}, + {{"\130\226\361", 3}, "WJbx"}, + {{"\173\013\071", 3}, "ews5"}, + {{"\336\004\027", 3}, "3gQX"}, + {{"\357\366\234", 3}, "7/ac"}, + {{"\353\304\111", 3}, "68RJ"}, + {{"\024\264\131", 3}, "FLRZ"}, + {{"\075\114\251", 3}, "PUyp"}, + {{"\315\031\225", 3}, "zRmV"}, + {{"\154\201\276", 3}, "bIG+"}, + {{"\200\066\072", 3}, "gDY6"}, + {{"\142\350\267", 3}, "Yui3"}, + {{"\033\000\166", 3}, "GwB2"}, + {{"\210\055\077", 3}, "iC0/"}, + {{"\341\037\124", 3}, "4R9U"}, + {{"\161\103\152", 3}, "cUNq"}, + {{"\270\142\131", 3}, "uGJZ"}, + {{"\337\076\074", 3}, "3z48"}, + {{"\375\106\362", 3}, "/Uby"}, + {{"\227\301\127", 3}, "l8FX"}, + {{"\340\002\234", 3}, "4AKc"}, + {{"\121\064\033", 3}, "UTQb"}, + {{"\157\134\143", 3}, "b1xj"}, + {{"\247\055\327", 3}, "py3X"}, + {{"\340\142\005", 3}, "4GIF"}, + {{"\060\260\143", 3}, "MLBj"}, + {{"\075\203\170", 3}, "PYN4"}, + {{"\143\160\016", 3}, "Y3AO"}, + {{"\313\013\063", 3}, "ywsz"}, + {{"\174\236\135", 3}, "fJ5d"}, + {{"\103\047\026", 3}, "QycW"}, + {{"\365\005\343", 3}, "9QXj"}, + {{"\271\160\223", 3}, "uXCT"}, + {{"\362\255\172", 3}, "8q16"}, + {{"\113\012\015", 3}, "SwoN"}, + + // various lengths, generated by this python script: + // + // from std::string import lowercase as lc + // for i in range(27): + // print '{ %2d, "%s",%s "%s" },' % (i, lc[:i], ' ' * (26-i), + // lc[:i].encode('base64').strip()) + + {{"", 0}, {"", 0}}, + {"a", "YQ=="}, + {"ab", "YWI="}, + {"abc", "YWJj"}, + {"abcd", "YWJjZA=="}, + {"abcde", "YWJjZGU="}, + {"abcdef", "YWJjZGVm"}, + {"abcdefg", "YWJjZGVmZw=="}, + {"abcdefgh", "YWJjZGVmZ2g="}, + {"abcdefghi", "YWJjZGVmZ2hp"}, + {"abcdefghij", "YWJjZGVmZ2hpag=="}, + {"abcdefghijk", "YWJjZGVmZ2hpams="}, + {"abcdefghijkl", "YWJjZGVmZ2hpamts"}, + {"abcdefghijklm", "YWJjZGVmZ2hpamtsbQ=="}, + {"abcdefghijklmn", "YWJjZGVmZ2hpamtsbW4="}, + {"abcdefghijklmno", "YWJjZGVmZ2hpamtsbW5v"}, + {"abcdefghijklmnop", "YWJjZGVmZ2hpamtsbW5vcA=="}, + {"abcdefghijklmnopq", "YWJjZGVmZ2hpamtsbW5vcHE="}, + {"abcdefghijklmnopqr", "YWJjZGVmZ2hpamtsbW5vcHFy"}, + {"abcdefghijklmnopqrs", "YWJjZGVmZ2hpamtsbW5vcHFycw=="}, + {"abcdefghijklmnopqrst", "YWJjZGVmZ2hpamtsbW5vcHFyc3Q="}, + {"abcdefghijklmnopqrstu", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1"}, + {"abcdefghijklmnopqrstuv", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dg=="}, + {"abcdefghijklmnopqrstuvw", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnc="}, + {"abcdefghijklmnopqrstuvwx", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4"}, + {"abcdefghijklmnopqrstuvwxy", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eQ=="}, + {"abcdefghijklmnopqrstuvwxyz", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="}, +}; + +TEST(Base64, EscapeAndUnescape) { + // Check the short strings; this tests the math (and boundaries) + for (const auto& tc : base64_tests) { + std::string encoded("this junk should be ignored"); + absl::Base64Escape(tc.plaintext, &encoded); + EXPECT_EQ(encoded, tc.cyphertext); + + std::string decoded("this junk should be ignored"); + EXPECT_TRUE(absl::Base64Unescape(encoded, &decoded)); + EXPECT_EQ(decoded, tc.plaintext); + + std::string websafe(tc.cyphertext); + for (int c = 0; c < websafe.size(); ++c) { + if ('+' == websafe[c]) websafe[c] = '-'; + if ('/' == websafe[c]) websafe[c] = '_'; + if ('=' == websafe[c]) { + websafe.resize(c); + break; + } + } + + encoded = "this junk should be ignored"; + absl::WebSafeBase64Escape(tc.plaintext, &encoded); + EXPECT_EQ(encoded, websafe); + + // Let's try the std::string version of the decoder + decoded = "this junk should be ignored"; + EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded)); + EXPECT_EQ(decoded, tc.plaintext); + } + + // Now try the long strings, this tests the streaming + for (const auto& tc : base64_strings) { + std::string buffer; + absl::WebSafeBase64Escape(tc.plaintext, &buffer); + EXPECT_EQ(tc.cyphertext, buffer); + } + + // Verify the behavior when decoding bad data + { + absl::string_view data_set[] = {"ab-/", absl::string_view("\0bcd", 4), + absl::string_view("abc.\0", 5)}; + for (absl::string_view bad_data : data_set) { + std::string buf; + EXPECT_FALSE(absl::Base64Unescape(bad_data, &buf)); + EXPECT_FALSE(absl::WebSafeBase64Unescape(bad_data, &buf)); + EXPECT_TRUE(buf.empty()); + } + } +} + +TEST(Base64, DISABLED_HugeData) { + const size_t kSize = size_t(3) * 1000 * 1000 * 1000; + static_assert(kSize % 3 == 0, "kSize must be divisible by 3"); + const std::string huge(kSize, 'x'); + + std::string escaped; + absl::Base64Escape(huge, &escaped); + + // Generates the std::string that should match a base64 encoded "xxx..." std::string. + // "xxx" in base64 is "eHh4". + std::string expected_encoding; + expected_encoding.reserve(kSize / 3 * 4); + for (size_t i = 0; i < kSize / 3; ++i) { + expected_encoding.append("eHh4"); + } + EXPECT_EQ(expected_encoding, escaped); + + std::string unescaped; + EXPECT_TRUE(absl::Base64Unescape(escaped, &unescaped)); + EXPECT_EQ(huge, unescaped); +} + +TEST(HexAndBack, HexStringToBytes_and_BytesToHexString) { + std::string hex_mixed = "0123456789abcdefABCDEF"; + std::string bytes_expected = "\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF"; + std::string hex_only_lower = "0123456789abcdefabcdef"; + + std::string bytes_result = absl::HexStringToBytes(hex_mixed); + EXPECT_EQ(bytes_expected, bytes_result); + + std::string prefix_valid = hex_mixed + "?"; + std::string prefix_valid_result = absl::HexStringToBytes( + absl::string_view(prefix_valid.data(), prefix_valid.size() - 1)); + EXPECT_EQ(bytes_expected, prefix_valid_result); + + std::string infix_valid = "?" + hex_mixed + "???"; + std::string infix_valid_result = absl::HexStringToBytes( + absl::string_view(infix_valid.data() + 1, hex_mixed.size())); + EXPECT_EQ(bytes_expected, infix_valid_result); + + std::string hex_result = absl::BytesToHexString(bytes_expected); + EXPECT_EQ(hex_only_lower, hex_result); +} + +} // namespace diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h new file mode 100644 index 000000000000..8d92963a4dea --- /dev/null +++ b/absl/strings/internal/char_map.h @@ -0,0 +1,154 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Character Map Class +// +// A fast, bit-vector map for 8-bit unsigned characters. +// This class is useful for non-character purposes as well. + +#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ +#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ + +#include <cstddef> +#include <cstdint> +#include <cstring> + +#include "absl/base/macros.h" +#include "absl/base/port.h" + +namespace absl { +namespace strings_internal { + +class Charmap { + public: + constexpr Charmap() : m_() {} + + // Initializes with a given char*. Note that NUL is not treated as + // a terminator, but rather a char to be flicked. + Charmap(const char* str, int len) : m_() { + while (len--) SetChar(*str++); + } + + // Initializes with a given char*. NUL is treated as a terminator + // and will not be in the charmap. + explicit Charmap(const char* str) : m_() { + while (*str) SetChar(*str++); + } + + constexpr bool contains(unsigned char c) const { + return (m_[c / 64] >> (c % 64)) & 0x1; + } + + // Returns true if and only if a character exists in both maps. + bool IntersectsWith(const Charmap& c) const { + for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) { + if ((m_[i] & c.m_[i]) != 0) return true; + } + return false; + } + + bool IsZero() const { + for (uint64_t c : m_) { + if (c != 0) return false; + } + return true; + } + + // Containing only a single specified char. + static constexpr Charmap Char(char x) { + return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1), + CharMaskForWord(x, 2), CharMaskForWord(x, 3)); + } + + // Containing all the chars in the C-std::string 's'. + // Note that this is expensively recursive because of the C++11 constexpr + // formulation. Use only in constexpr initializers. + static constexpr Charmap FromString(const char* s) { + return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1)); + } + + // Containing all the chars in the closed interval [lo,hi]. + static constexpr Charmap Range(char lo, char hi) { + return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1), + RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3)); + } + + friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) { + return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2], + a.m_[3] & b.m_[3]); + } + + friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) { + return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2], + a.m_[3] | b.m_[3]); + } + + friend constexpr Charmap operator~(const Charmap& a) { + return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]); + } + + private: + constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3) + : m_{b0, b1, b2, b3} {} + + static constexpr uint64_t RangeForWord(unsigned char lo, unsigned char hi, + uint64_t word) { + return OpenRangeFromZeroForWord(hi + 1, word) & + ~OpenRangeFromZeroForWord(lo, word); + } + + // All the chars in the specified word of the range [0, upper). + static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper, + uint64_t word) { + return (upper <= 64 * word) + ? 0 + : (upper >= 64 * (word + 1)) + ? ~static_cast<uint64_t>(0) + : (~static_cast<uint64_t>(0) >> (64 - upper % 64)); + } + + static constexpr uint64_t CharMaskForWord(unsigned char x, uint64_t word) { + return (x / 64 == word) ? (static_cast<uint64_t>(1) << (x % 64)) : 0; + } + + private: + void SetChar(unsigned char c) { + m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64); + } + + uint64_t m_[4]; +}; + +// Mirror the char-classifying predicates in <cctype> +constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); } +constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); } +constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); } +constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); } +constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); } +constexpr Charmap XDigitCharmap() { + return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f'); +} +constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); } +constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); } +constexpr Charmap CntrlCharmap() { + return Charmap::Range(0, 0x7f) & ~PrintCharmap(); +} +constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); } +constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); } +constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); } + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ diff --git a/absl/strings/internal/char_map_test.cc b/absl/strings/internal/char_map_test.cc new file mode 100644 index 000000000000..2167be975e7d --- /dev/null +++ b/absl/strings/internal/char_map_test.cc @@ -0,0 +1,172 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/char_map.h" + +#include <cstdio> +#include <cstdlib> +#include <cctype> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { + +constexpr absl::strings_internal::Charmap everything_map = + ~absl::strings_internal::Charmap(); +constexpr absl::strings_internal::Charmap nothing_map{}; + +TEST(Charmap, AllTests) { + const absl::strings_internal::Charmap also_nothing_map("", 0); + ASSERT_TRUE(everything_map.contains('\0')); + ASSERT_TRUE(!nothing_map.contains('\0')); + ASSERT_TRUE(!also_nothing_map.contains('\0')); + for (unsigned char ch = 1; ch != 0; ++ch) { + ASSERT_TRUE(everything_map.contains(ch)); + ASSERT_TRUE(!nothing_map.contains(ch)); + ASSERT_TRUE(!also_nothing_map.contains(ch)); + } + + const absl::strings_internal::Charmap symbols("&@#@^!@?", 5); + ASSERT_TRUE(symbols.contains('&')); + ASSERT_TRUE(symbols.contains('@')); + ASSERT_TRUE(symbols.contains('#')); + ASSERT_TRUE(symbols.contains('^')); + ASSERT_TRUE(!symbols.contains('!')); + ASSERT_TRUE(!symbols.contains('?')); + int cnt = 0; + for (unsigned char ch = 1; ch != 0; ++ch) + cnt += symbols.contains(ch); + ASSERT_EQ(cnt, 4); + + const absl::strings_internal::Charmap lets("^abcde", 3); + const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10); + const absl::strings_internal::Charmap lets3("fghij\0klmnop"); + ASSERT_TRUE(lets2.contains('k')); + ASSERT_TRUE(!lets3.contains('k')); + + ASSERT_TRUE(symbols.IntersectsWith(lets)); + ASSERT_TRUE(!lets2.IntersectsWith(lets)); + ASSERT_TRUE(lets.IntersectsWith(symbols)); + ASSERT_TRUE(!lets.IntersectsWith(lets2)); + + ASSERT_TRUE(nothing_map.IsZero()); + ASSERT_TRUE(!lets.IsZero()); +} + +namespace { +std::string Members(const absl::strings_internal::Charmap& m) { + std::string r; + for (size_t i = 0; i < 256; ++i) + if (m.contains(i)) r.push_back(i); + return r; +} + +std::string ClosedRangeString(unsigned char lo, unsigned char hi) { + // Don't depend on lo<hi. Just increment until lo==hi. + std::string s; + while (true) { + s.push_back(lo); + if (lo == hi) break; + ++lo; + } + return s; +} + +} // namespace + +TEST(Charmap, Constexpr) { + constexpr absl::strings_internal::Charmap kEmpty = nothing_map; + EXPECT_THAT(Members(kEmpty), ""); + constexpr absl::strings_internal::Charmap kA = + absl::strings_internal::Charmap::Char('A'); + EXPECT_THAT(Members(kA), "A"); + constexpr absl::strings_internal::Charmap kAZ = + absl::strings_internal::Charmap::Range('A', 'Z'); + EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + constexpr absl::strings_internal::Charmap kIdentifier = + absl::strings_internal::Charmap::Range('0', '9') | + absl::strings_internal::Charmap::Range('A', 'Z') | + absl::strings_internal::Charmap::Range('a', 'z') | + absl::strings_internal::Charmap::Char('_'); + EXPECT_THAT(Members(kIdentifier), + "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "_" + "abcdefghijklmnopqrstuvwxyz"); + constexpr absl::strings_internal::Charmap kAll = everything_map; + for (size_t i = 0; i < 256; ++i) { + EXPECT_TRUE(kAll.contains(i)) << i; + } + constexpr absl::strings_internal::Charmap kHello = + absl::strings_internal::Charmap::FromString("Hello, world!"); + EXPECT_THAT(Members(kHello), " !,Hdelorw"); + + // test negation and intersection + constexpr absl::strings_internal::Charmap kABC = + absl::strings_internal::Charmap::Range('A', 'Z') & + ~absl::strings_internal::Charmap::Range('D', 'Z'); + EXPECT_THAT(Members(kABC), "ABC"); +} + +TEST(Charmap, Range) { + // Exhaustive testing takes too long, so test some of the boundaries that + // are perhaps going to cause trouble. + std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15, + 16, 17, 30, 31, 32, 33, 63, 64, 65, + 127, 128, 129, 223, 224, 225, 254, 255}; + for (auto lo = poi.begin(); lo != poi.end(); ++lo) { + SCOPED_TRACE(*lo); + for (auto hi = lo; hi != poi.end(); ++hi) { + SCOPED_TRACE(*hi); + EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)), + ClosedRangeString(*lo, *hi)); + } + } +} + +bool AsBool(int x) { return static_cast<bool>(x); } + +TEST(CharmapCtype, Match) { + for (int c = 0; c < 256; ++c) { + SCOPED_TRACE(c); + SCOPED_TRACE(static_cast<char>(c)); + EXPECT_EQ(AsBool(std::isupper(c)), + absl::strings_internal::UpperCharmap().contains(c)); + EXPECT_EQ(AsBool(std::islower(c)), + absl::strings_internal::LowerCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isdigit(c)), + absl::strings_internal::DigitCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isalpha(c)), + absl::strings_internal::AlphaCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isalnum(c)), + absl::strings_internal::AlnumCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isxdigit(c)), + absl::strings_internal::XDigitCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isprint(c)), + absl::strings_internal::PrintCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isspace(c)), + absl::strings_internal::SpaceCharmap().contains(c)); + EXPECT_EQ(AsBool(std::iscntrl(c)), + absl::strings_internal::CntrlCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isblank(c)), + absl::strings_internal::BlankCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isgraph(c)), + absl::strings_internal::GraphCharmap().contains(c)); + EXPECT_EQ(AsBool(std::ispunct(c)), + absl::strings_internal::PunctCharmap().contains(c)); + } +} + +} // namespace diff --git a/absl/strings/internal/escaping_test_common.inc b/absl/strings/internal/escaping_test_common.inc new file mode 100644 index 000000000000..6f29140e3584 --- /dev/null +++ b/absl/strings/internal/escaping_test_common.inc @@ -0,0 +1,113 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This test contains common things needed by both escaping_test.cc and +// escaping_benchmark.cc. + +namespace { + +struct { + absl::string_view plaintext; + absl::string_view cyphertext; +} const base64_strings[] = { + // Some google quotes + // Cyphertext created with "uuencode (GNU sharutils) 4.6.3" + // (Note that we're testing the websafe encoding, though, so if + // you add messages, be sure to run "tr -- '+/' '-_'" on the output) + { "I was always good at math and science, and I never realized " + "that was unusual or somehow undesirable. So one of the things " + "I care a lot about is helping to remove that stigma, " + "to show girls that you can be feminine, you can like the things " + "that girls like, but you can also be really good at technology. " + "You can be really good at building things." + " - Marissa Meyer, Newsweek, 2010-12-22" "\n", + + "SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg" + "bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu" + "ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg" + "YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo" + "b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp" + "a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs" + "c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy" + "ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll" + "ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" }, + + { "Typical first year for a new cluster: " + "~0.5 overheating " + "~1 PDU failure " + "~1 rack-move " + "~1 network rewiring " + "~20 rack failures " + "~5 racks go wonky " + "~8 network maintenances " + "~12 router reloads " + "~3 router failures " + "~dozens of minor 30-second blips for dns " + "~1000 individual machine failures " + "~thousands of hard drive failures " + "slow disks, bad memory, misconfigured machines, flaky machines, etc." + " - Jeff Dean, The Joys of Real Hardware" "\n", + + "VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92" + "ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3" + "b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv" + "bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk" + "cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv" + "bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp" + "bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg" + "ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs" + "YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS" + "ZWFsIEhhcmR3YXJlCg" }, + + { "I'm the head of the webspam team at Google. " + "That means that if you type your name into Google and get porn back, " + "it's my fault. Unless you're a porn star, in which case porn is a " + "completely reasonable response." + " - Matt Cutts, Google Plus" "\n", + + "SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg" + "VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv" + "b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz" + "IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg" + "Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs" + "IEdvb2dsZSBQbHVzCg" }, + + { "It will still be a long time before machines approach human intelligence. " + "But luckily, machines don't actually have to be intelligent; " + "they just have to fake it. Access to a wealth of information, " + "combined with a rudimentary decision-making capacity, " + "can often be almost as useful. Of course, the results are better yet " + "when coupled with intelligence. A reference librarian with access to " + "a good search engine is a formidable tool." + " - Craig Silverstein, Siemens Pictures of the Future, Spring 2004" "\n", + + "SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg" + "YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj" + "aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg" + "dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo" + "IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg" + "ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0" + "IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy" + "IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl" + "bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu" + "Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp" + "biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw" + "NAo" }, + + // Degenerate edge case + { "", + "" }, +}; + +} // namespace diff --git a/absl/strings/internal/fastmem.h b/absl/strings/internal/fastmem.h new file mode 100644 index 000000000000..9989b12e34d3 --- /dev/null +++ b/absl/strings/internal/fastmem.h @@ -0,0 +1,215 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Fast memory copying and comparison routines. +// strings::fastmemcmp_inlined() replaces memcmp() +// strings::memcpy_inlined() replaces memcpy() +// strings::memeq(a, b, n) replaces memcmp(a, b, n) == 0 +// +// strings::*_inlined() routines are inline versions of the +// routines exported by this module. Sometimes using the inlined +// versions is faster. Measure before using the inlined versions. +// + +#ifndef ABSL_STRINGS_INTERNAL_FASTMEM_H_ +#define ABSL_STRINGS_INTERNAL_FASTMEM_H_ + +#ifdef __SSE4_1__ +#include <immintrin.h> +#endif +#include <cstddef> +#include <cstdint> +#include <cstdio> +#include <cstring> + +#include "absl/base/internal/unaligned_access.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" + +namespace absl { +namespace strings_internal { + +// Return true if the n bytes at a equal the n bytes at b. +// The regions are allowed to overlap. +// +// The performance is similar to the performance of memcmp(), but faster for +// moderately-sized inputs, or inputs that share a common prefix and differ +// somewhere in their last 8 bytes. Further optimizations can be added later +// if it makes sense to do so. Alternatively, if the compiler & runtime improve +// to eliminate the need for this, we can remove it. +inline bool memeq(const char* a, const char* b, size_t n) { + size_t n_rounded_down = n & ~static_cast<size_t>(7); + if (ABSL_PREDICT_FALSE(n_rounded_down == 0)) { // n <= 7 + return memcmp(a, b, n) == 0; + } + // n >= 8 + { + uint64_t u = + ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b); + uint64_t v = ABSL_INTERNAL_UNALIGNED_LOAD64(a + n - 8) ^ + ABSL_INTERNAL_UNALIGNED_LOAD64(b + n - 8); + if ((u | v) != 0) { // The first or last 8 bytes differ. + return false; + } + } + // The next line forces n to be a multiple of 8. + n = n_rounded_down; + if (n >= 80) { + // In 2013 or later, this should be fast on long strings. + return memcmp(a, b, n) == 0; + } + // Now force n to be a multiple of 16. Arguably, a "switch" would be smart + // here, but there's a difficult-to-evaluate code size vs. speed issue. The + // current approach often re-compares some bytes (worst case is if n initially + // was 16, 32, 48, or 64), but is fairly short. + size_t e = n & 8; + a += e; + b += e; + n -= e; + // n is now in {0, 16, 32, ...}. Process 0 or more 16-byte chunks. + while (n > 0) { +#ifdef __SSE4_1__ + __m128i u = + _mm_xor_si128(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)), + _mm_loadu_si128(reinterpret_cast<const __m128i*>(b))); + if (!_mm_test_all_zeros(u, u)) { + return false; + } +#else + uint64_t x = + ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b); + uint64_t y = ABSL_INTERNAL_UNALIGNED_LOAD64(a + 8) ^ + ABSL_INTERNAL_UNALIGNED_LOAD64(b + 8); + if ((x | y) != 0) { + return false; + } +#endif + a += 16; + b += 16; + n -= 16; + } + return true; +} + +inline int fastmemcmp_inlined(const void* va, const void* vb, size_t n) { + const unsigned char* pa = static_cast<const unsigned char*>(va); + const unsigned char* pb = static_cast<const unsigned char*>(vb); + switch (n) { + default: + return memcmp(va, vb, n); + case 7: + if (*pa != *pb) return *pa < *pb ? -1 : +1; + ++pa; + ++pb; + ABSL_FALLTHROUGH_INTENDED; + case 6: + if (*pa != *pb) return *pa < *pb ? -1 : +1; + ++pa; + ++pb; + ABSL_FALLTHROUGH_INTENDED; + case 5: + if (*pa != *pb) return *pa < *pb ? -1 : +1; + ++pa; + ++pb; + ABSL_FALLTHROUGH_INTENDED; + case 4: + if (*pa != *pb) return *pa < *pb ? -1 : +1; + ++pa; + ++pb; + ABSL_FALLTHROUGH_INTENDED; + case 3: + if (*pa != *pb) return *pa < *pb ? -1 : +1; + ++pa; + ++pb; + ABSL_FALLTHROUGH_INTENDED; + case 2: + if (*pa != *pb) return *pa < *pb ? -1 : +1; + ++pa; + ++pb; + ABSL_FALLTHROUGH_INTENDED; + case 1: + if (*pa != *pb) return *pa < *pb ? -1 : +1; + ABSL_FALLTHROUGH_INTENDED; + case 0: + break; + } + return 0; +} + +// The standard memcpy operation is slow for variable small sizes. +// This implementation inlines the optimal realization for sizes 1 to 16. +// To avoid code bloat don't use it in case of not performance-critical spots, +// nor when you don't expect very frequent values of size <= 16. +inline void memcpy_inlined(char* dst, const char* src, size_t size) { + // Compiler inlines code with minimal amount of data movement when third + // parameter of memcpy is a constant. + switch (size) { + case 1: + memcpy(dst, src, 1); + break; + case 2: + memcpy(dst, src, 2); + break; + case 3: + memcpy(dst, src, 3); + break; + case 4: + memcpy(dst, src, 4); + break; + case 5: + memcpy(dst, src, 5); + break; + case 6: + memcpy(dst, src, 6); + break; + case 7: + memcpy(dst, src, 7); + break; + case 8: + memcpy(dst, src, 8); + break; + case 9: + memcpy(dst, src, 9); + break; + case 10: + memcpy(dst, src, 10); + break; + case 11: + memcpy(dst, src, 11); + break; + case 12: + memcpy(dst, src, 12); + break; + case 13: + memcpy(dst, src, 13); + break; + case 14: + memcpy(dst, src, 14); + break; + case 15: + memcpy(dst, src, 15); + break; + case 16: + memcpy(dst, src, 16); + break; + default: + memcpy(dst, src, size); + break; + } +} + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_FASTMEM_H_ diff --git a/absl/strings/internal/fastmem_test.cc b/absl/strings/internal/fastmem_test.cc new file mode 100644 index 000000000000..7c670f967bb3 --- /dev/null +++ b/absl/strings/internal/fastmem_test.cc @@ -0,0 +1,453 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/fastmem.h" + +#include <memory> +#include <random> +#include <string> + +#include "base/init_google.h" +#include "base/logging.h" +#include "testing/base/public/benchmark.h" +#include "gtest/gtest.h" + +namespace { + +using RandomEngine = std::minstd_rand0; + +void VerifyResults(const int r1, const int r2, const std::string& a, + const std::string& b) { + CHECK_EQ(a.size(), b.size()); + if (r1 == 0) { + EXPECT_EQ(r2, 0) << a << " " << b; + } else if (r1 > 0) { + EXPECT_GT(r2, 0) << a << " " << b; + } else { + EXPECT_LT(r2, 0) << a << " " << b; + } + if ((r1 == 0) == (r2 == 0)) { + EXPECT_EQ(r1 == 0, + absl::strings_internal::memeq(a.data(), b.data(), a.size())) + << r1 << " " << a << " " << b; + } +} + +// Check correctness against glibc's memcmp implementation +void CheckSingle(const std::string& a, const std::string& b) { + CHECK_EQ(a.size(), b.size()); + const int r1 = memcmp(a.data(), b.data(), a.size()); + const int r2 = + absl::strings_internal::fastmemcmp_inlined(a.data(), b.data(), a.size()); + VerifyResults(r1, r2, a, b); +} + +void GenerateString(size_t len, std::string* s) { + s->clear(); + for (int i = 0; i < len; i++) { + *s += ('a' + (i % 26)); + } +} + +void CheckCompare(const std::string& a, const std::string& b) { + CheckSingle(a, b); + for (int common = 0; common <= 32; common++) { + std::string extra; + GenerateString(common, &extra); + CheckSingle(extra + a, extra + b); + CheckSingle(a + extra, b + extra); + for (char c1 = 'a'; c1 <= 'c'; c1++) { + for (char c2 = 'a'; c2 <= 'c'; c2++) { + CheckSingle(extra + c1 + a, extra + c2 + b); + } + } + } +} + +TEST(FastCompare, Misc) { + CheckCompare("", ""); + + CheckCompare("a", "a"); + CheckCompare("ab", "ab"); + CheckCompare("abc", "abc"); + CheckCompare("abcd", "abcd"); + CheckCompare("abcde", "abcde"); + + CheckCompare("a", "x"); + CheckCompare("ab", "xb"); + CheckCompare("abc", "xbc"); + CheckCompare("abcd", "xbcd"); + CheckCompare("abcde", "xbcde"); + + CheckCompare("x", "a"); + CheckCompare("xb", "ab"); + CheckCompare("xbc", "abc"); + CheckCompare("xbcd", "abcd"); + CheckCompare("xbcde", "abcde"); + + CheckCompare("a", "x"); + CheckCompare("ab", "ax"); + CheckCompare("abc", "abx"); + CheckCompare("abcd", "abcx"); + CheckCompare("abcde", "abcdx"); + + CheckCompare("x", "a"); + CheckCompare("ax", "ab"); + CheckCompare("abx", "abc"); + CheckCompare("abcx", "abcd"); + CheckCompare("abcdx", "abcde"); + + for (int len = 0; len < 1000; len++) { + std::string p(len, 'z'); + CheckCompare(p + "x", p + "a"); + CheckCompare(p + "ax", p + "ab"); + CheckCompare(p + "abx", p + "abc"); + CheckCompare(p + "abcx", p + "abcd"); + CheckCompare(p + "abcdx", p + "abcde"); + } +} + +TEST(FastCompare, TrailingByte) { + for (int i = 0; i < 256; i++) { + for (int j = 0; j < 256; j++) { + std::string a(1, i); + std::string b(1, j); + CheckSingle(a, b); + } + } +} + +// Check correctness of memcpy_inlined. +void CheckSingleMemcpyInlined(const std::string& a) { + std::unique_ptr<char[]> destination(new char[a.size() + 2]); + destination[0] = 'x'; + destination[a.size() + 1] = 'x'; + absl::strings_internal::memcpy_inlined(destination.get() + 1, a.data(), + a.size()); + CHECK_EQ('x', destination[0]); + CHECK_EQ('x', destination[a.size() + 1]); + CHECK_EQ(0, memcmp(a.data(), destination.get() + 1, a.size())); +} + +TEST(MemCpyInlined, Misc) { + CheckSingleMemcpyInlined(""); + CheckSingleMemcpyInlined("0"); + CheckSingleMemcpyInlined("012"); + CheckSingleMemcpyInlined("0123"); + CheckSingleMemcpyInlined("01234"); + CheckSingleMemcpyInlined("012345"); + CheckSingleMemcpyInlined("0123456"); + CheckSingleMemcpyInlined("01234567"); + CheckSingleMemcpyInlined("012345678"); + CheckSingleMemcpyInlined("0123456789"); + CheckSingleMemcpyInlined("0123456789a"); + CheckSingleMemcpyInlined("0123456789ab"); + CheckSingleMemcpyInlined("0123456789abc"); + CheckSingleMemcpyInlined("0123456789abcd"); + CheckSingleMemcpyInlined("0123456789abcde"); + CheckSingleMemcpyInlined("0123456789abcdef"); + CheckSingleMemcpyInlined("0123456789abcdefg"); +} + +template <typename Function> +inline void CopyLoop(benchmark::State& state, int size, Function func) { + char* src = new char[size]; + char* dst = new char[size]; + memset(src, 'x', size); + memset(dst, 'y', size); + for (auto _ : state) { + func(dst, src, size); + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size); + CHECK_EQ(dst[0], 'x'); + delete[] src; + delete[] dst; +} + +void BM_memcpy(benchmark::State& state) { + CopyLoop(state, state.range(0), memcpy); +} +BENCHMARK(BM_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20); + +void BM_memcpy_inlined(benchmark::State& state) { + CopyLoop(state, state.range(0), absl::strings_internal::memcpy_inlined); +} +BENCHMARK(BM_memcpy_inlined)->DenseRange(1, 18)->Range(32, 8 << 20); + +// unaligned memcpy +void BM_unaligned_memcpy(benchmark::State& state) { + const int n = state.range(0); + const int kMaxOffset = 32; + char* src = new char[n + kMaxOffset]; + char* dst = new char[n + kMaxOffset]; + memset(src, 'x', n + kMaxOffset); + int r = 0, i = 0; + for (auto _ : state) { + memcpy(dst + (i % kMaxOffset), src + ((i + 5) % kMaxOffset), n); + r += dst[0]; + ++i; + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); + delete[] src; + delete[] dst; + benchmark::DoNotOptimize(r); +} +BENCHMARK(BM_unaligned_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20); + +// memmove worst case: heavy overlap, but not always by the same amount. +// Also, the source and destination will often be unaligned. +void BM_memmove_worst_case(benchmark::State& state) { + const int n = state.range(0); + const int32_t kDeterministicSeed = 301; + const int kMaxOffset = 32; + char* src = new char[n + kMaxOffset]; + memset(src, 'x', n + kMaxOffset); + size_t offsets[64]; + RandomEngine rng(kDeterministicSeed); + std::uniform_int_distribution<size_t> random_to_max_offset(0, kMaxOffset); + for (size_t& offset : offsets) { + offset = random_to_max_offset(rng); + } + int r = 0, i = 0; + for (auto _ : state) { + memmove(src + offsets[i], src + offsets[i + 1], n); + r += src[0]; + i = (i + 2) % arraysize(offsets); + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); + delete[] src; + benchmark::DoNotOptimize(r); +} +BENCHMARK(BM_memmove_worst_case)->DenseRange(1, 18)->Range(32, 8 << 20); + +// memmove cache-friendly: aligned and overlapping with 4k +// between the source and destination addresses. +void BM_memmove_cache_friendly(benchmark::State& state) { + const int n = state.range(0); + char* src = new char[n + 4096]; + memset(src, 'x', n); + int r = 0; + while (state.KeepRunningBatch(2)) { // count each memmove as an iteration + memmove(src + 4096, src, n); + memmove(src, src + 4096, n); + r += src[0]; + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); + delete[] src; + benchmark::DoNotOptimize(r); +} +BENCHMARK(BM_memmove_cache_friendly) + ->Arg(5 * 1024) + ->Arg(10 * 1024) + ->Range(16 << 10, 8 << 20); + +// memmove best(?) case: aligned and non-overlapping. +void BM_memmove_aligned_non_overlapping(benchmark::State& state) { + CopyLoop(state, state.range(0), memmove); +} +BENCHMARK(BM_memmove_aligned_non_overlapping) + ->DenseRange(1, 18) + ->Range(32, 8 << 20); + +// memset speed +void BM_memset(benchmark::State& state) { + const int n = state.range(0); + char* dst = new char[n]; + int r = 0; + for (auto _ : state) { + memset(dst, 'x', n); + r += dst[0]; + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); + delete[] dst; + benchmark::DoNotOptimize(r); +} +BENCHMARK(BM_memset)->Range(8, 4096 << 10); + +// Bandwidth (vectorization?) test: the ideal generated code will be limited +// by memory bandwidth. Even so-so generated code will max out memory bandwidth +// on some machines. +void BM_membandwidth(benchmark::State& state) { + const int n = state.range(0); + CHECK_EQ(n % 32, 0); // We will read 32 bytes per iter. + char* dst = new char[n]; + int r = 0; + for (auto _ : state) { + const uint32_t* p = reinterpret_cast<uint32_t*>(dst); + const uint32_t* limit = reinterpret_cast<uint32_t*>(dst + n); + uint32_t x = 0; + while (p < limit) { + x += p[0]; + x += p[1]; + x += p[2]; + x += p[3]; + x += p[4]; + x += p[5]; + x += p[6]; + x += p[7]; + p += 8; + } + r += x; + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); + delete[] dst; + benchmark::DoNotOptimize(r); +} +BENCHMARK(BM_membandwidth)->Range(32, 16384 << 10); + +// Helper for benchmarks. Repeatedly compares two strings that are +// either equal or different only in one character. If test_equal_strings +// is false then position_to_modify determines where the difference will be. +template <typename Function> +ABSL_ATTRIBUTE_ALWAYS_INLINE inline void StringCompareLoop( + benchmark::State& state, bool test_equal_strings, + std::string::size_type position_to_modify, int size, Function func) { + const int kIterMult = 4; // Iteration multiplier for better timing resolution + CHECK_GT(size, 0); + const bool position_to_modify_is_valid = + position_to_modify != std::string::npos && position_to_modify < size; + CHECK_NE(position_to_modify_is_valid, test_equal_strings); + if (!position_to_modify_is_valid) { + position_to_modify = 0; + } + std::string sa(size, 'a'); + std::string sb = sa; + char last = sa[size - 1]; + int num = 0; + for (auto _ : state) { + for (int i = 0; i < kIterMult; ++i) { + sb[position_to_modify] = test_equal_strings ? last : last ^ 1; + num += func(sa, sb); + } + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size); + benchmark::DoNotOptimize(num); +} + +// Helper for benchmarks. Repeatedly compares two memory regions that are +// either equal or different only in their final character. +template <typename Function> +ABSL_ATTRIBUTE_ALWAYS_INLINE inline void CompareLoop(benchmark::State& state, + bool test_equal_strings, + int size, Function func) { + const int kIterMult = 4; // Iteration multiplier for better timing resolution + CHECK_GT(size, 0); + char* data = static_cast<char*>(malloc(size * 2)); + memset(data, 'a', size * 2); + char* a = data; + char* b = data + size; + char last = a[size - 1]; + int num = 0; + for (auto _ : state) { + for (int i = 0; i < kIterMult; ++i) { + b[size - 1] = test_equal_strings ? last : last ^ 1; + num += func(a, b, size); + } + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size); + benchmark::DoNotOptimize(num); + free(data); +} + +void BM_memcmp(benchmark::State& state) { + CompareLoop(state, false, state.range(0), memcmp); +} +BENCHMARK(BM_memcmp)->DenseRange(1, 9)->Range(32, 8 << 20); + +void BM_fastmemcmp_inlined(benchmark::State& state) { + CompareLoop(state, false, state.range(0), + absl::strings_internal::fastmemcmp_inlined); +} +BENCHMARK(BM_fastmemcmp_inlined)->DenseRange(1, 9)->Range(32, 8 << 20); + +void BM_memeq(benchmark::State& state) { + CompareLoop(state, false, state.range(0), absl::strings_internal::memeq); +} +BENCHMARK(BM_memeq)->DenseRange(1, 9)->Range(32, 8 << 20); + +void BM_memeq_equal(benchmark::State& state) { + CompareLoop(state, true, state.range(0), absl::strings_internal::memeq); +} +BENCHMARK(BM_memeq_equal)->DenseRange(1, 9)->Range(32, 8 << 20); + +bool StringLess(const std::string& x, const std::string& y) { return x < y; } +bool StringEqual(const std::string& x, const std::string& y) { return x == y; } +bool StdEqual(const std::string& x, const std::string& y) { + return x.size() == y.size() && + std::equal(x.data(), x.data() + x.size(), y.data()); +} + +// Benchmark for x < y, where x and y are strings that differ in only their +// final char. That should be more-or-less the worst case for <. +void BM_string_less(benchmark::State& state) { + StringCompareLoop(state, false, state.range(0) - 1, state.range(0), + StringLess); +} +BENCHMARK(BM_string_less)->DenseRange(1, 9)->Range(32, 1 << 20); + +// Benchmark for x < y, where x and y are strings that differ in only their +// first char. That should be more-or-less the best case for <. +void BM_string_less_easy(benchmark::State& state) { + StringCompareLoop(state, false, 0, state.range(0), StringLess); +} +BENCHMARK(BM_string_less_easy)->DenseRange(1, 9)->Range(32, 1 << 20); + +void BM_string_equal(benchmark::State& state) { + StringCompareLoop(state, false, state.range(0) - 1, state.range(0), + StringEqual); +} +BENCHMARK(BM_string_equal)->DenseRange(1, 9)->Range(32, 1 << 20); + +void BM_string_equal_equal(benchmark::State& state) { + StringCompareLoop(state, true, std::string::npos, state.range(0), StringEqual); +} +BENCHMARK(BM_string_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20); + +void BM_std_equal(benchmark::State& state) { + StringCompareLoop(state, false, state.range(0) - 1, state.range(0), StdEqual); +} +BENCHMARK(BM_std_equal)->DenseRange(1, 9)->Range(32, 1 << 20); + +void BM_std_equal_equal(benchmark::State& state) { + StringCompareLoop(state, true, std::string::npos, state.range(0), StdEqual); +} +BENCHMARK(BM_std_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20); + +void BM_string_equal_unequal_lengths(benchmark::State& state) { + const int size = state.range(0); + std::string a(size, 'a'); + std::string b(size + 1, 'a'); + int count = 0; + for (auto _ : state) { + b[size - 1] = 'a'; + count += (a == b); + } + benchmark::DoNotOptimize(count); +} +BENCHMARK(BM_string_equal_unequal_lengths)->Arg(1)->Arg(1 << 20); + +void BM_stdstring_equal_unequal_lengths(benchmark::State& state) { + const int size = state.range(0); + std::string a(size, 'a'); + std::string b(size + 1, 'a'); + int count = 0; + for (auto _ : state) { + b[size - 1] = 'a'; + count += (a == b); + } + benchmark::DoNotOptimize(count); +} +BENCHMARK(BM_stdstring_equal_unequal_lengths)->Arg(1)->Arg(1 << 20); + +} // namespace diff --git a/absl/strings/internal/memutil.cc b/absl/strings/internal/memutil.cc new file mode 100644 index 000000000000..a0de70dffdbd --- /dev/null +++ b/absl/strings/internal/memutil.cc @@ -0,0 +1,110 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/memutil.h" + +#include <cstdlib> + +namespace absl { +namespace strings_internal { + +int memcasecmp(const char* s1, const char* s2, size_t len) { + const unsigned char* us1 = reinterpret_cast<const unsigned char*>(s1); + const unsigned char* us2 = reinterpret_cast<const unsigned char*>(s2); + + for (size_t i = 0; i < len; i++) { + const int diff = + int{static_cast<unsigned char>(absl::ascii_tolower(us1[i]))} - + int{static_cast<unsigned char>(absl::ascii_tolower(us2[i]))}; + if (diff != 0) return diff; + } + return 0; +} + +char* memdup(const char* s, size_t slen) { + void* copy; + if ((copy = malloc(slen)) == nullptr) return nullptr; + memcpy(copy, s, slen); + return reinterpret_cast<char*>(copy); +} + +char* memrchr(const char* s, int c, size_t slen) { + for (const char* e = s + slen - 1; e >= s; e--) { + if (*e == c) return const_cast<char*>(e); + } + return nullptr; +} + +size_t memspn(const char* s, size_t slen, const char* accept) { + const char* p = s; + const char* spanp; + char c, sc; + +cont: + c = *p++; + if (slen-- == 0) return p - 1 - s; + for (spanp = accept; (sc = *spanp++) != '\0';) + if (sc == c) goto cont; + return p - 1 - s; +} + +size_t memcspn(const char* s, size_t slen, const char* reject) { + const char* p = s; + const char* spanp; + char c, sc; + + while (slen-- != 0) { + c = *p++; + for (spanp = reject; (sc = *spanp++) != '\0';) + if (sc == c) return p - 1 - s; + } + return p - s; +} + +char* mempbrk(const char* s, size_t slen, const char* accept) { + const char* scanp; + int sc; + + for (; slen; ++s, --slen) { + for (scanp = accept; (sc = *scanp++) != '\0';) + if (sc == *s) return const_cast<char*>(s); + } + return nullptr; +} + +// This is significantly faster for case-sensitive matches with very +// few possible matches. See unit test for benchmarks. +const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, + size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + // A static cast is used here to work around the fact that memchr returns + // a void* on Posix-compliant systems and const void* on Windows. + while ((match = static_cast<const char*>( + memchr(phaystack, pneedle[0], hayend - phaystack)))) { + if (memcmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } + return nullptr; +} + +} // namespace strings_internal +} // namespace absl diff --git a/absl/strings/internal/memutil.h b/absl/strings/internal/memutil.h new file mode 100644 index 000000000000..a6f1c69138e3 --- /dev/null +++ b/absl/strings/internal/memutil.h @@ -0,0 +1,146 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// These routines provide mem versions of standard C std::string routines, +// such as strpbrk. They function exactly the same as the str versions, +// so if you wonder what they are, replace the word "mem" by +// "str" and check out the man page. I could return void*, as the +// strutil.h mem*() routines tend to do, but I return char* instead +// since this is by far the most common way these functions are called. +// +// The difference between the mem and str versions is the mem version +// takes a pointer and a length, rather than a '\0'-terminated std::string. +// The memcase* routines defined here assume the locale is "C" +// (they use absl::ascii_tolower instead of tolower). +// +// These routines are based on the BSD library. +// +// Here's a list of routines from std::string.h, and their mem analogues. +// Functions in lowercase are defined in std::string.h; those in UPPERCASE +// are defined here: +// +// strlen -- +// strcat strncat MEMCAT +// strcpy strncpy memcpy +// -- memccpy (very cool function, btw) +// -- memmove +// -- memset +// strcmp strncmp memcmp +// strcasecmp strncasecmp MEMCASECMP +// strchr memchr +// strcoll -- +// strxfrm -- +// strdup strndup MEMDUP +// strrchr MEMRCHR +// strspn MEMSPN +// strcspn MEMCSPN +// strpbrk MEMPBRK +// strstr MEMSTR MEMMEM +// (g)strcasestr MEMCASESTR MEMCASEMEM +// strtok -- +// strprefix MEMPREFIX (strprefix is from strutil.h) +// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h) +// strsuffix MEMSUFFIX (strsuffix is from strutil.h) +// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h) +// -- MEMIS +// -- MEMCASEIS +// strcount MEMCOUNT (strcount is from strutil.h) + +#ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_ +#define ABSL_STRINGS_INTERNAL_MEMUTIL_H_ + +#include <cstddef> +#include <cstring> + +#include "absl/base/port.h" // disable some warnings on Windows +#include "absl/strings/ascii.h" // for absl::ascii_tolower + +namespace absl { +namespace strings_internal { + +inline char* memcat(char* dest, size_t destlen, const char* src, + size_t srclen) { + return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen)); +} + +int memcasecmp(const char* s1, const char* s2, size_t len); +char* memdup(const char* s, size_t slen); +char* memrchr(const char* s, int c, size_t slen); +size_t memspn(const char* s, size_t slen, const char* accept); +size_t memcspn(const char* s, size_t slen, const char* reject); +char* mempbrk(const char* s, size_t slen, const char* accept); + +// This is for internal use only. Don't call this directly +template <bool case_sensitive> +const char* int_memmatch(const char* haystack, size_t haylen, + const char* needle, size_t neelen) { + if (0 == neelen) { + return haystack; // even if haylen is 0 + } + const char* hayend = haystack + haylen; + const char* needlestart = needle; + const char* needleend = needlestart + neelen; + + for (; haystack < hayend; ++haystack) { + char hay = case_sensitive + ? *haystack + : absl::ascii_tolower(static_cast<unsigned char>(*haystack)); + char nee = case_sensitive + ? *needle + : absl::ascii_tolower(static_cast<unsigned char>(*needle)); + if (hay == nee) { + if (++needle == needleend) { + return haystack + 1 - neelen; + } + } else if (needle != needlestart) { + // must back up haystack in case a prefix matched (find "aab" in "aaab") + haystack -= needle - needlestart; // for loop will advance one more + needle = needlestart; + } + } + return nullptr; +} + +// These are the guys you can call directly +inline const char* memstr(const char* phaystack, size_t haylen, + const char* pneedle) { + return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle)); +} + +inline const char* memcasestr(const char* phaystack, size_t haylen, + const char* pneedle) { + return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle)); +} + +inline const char* memmem(const char* phaystack, size_t haylen, + const char* pneedle, size_t needlelen) { + return int_memmatch<true>(phaystack, haylen, pneedle, needlelen); +} + +inline const char* memcasemem(const char* phaystack, size_t haylen, + const char* pneedle, size_t needlelen) { + return int_memmatch<false>(phaystack, haylen, pneedle, needlelen); +} + +// This is significantly faster for case-sensitive matches with very +// few possible matches. See unit test for benchmarks. +const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, + size_t neelen); + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_ diff --git a/absl/strings/internal/memutil_test.cc b/absl/strings/internal/memutil_test.cc new file mode 100644 index 000000000000..1ff60f20e0bd --- /dev/null +++ b/absl/strings/internal/memutil_test.cc @@ -0,0 +1,180 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Unit test for memutil.cc + +#include "absl/strings/internal/memutil.h" + +#include <algorithm> +#include <cstdlib> + +#include "gtest/gtest.h" +#include "absl/strings/ascii.h" + +namespace { + +static char* memcasechr(const char* s, int c, size_t slen) { + c = absl::ascii_tolower(c); + for (; slen; ++s, --slen) { + if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s); + } + return nullptr; +} + +static const char* memcasematch(const char* phaystack, size_t haylen, + const char* pneedle, size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + while ((match = static_cast<char*>( + memcasechr(phaystack, pneedle[0], hayend - phaystack)))) { + if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } + return nullptr; +} + +TEST(MemUtilTest, AllTests) { + // check memutil functions + char a[1000]; + absl::strings_internal::memcat(a, 0, "hello", sizeof("hello") - 1); + absl::strings_internal::memcat(a, 5, " there", sizeof(" there") - 1); + + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there", + sizeof("hello there") - 1), + 0); + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf", + sizeof("hello there") - 1), + -1); + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf", + sizeof("hello there") - 2), + 0); + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0); + + char* p = absl::strings_internal::memdup("hello", 5); + free(p); + + p = absl::strings_internal::memrchr("hello there", 'e', + sizeof("hello there") - 1); + EXPECT_TRUE(p && p[-1] == 'r'); + p = absl::strings_internal::memrchr("hello there", 'e', + sizeof("hello there") - 2); + EXPECT_TRUE(p && p[-1] == 'h'); + p = absl::strings_internal::memrchr("hello there", 'u', + sizeof("hello there") - 1); + EXPECT_TRUE(p == nullptr); + + int len = absl::strings_internal::memspn("hello there", + sizeof("hello there") - 1, "hole"); + EXPECT_EQ(len, sizeof("hello") - 1); + len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, + "u"); + EXPECT_EQ(len, 0); + len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, + ""); + EXPECT_EQ(len, 0); + len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, + "trole h"); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memspn("hello there!", + sizeof("hello there!") - 1, "trole h"); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memspn("hello there!", + sizeof("hello there!") - 2, "trole h!"); + EXPECT_EQ(len, sizeof("hello there!") - 2); + + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, "leho"); + EXPECT_EQ(len, 0); + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, "u"); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, ""); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, " "); + EXPECT_EQ(len, 5); + + p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, + "leho"); + EXPECT_TRUE(p && p[1] == 'e' && p[2] == 'l'); + p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, + "nu"); + EXPECT_TRUE(p == nullptr); + p = absl::strings_internal::mempbrk("hello there!", + sizeof("hello there!") - 2, "!"); + EXPECT_TRUE(p == nullptr); + p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, + " t "); + EXPECT_TRUE(p && p[-1] == 'o' && p[1] == 't'); + + { + const char kHaystack[] = "0123456789"; + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 0, "", 0), kHaystack); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "012", 3), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "0xx", 1), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "789", 3), + kHaystack + 7); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "9xx", 1), + kHaystack + 9); + EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "9xx", 3) == + nullptr); + EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "xxx", 1) == + nullptr); + } + { + const char kHaystack[] = "aBcDeFgHiJ"; + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 0, "", 0), + kHaystack); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Abc", 3), + kHaystack); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Axx", 1), + kHaystack); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "hIj", 3), + kHaystack + 7); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 1), + kHaystack + 9); + EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 3) == + nullptr); + EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "xxx", 1) == + nullptr); + } + { + const char kHaystack[] = "0123456789"; + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 0, "", 0), kHaystack); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "012", 3), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "0xx", 1), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "789", 3), + kHaystack + 7); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 1), + kHaystack + 9); + EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 3) == + nullptr); + EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "xxx", 1) == + nullptr); + } +} + +} // namespace diff --git a/absl/strings/internal/numbers_test_common.inc b/absl/strings/internal/numbers_test_common.inc new file mode 100644 index 000000000000..e165b3bea5c6 --- /dev/null +++ b/absl/strings/internal/numbers_test_common.inc @@ -0,0 +1,166 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file contains common things needed by numbers_test.cc, +// numbers_legacy_test.cc and numbers_benchmark.cc. + +namespace { + +// Previously documented minimum buffer sizes for Fast*ToBuffer functions. +// NOTE(edk): These should be deleted and uses replaced with kFastToBufferSize +// once existing code has been fixed to use kFastToBufferSize. +enum { + kFastInt32ToBufferSize = 12, + kFastInt64ToBufferSize = 22, + kFastUInt32ToBufferSize = 12, + kFastUInt64ToBufferSize = 22 +}; + +template <typename IntType> +bool Itoa(IntType value, int base, std::string* destination) { + destination->clear(); + if (base <= 1 || base > 36) { + return false; + } + + if (value == 0) { + destination->push_back('0'); + return true; + } + + bool negative = value < 0; + while (value != 0) { + const IntType next_value = value / base; + // Can't use std::abs here because of problems when IntType is unsigned. + int remainder = value > next_value * base ? value - next_value * base + : next_value * base - value; + char c = remainder < 10 ? '0' + remainder : 'A' + remainder - 10; + destination->insert(0, 1, c); + value = next_value; + } + + if (negative) { + destination->insert(0, 1, '-'); + } + return true; +} + +struct uint32_test_case { + const char* str; + bool expect_ok; + int base; // base to pass to the conversion function + uint32_t expected; +} const strtouint32_test_cases[] = { + {"0xffffffff", true, 16, std::numeric_limits<uint32_t>::max()}, + {"0x34234324", true, 16, 0x34234324}, + {"34234324", true, 16, 0x34234324}, + {"0", true, 16, 0}, + {" \t\n 0xffffffff", true, 16, std::numeric_limits<uint32_t>::max()}, + {" \f\v 46", true, 10, 46}, // must accept weird whitespace + {" \t\n 72717222", true, 8, 072717222}, + {" \t\n 072717222", true, 8, 072717222}, + {" \t\n 072717228", false, 8, 07271722}, + {"0", true, 0, 0}, + + // Base-10 version. + {"34234324", true, 0, 34234324}, + {"4294967295", true, 0, std::numeric_limits<uint32_t>::max()}, + {"34234324 \n\t", true, 10, 34234324}, + + // Unusual base + {"0", true, 3, 0}, + {"2", true, 3, 2}, + {"11", true, 3, 4}, + + // Invalid uints. + {"", false, 0, 0}, + {" ", false, 0, 0}, + {"abc", false, 0, 0}, // would be valid hex, but prefix is missing + {"34234324a", false, 0, 34234324}, + {"34234.3", false, 0, 34234}, + {"-1", false, 0, 0}, + {" -123", false, 0, 0}, + {" \t\n -123", false, 0, 0}, + + // Out of bounds. + {"4294967296", false, 0, std::numeric_limits<uint32_t>::max()}, + {"0x100000000", false, 0, std::numeric_limits<uint32_t>::max()}, + {nullptr, false, 0, 0}, +}; + +struct uint64_test_case { + const char* str; + bool expect_ok; + int base; + uint64_t expected; +} const strtouint64_test_cases[] = { + {"0x3423432448783446", true, 16, int64_t{0x3423432448783446}}, + {"3423432448783446", true, 16, int64_t{0x3423432448783446}}, + + {"0", true, 16, 0}, + {"000", true, 0, 0}, + {"0", true, 0, 0}, + {" \t\n 0xffffffffffffffff", true, 16, + std::numeric_limits<uint64_t>::max()}, + + {"012345670123456701234", true, 8, int64_t{012345670123456701234}}, + {"12345670123456701234", true, 8, int64_t{012345670123456701234}}, + + {"12845670123456701234", false, 8, 0}, + + // Base-10 version. + {"34234324487834466", true, 0, int64_t{34234324487834466}}, + + {" \t\n 18446744073709551615", true, 0, + std::numeric_limits<uint64_t>::max()}, + + {"34234324487834466 \n\t ", true, 0, int64_t{34234324487834466}}, + + {" \f\v 46", true, 10, 46}, // must accept weird whitespace + + // Unusual base + {"0", true, 3, 0}, + {"2", true, 3, 2}, + {"11", true, 3, 4}, + + {"0", true, 0, 0}, + + // Invalid uints. + {"", false, 0, 0}, + {" ", false, 0, 0}, + {"abc", false, 0, 0}, + {"34234324487834466a", false, 0, 0}, + {"34234487834466.3", false, 0, 0}, + {"-1", false, 0, 0}, + {" -123", false, 0, 0}, + {" \t\n -123", false, 0, 0}, + + // Out of bounds. + {"18446744073709551616", false, 10, 0}, + {"18446744073709551616", false, 0, 0}, + {"0x10000000000000000", false, 16, std::numeric_limits<uint64_t>::max()}, + {"0X10000000000000000", false, 16, + std::numeric_limits<uint64_t>::max()}, // 0X versus 0x. + {"0x10000000000000000", false, 0, std::numeric_limits<uint64_t>::max()}, + {"0X10000000000000000", false, 0, + std::numeric_limits<uint64_t>::max()}, // 0X versus 0x. + + {"0x1234", true, 16, 0x1234}, + + // Base-10 std::string version. + {"1234", true, 0, 1234}, + {nullptr, false, 0, 0}, +}; + +} // namespace diff --git a/absl/strings/internal/ostringstream.h b/absl/strings/internal/ostringstream.h new file mode 100644 index 000000000000..017632a9292e --- /dev/null +++ b/absl/strings/internal/ostringstream.h @@ -0,0 +1,97 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ +#define ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ + +#include <cassert> +#include <ostream> +#include <streambuf> +#include <string> + +#include "absl/base/port.h" + +namespace absl { +namespace strings_internal { + +// The same as std::ostringstream but appends to a user-specified std::string, +// and is faster. It is ~70% faster to create, ~50% faster to write to, and +// completely free to extract the result std::string. +// +// std::string s; +// OStringStream strm(&s); +// strm << 42 << ' ' << 3.14; // appends to `s` +// +// The stream object doesn't have to be named. Starting from C++11 operator<< +// works with rvalues of std::ostream. +// +// std::string s; +// OStringStream(&s) << 42 << ' ' << 3.14; // appends to `s` +// +// OStringStream is faster to create than std::ostringstream but it's still +// relatively slow. Avoid creating multiple streams where a single stream will +// do. +// +// Creates unnecessary instances of OStringStream: slow. +// +// std::string s; +// OStringStream(&s) << 42; +// OStringStream(&s) << ' '; +// OStringStream(&s) << 3.14; +// +// Creates a single instance of OStringStream and reuses it: fast. +// +// std::string s; +// OStringStream strm(&s); +// strm << 42; +// strm << ' '; +// strm << 3.14; +// +// Note: flush() has no effect. No reason to call it. +class OStringStream : private std::basic_streambuf<char>, public std::ostream { + public: + // The argument can be null, in which case you'll need to call str(p) with a + // non-null argument before you can write to the stream. + // + // The destructor of OStringStream doesn't use the std::string. It's OK to destroy + // the std::string before the stream. + explicit OStringStream(std::string* s) : std::ostream(this), s_(s) {} + + std::string* str() { return s_; } + const std::string* str() const { return s_; } + void str(std::string* s) { s_ = s; } + + private: + using Buf = std::basic_streambuf<char>; + + Buf::int_type overflow(int c = Buf::traits_type::eof()) override { + assert(s_); + if (!Buf::traits_type::eq_int_type(c, Buf::traits_type::eof())) + s_->push_back(static_cast<char>(c)); + return 1; + } + + std::streamsize xsputn(const char* s, std::streamsize n) override { + assert(s_); + s_->append(s, n); + return n; + } + + std::string* s_; +}; + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ diff --git a/absl/strings/internal/ostringstream_test.cc b/absl/strings/internal/ostringstream_test.cc new file mode 100644 index 000000000000..0047ec822e8b --- /dev/null +++ b/absl/strings/internal/ostringstream_test.cc @@ -0,0 +1,103 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/ostringstream.h" + +#include <memory> +#include <ostream> +#include <sstream> +#include <string> +#include <type_traits> + +#include "gtest/gtest.h" + +namespace { + +TEST(OStringStream, IsOStream) { + static_assert( + std::is_base_of<std::ostream, absl::strings_internal::OStringStream>(), + ""); +} + +TEST(OStringStream, ConstructDestroy) { + { + absl::strings_internal::OStringStream strm(nullptr); + EXPECT_EQ(nullptr, strm.str()); + } + { + std::string s = "abc"; + { + absl::strings_internal::OStringStream strm(&s); + EXPECT_EQ(&s, strm.str()); + } + EXPECT_EQ("abc", s); + } + { + std::unique_ptr<std::string> s(new std::string); + absl::strings_internal::OStringStream strm(s.get()); + s.reset(); + } +} + +TEST(OStringStream, Str) { + std::string s1; + absl::strings_internal::OStringStream strm(&s1); + const absl::strings_internal::OStringStream& c_strm(strm); + + static_assert(std::is_same<decltype(strm.str()), std::string*>(), ""); + static_assert(std::is_same<decltype(c_strm.str()), const std::string*>(), ""); + + EXPECT_EQ(&s1, strm.str()); + EXPECT_EQ(&s1, c_strm.str()); + + strm.str(&s1); + EXPECT_EQ(&s1, strm.str()); + EXPECT_EQ(&s1, c_strm.str()); + + std::string s2; + strm.str(&s2); + EXPECT_EQ(&s2, strm.str()); + EXPECT_EQ(&s2, c_strm.str()); + + strm.str(nullptr); + EXPECT_EQ(nullptr, strm.str()); + EXPECT_EQ(nullptr, c_strm.str()); +} + +TEST(OStreamStream, WriteToLValue) { + std::string s = "abc"; + { + absl::strings_internal::OStringStream strm(&s); + EXPECT_EQ("abc", s); + strm << ""; + EXPECT_EQ("abc", s); + strm << 42; + EXPECT_EQ("abc42", s); + strm << 'x' << 'y'; + EXPECT_EQ("abc42xy", s); + } + EXPECT_EQ("abc42xy", s); +} + +TEST(OStreamStream, WriteToRValue) { + std::string s = "abc"; + absl::strings_internal::OStringStream(&s) << ""; + EXPECT_EQ("abc", s); + absl::strings_internal::OStringStream(&s) << 42; + EXPECT_EQ("abc42", s); + absl::strings_internal::OStringStream(&s) << 'x' << 'y'; + EXPECT_EQ("abc42xy", s); +} + +} // namespace diff --git a/absl/strings/internal/resize_uninitialized.h b/absl/strings/internal/resize_uninitialized.h new file mode 100644 index 000000000000..0157ca0245f3 --- /dev/null +++ b/absl/strings/internal/resize_uninitialized.h @@ -0,0 +1,69 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ +#define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ + +#include <string> +#include <utility> + +#include "absl/base/port.h" +#include "absl/meta/type_traits.h" // for void_t + +namespace absl { +namespace strings_internal { + +// Is a subclass of true_type or false_type, depending on whether or not +// T has a resize_uninitialized member. +template <typename T, typename = void> +struct HasResizeUninitialized : std::false_type {}; +template <typename T> +struct HasResizeUninitialized< + T, absl::void_t<decltype(std::declval<T>().resize_uninitialized(237))>> + : std::true_type {}; + +template <typename string_type> +void ResizeUninit(string_type* s, size_t new_size, std::true_type) { + s->resize_uninitialized(new_size); +} +template <typename string_type> +void ResizeUninit(string_type* s, size_t new_size, std::false_type) { + s->resize(new_size); +} + +// Returns true if the std::string implementation supports a resize where +// the new characters added to the std::string are left untouched. +// +// (A better name might be "STLStringSupportsUninitializedResize", alluding to +// the previous function.) +template <typename string_type> +inline constexpr bool STLStringSupportsNontrashingResize(string_type*) { + return HasResizeUninitialized<string_type>(); +} + +// Like str->resize(new_size), except any new characters added to "*str" as a +// result of resizing may be left uninitialized, rather than being filled with +// '0' bytes. Typically used when code is then going to overwrite the backing +// store of the std::string with known data. Uses a Google extension to std::string. +template <typename string_type, typename = void> +inline void STLStringResizeUninitialized(string_type* s, size_t new_size) { + ResizeUninit(s, new_size, HasResizeUninitialized<string_type>()); +} + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ diff --git a/absl/strings/internal/resize_uninitialized_test.cc b/absl/strings/internal/resize_uninitialized_test.cc new file mode 100644 index 000000000000..ad282efcd9bb --- /dev/null +++ b/absl/strings/internal/resize_uninitialized_test.cc @@ -0,0 +1,68 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/resize_uninitialized.h" + +#include "gtest/gtest.h" + +namespace { + +int resize_call_count = 0; + +struct resizable_string { + void resize(size_t) { resize_call_count += 1; } +}; + +int resize_uninitialized_call_count = 0; + +struct resize_uninitializable_string { + void resize(size_t) { resize_call_count += 1; } + void resize_uninitialized(size_t) { resize_uninitialized_call_count += 1; } +}; + +TEST(ResizeUninit, WithAndWithout) { + resize_call_count = 0; + resize_uninitialized_call_count = 0; + { + resizable_string rs; + + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_uninitialized_call_count, 0); + EXPECT_FALSE( + absl::strings_internal::STLStringSupportsNontrashingResize(&rs)); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_uninitialized_call_count, 0); + absl::strings_internal::STLStringResizeUninitialized(&rs, 237); + EXPECT_EQ(resize_call_count, 1); + EXPECT_EQ(resize_uninitialized_call_count, 0); + } + + resize_call_count = 0; + resize_uninitialized_call_count = 0; + { + resize_uninitializable_string rus; + + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_uninitialized_call_count, 0); + EXPECT_TRUE( + absl::strings_internal::STLStringSupportsNontrashingResize(&rus)); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_uninitialized_call_count, 0); + absl::strings_internal::STLStringResizeUninitialized(&rus, 237); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_uninitialized_call_count, 1); + } +} + +} // namespace diff --git a/absl/strings/internal/str_join_internal.h b/absl/strings/internal/str_join_internal.h new file mode 100644 index 000000000000..e73f1dde403d --- /dev/null +++ b/absl/strings/internal/str_join_internal.h @@ -0,0 +1,314 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// This file declares INTERNAL parts of the Join API that are inlined/templated +// or otherwise need to be available at compile time. The main abstractions +// defined in this file are: +// +// - A handful of default Formatters +// - JoinAlgorithm() overloads +// - JoinRange() overloads +// - JoinTuple() +// +// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including +// absl/strings/str_join.h +// +// IWYU pragma: private, include "absl/strings/str_join.h" + +#ifndef ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ + +#include <cassert> +#include <iterator> +#include <memory> +#include <string> +#include <utility> + +#include "absl/strings/internal/ostringstream.h" +#include "absl/strings/str_cat.h" + +namespace absl { +namespace strings_internal { + +// +// Formatter objects +// +// The following are implementation classes for standard Formatter objects. The +// factory functions that users will call to create and use these formatters are +// defined and documented in strings/join.h. +// + +// The default formatter. Converts alpha-numeric types to strings. +struct AlphaNumFormatterImpl { + // This template is needed in order to support passing in a dereferenced + // vector<bool>::iterator + template <typename T> + void operator()(std::string* out, const T& t) const { + StrAppend(out, AlphaNum(t)); + } + + void operator()(std::string* out, const AlphaNum& t) const { + StrAppend(out, t); + } +}; + +// A type that's used to overload the JoinAlgorithm() function (defined below) +// for ranges that do not require additional formatting (e.g., a range of +// strings). + +struct NoFormatter : public AlphaNumFormatterImpl {}; + +// Formats types to strings using the << operator. +class StreamFormatterImpl { + public: + // The method isn't const because it mutates state. Making it const will + // render StreamFormatterImpl thread-hostile. + template <typename T> + void operator()(std::string* out, const T& t) { + // The stream is created lazily to avoid paying the relatively high cost + // of its construction when joining an empty range. + if (strm_) { + strm_->clear(); // clear the bad, fail and eof bits in case they were set + strm_->str(out); + } else { + strm_.reset(new strings_internal::OStringStream(out)); + } + *strm_ << t; + } + + private: + std::unique_ptr<strings_internal::OStringStream> strm_; +}; + +// Formats a std::pair<>. The 'first' member is formatted using f1_ and the +// 'second' member is formatted using f2_. sep_ is the separator. +template <typename F1, typename F2> +class PairFormatterImpl { + public: + PairFormatterImpl(F1 f1, absl::string_view sep, F2 f2) + : f1_(std::move(f1)), sep_(sep), f2_(std::move(f2)) {} + + template <typename T> + void operator()(std::string* out, const T& p) { + f1_(out, p.first); + out->append(sep_); + f2_(out, p.second); + } + + template <typename T> + void operator()(std::string* out, const T& p) const { + f1_(out, p.first); + out->append(sep_); + f2_(out, p.second); + } + + private: + F1 f1_; + std::string sep_; + F2 f2_; +}; + +// Wraps another formatter and dereferences the argument to operator() then +// passes the dereferenced argument to the wrapped formatter. This can be +// useful, for example, to join a std::vector<int*>. +template <typename Formatter> +class DereferenceFormatterImpl { + public: + DereferenceFormatterImpl() : f_() {} + explicit DereferenceFormatterImpl(Formatter&& f) + : f_(std::forward<Formatter>(f)) {} + + template <typename T> + void operator()(std::string* out, const T& t) { + f_(out, *t); + } + + template <typename T> + void operator()(std::string* out, const T& t) const { + f_(out, *t); + } + + private: + Formatter f_; +}; + +// DefaultFormatter<T> is a traits class that selects a default Formatter to use +// for the given type T. The ::Type member names the Formatter to use. This is +// used by the strings::Join() functions that do NOT take a Formatter argument, +// in which case a default Formatter must be chosen. +// +// AlphaNumFormatterImpl is the default in the base template, followed by +// specializations for other types. +template <typename ValueType> +struct DefaultFormatter { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<const char*> { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<char*> { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<std::string> { + typedef NoFormatter Type; +}; +template <> +struct DefaultFormatter<absl::string_view> { + typedef NoFormatter Type; +}; +template <typename ValueType> +struct DefaultFormatter<ValueType*> { + typedef DereferenceFormatterImpl<typename DefaultFormatter<ValueType>::Type> + Type; +}; + +template <typename ValueType> +struct DefaultFormatter<std::unique_ptr<ValueType>> + : public DefaultFormatter<ValueType*> {}; + +// +// JoinAlgorithm() functions +// + +// The main joining algorithm. This simply joins the elements in the given +// iterator range, each separated by the given separator, into an output std::string, +// and formats each element using the provided Formatter object. +template <typename Iterator, typename Formatter> +std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s, + Formatter&& f) { + std::string result; + absl::string_view sep(""); + for (Iterator it = start; it != end; ++it) { + result.append(sep.data(), sep.size()); + f(&result, *it); + sep = s; + } + return result; +} + +// No-op placeholder for input iterators which can not be iterated over. +template <typename Iterator> +size_t GetResultSize(Iterator, Iterator, size_t, std::input_iterator_tag) { + return 0; +} + +// Calculates space to reserve, if the iterator supports multiple passes. +template <typename Iterator> +size_t GetResultSize(Iterator it, Iterator end, size_t separator_size, + std::forward_iterator_tag) { + assert(it != end); + size_t length = it->size(); + while (++it != end) { + length += separator_size; + length += it->size(); + } + return length; +} + +// A joining algorithm that's optimized for an iterator range of std::string-like +// objects that do not need any additional formatting. This is to optimize the +// common case of joining, say, a std::vector<std::string> or a +// std::vector<absl::string_view>. +// +// This is an overload of the previous JoinAlgorithm() function. Here the +// Formatter argument is of type NoFormatter. Since NoFormatter is an internal +// type, this overload is only invoked when strings::Join() is called with a +// range of std::string-like objects (e.g., std::string, absl::string_view), and an +// explicit Formatter argument was NOT specified. +// +// The optimization is that the needed space will be reserved in the output +// std::string to avoid the need to resize while appending. To do this, the iterator +// range will be traversed twice: once to calculate the total needed size, and +// then again to copy the elements and delimiters to the output std::string. +template <typename Iterator> +std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s, + NoFormatter) { + std::string result; + if (start != end) { + typename std::iterator_traits<Iterator>::iterator_category iterator_tag; + result.reserve(GetResultSize(start, end, s.size(), iterator_tag)); + + // Joins strings + absl::string_view sep("", 0); + for (Iterator it = start; it != end; ++it) { + result.append(sep.data(), sep.size()); + result.append(it->data(), it->size()); + sep = s; + } + } + + return result; +} + +// JoinTupleLoop implements a loop over the elements of a std::tuple, which +// are heterogeneous. The primary template matches the tuple interior case. It +// continues the iteration after appending a separator (for nonzero indices) +// and formatting an element of the tuple. The specialization for the I=N case +// matches the end-of-tuple, and terminates the iteration. +template <size_t I, size_t N> +struct JoinTupleLoop { + template <typename Tup, typename Formatter> + void operator()(std::string* out, const Tup& tup, absl::string_view sep, + Formatter&& fmt) { + if (I > 0) out->append(sep.data(), sep.size()); + fmt(out, std::get<I>(tup)); + JoinTupleLoop<I + 1, N>()(out, tup, sep, fmt); + } +}; +template <size_t N> +struct JoinTupleLoop<N, N> { + template <typename Tup, typename Formatter> + void operator()(std::string*, const Tup&, absl::string_view, Formatter&&) {} +}; + +template <typename... T, typename Formatter> +std::string JoinAlgorithm(const std::tuple<T...>& tup, absl::string_view sep, + Formatter&& fmt) { + std::string result; + JoinTupleLoop<0, sizeof...(T)>()(&result, tup, sep, fmt); + return result; +} + +template <typename Iterator> +std::string JoinRange(Iterator first, Iterator last, absl::string_view separator) { + // No formatter was explicitly given, so a default must be chosen. + typedef typename std::iterator_traits<Iterator>::value_type ValueType; + typedef typename DefaultFormatter<ValueType>::Type Formatter; + return JoinAlgorithm(first, last, separator, Formatter()); +} + +template <typename Range, typename Formatter> +std::string JoinRange(const Range& range, absl::string_view separator, + Formatter&& fmt) { + using std::begin; + using std::end; + return JoinAlgorithm(begin(range), end(range), separator, fmt); +} + +template <typename Range> +std::string JoinRange(const Range& range, absl::string_view separator) { + using std::begin; + using std::end; + return JoinRange(begin(range), end(range), separator); +} + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ diff --git a/absl/strings/internal/str_split_internal.h b/absl/strings/internal/str_split_internal.h new file mode 100644 index 000000000000..dc31a8ef9090 --- /dev/null +++ b/absl/strings/internal/str_split_internal.h @@ -0,0 +1,439 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// This file declares INTERNAL parts of the Split API that are inline/templated +// or otherwise need to be available at compile time. The main abstractions +// defined in here are +// +// - ConvertibleToStringView +// - SplitIterator<> +// - Splitter<> +// +// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including +// absl/strings/str_split.h. +// +// IWYU pragma: private, include "absl/strings/str_split.h" + +#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ + +#ifdef _GLIBCXX_DEBUG +#include <glibcxx_debug_traits.h> +#endif // _GLIBCXX_DEBUG + +#include <array> +#include <initializer_list> +#include <iterator> +#include <map> +#include <type_traits> +#include <utility> +#include <vector> + +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/meta/type_traits.h" +#include "absl/strings/string_view.h" + +namespace absl { +namespace strings_internal { + +#ifdef _GLIBCXX_DEBUG +using ::glibcxx_debug_traits::IsStrictlyDebugWrapperBase; +#else // _GLIBCXX_DEBUG +template <typename T> struct IsStrictlyDebugWrapperBase : std::false_type {}; +#endif // _GLIBCXX_DEBUG + +// This class is implicitly constructible from everything that absl::string_view +// is implicitly constructible from. If it's constructed from a temporary +// std::string, the data is moved into a data member so its lifetime matches that of +// the ConvertibleToStringView instance. +class ConvertibleToStringView { + public: + ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit) + : value_(s) {} + ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit) + ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit) + : value_(s) {} + ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit) + : value_(s) {} + + // Matches rvalue strings and moves their data to a member. +ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit) + : copy_(std::move(s)), value_(copy_) {} + + ConvertibleToStringView(const ConvertibleToStringView& other) + : copy_(other.copy_), + value_(other.IsSelfReferential() ? copy_ : other.value_) {} + + ConvertibleToStringView(ConvertibleToStringView&& other) { + StealMembers(std::move(other)); + } + + ConvertibleToStringView& operator=(ConvertibleToStringView other) { + StealMembers(std::move(other)); + return *this; + } + + absl::string_view value() const { return value_; } + + private: + // Returns true if ctsp's value refers to its internal copy_ member. + bool IsSelfReferential() const { return value_.data() == copy_.data(); } + + void StealMembers(ConvertibleToStringView&& other) { + if (other.IsSelfReferential()) { + copy_ = std::move(other.copy_); + value_ = copy_; + other.value_ = other.copy_; + } else { + value_ = other.value_; + } + } + + // Holds the data moved from temporary std::string arguments. Declared first so + // that 'value' can refer to 'copy_'. + std::string copy_; + absl::string_view value_; +}; + +// An iterator that enumerates the parts of a std::string from a Splitter. The text +// to be split, the Delimiter, and the Predicate are all taken from the given +// Splitter object. Iterators may only be compared if they refer to the same +// Splitter instance. +// +// This class is NOT part of the public splitting API. +template <typename Splitter> +class SplitIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = absl::string_view; + using difference_type = ptrdiff_t; + using pointer = const value_type*; + using reference = const value_type&; + + enum State { kInitState, kLastState, kEndState }; + SplitIterator(State state, const Splitter* splitter) + : pos_(0), + state_(state), + splitter_(splitter), + delimiter_(splitter->delimiter()), + predicate_(splitter->predicate()) { + // Hack to maintain backward compatibility. This one block makes it so an + // empty absl::string_view whose .data() happens to be nullptr behaves + // *differently* from an otherwise empty absl::string_view whose .data() is + // not nullptr. This is an undesirable difference in general, but this + // behavior is maintained to avoid breaking existing code that happens to + // depend on this old behavior/bug. Perhaps it will be fixed one day. The + // difference in behavior is as follows: + // Split(absl::string_view(""), '-'); // {""} + // Split(absl::string_view(), '-'); // {} + if (splitter_->text().data() == nullptr) { + state_ = kEndState; + pos_ = splitter_->text().size(); + return; + } + + if (state_ == kEndState) { + pos_ = splitter_->text().size(); + } else { + ++(*this); + } + } + + bool at_end() const { return state_ == kEndState; } + + reference operator*() const { return curr_; } + pointer operator->() const { return &curr_; } + + SplitIterator& operator++() { + do { + if (state_ == kLastState) { + state_ = kEndState; + return *this; + } + const absl::string_view text = splitter_->text(); + const absl::string_view d = delimiter_.Find(text, pos_); + if (d.data() == text.end()) state_ = kLastState; + curr_ = text.substr(pos_, d.data() - (text.data() + pos_)); + pos_ += curr_.size() + d.size(); + } while (!predicate_(curr_)); + return *this; + } + + SplitIterator operator++(int) { + SplitIterator old(*this); + ++(*this); + return old; + } + + friend bool operator==(const SplitIterator& a, const SplitIterator& b) { + return a.state_ == b.state_ && a.pos_ == b.pos_; + } + + friend bool operator!=(const SplitIterator& a, const SplitIterator& b) { + return !(a == b); + } + + private: + size_t pos_; + State state_; + absl::string_view curr_; + const Splitter* splitter_; + typename Splitter::DelimiterType delimiter_; + typename Splitter::PredicateType predicate_; +}; + +// HasMappedType<T>::value is true iff there exists a type T::mapped_type. +template <typename T, typename = void> +struct HasMappedType : std::false_type {}; +template <typename T> +struct HasMappedType<T, absl::void_t<typename T::mapped_type>> + : std::true_type {}; + +// HasValueType<T>::value is true iff there exists a type T::value_type. +template <typename T, typename = void> +struct HasValueType : std::false_type {}; +template <typename T> +struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type { +}; + +// HasConstIterator<T>::value is true iff there exists a type T::const_iterator. +template <typename T, typename = void> +struct HasConstIterator : std::false_type {}; +template <typename T> +struct HasConstIterator<T, absl::void_t<typename T::const_iterator>> + : std::true_type {}; + +// IsInitializerList<T>::value is true iff T is an std::initializer_list. More +// details below in Splitter<> where this is used. +std::false_type IsInitializerListDispatch(...); // default: No +template <typename T> +std::true_type IsInitializerListDispatch(std::initializer_list<T>*); +template <typename T> +struct IsInitializerList + : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {}; + +// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition +// is true for type 'C'. +// +// Restricts conversion to container-like types (by testing for the presence of +// a const_iterator member type) and also to disable conversion to an +// std::initializer_list (which also has a const_iterator). Otherwise, code +// compiled in C++11 will get an error due to ambiguous conversion paths (in +// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T> +// or an std::initializer_list<T>). +template <typename C> +struct SplitterIsConvertibleTo + : std::enable_if< + !IsStrictlyDebugWrapperBase<C>::value && + !IsInitializerList<C>::value && + HasValueType<C>::value && + HasConstIterator<C>::value> {}; + +// This class implements the range that is returned by absl::StrSplit(). This +// class has templated conversion operators that allow it to be implicitly +// converted to a variety of types that the caller may have specified on the +// left-hand side of an assignment. +// +// The main interface for interacting with this class is through its implicit +// conversion operators. However, this class may also be used like a container +// in that it has .begin() and .end() member functions. It may also be used +// within a range-for loop. +// +// Output containers can be collections of any type that is constructible from +// an absl::string_view. +// +// An Predicate functor may be supplied. This predicate will be used to filter +// the split strings: only strings for which the predicate returns true will be +// kept. A Predicate object is any unary functor that takes an absl::string_view +// and returns bool. +template <typename Delimiter, typename Predicate> +class Splitter { + public: + using DelimiterType = Delimiter; + using PredicateType = Predicate; + using const_iterator = strings_internal::SplitIterator<Splitter>; + using value_type = typename std::iterator_traits<const_iterator>::value_type; + + Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p) + : text_(std::move(input_text)), + delimiter_(std::move(d)), + predicate_(std::move(p)) {} + + absl::string_view text() const { return text_.value(); } + const Delimiter& delimiter() const { return delimiter_; } + const Predicate& predicate() const { return predicate_; } + + // Range functions that iterate the split substrings as absl::string_view + // objects. These methods enable a Splitter to be used in a range-based for + // loop. + const_iterator begin() const { return {const_iterator::kInitState, this}; } + const_iterator end() const { return {const_iterator::kEndState, this}; } + + // An implicit conversion operator that is restricted to only those containers + // that the splitter is convertible to. + template <typename Container, + typename OnlyIf = typename SplitterIsConvertibleTo<Container>::type> + operator Container() const { // NOLINT(runtime/explicit) + return ConvertToContainer<Container, typename Container::value_type, + HasMappedType<Container>::value>()(*this); + } + + // Returns a pair with its .first and .second members set to the first two + // strings returned by the begin() iterator. Either/both of .first and .second + // will be constructed with empty strings if the iterator doesn't have a + // corresponding value. + template <typename First, typename Second> + operator std::pair<First, Second>() const { // NOLINT(runtime/explicit) + absl::string_view first, second; + auto it = begin(); + if (it != end()) { + first = *it; + if (++it != end()) { + second = *it; + } + } + return {First(first), Second(second)}; + } + + private: + // ConvertToContainer is a functor converting a Splitter to the requested + // Container of ValueType. It is specialized below to optimize splitting to + // certain combinations of Container and ValueType. + // + // This base template handles the generic case of storing the split results in + // the requested non-map-like container and converting the split substrings to + // the requested type. + template <typename Container, typename ValueType, bool is_map = false> + struct ConvertToContainer { + Container operator()(const Splitter& splitter) const { + Container c; + auto it = std::inserter(c, c.end()); + for (const auto sp : splitter) { + *it++ = ValueType(sp); + } + return c; + } + }; + + // Partial specialization for a std::vector<absl::string_view>. + // + // Optimized for the common case of splitting to a + // std::vector<absl::string_view>. In this case we first split the results to + // a small array of absl::string_view on the stack, to reduce reallocations. + template <typename A> + struct ConvertToContainer<std::vector<absl::string_view, A>, + absl::string_view, false> { + std::vector<absl::string_view, A> operator()( + const Splitter& splitter) const { + struct raw_view { + const char* data; + size_t size; + operator absl::string_view() const { // NOLINT(runtime/explicit) + return {data, size}; + } + }; + std::vector<absl::string_view, A> v; + std::array<raw_view, 16> ar; + for (auto it = splitter.begin(); !it.at_end();) { + size_t index = 0; + do { + ar[index].data = it->data(); + ar[index].size = it->size(); + ++it; + } while (++index != ar.size() && !it.at_end()); + v.insert(v.end(), ar.begin(), ar.begin() + index); + } + return v; + } + }; + + // Partial specialization for a std::vector<std::string>. + // + // Optimized for the common case of splitting to a std::vector<std::string>. In + // this case we first split the results to a std::vector<absl::string_view> so + // the returned std::vector<std::string> can have space reserved to avoid std::string + // moves. + template <typename A> + struct ConvertToContainer<std::vector<std::string, A>, std::string, false> { + std::vector<std::string, A> operator()(const Splitter& splitter) const { + const std::vector<absl::string_view> v = splitter; + return std::vector<std::string, A>(v.begin(), v.end()); + } + }; + + // Partial specialization for containers of pairs (e.g., maps). + // + // The algorithm is to insert a new pair into the map for each even-numbered + // item, with the even-numbered item as the key with a default-constructed + // value. Each odd-numbered item will then be assigned to the last pair's + // value. + template <typename Container, typename First, typename Second> + struct ConvertToContainer<Container, std::pair<const First, Second>, true> { + Container operator()(const Splitter& splitter) const { + Container m; + typename Container::iterator it; + bool insert = true; + for (const auto sp : splitter) { + if (insert) { + it = Inserter<Container>::Insert(&m, First(sp), Second()); + } else { + it->second = Second(sp); + } + insert = !insert; + } + return m; + } + + // Inserts the key and value into the given map, returning an iterator to + // the inserted item. Specialized for std::map and std::multimap to use + // emplace() and adapt emplace()'s return value. + template <typename Map> + struct Inserter { + using M = Map; + template <typename... Args> + static typename M::iterator Insert(M* m, Args&&... args) { + return m->insert(std::make_pair(std::forward<Args>(args)...)).first; + } + }; + + template <typename... Ts> + struct Inserter<std::map<Ts...>> { + using M = std::map<Ts...>; + template <typename... Args> + static typename M::iterator Insert(M* m, Args&&... args) { + return m->emplace(std::make_pair(std::forward<Args>(args)...)).first; + } + }; + + template <typename... Ts> + struct Inserter<std::multimap<Ts...>> { + using M = std::multimap<Ts...>; + template <typename... Args> + static typename M::iterator Insert(M* m, Args&&... args) { + return m->emplace(std::make_pair(std::forward<Args>(args)...)); + } + }; + }; + + ConvertibleToStringView text_; + Delimiter delimiter_; + Predicate predicate_; +}; + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ diff --git a/absl/strings/internal/utf8.cc b/absl/strings/internal/utf8.cc new file mode 100644 index 000000000000..2415c2ccc45c --- /dev/null +++ b/absl/strings/internal/utf8.cc @@ -0,0 +1,51 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// UTF8 utilities, implemented to reduce dependencies. + +#include "absl/strings/internal/utf8.h" + +namespace absl { +namespace strings_internal { + +size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) { + if (utf8_char <= 0x7F) { + *buffer = static_cast<char>(utf8_char); + return 1; + } else if (utf8_char <= 0x7FF) { + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xC0 | utf8_char; + return 2; + } else if (utf8_char <= 0xFFFF) { + buffer[2] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xE0 | utf8_char; + return 3; + } else { + buffer[3] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[2] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xF0 | utf8_char; + return 4; + } +} + +} // namespace strings_internal +} // namespace absl diff --git a/absl/strings/internal/utf8.h b/absl/strings/internal/utf8.h new file mode 100644 index 000000000000..705eea7f16ab --- /dev/null +++ b/absl/strings/internal/utf8.h @@ -0,0 +1,52 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// UTF8 utilities, implemented to reduce dependencies. +// +// If you need Unicode specific processing (for example being aware of +// Unicode character boundaries, or knowledge of Unicode casing rules, +// or various forms of equivalence and normalization), take a look at +// files in i18n/utf8. + +#ifndef ABSL_STRINGS_INTERNAL_UTF8_H_ +#define ABSL_STRINGS_INTERNAL_UTF8_H_ + +#include <cstddef> +#include <cstdint> + + +namespace absl { +namespace strings_internal { + +// For Unicode code points 0 through 0x10FFFF, EncodeUTF8Char writes +// out the UTF-8 encoding into buffer, and returns the number of chars +// it wrote. +// +// As described in https://tools.ietf.org/html/rfc3629#section-3 , the encodings +// are: +// 00 - 7F : 0xxxxxxx +// 80 - 7FF : 110xxxxx 10xxxxxx +// 800 - FFFF : 1110xxxx 10xxxxxx 10xxxxxx +// 10000 - 10FFFF : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx +// +// Values greater than 0x10FFFF are not supported and may or may not write +// characters into buffer, however never will more than kMaxEncodedUTF8Size +// bytes be written, regardless of the value of utf8_char. +enum { kMaxEncodedUTF8Size = 4 }; +size_t EncodeUTF8Char(char *buffer, char32_t utf8_char); + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_UTF8_H_ diff --git a/absl/strings/internal/utf8_test.cc b/absl/strings/internal/utf8_test.cc new file mode 100644 index 000000000000..4d437427a88a --- /dev/null +++ b/absl/strings/internal/utf8_test.cc @@ -0,0 +1,58 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/utf8.h" + +#include <cctype> +#include <cstdlib> +#include <cstring> +#include <cstdint> + +#include "gtest/gtest.h" + +namespace { + +TEST(EncodeUTF8Char, BasicFunction) { + std::pair<char32_t, std::string> tests[] = {{0x0030, u8"\u0030"}, + {0x00A3, u8"\u00A3"}, + {0x00010000, u8"\U00010000"}, + {0x0000FFFF, u8"\U0000FFFF"}, + {0x0010FFFD, u8"\U0010FFFD"}}; + for (auto &test : tests) { + char buf0[7] = {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}; + char buf1[7] = {'\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF'}; + char *buf0_written = + &buf0[absl::strings_internal::EncodeUTF8Char(buf0, test.first)]; + char *buf1_written = + &buf1[absl::strings_internal::EncodeUTF8Char(buf1, test.first)]; + int apparent_length = 7; + while (buf0[apparent_length - 1] == '\x00' && + buf1[apparent_length - 1] == '\xFF') { + if (--apparent_length == 0) break; + } + EXPECT_EQ(apparent_length, buf0_written - buf0); + EXPECT_EQ(apparent_length, buf1_written - buf1); + EXPECT_EQ(apparent_length, test.second.length()); + EXPECT_EQ(std::string(buf0, apparent_length), test.second); + EXPECT_EQ(std::string(buf1, apparent_length), test.second); + } + char buf[32] = "Don't Tread On Me"; + EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf, 0x00110000), + absl::strings_internal::kMaxEncodedUTF8Size); + char buf2[32] = "Negative is invalid but sane"; + EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf2, -1), + absl::strings_internal::kMaxEncodedUTF8Size); +} + +} // namespace diff --git a/absl/strings/match.cc b/absl/strings/match.cc new file mode 100644 index 000000000000..53881bdd3f3c --- /dev/null +++ b/absl/strings/match.cc @@ -0,0 +1,40 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/match.h" + +#include "absl/strings/internal/memutil.h" + +namespace absl { + +namespace { +bool CaseEqual(absl::string_view piece1, absl::string_view piece2) { + return (piece1.size() == piece2.size() && + 0 == strings_internal::memcasecmp(piece1.data(), piece2.data(), + piece1.size())); + // memcasecmp uses ascii_tolower(). +} +} // namespace + +bool StartsWithIgnoreCase(absl::string_view text, absl::string_view preffix) { + return (text.size() >= preffix.size()) && + CaseEqual(text.substr(0, preffix.size()), preffix); +} + +bool EndsWithIgnoreCase(absl::string_view text, absl::string_view suffix) { + return (text.size() >= suffix.size()) && + CaseEqual(text.substr(text.size() - suffix.size()), suffix); +} + +} // namespace absl diff --git a/absl/strings/match.h b/absl/strings/match.h new file mode 100644 index 000000000000..4a5d1c0342cd --- /dev/null +++ b/absl/strings/match.h @@ -0,0 +1,81 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: match.h +// ----------------------------------------------------------------------------- +// +// This file contains simple utilities for performing std::string matching checks. +// All of these function parameters are specified as `absl::string_view`, +// meaning that these functions can accept `std::string`, `absl::string_view` or +// nul-terminated C-style strings. +// +// Examples: +// std::string s = "foo"; +// absl::string_view sv = "f"; +// EXPECT_TRUE(absl::StrContains(s, sv)); +// +// Note: The order of parameters in these functions is designed to mimic the +// order an equivalent member function would exhibit; +// e.g. `s.Contains(x)` ==> `absl::StrContains(s, x). +#ifndef ABSL_STRINGS_MATCH_H_ +#define ABSL_STRINGS_MATCH_H_ + +#include <cstring> + +#include "absl/strings/string_view.h" + +namespace absl { + +// StrContains() +// +// Returns whether a given std::string `s` contains the substring `x`. +inline bool StrContains(absl::string_view s, absl::string_view x) { + return static_cast<absl::string_view::size_type>(s.find(x, 0)) != s.npos; +} + +// StartsWith() +// +// Returns whether a given std::string `s` begins with `x`. +inline bool StartsWith(absl::string_view s, absl::string_view x) { + return x.empty() || + (s.size() >= x.size() && memcmp(s.data(), x.data(), x.size()) == 0); +} + +// EndsWith() +// +// Returns whether a given std::string `s` ends `x`. +inline bool EndsWith(absl::string_view s, absl::string_view x) { + return x.empty() || + (s.size() >= x.size() && + memcmp(s.data() + (s.size() - x.size()), x.data(), x.size()) == 0); +} + +// StartsWithIgnoreCase() +// +// Returns whether a given std::string `text` starts with `starts_with`, ignoring +// case in the comparison. +bool StartsWithIgnoreCase(absl::string_view text, + absl::string_view starts_with); + +// EndsWithIgnoreCase() +// +// Returns whether a given std::string `text` ends with `ends_with`, ignoring case +// in the comparison. +bool EndsWithIgnoreCase(absl::string_view text, absl::string_view ends_with); + +} // namespace absl + +#endif // ABSL_STRINGS_MATCH_H_ diff --git a/absl/strings/match_test.cc b/absl/strings/match_test.cc new file mode 100644 index 000000000000..d194f0e689bf --- /dev/null +++ b/absl/strings/match_test.cc @@ -0,0 +1,99 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/match.h" + +#include "gtest/gtest.h" + +namespace { + +TEST(MatchTest, StartsWith) { + const std::string s1("123" "\0" "456", 7); + const absl::string_view a("foobar"); + const absl::string_view b(s1); + const absl::string_view e; + EXPECT_TRUE(absl::StartsWith(a, a)); + EXPECT_TRUE(absl::StartsWith(a, "foo")); + EXPECT_TRUE(absl::StartsWith(a, e)); + EXPECT_TRUE(absl::StartsWith(b, s1)); + EXPECT_TRUE(absl::StartsWith(b, b)); + EXPECT_TRUE(absl::StartsWith(b, e)); + EXPECT_TRUE(absl::StartsWith(e, "")); + EXPECT_FALSE(absl::StartsWith(a, b)); + EXPECT_FALSE(absl::StartsWith(b, a)); + EXPECT_FALSE(absl::StartsWith(e, a)); +} + +TEST(MatchTest, EndsWith) { + const std::string s1("123" "\0" "456", 7); + const absl::string_view a("foobar"); + const absl::string_view b(s1); + const absl::string_view e; + EXPECT_TRUE(absl::EndsWith(a, a)); + EXPECT_TRUE(absl::EndsWith(a, "bar")); + EXPECT_TRUE(absl::EndsWith(a, e)); + EXPECT_TRUE(absl::EndsWith(b, s1)); + EXPECT_TRUE(absl::EndsWith(b, b)); + EXPECT_TRUE(absl::EndsWith(b, e)); + EXPECT_TRUE(absl::EndsWith(e, "")); + EXPECT_FALSE(absl::EndsWith(a, b)); + EXPECT_FALSE(absl::EndsWith(b, a)); + EXPECT_FALSE(absl::EndsWith(e, a)); +} + +TEST(MatchTest, Contains) { + absl::string_view a("abcdefg"); + absl::string_view b("abcd"); + absl::string_view c("efg"); + absl::string_view d("gh"); + EXPECT_TRUE(absl::StrContains(a, a)); + EXPECT_TRUE(absl::StrContains(a, b)); + EXPECT_TRUE(absl::StrContains(a, c)); + EXPECT_FALSE(absl::StrContains(a, d)); + EXPECT_TRUE(absl::StrContains("", "")); + EXPECT_TRUE(absl::StrContains("abc", "")); + EXPECT_FALSE(absl::StrContains("", "a")); +} + +TEST(MatchTest, ContainsNull) { + const std::string s = "foo"; + const char* cs = "foo"; + const absl::string_view sv("foo"); + const absl::string_view sv2("foo\0bar", 4); + EXPECT_EQ(s, "foo"); + EXPECT_EQ(sv, "foo"); + EXPECT_NE(sv2, "foo"); + EXPECT_TRUE(absl::EndsWith(s, sv)); + EXPECT_TRUE(absl::StartsWith(cs, sv)); + EXPECT_TRUE(absl::StrContains(cs, sv)); + EXPECT_FALSE(absl::StrContains(cs, sv2)); +} + +TEST(MatchTest, StartsWithIgnoreCase) { + EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", "foo")); + EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", "Fo")); + EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", "")); + EXPECT_FALSE(absl::StartsWithIgnoreCase("foo", "fooo")); + EXPECT_FALSE(absl::StartsWithIgnoreCase("", "fo")); +} + +TEST(MatchTest, EndsWithIgnoreCase) { + EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", "foo")); + EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", "Oo")); + EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", "")); + EXPECT_FALSE(absl::EndsWithIgnoreCase("foo", "fooo")); + EXPECT_FALSE(absl::EndsWithIgnoreCase("", "fo")); +} + +} // namespace diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc new file mode 100644 index 000000000000..3b093b98c6f4 --- /dev/null +++ b/absl/strings/numbers.cc @@ -0,0 +1,1288 @@ +// This file contains std::string processing functions related to +// numeric values. + +#include "absl/strings/numbers.h" + +#include <cassert> +#include <cctype> +#include <cfloat> // for DBL_DIG and FLT_DIG +#include <cmath> // for HUGE_VAL +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <limits> +#include <memory> +#include <string> + +#include "absl/base/internal/raw_logging.h" +#include "absl/numeric/int128.h" +#include "absl/strings/ascii.h" +#include "absl/strings/internal/memutil.h" +#include "absl/strings/str_cat.h" + +namespace absl { + +bool SimpleAtof(absl::string_view str, float* value) { + *value = 0.0; + if (str.empty()) return false; + char buf[32]; + std::unique_ptr<char[]> bigbuf; + char* ptr = buf; + if (str.size() > sizeof(buf) - 1) { + bigbuf.reset(new char[str.size() + 1]); + ptr = bigbuf.get(); + } + memcpy(ptr, str.data(), str.size()); + ptr[str.size()] = '\0'; + + char* endptr; + *value = strtof(ptr, &endptr); + if (endptr != ptr) { + while (absl::ascii_isspace(*endptr)) ++endptr; + } + // Ignore range errors from strtod/strtof. + // The values it returns on underflow and + // overflow are the right fallback in a + // robust setting. + return *ptr != '\0' && *endptr == '\0'; +} + +bool SimpleAtod(absl::string_view str, double* value) { + *value = 0.0; + if (str.empty()) return false; + char buf[32]; + std::unique_ptr<char[]> bigbuf; + char* ptr = buf; + if (str.size() > sizeof(buf) - 1) { + bigbuf.reset(new char[str.size() + 1]); + ptr = bigbuf.get(); + } + memcpy(ptr, str.data(), str.size()); + ptr[str.size()] = '\0'; + + char* endptr; + *value = strtod(ptr, &endptr); + if (endptr != ptr) { + while (absl::ascii_isspace(*endptr)) ++endptr; + } + // Ignore range errors from strtod. The values it + // returns on underflow and overflow are the right + // fallback in a robust setting. + return *ptr != '\0' && *endptr == '\0'; +} + +namespace { + +// TODO(rogeeff): replace with the real released thing once we figure out what +// it is. +inline bool CaseEqual(absl::string_view piece1, absl::string_view piece2) { + return (piece1.size() == piece2.size() && + 0 == strings_internal::memcasecmp(piece1.data(), piece2.data(), + piece1.size())); +} + +// Writes a two-character representation of 'i' to 'buf'. 'i' must be in the +// range 0 <= i < 100, and buf must have space for two characters. Example: +// char buf[2]; +// PutTwoDigits(42, buf); +// // buf[0] == '4' +// // buf[1] == '2' +inline void PutTwoDigits(size_t i, char* buf) { + static const char two_ASCII_digits[100][2] = { + {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, + {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, + {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'}, + {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'}, + {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, + {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, + {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, + {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, + {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'}, + {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'}, + {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, + {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, + {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, + {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, + {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, + {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'}, + {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'}, + {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, + {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, + {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'} + }; + assert(i < 100); + memcpy(buf, two_ASCII_digits[i], 2); +} + +} // namespace + +bool SimpleAtob(absl::string_view str, bool* value) { + ABSL_RAW_CHECK(value != nullptr, "Output pointer must not be nullptr."); + if (CaseEqual(str, "true") || CaseEqual(str, "t") || + CaseEqual(str, "yes") || CaseEqual(str, "y") || + CaseEqual(str, "1")) { + *value = true; + return true; + } + if (CaseEqual(str, "false") || CaseEqual(str, "f") || + CaseEqual(str, "no") || CaseEqual(str, "n") || + CaseEqual(str, "0")) { + *value = false; + return true; + } + return false; +} + +// ---------------------------------------------------------------------- +// FastInt32ToBuffer() +// FastUInt32ToBuffer() +// FastInt64ToBuffer() +// FastUInt64ToBuffer() +// +// Like the Fast*ToBuffer() functions above, these are intended for speed. +// Unlike the Fast*ToBuffer() functions, however, these functions write +// their output to the beginning of the buffer (hence the name, as the +// output is left-aligned). The caller is responsible for ensuring that +// the buffer has enough space to hold the output. +// +// Returns a pointer to the end of the std::string (i.e. the null character +// terminating the std::string). +// ---------------------------------------------------------------------- + +namespace { + +// Used to optimize printing a decimal number's final digit. +const char one_ASCII_final_digits[10][2] { + {'0', 0}, {'1', 0}, {'2', 0}, {'3', 0}, {'4', 0}, + {'5', 0}, {'6', 0}, {'7', 0}, {'8', 0}, {'9', 0}, +}; + +} // namespace + +char* numbers_internal::FastUInt32ToBuffer(uint32_t i, char* buffer) { + uint32_t digits; + // The idea of this implementation is to trim the number of divides to as few + // as possible, and also reducing memory stores and branches, by going in + // steps of two digits at a time rather than one whenever possible. + // The huge-number case is first, in the hopes that the compiler will output + // that case in one branch-free block of code, and only output conditional + // branches into it from below. + if (i >= 1000000000) { // >= 1,000,000,000 + digits = i / 100000000; // 100,000,000 + i -= digits * 100000000; + PutTwoDigits(digits, buffer); + buffer += 2; + lt100_000_000: + digits = i / 1000000; // 1,000,000 + i -= digits * 1000000; + PutTwoDigits(digits, buffer); + buffer += 2; + lt1_000_000: + digits = i / 10000; // 10,000 + i -= digits * 10000; + PutTwoDigits(digits, buffer); + buffer += 2; + lt10_000: + digits = i / 100; + i -= digits * 100; + PutTwoDigits(digits, buffer); + buffer += 2; + lt100: + digits = i; + PutTwoDigits(digits, buffer); + buffer += 2; + *buffer = 0; + return buffer; + } + + if (i < 100) { + digits = i; + if (i >= 10) goto lt100; + memcpy(buffer, one_ASCII_final_digits[i], 2); + return buffer + 1; + } + if (i < 10000) { // 10,000 + if (i >= 1000) goto lt10_000; + digits = i / 100; + i -= digits * 100; + *buffer++ = '0' + digits; + goto lt100; + } + if (i < 1000000) { // 1,000,000 + if (i >= 100000) goto lt1_000_000; + digits = i / 10000; // 10,000 + i -= digits * 10000; + *buffer++ = '0' + digits; + goto lt10_000; + } + if (i < 100000000) { // 100,000,000 + if (i >= 10000000) goto lt100_000_000; + digits = i / 1000000; // 1,000,000 + i -= digits * 1000000; + *buffer++ = '0' + digits; + goto lt1_000_000; + } + // we already know that i < 1,000,000,000 + digits = i / 100000000; // 100,000,000 + i -= digits * 100000000; + *buffer++ = '0' + digits; + goto lt100_000_000; +} + +char* numbers_internal::FastInt32ToBuffer(int32_t i, char* buffer) { + uint32_t u = i; + if (i < 0) { + *buffer++ = '-'; + // We need to do the negation in modular (i.e., "unsigned") + // arithmetic; MSVC++ apprently warns for plain "-u", so + // we write the equivalent expression "0 - u" instead. + u = 0 - u; + } + return numbers_internal::FastUInt32ToBuffer(u, buffer); +} + +char* numbers_internal::FastUInt64ToBuffer(uint64_t i, char* buffer) { + uint32_t u32 = static_cast<uint32_t>(i); + if (u32 == i) return numbers_internal::FastUInt32ToBuffer(u32, buffer); + + // Here we know i has at least 10 decimal digits. + uint64_t top_1to11 = i / 1000000000; + u32 = static_cast<uint32_t>(i - top_1to11 * 1000000000); + uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11); + + if (top_1to11_32 == top_1to11) { + buffer = numbers_internal::FastUInt32ToBuffer(top_1to11_32, buffer); + } else { + // top_1to11 has more than 32 bits too; print it in two steps. + uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / 100); + uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * 100); + buffer = numbers_internal::FastUInt32ToBuffer(top_8to9, buffer); + PutTwoDigits(mid_2, buffer); + buffer += 2; + } + + // We have only 9 digits now, again the maximum uint32_t can handle fully. + uint32_t digits = u32 / 10000000; // 10,000,000 + u32 -= digits * 10000000; + PutTwoDigits(digits, buffer); + buffer += 2; + digits = u32 / 100000; // 100,000 + u32 -= digits * 100000; + PutTwoDigits(digits, buffer); + buffer += 2; + digits = u32 / 1000; // 1,000 + u32 -= digits * 1000; + PutTwoDigits(digits, buffer); + buffer += 2; + digits = u32 / 10; + u32 -= digits * 10; + PutTwoDigits(digits, buffer); + buffer += 2; + memcpy(buffer, one_ASCII_final_digits[u32], 2); + return buffer + 1; +} + +char* numbers_internal::FastInt64ToBuffer(int64_t i, char* buffer) { + uint64_t u = i; + if (i < 0) { + *buffer++ = '-'; + u = 0 - u; + } + return numbers_internal::FastUInt64ToBuffer(u, buffer); +} + +// Although DBL_DIG is typically 15, DBL_MAX is normally represented with 17 +// digits of precision. When converted to a std::string value with fewer digits +// of precision using strtod(), the result can be bigger than DBL_MAX due to +// a rounding error. Converting this value back to a double will produce an +// Inf which will trigger a SIGFPE if FP exceptions are enabled. We skip +// the precision check for sufficiently large values to avoid the SIGFPE. +static const double kDoublePrecisionCheckMax = DBL_MAX / 1.000000000000001; + +char* numbers_internal::RoundTripDoubleToBuffer(double d, char* buffer) { + // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all + // platforms these days. Just in case some system exists where DBL_DIG + // is significantly larger -- and risks overflowing our buffer -- we have + // this assert. + static_assert(DBL_DIG < 20, "DBL_DIG is too big"); + + bool full_precision_needed = true; + if (std::abs(d) <= kDoublePrecisionCheckMax) { + int snprintf_result = snprintf(buffer, numbers_internal::kFastToBufferSize, + "%.*g", DBL_DIG, d); + + // The snprintf should never overflow because the buffer is significantly + // larger than the precision we asked for. + assert(snprintf_result > 0 && + snprintf_result < numbers_internal::kFastToBufferSize); + (void)snprintf_result; + + full_precision_needed = strtod(buffer, nullptr) != d; + } + + if (full_precision_needed) { + int snprintf_result = snprintf(buffer, numbers_internal::kFastToBufferSize, + "%.*g", DBL_DIG + 2, d); + + // Should never overflow; see above. + assert(snprintf_result > 0 && + snprintf_result < numbers_internal::kFastToBufferSize); + (void)snprintf_result; + } + return buffer; +} +// This table is used to quickly calculate the base-ten exponent of a given +// float, and then to provide a multiplier to bring that number into the +// range 1-999,999,999, that is, into uint32_t range. Finally, the exp +// std::string is made available so there is one less int-to-std::string conversion +// to be done. + +struct Spec { + double min_range; + double multiplier; + const char expstr[5]; +}; +const Spec neg_exp_table[] = { + {1.4e-45f, 1e+55, "e-45"}, // + {1e-44f, 1e+54, "e-44"}, // + {1e-43f, 1e+53, "e-43"}, // + {1e-42f, 1e+52, "e-42"}, // + {1e-41f, 1e+51, "e-41"}, // + {1e-40f, 1e+50, "e-40"}, // + {1e-39f, 1e+49, "e-39"}, // + {1e-38f, 1e+48, "e-38"}, // + {1e-37f, 1e+47, "e-37"}, // + {1e-36f, 1e+46, "e-36"}, // + {1e-35f, 1e+45, "e-35"}, // + {1e-34f, 1e+44, "e-34"}, // + {1e-33f, 1e+43, "e-33"}, // + {1e-32f, 1e+42, "e-32"}, // + {1e-31f, 1e+41, "e-31"}, // + {1e-30f, 1e+40, "e-30"}, // + {1e-29f, 1e+39, "e-29"}, // + {1e-28f, 1e+38, "e-28"}, // + {1e-27f, 1e+37, "e-27"}, // + {1e-26f, 1e+36, "e-26"}, // + {1e-25f, 1e+35, "e-25"}, // + {1e-24f, 1e+34, "e-24"}, // + {1e-23f, 1e+33, "e-23"}, // + {1e-22f, 1e+32, "e-22"}, // + {1e-21f, 1e+31, "e-21"}, // + {1e-20f, 1e+30, "e-20"}, // + {1e-19f, 1e+29, "e-19"}, // + {1e-18f, 1e+28, "e-18"}, // + {1e-17f, 1e+27, "e-17"}, // + {1e-16f, 1e+26, "e-16"}, // + {1e-15f, 1e+25, "e-15"}, // + {1e-14f, 1e+24, "e-14"}, // + {1e-13f, 1e+23, "e-13"}, // + {1e-12f, 1e+22, "e-12"}, // + {1e-11f, 1e+21, "e-11"}, // + {1e-10f, 1e+20, "e-10"}, // + {1e-09f, 1e+19, "e-09"}, // + {1e-08f, 1e+18, "e-08"}, // + {1e-07f, 1e+17, "e-07"}, // + {1e-06f, 1e+16, "e-06"}, // + {1e-05f, 1e+15, "e-05"}, // + {1e-04f, 1e+14, "e-04"}, // +}; + +const Spec pos_exp_table[] = { + {1e+08f, 1e+02, "e+08"}, // + {1e+09f, 1e+01, "e+09"}, // + {1e+10f, 1e+00, "e+10"}, // + {1e+11f, 1e-01, "e+11"}, // + {1e+12f, 1e-02, "e+12"}, // + {1e+13f, 1e-03, "e+13"}, // + {1e+14f, 1e-04, "e+14"}, // + {1e+15f, 1e-05, "e+15"}, // + {1e+16f, 1e-06, "e+16"}, // + {1e+17f, 1e-07, "e+17"}, // + {1e+18f, 1e-08, "e+18"}, // + {1e+19f, 1e-09, "e+19"}, // + {1e+20f, 1e-10, "e+20"}, // + {1e+21f, 1e-11, "e+21"}, // + {1e+22f, 1e-12, "e+22"}, // + {1e+23f, 1e-13, "e+23"}, // + {1e+24f, 1e-14, "e+24"}, // + {1e+25f, 1e-15, "e+25"}, // + {1e+26f, 1e-16, "e+26"}, // + {1e+27f, 1e-17, "e+27"}, // + {1e+28f, 1e-18, "e+28"}, // + {1e+29f, 1e-19, "e+29"}, // + {1e+30f, 1e-20, "e+30"}, // + {1e+31f, 1e-21, "e+31"}, // + {1e+32f, 1e-22, "e+32"}, // + {1e+33f, 1e-23, "e+33"}, // + {1e+34f, 1e-24, "e+34"}, // + {1e+35f, 1e-25, "e+35"}, // + {1e+36f, 1e-26, "e+36"}, // + {1e+37f, 1e-27, "e+37"}, // + {1e+38f, 1e-28, "e+38"}, // + {1e+39, 1e-29, "e+39"}, // +}; + +struct ExpCompare { + bool operator()(const Spec& spec, double d) const { + return spec.min_range < d; + } +}; + +// Utility routine(s) for RoundTripFloatToBuffer: +// OutputNecessaryDigits takes two 11-digit numbers, whose integer portion +// represents the fractional part of a floating-point number, and outputs a +// number that is in-between them, with the fewest digits possible. For +// instance, given 12345678900 and 12345876900, it would output "0123457". +// When there are multiple final digits that would satisfy this requirement, +// this routine attempts to use a digit that would represent the average of +// lower_double and upper_double. +// +// Although the routine works using integers, all callers use doubles, so +// for their convenience this routine accepts doubles. +static char* OutputNecessaryDigits(double lower_double, double upper_double, + char* out) { + assert(lower_double > 0); + assert(lower_double < upper_double - 10); + assert(upper_double < 100000000000.0); + + // Narrow the range a bit; without this bias, an input of lower=87654320010.0 + // and upper=87654320100.0 would produce an output of 876543201 + // + // We do this in three steps: first, we lower the upper bound and truncate it + // to an integer. Then, we increase the lower bound by exactly the amount we + // just decreased the upper bound by - at that point, the midpoint is exactly + // where it used to be. Then we truncate the lower bound. + + uint64_t upper64 = upper_double - (1.0 / 1024); + double shrink = upper_double - upper64; + uint64_t lower64 = lower_double + shrink; + + // Theory of operation: we convert the lower number to ascii representation, + // two digits at a time. As we go, we remove the same digits from the upper + // number. When we see the upper number does not share those same digits, we + // know we can stop converting. When we stop, the last digit we output is + // taken from the average of upper and lower values, rounded up. + char buf[2]; + uint32_t lodigits = + static_cast<uint32_t>(lower64 / 1000000000); // 1,000,000,000 + uint64_t mul64 = lodigits * uint64_t{1000000000}; + + PutTwoDigits(lodigits, out); + out += 2; + if (upper64 - mul64 >= 1000000000) { // digit mismatch! + PutTwoDigits(upper64 / 1000000000, buf); + if (out[-2] != buf[0]) { + out[-2] = '0' + (upper64 + lower64 + 10000000000) / 20000000000; + --out; + } else { + PutTwoDigits((upper64 + lower64 + 1000000000) / 2000000000, out - 2); + } + *out = '\0'; + return out; + } + uint32_t lower = static_cast<uint32_t>(lower64 - mul64); + uint32_t upper = static_cast<uint32_t>(upper64 - mul64); + + lodigits = lower / 10000000; // 10,000,000 + uint32_t mul = lodigits * 10000000; + PutTwoDigits(lodigits, out); + out += 2; + if (upper - mul >= 10000000) { // digit mismatch! + PutTwoDigits(upper / 10000000, buf); + if (out[-2] != buf[0]) { + out[-2] = '0' + (upper + lower + 100000000) / 200000000; + --out; + } else { + PutTwoDigits((upper + lower + 10000000) / 20000000, out - 2); + } + *out = '\0'; + return out; + } + lower -= mul; + upper -= mul; + + lodigits = lower / 100000; // 100,000 + mul = lodigits * 100000; + PutTwoDigits(lodigits, out); + out += 2; + if (upper - mul >= 100000) { // digit mismatch! + PutTwoDigits(upper / 100000, buf); + if (out[-2] != buf[0]) { + out[-2] = '0' + (upper + lower + 1000000) / 2000000; + --out; + } else { + PutTwoDigits((upper + lower + 100000) / 200000, out - 2); + } + *out = '\0'; + return out; + } + lower -= mul; + upper -= mul; + + lodigits = lower / 1000; + mul = lodigits * 1000; + PutTwoDigits(lodigits, out); + out += 2; + if (upper - mul >= 1000) { // digit mismatch! + PutTwoDigits(upper / 1000, buf); + if (out[-2] != buf[0]) { + out[-2] = '0' + (upper + lower + 10000) / 20000; + --out; + } else { + PutTwoDigits((upper + lower + 1000) / 2000, out - 2); + } + *out = '\0'; + return out; + } + lower -= mul; + upper -= mul; + + PutTwoDigits(lower / 10, out); + out += 2; + PutTwoDigits(upper / 10, buf); + if (out[-2] != buf[0]) { + out[-2] = '0' + (upper + lower + 100) / 200; + --out; + } else { + PutTwoDigits((upper + lower + 10) / 20, out - 2); + } + *out = '\0'; + return out; +} + +// RoundTripFloatToBuffer converts the given float into a std::string which, if +// passed to strtof, will produce the exact same original float. It does this +// by computing the range of possible doubles which map to the given float, and +// then examining the digits of the doubles in that range. If all the doubles +// in the range start with "2.37", then clearly our float does, too. As soon as +// they diverge, only one more digit is needed. +char* numbers_internal::RoundTripFloatToBuffer(float f, char* buffer) { + static_assert(std::numeric_limits<float>::is_iec559, + "IEEE-754/IEC-559 support only"); + + char* out = buffer; // we write data to out, incrementing as we go, but + // FloatToBuffer always returns the address of the buffer + // passed in. + + if (std::isnan(f)) { + strcpy(out, "nan"); // NOLINT(runtime/printf) + return buffer; + } + if (f == 0) { // +0 and -0 are handled here + if (std::signbit(f)) { + strcpy(out, "-0"); // NOLINT(runtime/printf) + } else { + strcpy(out, "0"); // NOLINT(runtime/printf) + } + return buffer; + } + if (f < 0) { + *out++ = '-'; + f = -f; + } + if (std::isinf(f)) { + strcpy(out, "inf"); // NOLINT(runtime/printf) + return buffer; + } + + double next_lower = nextafterf(f, 0.0f); + // For all doubles in the range lower_bound < f < upper_bound, the + // nearest float is f. + double lower_bound = (f + next_lower) * 0.5; + double upper_bound = f + (f - lower_bound); + // Note: because std::nextafter is slow, we calculate upper_bound + // assuming that it is the same distance from f as lower_bound is. + // For exact powers of two, upper_bound is actually twice as far + // from f as lower_bound is, but this turns out not to matter. + + // Most callers pass floats that are either 0 or within the + // range 0.0001 through 100,000,000, so handle those first, + // since they don't need exponential notation. + const Spec* spec = nullptr; + if (f < 1.0) { + if (f >= 0.0001f) { + // for fractional values, we set up the multiplier at the same + // time as we output the leading "0." / "0.0" / "0.00" / "0.000" + double multiplier = 1e+11; + *out++ = '0'; + *out++ = '.'; + if (f < 0.1f) { + multiplier = 1e+12; + *out++ = '0'; + if (f < 0.01f) { + multiplier = 1e+13; + *out++ = '0'; + if (f < 0.001f) { + multiplier = 1e+14; + *out++ = '0'; + } + } + } + OutputNecessaryDigits(lower_bound * multiplier, upper_bound * multiplier, + out); + return buffer; + } + spec = std::lower_bound(std::begin(neg_exp_table), std::end(neg_exp_table), + double{f}, ExpCompare()); + if (spec == std::end(neg_exp_table)) --spec; + } else if (f < 1e8) { + // Handling non-exponential format greater than 1.0 is similar to the above, + // but instead of 0.0 / 0.00 / 0.000, the prefix is simply the truncated + // integer part of f. + int32_t as_int = f; + out = numbers_internal::FastUInt32ToBuffer(as_int, out); + // Easy: if the integer part is within (lower_bound, upper_bound), then we + // are already done. + if (as_int > lower_bound && as_int < upper_bound) { + return buffer; + } + *out++ = '.'; + OutputNecessaryDigits((lower_bound - as_int) * 1e11, + (upper_bound - as_int) * 1e11, out); + return buffer; + } else { + spec = std::lower_bound(std::begin(pos_exp_table), + std::end(pos_exp_table), + double{f}, ExpCompare()); + if (spec == std::end(pos_exp_table)) --spec; + } + // Exponential notation from here on. "spec" was computed using lower_bound, + // which means it's the first spec from the table where min_range is greater + // or equal to f. + // Unfortunately that's not quite what we want; we want a min_range that is + // less or equal. So first thing, if it was greater, back up one entry. + if (spec->min_range > f) --spec; + + // The digits might be "237000123", but we want "2.37000123", + // so we output the digits one character later, and then move the first + // digit back so we can stick the "." in. + char* start = out; + out = OutputNecessaryDigits(lower_bound * spec->multiplier, + upper_bound * spec->multiplier, start + 1); + start[0] = start[1]; + start[1] = '.'; + + // If it turns out there was only one digit output, then back up over the '.' + if (out == &start[2]) --out; + + // Now add the "e+NN" part. + memcpy(out, spec->expstr, 4); + out[4] = '\0'; + return buffer; +} + +// Returns the number of leading 0 bits in a 64-bit value. +// TODO(jorg): Replace with builtin_clzll if available. +// Are we shipping util/bits in absl? +static inline int CountLeadingZeros64(uint64_t n) { + int zeroes = 60; + if (n >> 32) zeroes -= 32, n >>= 32; + if (n >> 16) zeroes -= 16, n >>= 16; + if (n >> 8) zeroes -= 8, n >>= 8; + if (n >> 4) zeroes -= 4, n >>= 4; + return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0\0"[n] + zeroes; +} + +// Given a 128-bit number expressed as a pair of uint64_t, high half first, +// return that number multiplied by the given 32-bit value. If the result is +// too large to fit in a 128-bit number, divide it by 2 until it fits. +static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num, + uint32_t mul) { + uint64_t bits0_31 = num.second & 0xFFFFFFFF; + uint64_t bits32_63 = num.second >> 32; + uint64_t bits64_95 = num.first & 0xFFFFFFFF; + uint64_t bits96_127 = num.first >> 32; + + // The picture so far: each of these 64-bit values has only the lower 32 bits + // filled in. + // bits96_127: [ 00000000 xxxxxxxx ] + // bits64_95: [ 00000000 xxxxxxxx ] + // bits32_63: [ 00000000 xxxxxxxx ] + // bits0_31: [ 00000000 xxxxxxxx ] + + bits0_31 *= mul; + bits32_63 *= mul; + bits64_95 *= mul; + bits96_127 *= mul; + + // Now the top halves may also have value, though all 64 of their bits will + // never be set at the same time, since they are a result of a 32x32 bit + // multiply. This makes the carry calculation slightly easier. + // bits96_127: [ mmmmmmmm | mmmmmmmm ] + // bits64_95: [ | mmmmmmmm mmmmmmmm | ] + // bits32_63: | [ mmmmmmmm | mmmmmmmm ] + // bits0_31: | [ | mmmmmmmm mmmmmmmm ] + // eventually: [ bits128_up | ...bits64_127.... | ..bits0_63... ] + + uint64_t bits0_63 = bits0_31 + (bits32_63 << 32); + uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) + + (bits0_63 < bits0_31); + uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95); + if (bits128_up == 0) return {bits64_127, bits0_63}; + + int shift = 64 - CountLeadingZeros64(bits128_up); + uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift)); + uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift)); + return {hi, lo}; +} + +// Compute num * 5 ^ expfive, and return the first 128 bits of the result, +// where the first bit is always a one. So PowFive(1, 0) starts 0b100000, +// PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc. +static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) { + std::pair<uint64_t, uint64_t> result = {num, 0}; + while (expfive >= 13) { + // 5^13 is the highest power of five that will fit in a 32-bit integer. + result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5); + expfive -= 13; + } + constexpr int powers_of_five[13] = { + 1, + 5, + 5 * 5, + 5 * 5 * 5, + 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5}; + result = Mul32(result, powers_of_five[expfive & 15]); + int shift = CountLeadingZeros64(result.first); + if (shift != 0) { + result.first = (result.first << shift) + (result.second >> (64 - shift)); + result.second = (result.second << shift); + } + return result; +} + +struct ExpDigits { + int32_t exponent; + char digits[6]; +}; + +// SplitToSix converts value, a positive double-precision floating-point number, +// into a base-10 exponent and 6 ASCII digits, where the first digit is never +// zero. For example, SplitToSix(1) returns an exponent of zero and a digits +// array of {'1', '0', '0', '0', '0', '0'}. If value is exactly halfway between +// two possible representations, e.g. value = 100000.5, then "round to even" is +// performed. +static ExpDigits SplitToSix(const double value) { + ExpDigits exp_dig; + int exp = 5; + double d = value; + // First step: calculate a close approximation of the output, where the + // value d will be between 100,000 and 999,999, representing the digits + // in the output ASCII array, and exp is the base-10 exponent. It would be + // faster to use a table here, and to look up the base-2 exponent of value, + // however value is an IEEE-754 64-bit number, so the table would have 2,000 + // entries, which is not cache-friendly. + if (d >= 999999.5) { + if (d >= 1e+261) exp += 256, d *= 1e-256; + if (d >= 1e+133) exp += 128, d *= 1e-128; + if (d >= 1e+69) exp += 64, d *= 1e-64; + if (d >= 1e+37) exp += 32, d *= 1e-32; + if (d >= 1e+21) exp += 16, d *= 1e-16; + if (d >= 1e+13) exp += 8, d *= 1e-8; + if (d >= 1e+9) exp += 4, d *= 1e-4; + if (d >= 1e+7) exp += 2, d *= 1e-2; + if (d >= 1e+6) exp += 1, d *= 1e-1; + } else { + if (d < 1e-250) exp -= 256, d *= 1e256; + if (d < 1e-122) exp -= 128, d *= 1e128; + if (d < 1e-58) exp -= 64, d *= 1e64; + if (d < 1e-26) exp -= 32, d *= 1e32; + if (d < 1e-10) exp -= 16, d *= 1e16; + if (d < 1e-2) exp -= 8, d *= 1e8; + if (d < 1e+2) exp -= 4, d *= 1e4; + if (d < 1e+4) exp -= 2, d *= 1e2; + if (d < 1e+5) exp -= 1, d *= 1e1; + } + // At this point, d is in the range [99999.5..999999.5) and exp is in the + // range [-324..308]. Since we need to round d up, we want to add a half + // and truncate. + // However, the technique above may have lost some precision, due to its + // repeated multiplication by constants that each may be off by half a bit + // of precision. This only matters if we're close to the edge though. + // Since we'd like to know if the fractional part of d is close to a half, + // we multiply it by 65536 and see if the fractional part is close to 32768. + // (The number doesn't have to be a power of two,but powers of two are faster) + uint64_t d64k = d * 65536; + int dddddd; // A 6-digit decimal integer. + if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) { + // OK, it's fairly likely that precision was lost above, which is + // not a surprise given only 52 mantissa bits are available. Therefore + // redo the calculation using 128-bit numbers. (64 bits are not enough). + + // Start out with digits rounded down; maybe add one below. + dddddd = static_cast<int>(d64k / 65536); + + // mantissa is a 64-bit integer representing M.mmm... * 2^63. The actual + // value we're representing, of course, is M.mmm... * 2^exp2. + int exp2; + double m = std::frexp(value, &exp2); + uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0); + // std::frexp returns an m value in the range [0.5, 1.0), however we + // can't multiply it by 2^64 and convert to an integer because some FPUs + // throw an exception when converting an number higher than 2^63 into an + // integer - even an unsigned 64-bit integer! Fortunately it doesn't matter + // since m only has 52 significant bits anyway. + mantissa <<= 1; + exp2 -= 64; // not needed, but nice for debugging + + // OK, we are here to compare: + // (dddddd + 0.5) * 10^(exp-5) vs. mantissa * 2^exp2 + // so we can round up dddddd if appropriate. Those values span the full + // range of 600 orders of magnitude of IEE 64-bit floating-point. + // Fortunately, we already know they are very close, so we don't need to + // track the base-2 exponent of both sides. This greatly simplifies the + // the math since the 2^exp2 calculation is unnecessary and the power-of-10 + // calculation can become a power-of-5 instead. + + std::pair<uint64_t, uint64_t> edge, val; + if (exp >= 6) { + // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa + // Since we're tossing powers of two, 2 * dddddd + 1 is the + // same as dddddd + 0.5 + edge = PowFive(2 * dddddd + 1, exp - 5); + + val.first = mantissa; + val.second = 0; + } else { + // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did + // above because (exp - 5) is negative. So we compare (dddddd + 0.5) to + // mantissa * 5 ^ (5 - exp) + edge = PowFive(2 * dddddd + 1, 0); + + val = PowFive(mantissa, 5 - exp); + } + // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first, + // val.second, edge.first, edge.second); + if (val > edge) { + dddddd++; + } else if (val == edge) { + dddddd += (dddddd & 1); + } + } else { + // Here, we are not close to the edge. + dddddd = static_cast<int>((d64k + 32768) / 65536); + } + if (dddddd == 1000000) { + dddddd = 100000; + exp += 1; + } + exp_dig.exponent = exp; + + int two_digits = dddddd / 10000; + dddddd -= two_digits * 10000; + PutTwoDigits(two_digits, &exp_dig.digits[0]); + + two_digits = dddddd / 100; + dddddd -= two_digits * 100; + PutTwoDigits(two_digits, &exp_dig.digits[2]); + + PutTwoDigits(dddddd, &exp_dig.digits[4]); + return exp_dig; +} + +// Helper function for fast formatting of floating-point. +// The result is the same as "%g", a.k.a. "%.6g". +size_t numbers_internal::SixDigitsToBuffer(double d, char* const buffer) { + static_assert(std::numeric_limits<float>::is_iec559, + "IEEE-754/IEC-559 support only"); + + char* out = buffer; // we write data to out, incrementing as we go, but + // FloatToBuffer always returns the address of the buffer + // passed in. + + if (std::isnan(d)) { + strcpy(out, "nan"); // NOLINT(runtime/printf) + return 3; + } + if (d == 0) { // +0 and -0 are handled here + if (std::signbit(d)) *out++ = '-'; + *out++ = '0'; + *out = 0; + return out - buffer; + } + if (d < 0) { + *out++ = '-'; + d = -d; + } + if (std::isinf(d)) { + strcpy(out, "inf"); // NOLINT(runtime/printf) + return out + 3 - buffer; + } + + auto exp_dig = SplitToSix(d); + int exp = exp_dig.exponent; + const char* digits = exp_dig.digits; + out[0] = '0'; + out[1] = '.'; + switch (exp) { + case 5: + memcpy(out, &digits[0], 6), out += 6; + *out = 0; + return out - buffer; + case 4: + memcpy(out, &digits[0], 5), out += 5; + if (digits[5] != '0') { + *out++ = '.'; + *out++ = digits[5]; + } + *out = 0; + return out - buffer; + case 3: + memcpy(out, &digits[0], 4), out += 4; + if ((digits[5] | digits[4]) != '0') { + *out++ = '.'; + *out++ = digits[4]; + if (digits[5] != '0') *out++ = digits[5]; + } + *out = 0; + return out - buffer; + case 2: + memcpy(out, &digits[0], 3), out += 3; + *out++ = '.'; + memcpy(out, &digits[3], 3); + out += 3; + while (out[-1] == '0') --out; + if (out[-1] == '.') --out; + *out = 0; + return out - buffer; + case 1: + memcpy(out, &digits[0], 2), out += 2; + *out++ = '.'; + memcpy(out, &digits[2], 4); + out += 4; + while (out[-1] == '0') --out; + if (out[-1] == '.') --out; + *out = 0; + return out - buffer; + case 0: + memcpy(out, &digits[0], 1), out += 1; + *out++ = '.'; + memcpy(out, &digits[1], 5); + out += 5; + while (out[-1] == '0') --out; + if (out[-1] == '.') --out; + *out = 0; + return out - buffer; + case -4: + out[2] = '0'; + ++out; + ABSL_FALLTHROUGH_INTENDED; + case -3: + out[2] = '0'; + ++out; + ABSL_FALLTHROUGH_INTENDED; + case -2: + out[2] = '0'; + ++out; + ABSL_FALLTHROUGH_INTENDED; + case -1: + out += 2; + memcpy(out, &digits[0], 6); + out += 6; + while (out[-1] == '0') --out; + *out = 0; + return out - buffer; + } + assert(exp < -4 || exp >= 6); + out[0] = digits[0]; + assert(out[1] == '.'); + out += 2; + memcpy(out, &digits[1], 5), out += 5; + while (out[-1] == '0') --out; + if (out[-1] == '.') --out; + *out++ = 'e'; + if (exp > 0) { + *out++ = '+'; + } else { + *out++ = '-'; + exp = -exp; + } + if (exp > 99) { + int dig1 = exp / 100; + exp -= dig1 * 100; + *out++ = '0' + dig1; + } + PutTwoDigits(exp, out); + out += 2; + *out = 0; + return out - buffer; +} + +namespace { +// Represents integer values of digits. +// Uses 36 to indicate an invalid character since we support +// bases up to 36. +static const int8_t kAsciiToInt[256] = { + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s. + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36}; + +// Parse the sign and optional hex or oct prefix in text. +inline bool safe_parse_sign_and_base(absl::string_view* text /*inout*/, + int* base_ptr /*inout*/, + bool* negative_ptr /*output*/) { + if (text->data() == nullptr) { + return false; + } + + const char* start = text->data(); + const char* end = start + text->size(); + int base = *base_ptr; + + // Consume whitespace. + while (start < end && absl::ascii_isspace(start[0])) { + ++start; + } + while (start < end && absl::ascii_isspace(end[-1])) { + --end; + } + if (start >= end) { + return false; + } + + // Consume sign. + *negative_ptr = (start[0] == '-'); + if (*negative_ptr || start[0] == '+') { + ++start; + if (start >= end) { + return false; + } + } + + // Consume base-dependent prefix. + // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10 + // base 16: "0x" -> base 16 + // Also validate the base. + if (base == 0) { + if (end - start >= 2 && start[0] == '0' && + (start[1] == 'x' || start[1] == 'X')) { + base = 16; + start += 2; + if (start >= end) { + // "0x" with no digits after is invalid. + return false; + } + } else if (end - start >= 1 && start[0] == '0') { + base = 8; + start += 1; + } else { + base = 10; + } + } else if (base == 16) { + if (end - start >= 2 && start[0] == '0' && + (start[1] == 'x' || start[1] == 'X')) { + start += 2; + if (start >= end) { + // "0x" with no digits after is invalid. + return false; + } + } + } else if (base >= 2 && base <= 36) { + // okay + } else { + return false; + } + *text = absl::string_view(start, end - start); + *base_ptr = base; + return true; +} + +// Consume digits. +// +// The classic loop: +// +// for each digit +// value = value * base + digit +// value *= sign +// +// The classic loop needs overflow checking. It also fails on the most +// negative integer, -2147483648 in 32-bit two's complement representation. +// +// My improved loop: +// +// if (!negative) +// for each digit +// value = value * base +// value = value + digit +// else +// for each digit +// value = value * base +// value = value - digit +// +// Overflow checking becomes simple. + +// Lookup tables per IntType: +// vmax/base and vmin/base are precomputed because division costs at least 8ns. +// TODO(junyer): Doing this per base instead (i.e. an array of structs, not a +// struct of arrays) would probably be better in terms of d-cache for the most +// commonly used bases. +template <typename IntType> +struct LookupTables { + static const IntType kVmaxOverBase[]; + static const IntType kVminOverBase[]; +}; + +// An array initializer macro for X/base where base in [0, 36]. +// However, note that lookups for base in [0, 1] should never happen because +// base has been validated to be in [2, 36] by safe_parse_sign_and_base(). +#define X_OVER_BASE_INITIALIZER(X) \ + { \ + 0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \ + X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18, \ + X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26, \ + X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34, \ + X / 35, X / 36, \ + } + +template <typename IntType> +const IntType LookupTables<IntType>::kVmaxOverBase[] = + X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max()); + +template <typename IntType> +const IntType LookupTables<IntType>::kVminOverBase[] = + X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min()); + +#undef X_OVER_BASE_INITIALIZER + +template <typename IntType> +inline bool safe_parse_positive_int(absl::string_view text, int base, + IntType* value_p) { + IntType value = 0; + const IntType vmax = std::numeric_limits<IntType>::max(); + assert(vmax > 0); + assert(base >= 0); + assert(vmax >= static_cast<IntType>(base)); + const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base]; + const char* start = text.data(); + const char* end = start + text.size(); + // loop over digits + for (; start < end; ++start) { + unsigned char c = static_cast<unsigned char>(start[0]); + int digit = kAsciiToInt[c]; + if (digit >= base) { + *value_p = value; + return false; + } + if (value > vmax_over_base) { + *value_p = vmax; + return false; + } + value *= base; + if (value > vmax - digit) { + *value_p = vmax; + return false; + } + value += digit; + } + *value_p = value; + return true; +} + +template <typename IntType> +inline bool safe_parse_negative_int(absl::string_view text, int base, + IntType* value_p) { + IntType value = 0; + const IntType vmin = std::numeric_limits<IntType>::min(); + assert(vmin < 0); + assert(vmin <= 0 - base); + IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base]; + // 2003 c++ standard [expr.mul] + // "... the sign of the remainder is implementation-defined." + // Although (vmin/base)*base + vmin%base is always vmin. + // 2011 c++ standard tightens the spec but we cannot rely on it. + // TODO(junyer): Handle this in the lookup table generation. + if (vmin % base > 0) { + vmin_over_base += 1; + } + const char* start = text.data(); + const char* end = start + text.size(); + // loop over digits + for (; start < end; ++start) { + unsigned char c = static_cast<unsigned char>(start[0]); + int digit = kAsciiToInt[c]; + if (digit >= base) { + *value_p = value; + return false; + } + if (value < vmin_over_base) { + *value_p = vmin; + return false; + } + value *= base; + if (value < vmin + digit) { + *value_p = vmin; + return false; + } + value -= digit; + } + *value_p = value; + return true; +} + +// Input format based on POSIX.1-2008 strtol +// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html +template <typename IntType> +inline bool safe_int_internal(absl::string_view text, IntType* value_p, + int base) { + *value_p = 0; + bool negative; + if (!safe_parse_sign_and_base(&text, &base, &negative)) { + return false; + } + if (!negative) { + return safe_parse_positive_int(text, base, value_p); + } else { + return safe_parse_negative_int(text, base, value_p); + } +} + +template <typename IntType> +inline bool safe_uint_internal(absl::string_view text, IntType* value_p, + int base) { + *value_p = 0; + bool negative; + if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) { + return false; + } + return safe_parse_positive_int(text, base, value_p); +} +} // anonymous namespace + +namespace numbers_internal { +bool safe_strto32_base(absl::string_view text, int32_t* value, int base) { + return safe_int_internal<int32_t>(text, value, base); +} + +bool safe_strto64_base(absl::string_view text, int64_t* value, int base) { + return safe_int_internal<int64_t>(text, value, base); +} + +bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) { + return safe_uint_internal<uint32_t>(text, value, base); +} + +bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) { + return safe_uint_internal<uint64_t>(text, value, base); +} +} // namespace numbers_internal + +} // namespace absl diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h new file mode 100644 index 000000000000..f17dc97b2b1c --- /dev/null +++ b/absl/strings/numbers.h @@ -0,0 +1,173 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: numbers.h +// ----------------------------------------------------------------------------- +// +// This package contains functions for converting strings to numbers. For +// converting numbers to strings, use `StrCat()` or `StrAppend()` in str_cat.h, +// which automatically detect and convert most number values appropriately. + +#ifndef ABSL_STRINGS_NUMBERS_H_ +#define ABSL_STRINGS_NUMBERS_H_ + +#include <cstddef> +#include <cstdlib> +#include <cstring> +#include <ctime> +#include <limits> +#include <string> +#include <type_traits> + +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/numeric/int128.h" +#include "absl/strings/string_view.h" + +namespace absl { + +// SimpleAtoi() +// +// Converts the given std::string into an integer value, returning `true` if +// successful. The std::string must reflect a base-10 integer (optionally followed or +// preceded by ASCII whitespace) whose value falls within the range of the +// integer type, +template <typename int_type> +ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view s, int_type* out); + +// SimpleAtof() +// +// Converts the given std::string (optionally followed or preceded by ASCII +// whitespace) into a float, which may be rounded on overflow or underflow. +ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* value); + +// SimpleAtod() +// +// Converts the given std::string (optionally followed or preceded by ASCII +// whitespace) into a double, which may be rounded on overflow or underflow. +ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* value); + +// SimpleAtob() +// +// Converts the given std::string into into a boolean, returning `true` if +// successful. The following case-insensitive strings are interpreted as boolean +// `true`: "true", "t", "yes", "y", "1". The following case-insensitive strings +// are interpreted as boolean `false`: "false", "f", "no", "n", "0". +ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* value); + +} // namespace absl + +// End of public API. Implementation details follow. + +namespace absl { +namespace numbers_internal { + +// safe_strto?() functions for implementing SimpleAtoi() +bool safe_strto32_base(absl::string_view text, int32_t* value, int base); +bool safe_strto64_base(absl::string_view text, int64_t* value, int base); +bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base); +bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base); + +// These functions are intended for speed. All functions take an output buffer +// as an argument and return a pointer to the last byte they wrote, which is the +// terminating '\0'. At most `kFastToBufferSize` bytes are written. +char* FastInt32ToBuffer(int32_t i, char* buffer); +char* FastUInt32ToBuffer(uint32_t i, char* buffer); +char* FastInt64ToBuffer(int64_t i, char* buffer); +char* FastUInt64ToBuffer(uint64_t i, char* buffer); + +static const int kFastToBufferSize = 32; +static const int kSixDigitsToBufferSize = 16; + +char* RoundTripDoubleToBuffer(double d, char* buffer); +char* RoundTripFloatToBuffer(float f, char* buffer); + +// Helper function for fast formatting of floating-point values. +// The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six +// significant digits are returned, trailing zeros are removed, and numbers +// outside the range 0.0001-999999 are output using scientific notation +// (1.23456e+06). This routine is heavily optimized. +// Required buffer size is `kSixDigitsToBufferSize`. +size_t SixDigitsToBuffer(double d, char* buffer); + +template <typename int_type> +char* FastIntToBuffer(int_type i, char* buffer) { + static_assert(sizeof(i) <= 64 / 8, + "FastIntToBuffer works only with 64-bit-or-less integers."); + // TODO(jorg): This signed-ness check is used because it works correctly + // with enums, and it also serves to check that int_type is not a pointer. + // If one day something like std::is_signed<enum E> works, switch to it. + if (static_cast<int_type>(1) - 2 < 0) { // Signed + if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit + return numbers_internal::FastInt64ToBuffer(i, buffer); + } else { // 32-bit or less + return numbers_internal::FastInt32ToBuffer(i, buffer); + } + } else { // Unsigned + if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit + return numbers_internal::FastUInt64ToBuffer(i, buffer); + } else { // 32-bit or less + return numbers_internal::FastUInt32ToBuffer(i, buffer); + } + } +} + +} // namespace numbers_internal + +// SimpleAtoi() +// +// Converts a std::string to an integer, using `safe_strto?()` functions for actual +// parsing, returning `true` if successful. The `safe_strto?()` functions apply +// strict checking; the std::string must be a base-10 integer, optionally followed or +// preceded by ASCII whitespace, with a value in the range of the corresponding +// integer type. +template <typename int_type> +ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view s, int_type* out) { + static_assert(sizeof(*out) == 4 || sizeof(*out) == 8, + "SimpleAtoi works only with 32-bit or 64-bit integers."); + static_assert(!std::is_floating_point<int_type>::value, + "Use SimpleAtof or SimpleAtod instead."); + bool parsed; + // TODO(jorg): This signed-ness check is used because it works correctly + // with enums, and it also serves to check that int_type is not a pointer. + // If one day something like std::is_signed<enum E> works, switch to it. + if (static_cast<int_type>(1) - 2 < 0) { // Signed + if (sizeof(*out) == 64 / 8) { // 64-bit + int64_t val; + parsed = numbers_internal::safe_strto64_base(s, &val, 10); + *out = static_cast<int_type>(val); + } else { // 32-bit + int32_t val; + parsed = numbers_internal::safe_strto32_base(s, &val, 10); + *out = static_cast<int_type>(val); + } + } else { // Unsigned + if (sizeof(*out) == 64 / 8) { // 64-bit + uint64_t val; + parsed = numbers_internal::safe_strtou64_base(s, &val, 10); + *out = static_cast<int_type>(val); + } else { // 32-bit + uint32_t val; + parsed = numbers_internal::safe_strtou32_base(s, &val, 10); + *out = static_cast<int_type>(val); + } + } + return parsed; +} + +} // namespace absl + +#endif // ABSL_STRINGS_NUMBERS_H_ diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc new file mode 100644 index 000000000000..9b74d67b87c8 --- /dev/null +++ b/absl/strings/numbers_test.cc @@ -0,0 +1,1186 @@ +// This file tests std::string processing functions related to numeric values. + +#include "absl/strings/numbers.h" + +#include <sys/types.h> +#include <algorithm> +#include <cctype> +#include <cfenv> // NOLINT(build/c++11) +#include <cfloat> +#include <cinttypes> +#include <climits> +#include <cmath> +#include <cstddef> +#include <cstdint> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <limits> +#include <numeric> +#include <random> +#include <set> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/port.h" +#include "absl/strings/str_cat.h" + +#include "absl/strings/internal/numbers_test_common.inc" + +namespace { + +using absl::numbers_internal::FastInt32ToBuffer; +using absl::numbers_internal::FastInt64ToBuffer; +using absl::numbers_internal::FastUInt32ToBuffer; +using absl::numbers_internal::FastUInt64ToBuffer; +using absl::numbers_internal::kFastToBufferSize; +using absl::numbers_internal::kSixDigitsToBufferSize; +using absl::numbers_internal::safe_strto32_base; +using absl::numbers_internal::safe_strto64_base; +using absl::numbers_internal::safe_strtou32_base; +using absl::numbers_internal::safe_strtou64_base; +using absl::numbers_internal::RoundTripFloatToBuffer; +using absl::numbers_internal::SixDigitsToBuffer; +using absl::SimpleAtoi; +using testing::Eq; +using testing::MatchesRegex; + +// Number of floats to test with. +// 10,000,000 is a reasonable default for a test that only takes a few seconds. +// 1,000,000,000+ triggers checking for all possible mantissa values for +// double-precision tests. 2,000,000,000+ triggers checking for every possible +// single-precision float. +#ifdef _MSC_VER +// Use a smaller number on MSVC to avoid test time out (1 min) +const int kFloatNumCases = 5000000; +#else +const int kFloatNumCases = 10000000; +#endif + +// This is a slow, brute-force routine to compute the exact base-10 +// representation of a double-precision floating-point number. It +// is useful for debugging only. +std::string PerfectDtoa(double d) { + if (d == 0) return "0"; + if (d < 0) return "-" + PerfectDtoa(-d); + + // Basic theory: decompose d into mantissa and exp, where + // d = mantissa * 2^exp, and exp is as close to zero as possible. + int64_t mantissa, exp = 0; + while (d >= 1ULL << 63) ++exp, d *= 0.5; + while ((mantissa = d) != d) --exp, d *= 2.0; + + // Then convert mantissa to ASCII, and either double it (if + // exp > 0) or halve it (if exp < 0) repeatedly. "halve it" + // in this case means multiplying it by five and dividing by 10. + constexpr int maxlen = 1100; // worst case is actually 1030 or so. + char buf[maxlen + 5]; + for (int64_t num = mantissa, pos = maxlen; --pos >= 0;) { + buf[pos] = '0' + (num % 10); + num /= 10; + } + char* begin = &buf[0]; + char* end = buf + maxlen; + for (int i = 0; i != exp; i += (exp > 0) ? 1 : -1) { + int carry = 0; + for (char* p = end; --p != begin;) { + int dig = *p - '0'; + dig = dig * (exp > 0 ? 2 : 5) + carry; + carry = dig / 10; + dig %= 10; + *p = '0' + dig; + } + } + if (exp < 0) { + // "dividing by 10" above means we have to add the decimal point. + memmove(end + 1 + exp, end + exp, 1 - exp); + end[exp] = '.'; + ++end; + } + while (*begin == '0' && begin[1] != '.') ++begin; + return {begin, end}; +} + +TEST(ToString, PerfectDtoa) { + EXPECT_THAT(PerfectDtoa(1), Eq("1")); + EXPECT_THAT(PerfectDtoa(0.1), + Eq("0.1000000000000000055511151231257827021181583404541015625")); + EXPECT_THAT(PerfectDtoa(1e24), Eq("999999999999999983222784")); + EXPECT_THAT(PerfectDtoa(5e-324), MatchesRegex("0.0000.*625")); + for (int i = 0; i < 100; ++i) { + for (double multiplier : + {1e-300, 1e-200, 1e-100, 0.1, 1.0, 10.0, 1e100, 1e300}) { + double d = multiplier * i; + std::string s = PerfectDtoa(d); + EXPECT_EQ(d, strtod(s.c_str(), nullptr)); + } + } +} + +void CheckInt32(int32_t x) { + char buffer[kFastInt32ToBufferSize]; + char* actual = FastInt32ToBuffer(x, buffer); + std::string expected = std::to_string(x); + ASSERT_TRUE(expected == actual) + << "Expected \"" << expected << "\", Actual \"" << actual << "\", Input " + << x; +} + +void CheckInt64(int64_t x) { + char buffer[kFastInt64ToBufferSize + 3]; + buffer[0] = '*'; + buffer[23] = '*'; + buffer[24] = '*'; + char* actual = FastInt64ToBuffer(x, &buffer[1]); + std::string expected = std::to_string(x); + ASSERT_TRUE(expected == actual) + << "Expected \"" << expected << "\", Actual \"" << actual << "\", Input " + << x; + ASSERT_EQ(buffer[0], '*'); + ASSERT_EQ(buffer[23], '*'); + ASSERT_EQ(buffer[24], '*'); +} + +void CheckUInt32(uint32_t x) { + char buffer[kFastUInt64ToBufferSize]; + char* actual = FastUInt32ToBuffer(x, buffer); + std::string expected = std::to_string(x); + ASSERT_TRUE(expected == actual) + << "Expected \"" << expected << "\", Actual \"" << actual << "\", Input " + << x; +} + +void CheckUInt64(uint64_t x) { + char buffer[kFastUInt64ToBufferSize + 1]; + char* actual = FastUInt64ToBuffer(x, &buffer[1]); + std::string expected = std::to_string(x); + ASSERT_TRUE(expected == actual) + << "Expected \"" << expected << "\", Actual \"" << actual << "\", Input " + << x; +} + +void CheckHex64(uint64_t v) { + char expected[kFastUInt64ToBufferSize]; + std::string actual = absl::StrCat(absl::Hex(v, absl::kZeroPad16)); + snprintf(expected, sizeof(expected), "%016" PRIx64, static_cast<uint64_t>(v)); + ASSERT_TRUE(expected == actual) + << "Expected \"" << expected << "\", Actual \"" << actual << "\""; +} + +void TestFastPrints() { + for (int i = -100; i <= 100; i++) { + CheckInt32(i); + CheckInt64(i); + } + for (int i = 0; i <= 100; i++) { + CheckUInt32(i); + CheckUInt64(i); + } + // Test min int to make sure that works + CheckInt32(INT_MIN); + CheckInt32(INT_MAX); + CheckInt64(LONG_MIN); + CheckInt64(uint64_t{1000000000}); + CheckInt64(uint64_t{9999999999}); + CheckInt64(uint64_t{100000000000000}); + CheckInt64(uint64_t{999999999999999}); + CheckInt64(uint64_t{1000000000000000000}); + CheckInt64(uint64_t{1199999999999999999}); + CheckInt64(int64_t{-700000000000000000}); + CheckInt64(LONG_MAX); + CheckUInt32(std::numeric_limits<uint32_t>::max()); + CheckUInt64(uint64_t{1000000000}); + CheckUInt64(uint64_t{9999999999}); + CheckUInt64(uint64_t{100000000000000}); + CheckUInt64(uint64_t{999999999999999}); + CheckUInt64(uint64_t{1000000000000000000}); + CheckUInt64(uint64_t{1199999999999999999}); + CheckUInt64(std::numeric_limits<uint64_t>::max()); + + for (int i = 0; i < 10000; i++) { + CheckHex64(i); + } + CheckHex64(uint64_t{0x123456789abcdef0}); +} + +template <typename int_type, typename in_val_type> +void VerifySimpleAtoiGood(in_val_type in_value, int_type exp_value) { + std::string s = absl::StrCat(in_value); + int_type x = static_cast<int_type>(~exp_value); + EXPECT_TRUE(SimpleAtoi(s, &x)) + << "in_value=" << in_value << " s=" << s << " x=" << x; + EXPECT_EQ(exp_value, x); + x = static_cast<int_type>(~exp_value); + EXPECT_TRUE(SimpleAtoi(s.c_str(), &x)); + EXPECT_EQ(exp_value, x); +} + +template <typename int_type, typename in_val_type> +void VerifySimpleAtoiBad(in_val_type in_value) { + std::string s = absl::StrCat(in_value); + int_type x; + EXPECT_FALSE(SimpleAtoi(s, &x)); + EXPECT_FALSE(SimpleAtoi(s.c_str(), &x)); +} + +TEST(NumbersTest, Atoi) { + // SimpleAtoi(absl::string_view, int32_t) + VerifySimpleAtoiGood<int32_t>(0, 0); + VerifySimpleAtoiGood<int32_t>(42, 42); + VerifySimpleAtoiGood<int32_t>(-42, -42); + + VerifySimpleAtoiGood<int32_t>(std::numeric_limits<int32_t>::min(), + std::numeric_limits<int32_t>::min()); + VerifySimpleAtoiGood<int32_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + + // SimpleAtoi(absl::string_view, uint32_t) + VerifySimpleAtoiGood<uint32_t>(0, 0); + VerifySimpleAtoiGood<uint32_t>(42, 42); + VerifySimpleAtoiBad<uint32_t>(-42); + + VerifySimpleAtoiBad<uint32_t>(std::numeric_limits<int32_t>::min()); + VerifySimpleAtoiGood<uint32_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleAtoiGood<uint32_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleAtoiBad<uint32_t>(std::numeric_limits<int64_t>::min()); + VerifySimpleAtoiBad<uint32_t>(std::numeric_limits<int64_t>::max()); + VerifySimpleAtoiBad<uint32_t>(std::numeric_limits<uint64_t>::max()); + + // SimpleAtoi(absl::string_view, int64_t) + VerifySimpleAtoiGood<int64_t>(0, 0); + VerifySimpleAtoiGood<int64_t>(42, 42); + VerifySimpleAtoiGood<int64_t>(-42, -42); + + VerifySimpleAtoiGood<int64_t>(std::numeric_limits<int32_t>::min(), + std::numeric_limits<int32_t>::min()); + VerifySimpleAtoiGood<int64_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleAtoiGood<int64_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleAtoiGood<int64_t>(std::numeric_limits<int64_t>::min(), + std::numeric_limits<int64_t>::min()); + VerifySimpleAtoiGood<int64_t>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleAtoiBad<int64_t>(std::numeric_limits<uint64_t>::max()); + + // SimpleAtoi(absl::string_view, uint64_t) + VerifySimpleAtoiGood<uint64_t>(0, 0); + VerifySimpleAtoiGood<uint64_t>(42, 42); + VerifySimpleAtoiBad<uint64_t>(-42); + + VerifySimpleAtoiBad<uint64_t>(std::numeric_limits<int32_t>::min()); + VerifySimpleAtoiGood<uint64_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleAtoiGood<uint64_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleAtoiBad<uint64_t>(std::numeric_limits<int64_t>::min()); + VerifySimpleAtoiGood<uint64_t>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleAtoiGood<uint64_t>(std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::max()); + + // Some other types + VerifySimpleAtoiGood<int>(-42, -42); + VerifySimpleAtoiGood<int32_t>(-42, -42); + VerifySimpleAtoiGood<uint32_t>(42, 42); + VerifySimpleAtoiGood<unsigned int>(42, 42); + VerifySimpleAtoiGood<int64_t>(-42, -42); + VerifySimpleAtoiGood<long>(-42, -42); // NOLINT(runtime/int) + VerifySimpleAtoiGood<uint64_t>(42, 42); + VerifySimpleAtoiGood<size_t>(42, 42); + VerifySimpleAtoiGood<std::string::size_type>(42, 42); +} + +TEST(NumbersTest, Atoenum) { + enum E01 { + E01_zero = 0, + E01_one = 1, + }; + + VerifySimpleAtoiGood<E01>(E01_zero, E01_zero); + VerifySimpleAtoiGood<E01>(E01_one, E01_one); + + enum E_101 { + E_101_minusone = -1, + E_101_zero = 0, + E_101_one = 1, + }; + + VerifySimpleAtoiGood<E_101>(E_101_minusone, E_101_minusone); + VerifySimpleAtoiGood<E_101>(E_101_zero, E_101_zero); + VerifySimpleAtoiGood<E_101>(E_101_one, E_101_one); + + enum E_bigint { + E_bigint_zero = 0, + E_bigint_one = 1, + E_bigint_max31 = static_cast<int32_t>(0x7FFFFFFF), + }; + + VerifySimpleAtoiGood<E_bigint>(E_bigint_zero, E_bigint_zero); + VerifySimpleAtoiGood<E_bigint>(E_bigint_one, E_bigint_one); + VerifySimpleAtoiGood<E_bigint>(E_bigint_max31, E_bigint_max31); + + enum E_fullint { + E_fullint_zero = 0, + E_fullint_one = 1, + E_fullint_max31 = static_cast<int32_t>(0x7FFFFFFF), + E_fullint_min32 = INT32_MIN, + }; + + VerifySimpleAtoiGood<E_fullint>(E_fullint_zero, E_fullint_zero); + VerifySimpleAtoiGood<E_fullint>(E_fullint_one, E_fullint_one); + VerifySimpleAtoiGood<E_fullint>(E_fullint_max31, E_fullint_max31); + VerifySimpleAtoiGood<E_fullint>(E_fullint_min32, E_fullint_min32); + + enum E_biguint { + E_biguint_zero = 0, + E_biguint_one = 1, + E_biguint_max31 = static_cast<uint32_t>(0x7FFFFFFF), + E_biguint_max32 = static_cast<uint32_t>(0xFFFFFFFF), + }; + + VerifySimpleAtoiGood<E_biguint>(E_biguint_zero, E_biguint_zero); + VerifySimpleAtoiGood<E_biguint>(E_biguint_one, E_biguint_one); + VerifySimpleAtoiGood<E_biguint>(E_biguint_max31, E_biguint_max31); + VerifySimpleAtoiGood<E_biguint>(E_biguint_max32, E_biguint_max32); +} + +TEST(stringtest, safe_strto32_base) { + int32_t value; + EXPECT_TRUE(safe_strto32_base("0x34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("0X34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("0", &value, 16)); + EXPECT_EQ(0, value); + + EXPECT_TRUE(safe_strto32_base(" \t\n -0x34234324", &value, 16)); + EXPECT_EQ(-0x34234324, value); + + EXPECT_TRUE(safe_strto32_base(" \t\n -34234324", &value, 16)); + EXPECT_EQ(-0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("7654321", &value, 8)); + EXPECT_EQ(07654321, value); + + EXPECT_TRUE(safe_strto32_base("-01234", &value, 8)); + EXPECT_EQ(-01234, value); + + EXPECT_FALSE(safe_strto32_base("1834", &value, 8)); + + // Autodetect base. + EXPECT_TRUE(safe_strto32_base("0", &value, 0)); + EXPECT_EQ(0, value); + + EXPECT_TRUE(safe_strto32_base("077", &value, 0)); + EXPECT_EQ(077, value); // Octal interpretation + + // Leading zero indicates octal, but then followed by invalid digit. + EXPECT_FALSE(safe_strto32_base("088", &value, 0)); + + // Leading 0x indicated hex, but then followed by invalid digit. + EXPECT_FALSE(safe_strto32_base("0xG", &value, 0)); + + // Base-10 version. + EXPECT_TRUE(safe_strto32_base("34234324", &value, 10)); + EXPECT_EQ(34234324, value); + + EXPECT_TRUE(safe_strto32_base("0", &value, 10)); + EXPECT_EQ(0, value); + + EXPECT_TRUE(safe_strto32_base(" \t\n -34234324", &value, 10)); + EXPECT_EQ(-34234324, value); + + EXPECT_TRUE(safe_strto32_base("34234324 \n\t ", &value, 10)); + EXPECT_EQ(34234324, value); + + // Invalid ints. + EXPECT_FALSE(safe_strto32_base("", &value, 10)); + EXPECT_FALSE(safe_strto32_base(" ", &value, 10)); + EXPECT_FALSE(safe_strto32_base("abc", &value, 10)); + EXPECT_FALSE(safe_strto32_base("34234324a", &value, 10)); + EXPECT_FALSE(safe_strto32_base("34234.3", &value, 10)); + + // Out of bounds. + EXPECT_FALSE(safe_strto32_base("2147483648", &value, 10)); + EXPECT_FALSE(safe_strto32_base("-2147483649", &value, 10)); + + // String version. + EXPECT_TRUE(safe_strto32_base(std::string("0x1234"), &value, 16)); + EXPECT_EQ(0x1234, value); + + // Base-10 std::string version. + EXPECT_TRUE(safe_strto32_base("1234", &value, 10)); + EXPECT_EQ(1234, value); +} + +TEST(stringtest, safe_strto32_range) { + // These tests verify underflow/overflow behaviour. + int32_t value; + EXPECT_FALSE(safe_strto32_base("2147483648", &value, 10)); + EXPECT_EQ(std::numeric_limits<int32_t>::max(), value); + + EXPECT_TRUE(safe_strto32_base("-2147483648", &value, 10)); + EXPECT_EQ(std::numeric_limits<int32_t>::min(), value); + + EXPECT_FALSE(safe_strto32_base("-2147483649", &value, 10)); + EXPECT_EQ(std::numeric_limits<int32_t>::min(), value); +} + +TEST(stringtest, safe_strto64_range) { + // These tests verify underflow/overflow behaviour. + int64_t value; + EXPECT_FALSE(safe_strto64_base("9223372036854775808", &value, 10)); + EXPECT_EQ(std::numeric_limits<int64_t>::max(), value); + + EXPECT_TRUE(safe_strto64_base("-9223372036854775808", &value, 10)); + EXPECT_EQ(std::numeric_limits<int64_t>::min(), value); + + EXPECT_FALSE(safe_strto64_base("-9223372036854775809", &value, 10)); + EXPECT_EQ(std::numeric_limits<int64_t>::min(), value); +} + +TEST(stringtest, safe_strto32_leading_substring) { + // These tests verify this comment in numbers.h: + // On error, returns false, and sets *value to: [...] + // conversion of leading substring if available ("123@@@" -> 123) + // 0 if no leading substring available + int32_t value; + EXPECT_FALSE(safe_strto32_base("04069@@@", &value, 10)); + EXPECT_EQ(4069, value); + + EXPECT_FALSE(safe_strto32_base("04069@@@", &value, 8)); + EXPECT_EQ(0406, value); + + EXPECT_FALSE(safe_strto32_base("04069balloons", &value, 10)); + EXPECT_EQ(4069, value); + + EXPECT_FALSE(safe_strto32_base("04069balloons", &value, 16)); + EXPECT_EQ(0x4069ba, value); + + EXPECT_FALSE(safe_strto32_base("@@@", &value, 10)); + EXPECT_EQ(0, value); // there was no leading substring +} + +TEST(stringtest, safe_strto64_leading_substring) { + // These tests verify this comment in numbers.h: + // On error, returns false, and sets *value to: [...] + // conversion of leading substring if available ("123@@@" -> 123) + // 0 if no leading substring available + int64_t value; + EXPECT_FALSE(safe_strto64_base("04069@@@", &value, 10)); + EXPECT_EQ(4069, value); + + EXPECT_FALSE(safe_strto64_base("04069@@@", &value, 8)); + EXPECT_EQ(0406, value); + + EXPECT_FALSE(safe_strto64_base("04069balloons", &value, 10)); + EXPECT_EQ(4069, value); + + EXPECT_FALSE(safe_strto64_base("04069balloons", &value, 16)); + EXPECT_EQ(0x4069ba, value); + + EXPECT_FALSE(safe_strto64_base("@@@", &value, 10)); + EXPECT_EQ(0, value); // there was no leading substring +} + +TEST(stringtest, safe_strto64_base) { + int64_t value; + EXPECT_TRUE(safe_strto64_base("0x3423432448783446", &value, 16)); + EXPECT_EQ(int64_t{0x3423432448783446}, value); + + EXPECT_TRUE(safe_strto64_base("3423432448783446", &value, 16)); + EXPECT_EQ(int64_t{0x3423432448783446}, value); + + EXPECT_TRUE(safe_strto64_base("0", &value, 16)); + EXPECT_EQ(0, value); + + EXPECT_TRUE(safe_strto64_base(" \t\n -0x3423432448783446", &value, 16)); + EXPECT_EQ(int64_t{-0x3423432448783446}, value); + + EXPECT_TRUE(safe_strto64_base(" \t\n -3423432448783446", &value, 16)); + EXPECT_EQ(int64_t{-0x3423432448783446}, value); + + EXPECT_TRUE(safe_strto64_base("123456701234567012", &value, 8)); + EXPECT_EQ(int64_t{0123456701234567012}, value); + + EXPECT_TRUE(safe_strto64_base("-017777777777777", &value, 8)); + EXPECT_EQ(int64_t{-017777777777777}, value); + + EXPECT_FALSE(safe_strto64_base("19777777777777", &value, 8)); + + // Autodetect base. + EXPECT_TRUE(safe_strto64_base("0", &value, 0)); + EXPECT_EQ(0, value); + + EXPECT_TRUE(safe_strto64_base("077", &value, 0)); + EXPECT_EQ(077, value); // Octal interpretation + + // Leading zero indicates octal, but then followed by invalid digit. + EXPECT_FALSE(safe_strto64_base("088", &value, 0)); + + // Leading 0x indicated hex, but then followed by invalid digit. + EXPECT_FALSE(safe_strto64_base("0xG", &value, 0)); + + // Base-10 version. + EXPECT_TRUE(safe_strto64_base("34234324487834466", &value, 10)); + EXPECT_EQ(int64_t{34234324487834466}, value); + + EXPECT_TRUE(safe_strto64_base("0", &value, 10)); + EXPECT_EQ(0, value); + + EXPECT_TRUE(safe_strto64_base(" \t\n -34234324487834466", &value, 10)); + EXPECT_EQ(int64_t{-34234324487834466}, value); + + EXPECT_TRUE(safe_strto64_base("34234324487834466 \n\t ", &value, 10)); + EXPECT_EQ(int64_t{34234324487834466}, value); + + // Invalid ints. + EXPECT_FALSE(safe_strto64_base("", &value, 10)); + EXPECT_FALSE(safe_strto64_base(" ", &value, 10)); + EXPECT_FALSE(safe_strto64_base("abc", &value, 10)); + EXPECT_FALSE(safe_strto64_base("34234324487834466a", &value, 10)); + EXPECT_FALSE(safe_strto64_base("34234487834466.3", &value, 10)); + + // Out of bounds. + EXPECT_FALSE(safe_strto64_base("9223372036854775808", &value, 10)); + EXPECT_FALSE(safe_strto64_base("-9223372036854775809", &value, 10)); + + // String version. + EXPECT_TRUE(safe_strto64_base(std::string("0x1234"), &value, 16)); + EXPECT_EQ(0x1234, value); + + // Base-10 std::string version. + EXPECT_TRUE(safe_strto64_base("1234", &value, 10)); + EXPECT_EQ(1234, value); +} + +const size_t kNumRandomTests = 10000; + +template <typename IntType> +void test_random_integer_parse_base(bool (*parse_func)(absl::string_view, + IntType* value, + int base)) { + using RandomEngine = std::minstd_rand0; + std::random_device rd; + RandomEngine rng(rd()); + std::uniform_int_distribution<IntType> random_int( + std::numeric_limits<IntType>::min()); + std::uniform_int_distribution<int> random_base(2, 35); + for (size_t i = 0; i < kNumRandomTests; i++) { + IntType value = random_int(rng); + int base = random_base(rng); + std::string str_value; + EXPECT_TRUE(Itoa<IntType>(value, base, &str_value)); + IntType parsed_value; + + // Test successful parse + EXPECT_TRUE(parse_func(str_value, &parsed_value, base)); + EXPECT_EQ(parsed_value, value); + + // Test overflow + EXPECT_FALSE( + parse_func(absl::StrCat(std::numeric_limits<IntType>::max(), value), + &parsed_value, base)); + + // Test underflow + if (std::numeric_limits<IntType>::min() < 0) { + EXPECT_FALSE( + parse_func(absl::StrCat(std::numeric_limits<IntType>::min(), value), + &parsed_value, base)); + } else { + EXPECT_FALSE(parse_func(absl::StrCat("-", value), &parsed_value, base)); + } + } +} + +TEST(stringtest, safe_strto32_random) { + test_random_integer_parse_base<int32_t>(&safe_strto32_base); +} +TEST(stringtest, safe_strto64_random) { + test_random_integer_parse_base<int64_t>(&safe_strto64_base); +} +TEST(stringtest, safe_strtou32_random) { + test_random_integer_parse_base<uint32_t>(&safe_strtou32_base); +} +TEST(stringtest, safe_strtou64_random) { + test_random_integer_parse_base<uint64_t>(&safe_strtou64_base); +} + +TEST(stringtest, safe_strtou32_base) { + for (int i = 0; strtouint32_test_cases[i].str != nullptr; ++i) { + const auto& e = strtouint32_test_cases[i]; + uint32_t value; + EXPECT_EQ(e.expect_ok, safe_strtou32_base(e.str, &value, e.base)) + << "str=\"" << e.str << "\" base=" << e.base; + if (e.expect_ok) { + EXPECT_EQ(e.expected, value) << "i=" << i << " str=\"" << e.str + << "\" base=" << e.base; + } + } +} + +TEST(stringtest, safe_strtou32_base_length_delimited) { + for (int i = 0; strtouint32_test_cases[i].str != nullptr; ++i) { + const auto& e = strtouint32_test_cases[i]; + std::string tmp(e.str); + tmp.append("12"); // Adds garbage at the end. + + uint32_t value; + EXPECT_EQ(e.expect_ok, + safe_strtou32_base(absl::string_view(tmp.data(), strlen(e.str)), + &value, e.base)) + << "str=\"" << e.str << "\" base=" << e.base; + if (e.expect_ok) { + EXPECT_EQ(e.expected, value) << "i=" << i << " str=" << e.str + << " base=" << e.base; + } + } +} + +TEST(stringtest, safe_strtou64_base) { + for (int i = 0; strtouint64_test_cases[i].str != nullptr; ++i) { + const auto& e = strtouint64_test_cases[i]; + uint64_t value; + EXPECT_EQ(e.expect_ok, safe_strtou64_base(e.str, &value, e.base)) + << "str=\"" << e.str << "\" base=" << e.base; + if (e.expect_ok) { + EXPECT_EQ(e.expected, value) << "str=" << e.str << " base=" << e.base; + } + } +} + +TEST(stringtest, safe_strtou64_base_length_delimited) { + for (int i = 0; strtouint64_test_cases[i].str != nullptr; ++i) { + const auto& e = strtouint64_test_cases[i]; + std::string tmp(e.str); + tmp.append("12"); // Adds garbage at the end. + + uint64_t value; + EXPECT_EQ(e.expect_ok, + safe_strtou64_base(absl::string_view(tmp.data(), strlen(e.str)), + &value, e.base)) + << "str=\"" << e.str << "\" base=" << e.base; + if (e.expect_ok) { + EXPECT_EQ(e.expected, value) << "str=\"" << e.str << "\" base=" << e.base; + } + } +} + +// feenableexcept() and fedisableexcept() are missing on Mac OS X, MSVC. +#if defined(_MSC_VER) || defined(__APPLE__) +#define ABSL_MISSING_FEENABLEEXCEPT 1 +#define ABSL_MISSING_FEDISABLEEXCEPT 1 +#endif + +class SimpleDtoaTest : public testing::Test { + protected: + void SetUp() override { + // Store the current floating point env & clear away any pending exceptions. + feholdexcept(&fp_env_); +#ifndef ABSL_MISSING_FEENABLEEXCEPT + // Turn on floating point exceptions. + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + } + + void TearDown() override { + // Restore the floating point environment to the original state. + // In theory fedisableexcept is unnecessary; fesetenv will also do it. + // In practice, our toolchains have subtle bugs. +#ifndef ABSL_MISSING_FEDISABLEEXCEPT + fedisableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + fesetenv(&fp_env_); + } + + std::string ToNineDigits(double value) { + char buffer[kFastToBufferSize]; // more than enough for %.9g + snprintf(buffer, sizeof(buffer), "%.9g", value); + return buffer; + } + + fenv_t fp_env_; +}; + +// Run the given runnable functor for "cases" test cases, chosen over the +// available range of float. pi and e and 1/e are seeded, and then all +// available integer powers of 2 and 10 are multiplied against them. In +// addition to trying all those values, we try the next higher and next lower +// float, and then we add additional test cases evenly distributed between them. +// Each test case is passed to runnable as both a positive and negative value. +template <typename R> +void ExhaustiveFloat(uint32_t cases, R&& runnable) { + runnable(0.0f); + runnable(-0.0f); + if (cases >= 2e9) { // more than 2 billion? Might as well run them all. + for (float f = 0; f < std::numeric_limits<float>::max(); ) { + f = nextafterf(f, std::numeric_limits<float>::max()); + runnable(-f); + runnable(f); + } + return; + } + std::set<float> floats = {3.4028234e38f}; + for (float f : {1.0, 3.14159265, 2.718281828, 1 / 2.718281828}) { + for (float testf = f; testf != 0; testf *= 0.1f) floats.insert(testf); + for (float testf = f; testf != 0; testf *= 0.5f) floats.insert(testf); + for (float testf = f; testf < 3e38f / 2; testf *= 2.0f) + floats.insert(testf); + for (float testf = f; testf < 3e38f / 10; testf *= 10) floats.insert(testf); + } + + float last = *floats.begin(); + + runnable(last); + runnable(-last); + int iters_per_float = cases / floats.size(); + if (iters_per_float == 0) iters_per_float = 1; + for (float f : floats) { + if (f == last) continue; + float testf = nextafter(last, std::numeric_limits<float>::max()); + runnable(testf); + runnable(-testf); + last = testf; + if (f == last) continue; + double step = (double{f} - last) / iters_per_float; + for (double d = last + step; d < f; d += step) { + testf = d; + if (testf != last) { + runnable(testf); + runnable(-testf); + last = testf; + } + } + testf = nextafter(f, 0.0f); + if (testf > last) { + runnable(testf); + runnable(-testf); + last = testf; + } + if (f != last) { + runnable(f); + runnable(-f); + last = f; + } + } +} + +TEST_F(SimpleDtoaTest, ExhaustiveFloatToBuffer) { + uint64_t test_count = 0; + std::vector<float> mismatches; + ExhaustiveFloat(kFloatNumCases, [&](float f) { + if (f != f) return; // rule out NaNs + ++test_count; + char fastbuf[kFastToBufferSize]; + RoundTripFloatToBuffer(f, fastbuf); + float round_trip = strtof(fastbuf, nullptr); + if (f != round_trip) { + mismatches.push_back(f); + if (mismatches.size() < 10) { + ABSL_RAW_LOG(ERROR, "%s", + absl::StrCat("Round-trip failure with float. ", "f=", f, + "=", ToNineDigits(f), " fast=", fastbuf, + " strtof=", ToNineDigits(round_trip)) + .c_str()); + } + } + }); + if (!mismatches.empty()) { + EXPECT_EQ(mismatches.size(), 0); + for (size_t i = 0; i < mismatches.size(); ++i) { + if (i > 100) i = mismatches.size() - 1; + float f = mismatches[i]; + std::string msg = + absl::StrCat("Mismatch #", i, " f=", f, " (", ToNineDigits(f), ")"); + char buf[kFastToBufferSize]; + absl::StrAppend(&msg, " fast='", RoundTripFloatToBuffer(f, buf), "'"); + float rt = strtof(buf, nullptr); + absl::StrAppend(&msg, " rt=", ToNineDigits(rt)); + ABSL_RAW_LOG(ERROR, "%s", msg.c_str()); + } + } +} + +TEST_F(SimpleDtoaTest, ExhaustiveDoubleToSixDigits) { + uint64_t test_count = 0; + std::vector<double> mismatches; + auto checker = [&](double d) { + if (d != d) return; // rule out NaNs + ++test_count; + char sixdigitsbuf[kSixDigitsToBufferSize] = {0}; + SixDigitsToBuffer(d, sixdigitsbuf); + char snprintfbuf[kSixDigitsToBufferSize] = {0}; + snprintf(snprintfbuf, kSixDigitsToBufferSize, "%g", d); + if (strcmp(sixdigitsbuf, snprintfbuf) != 0) { + mismatches.push_back(d); + if (mismatches.size() < 10) { + ABSL_RAW_LOG(ERROR, "%s", + absl::StrCat("Six-digit failure with double. ", "d=", d, + "=", d, " sixdigits=", sixdigitsbuf, + " printf(%g)=", snprintfbuf) + .c_str()); + } + } + }; + // Some quick sanity checks... + checker(5e-324); + checker(1e-308); + checker(1.0); + checker(1.000005); + checker(1.7976931348623157e308); + checker(0.00390625); +#ifndef _MSC_VER + // on MSVC, snprintf() rounds it to 0.00195313. SixDigitsToBuffer() rounds it + // to 0.00195312 (round half to even). + checker(0.001953125); +#endif + checker(0.005859375); + // Some cases where the rounding is very very close + checker(1.089095e-15); + checker(3.274195e-55); + checker(6.534355e-146); + checker(2.920845e+234); + + if (mismatches.empty()) { + test_count = 0; + ExhaustiveFloat(kFloatNumCases, checker); + + test_count = 0; + std::vector<int> digit_testcases{ + 100000, 100001, 100002, 100005, 100010, 100020, 100050, 100100, // misc + 195312, 195313, // 1.953125 is a case where we round down, just barely. + 200000, 500000, 800000, // misc mid-range cases + 585937, 585938, // 5.859375 is a case where we round up, just barely. + 900000, 990000, 999000, 999900, 999990, 999996, 999997, 999998, 999999}; + if (kFloatNumCases >= 1e9) { + // If at least 1 billion test cases were requested, user wants an + // exhaustive test. So let's test all mantissas, too. + constexpr int min_mantissa = 100000, max_mantissa = 999999; + digit_testcases.resize(max_mantissa - min_mantissa + 1); + std::iota(digit_testcases.begin(), digit_testcases.end(), min_mantissa); + } + + for (int exponent = -324; exponent <= 308; ++exponent) { + double powten = pow(10.0, exponent); + if (powten == 0) powten = 5e-324; + if (kFloatNumCases >= 1e9) { + // The exhaustive test takes a very long time, so log progress. + char buf[kSixDigitsToBufferSize]; + ABSL_RAW_LOG( + INFO, "%s", + absl::StrCat("Exp ", exponent, " powten=", powten, "(", + powten, ") (", + std::string(buf, SixDigitsToBuffer(powten, buf)), ")") + .c_str()); + } + for (int digits : digit_testcases) { + if (exponent == 308 && digits >= 179769) break; // don't overflow! + double digiform = (digits + 0.5) * 0.00001; + double testval = digiform * powten; + double pretestval = nextafter(testval, 0); + double posttestval = nextafter(testval, 1.7976931348623157e308); + checker(testval); + checker(pretestval); + checker(posttestval); + } + } + } else { + EXPECT_EQ(mismatches.size(), 0); + for (size_t i = 0; i < mismatches.size(); ++i) { + if (i > 100) i = mismatches.size() - 1; + double d = mismatches[i]; + char sixdigitsbuf[kSixDigitsToBufferSize] = {0}; + SixDigitsToBuffer(d, sixdigitsbuf); + char snprintfbuf[kSixDigitsToBufferSize] = {0}; + snprintf(snprintfbuf, kSixDigitsToBufferSize, "%g", d); + double before = nextafter(d, 0.0); + double after = nextafter(d, 1.7976931348623157e308); + char b1[32], b2[kSixDigitsToBufferSize]; + ABSL_RAW_LOG( + ERROR, "%s", + absl::StrCat( + "Mismatch #", i, " d=", d, " (", ToNineDigits(d), ")", + " sixdigits='", sixdigitsbuf, "'", " snprintf='", snprintfbuf, + "'", " Before.=", PerfectDtoa(before), " ", + (SixDigitsToBuffer(before, b2), b2), + " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", before), b1), + " Perfect=", PerfectDtoa(d), " ", (SixDigitsToBuffer(d, b2), b2), + " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", d), b1), + " After.=.", PerfectDtoa(after), " ", + (SixDigitsToBuffer(after, b2), b2), + " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", after), b1)) + .c_str()); + } + } +} + +TEST(StrToInt32, Partial) { + struct Int32TestLine { + std::string input; + bool status; + int32_t value; + }; + const int32_t int32_min = std::numeric_limits<int32_t>::min(); + const int32_t int32_max = std::numeric_limits<int32_t>::max(); + Int32TestLine int32_test_line[] = { + {"", false, 0}, + {" ", false, 0}, + {"-", false, 0}, + {"123@@@", false, 123}, + {absl::StrCat(int32_min, int32_max), false, int32_min}, + {absl::StrCat(int32_max, int32_max), false, int32_max}, + }; + + for (const Int32TestLine& test_line : int32_test_line) { + int32_t value = -2; + bool status = safe_strto32_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = -2; + status = safe_strto32_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = -2; + status = safe_strto32_base(absl::string_view(test_line.input), &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + } +} + +TEST(StrToUint32, Partial) { + struct Uint32TestLine { + std::string input; + bool status; + uint32_t value; + }; + const uint32_t uint32_max = std::numeric_limits<uint32_t>::max(); + Uint32TestLine uint32_test_line[] = { + {"", false, 0}, + {" ", false, 0}, + {"-", false, 0}, + {"123@@@", false, 123}, + {absl::StrCat(uint32_max, uint32_max), false, uint32_max}, + }; + + for (const Uint32TestLine& test_line : uint32_test_line) { + uint32_t value = 2; + bool status = safe_strtou32_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = 2; + status = safe_strtou32_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = 2; + status = safe_strtou32_base(absl::string_view(test_line.input), &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + } +} + +TEST(StrToInt64, Partial) { + struct Int64TestLine { + std::string input; + bool status; + int64_t value; + }; + const int64_t int64_min = std::numeric_limits<int64_t>::min(); + const int64_t int64_max = std::numeric_limits<int64_t>::max(); + Int64TestLine int64_test_line[] = { + {"", false, 0}, + {" ", false, 0}, + {"-", false, 0}, + {"123@@@", false, 123}, + {absl::StrCat(int64_min, int64_max), false, int64_min}, + {absl::StrCat(int64_max, int64_max), false, int64_max}, + }; + + for (const Int64TestLine& test_line : int64_test_line) { + int64_t value = -2; + bool status = safe_strto64_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = -2; + status = safe_strto64_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = -2; + status = safe_strto64_base(absl::string_view(test_line.input), &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + } +} + +TEST(StrToUint64, Partial) { + struct Uint64TestLine { + std::string input; + bool status; + uint64_t value; + }; + const uint64_t uint64_max = std::numeric_limits<uint64_t>::max(); + Uint64TestLine uint64_test_line[] = { + {"", false, 0}, + {" ", false, 0}, + {"-", false, 0}, + {"123@@@", false, 123}, + {absl::StrCat(uint64_max, uint64_max), false, uint64_max}, + }; + + for (const Uint64TestLine& test_line : uint64_test_line) { + uint64_t value = 2; + bool status = safe_strtou64_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = 2; + status = safe_strtou64_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = 2; + status = safe_strtou64_base(absl::string_view(test_line.input), &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + } +} + +TEST(StrToInt32Base, PrefixOnly) { + struct Int32TestLine { + std::string input; + bool status; + int32_t value; + }; + Int32TestLine int32_test_line[] = { + { "", false, 0 }, + { "-", false, 0 }, + { "-0", true, 0 }, + { "0", true, 0 }, + { "0x", false, 0 }, + { "-0x", false, 0 }, + }; + const int base_array[] = { 0, 2, 8, 10, 16 }; + + for (const Int32TestLine& line : int32_test_line) { + for (const int base : base_array) { + int32_t value = 2; + bool status = safe_strto32_base(line.input.c_str(), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strto32_base(line.input, &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strto32_base(absl::string_view(line.input), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + } + } +} + +TEST(StrToUint32Base, PrefixOnly) { + struct Uint32TestLine { + std::string input; + bool status; + uint32_t value; + }; + Uint32TestLine uint32_test_line[] = { + { "", false, 0 }, + { "0", true, 0 }, + { "0x", false, 0 }, + }; + const int base_array[] = { 0, 2, 8, 10, 16 }; + + for (const Uint32TestLine& line : uint32_test_line) { + for (const int base : base_array) { + uint32_t value = 2; + bool status = safe_strtou32_base(line.input.c_str(), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strtou32_base(line.input, &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strtou32_base(absl::string_view(line.input), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + } + } +} + +TEST(StrToInt64Base, PrefixOnly) { + struct Int64TestLine { + std::string input; + bool status; + int64_t value; + }; + Int64TestLine int64_test_line[] = { + { "", false, 0 }, + { "-", false, 0 }, + { "-0", true, 0 }, + { "0", true, 0 }, + { "0x", false, 0 }, + { "-0x", false, 0 }, + }; + const int base_array[] = { 0, 2, 8, 10, 16 }; + + for (const Int64TestLine& line : int64_test_line) { + for (const int base : base_array) { + int64_t value = 2; + bool status = safe_strto64_base(line.input.c_str(), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strto64_base(line.input, &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strto64_base(absl::string_view(line.input), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + } + } +} + +TEST(StrToUint64Base, PrefixOnly) { + struct Uint64TestLine { + std::string input; + bool status; + uint64_t value; + }; + Uint64TestLine uint64_test_line[] = { + { "", false, 0 }, + { "0", true, 0 }, + { "0x", false, 0 }, + }; + const int base_array[] = { 0, 2, 8, 10, 16 }; + + for (const Uint64TestLine& line : uint64_test_line) { + for (const int base : base_array) { + uint64_t value = 2; + bool status = safe_strtou64_base(line.input.c_str(), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strtou64_base(line.input, &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strtou64_base(absl::string_view(line.input), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + } + } +} + +} // namespace diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc new file mode 100644 index 000000000000..0c75655c993f --- /dev/null +++ b/absl/strings/str_cat.cc @@ -0,0 +1,208 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_cat.h" + +#include <cstdarg> +#include <cstdint> +#include <cstdio> +#include <cstring> + +#include "absl/strings/ascii.h" +#include "absl/strings/internal/resize_uninitialized.h" + +namespace absl { + +AlphaNum::AlphaNum(Hex hex) { + char* const end = &digits_[numbers_internal::kFastToBufferSize]; + char* writer = end; + uint64_t value = hex.value; + static const char hexdigits[] = "0123456789abcdef"; + do { + *--writer = hexdigits[value & 0xF]; + value >>= 4; + } while (value != 0); + + char* beg; + if (end - writer < hex.width) { + beg = end - hex.width; + std::fill_n(beg, writer - beg, hex.fill); + } else { + beg = writer; + } + + piece_ = absl::string_view(beg, end - beg); +} + +// ---------------------------------------------------------------------- +// StrCat() +// This merges the given strings or integers, with no delimiter. This +// is designed to be the fastest possible way to construct a std::string out +// of a mix of raw C strings, StringPieces, strings, and integer values. +// ---------------------------------------------------------------------- + +// Append is merely a version of memcpy that returns the address of the byte +// after the area just overwritten. +static char* Append(char* out, const AlphaNum& x) { + // memcpy is allowed to overwrite arbitrary memory, so doing this after the + // call would force an extra fetch of x.size(). + char* after = out + x.size(); + memcpy(out, x.data(), x.size()); + return after; +} + +std::string StrCat(const AlphaNum& a, const AlphaNum& b) { + std::string result; + absl::strings_internal::STLStringResizeUninitialized(&result, + a.size() + b.size()); + char* const begin = &*result.begin(); + char* out = begin; + out = Append(out, a); + out = Append(out, b); + assert(out == begin + result.size()); + return result; +} + +std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) { + std::string result; + strings_internal::STLStringResizeUninitialized( + &result, a.size() + b.size() + c.size()); + char* const begin = &*result.begin(); + char* out = begin; + out = Append(out, a); + out = Append(out, b); + out = Append(out, c); + assert(out == begin + result.size()); + return result; +} + +std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, + const AlphaNum& d) { + std::string result; + strings_internal::STLStringResizeUninitialized( + &result, a.size() + b.size() + c.size() + d.size()); + char* const begin = &*result.begin(); + char* out = begin; + out = Append(out, a); + out = Append(out, b); + out = Append(out, c); + out = Append(out, d); + assert(out == begin + result.size()); + return result; +} + +namespace strings_internal { + +// Do not call directly - these are not part of the public API. +std::string CatPieces(std::initializer_list<absl::string_view> pieces) { + std::string result; + size_t total_size = 0; + for (const absl::string_view piece : pieces) total_size += piece.size(); + strings_internal::STLStringResizeUninitialized(&result, total_size); + + char* const begin = &*result.begin(); + char* out = begin; + for (const absl::string_view piece : pieces) { + const size_t this_size = piece.size(); + memcpy(out, piece.data(), this_size); + out += this_size; + } + assert(out == begin + result.size()); + return result; +} + +// It's possible to call StrAppend with an absl::string_view that is itself a +// fragment of the std::string we're appending to. However the results of this are +// random. Therefore, check for this in debug mode. Use unsigned math so we +// only have to do one comparison. Note, there's an exception case: appending an +// empty std::string is always allowed. +#define ASSERT_NO_OVERLAP(dest, src) \ + assert(((src).size() == 0) || \ + (uintptr_t((src).data() - (dest).data()) > uintptr_t((dest).size()))) + +void AppendPieces(std::string* dest, + std::initializer_list<absl::string_view> pieces) { + size_t old_size = dest->size(); + size_t total_size = old_size; + for (const absl::string_view piece : pieces) { + ASSERT_NO_OVERLAP(*dest, piece); + total_size += piece.size(); + } + strings_internal::STLStringResizeUninitialized(dest, total_size); + + char* const begin = &*dest->begin(); + char* out = begin + old_size; + for (const absl::string_view piece : pieces) { + const size_t this_size = piece.size(); + memcpy(out, piece.data(), this_size); + out += this_size; + } + assert(out == begin + dest->size()); +} + +} // namespace strings_internal + +void StrAppend(std::string* dest, const AlphaNum& a) { + ASSERT_NO_OVERLAP(*dest, a); + dest->append(a.data(), a.size()); +} + +void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b) { + ASSERT_NO_OVERLAP(*dest, a); + ASSERT_NO_OVERLAP(*dest, b); + std::string::size_type old_size = dest->size(); + strings_internal::STLStringResizeUninitialized( + dest, old_size + a.size() + b.size()); + char* const begin = &*dest->begin(); + char* out = begin + old_size; + out = Append(out, a); + out = Append(out, b); + assert(out == begin + dest->size()); +} + +void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c) { + ASSERT_NO_OVERLAP(*dest, a); + ASSERT_NO_OVERLAP(*dest, b); + ASSERT_NO_OVERLAP(*dest, c); + std::string::size_type old_size = dest->size(); + strings_internal::STLStringResizeUninitialized( + dest, old_size + a.size() + b.size() + c.size()); + char* const begin = &*dest->begin(); + char* out = begin + old_size; + out = Append(out, a); + out = Append(out, b); + out = Append(out, c); + assert(out == begin + dest->size()); +} + +void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d) { + ASSERT_NO_OVERLAP(*dest, a); + ASSERT_NO_OVERLAP(*dest, b); + ASSERT_NO_OVERLAP(*dest, c); + ASSERT_NO_OVERLAP(*dest, d); + std::string::size_type old_size = dest->size(); + strings_internal::STLStringResizeUninitialized( + dest, old_size + a.size() + b.size() + c.size() + d.size()); + char* const begin = &*dest->begin(); + char* out = begin + old_size; + out = Append(out, a); + out = Append(out, b); + out = Append(out, c); + out = Append(out, d); + assert(out == begin + dest->size()); +} + +} // namespace absl diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h new file mode 100644 index 000000000000..5b4c9baacf59 --- /dev/null +++ b/absl/strings/str_cat.h @@ -0,0 +1,348 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_cat.h +// ----------------------------------------------------------------------------- +// +// This package contains functions for efficiently concatenating and appending +// strings: `StrCat()` and `StrAppend()`. Most of the work within these routines +// is actually handled through use of a special AlphaNum type, which was +// designed to be used as a parameter type that efficiently manages conversion +// to strings and avoids copies in the above operations. +// +// Any routine accepting either a std::string or a number may accept `AlphaNum`. +// The basic idea is that by accepting a `const AlphaNum &` as an argument +// to your function, your callers will automagically convert bools, integers, +// and floating point values to strings for you. +// +// NOTE: Use of `AlphaNum` outside of the //absl/strings package is unsupported +// except for the specific case of function parameters of type `AlphaNum` or +// `const AlphaNum &`. In particular, instantiating `AlphaNum` directly as a +// stack variable is not supported. +// +// Conversion from 8-bit values is not accepted because, if it were, then an +// attempt to pass ':' instead of ":" might result in a 58 ending up in your +// result. +// +// Bools convert to "0" or "1". +// +// Floating point numbers are formatted with six-digit precision, which is +// the default for "std::cout <<" or printf "%g" (the same as "%.6g"). +// +// +// You can convert to hexadecimal output rather than decimal output using the +// `Hex` type contained here. To do so, pass `Hex(my_int)` as a parameter to +// `StrCat()` or `StrAppend()`. You may specify a minimum hex field width using +// a `PadSpec` enum, so the equivalent of `StringPrintf("%04x", my_int)` is +// `absl::StrCat(absl::Hex(my_int, absl::kZeroPad4))`. +// +// ----------------------------------------------------------------------------- + +#ifndef ABSL_STRINGS_STR_CAT_H_ +#define ABSL_STRINGS_STR_CAT_H_ + +#include <array> +#include <cstdint> +#include <string> +#include <type_traits> + +#include "absl/base/port.h" +#include "absl/strings/numbers.h" +#include "absl/strings/string_view.h" + +namespace absl { + +namespace strings_internal { +// AlphaNumBuffer allows a way to pass a std::string to StrCat without having to do +// memory allocation. It is simply a pair of a fixed-size character array, and +// a size. Please don't use outside of absl, yet. +template <size_t max_size> +struct AlphaNumBuffer { + std::array<char, max_size> data; + size_t size; +}; + +} // namespace strings_internal + +// Enum that specifies the number of significant digits to return in a `Hex` +// conversion and fill character to use. A `kZeroPad2` value, for example, would +// produce hexadecimal strings such as "0A","0F" and 'kSpacePad5' value would +// produce hexadecimal strings such as " A"," F". +enum PadSpec { + kNoPad = 1, + kZeroPad2, + kZeroPad3, + kZeroPad4, + kZeroPad5, + kZeroPad6, + kZeroPad7, + kZeroPad8, + kZeroPad9, + kZeroPad10, + kZeroPad11, + kZeroPad12, + kZeroPad13, + kZeroPad14, + kZeroPad15, + kZeroPad16, + + kSpacePad2 = kZeroPad2 + 64, + kSpacePad3, + kSpacePad4, + kSpacePad5, + kSpacePad6, + kSpacePad7, + kSpacePad8, + kSpacePad9, + kSpacePad10, + kSpacePad11, + kSpacePad12, + kSpacePad13, + kSpacePad14, + kSpacePad15, + kSpacePad16, +}; + +// ----------------------------------------------------------------------------- +// Hex +// ----------------------------------------------------------------------------- +// +// `Hex` stores a set of hexadecimal std::string conversion parameters for use +// within `AlphaNum` std::string conversions. +struct Hex { + uint64_t value; + uint8_t width; + char fill; + + template <typename Int> + explicit Hex(Int v, PadSpec spec = absl::kNoPad, + typename std::enable_if<sizeof(Int) == 1>::type* = nullptr) + : Hex(spec, static_cast<uint8_t>(v)) {} + template <typename Int> + explicit Hex(Int v, PadSpec spec = absl::kNoPad, + typename std::enable_if<sizeof(Int) == 2>::type* = nullptr) + : Hex(spec, static_cast<uint16_t>(v)) {} + template <typename Int> + explicit Hex(Int v, PadSpec spec = absl::kNoPad, + typename std::enable_if<sizeof(Int) == 4>::type* = nullptr) + : Hex(spec, static_cast<uint32_t>(v)) {} + template <typename Int> + explicit Hex(Int v, PadSpec spec = absl::kNoPad, + typename std::enable_if<sizeof(Int) == 8>::type* = nullptr) + : Hex(spec, static_cast<uint64_t>(v)) {} + + private: + Hex(PadSpec spec, uint64_t v) + : value(v), + width(spec == absl::kNoPad + ? 1 + : spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2 + : spec - absl::kZeroPad2 + 2), + fill(spec >= absl::kSpacePad2 ? ' ' : '0') {} +}; + +// ----------------------------------------------------------------------------- +// AlphaNum +// ----------------------------------------------------------------------------- +// +// The `AlphaNum` class acts as the main parameter type for `StrCat()` and +// `StrAppend()`, providing efficient conversion of numeric, boolean, and +// hexadecimal values (through the `Hex` type) into strings. + +class AlphaNum { + public: + // No bool ctor -- bools convert to an integral type. + // A bool ctor would also convert incoming pointers (bletch). + + AlphaNum(int x) // NOLINT(runtime/explicit) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(unsigned int x) // NOLINT(runtime/explicit) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(long x) // NOLINT(*) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(unsigned long x) // NOLINT(*) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(long long x) // NOLINT(*) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(unsigned long long x) // NOLINT(*) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + + AlphaNum(float f) // NOLINT(runtime/explicit) + : piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {} + AlphaNum(double f) // NOLINT(runtime/explicit) + : piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {} + + AlphaNum(Hex hex); // NOLINT(runtime/explicit) + + template <size_t size> + AlphaNum( // NOLINT(runtime/explicit) + const strings_internal::AlphaNumBuffer<size>& buf) + : piece_(&buf.data[0], buf.size) {} + + AlphaNum(const char* c_str) : piece_(c_str) {} // NOLINT(runtime/explicit) + AlphaNum(absl::string_view pc) : piece_(pc) {} // NOLINT(runtime/explicit) + template <typename Allocator> + AlphaNum( // NOLINT(runtime/explicit) + const std::basic_string<char, std::char_traits<char>, Allocator>& str) + : piece_(str) {} + + // Use std::string literals ":" instead of character literals ':'. + AlphaNum(char c) = delete; // NOLINT(runtime/explicit) + + AlphaNum(const AlphaNum&) = delete; + AlphaNum& operator=(const AlphaNum&) = delete; + + absl::string_view::size_type size() const { return piece_.size(); } + const char* data() const { return piece_.data(); } + absl::string_view Piece() const { return piece_; } + + // Normal enums are already handled by the integer formatters. + // This overload matches only scoped enums. + template <typename T, + typename = typename std::enable_if< + std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type> + AlphaNum(T e) // NOLINT(runtime/explicit) + : AlphaNum(static_cast<typename std::underlying_type<T>::type>(e)) {} + + private: + absl::string_view piece_; + char digits_[numbers_internal::kFastToBufferSize]; +}; + +// ----------------------------------------------------------------------------- +// StrCat() +// ----------------------------------------------------------------------------- +// +// Merges given strings or numbers, using no delimiter(s). +// +// `StrCat()` is designed to be the fastest possible way to construct a std::string +// out of a mix of raw C strings, string_views, strings, bool values, +// and numeric values. +// +// Don't use `StrCat()` for user-visible strings. The localization process +// works poorly on strings built up out of fragments. +// +// For clarity and performance, don't use `StrCat()` when appending to a +// std::string. Use `StrAppend()` instead. In particular, avoid using any of these +// (anti-)patterns: +// +// str.append(StrCat(...)) +// str += StrCat(...) +// str = StrCat(str, ...) +// +// The last case is the worst, with a potential to change a loop +// from a linear time operation with O(1) dynamic allocations into a +// quadratic time operation with O(n) dynamic allocations. +// +// See `StrAppend()` below for more information. + +namespace strings_internal { + +// Do not call directly - this is not part of the public API. +std::string CatPieces(std::initializer_list<absl::string_view> pieces); +void AppendPieces(std::string* dest, + std::initializer_list<absl::string_view> pieces); + +} // namespace strings_internal + +ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); } + +ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a) { + return std::string(a.data(), a.size()); +} + +ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b); +ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c); +ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d); + +// Support 5 or more arguments +template <typename... AV> +ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, + const AV&... args) { + return strings_internal::CatPieces( + {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(), + static_cast<const AlphaNum&>(args).Piece()...}); +} + +// ----------------------------------------------------------------------------- +// StrAppend() +// ----------------------------------------------------------------------------- +// +// Appends a std::string or set of strings to an existing std::string, in a similar +// fashion to `StrCat()`. +// +// WARNING: `StrAppend(&str, a, b, c, ...)` requires that none of the +// a, b, c, parameters be a reference into str. For speed, `StrAppend()` does +// not try to check each of its input arguments to be sure that they are not +// a subset of the std::string being appended to. That is, while this will work: +// +// std::string s = "foo"; +// s += s; +// +// This output is undefined: +// +// std::string s = "foo"; +// StrAppend(&s, s); +// +// This output is undefined as well, since `absl::string_view` does not own its +// data: +// +// std::string s = "foobar"; +// absl::string_view p = s; +// StrAppend(&s, p); + +inline void StrAppend(std::string*) {} +void StrAppend(std::string* dest, const AlphaNum& a); +void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b); +void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c); +void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d); + +// Support 5 or more arguments +template <typename... AV> +inline void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d, const AlphaNum& e, + const AV&... args) { + strings_internal::AppendPieces( + dest, {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(), + static_cast<const AlphaNum&>(args).Piece()...}); +} + +// Helper function for the future StrCat default floating-point format, %.6g +// This is fast. +inline strings_internal::AlphaNumBuffer< + numbers_internal::kSixDigitsToBufferSize> +SixDigits(double d) { + strings_internal::AlphaNumBuffer<numbers_internal::kSixDigitsToBufferSize> + result; + result.size = numbers_internal::SixDigitsToBuffer(d, &result.data[0]); + return result; +} + +} // namespace absl + +#endif // ABSL_STRINGS_STR_CAT_H_ diff --git a/absl/strings/str_cat_test.cc b/absl/strings/str_cat_test.cc new file mode 100644 index 000000000000..293d1943b16e --- /dev/null +++ b/absl/strings/str_cat_test.cc @@ -0,0 +1,462 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Unit tests for all str_cat.h functions + +#include "absl/strings/str_cat.h" + +#include <cstdint> +#include <string> + +#include "gtest/gtest.h" +#include "absl/strings/substitute.h" + +namespace { + +// Test absl::StrCat of ints and longs of various sizes and signdedness. +TEST(StrCat, Ints) { + const short s = -1; // NOLINT(runtime/int) + const uint16_t us = 2; + const int i = -3; + const unsigned int ui = 4; + const long l = -5; // NOLINT(runtime/int) + const unsigned long ul = 6; // NOLINT(runtime/int) + const long long ll = -7; // NOLINT(runtime/int) + const unsigned long long ull = 8; // NOLINT(runtime/int) + const ptrdiff_t ptrdiff = -9; + const size_t size = 10; + const intptr_t intptr = -12; + const uintptr_t uintptr = 13; + std::string answer; + answer = absl::StrCat(s, us); + EXPECT_EQ(answer, "-12"); + answer = absl::StrCat(i, ui); + EXPECT_EQ(answer, "-34"); + answer = absl::StrCat(l, ul); + EXPECT_EQ(answer, "-56"); + answer = absl::StrCat(ll, ull); + EXPECT_EQ(answer, "-78"); + answer = absl::StrCat(ptrdiff, size); + EXPECT_EQ(answer, "-910"); + answer = absl::StrCat(ptrdiff, intptr); + EXPECT_EQ(answer, "-9-12"); + answer = absl::StrCat(uintptr, 0); + EXPECT_EQ(answer, "130"); +} + +TEST(StrCat, Enums) { + enum SmallNumbers { One = 1, Ten = 10 } e = Ten; + EXPECT_EQ("10", absl::StrCat(e)); + EXPECT_EQ("-5", absl::StrCat(SmallNumbers(-5))); + + enum class Option { Boxers = 1, Briefs = -1 }; + + EXPECT_EQ("-1", absl::StrCat(Option::Briefs)); + + enum class Airplane : uint64_t { + Airbus = 1, + Boeing = 1000, + Canary = 10000000000 // too big for "int" + }; + + EXPECT_EQ("10000000000", absl::StrCat(Airplane::Canary)); + + enum class TwoGig : int32_t { + TwoToTheZero = 1, + TwoToTheSixteenth = 1 << 16, + TwoToTheThirtyFirst = INT32_MIN + }; + EXPECT_EQ("65536", absl::StrCat(TwoGig::TwoToTheSixteenth)); + EXPECT_EQ("-2147483648", absl::StrCat(TwoGig::TwoToTheThirtyFirst)); + EXPECT_EQ("-1", absl::StrCat(static_cast<TwoGig>(-1))); + + enum class FourGig : uint32_t { + TwoToTheZero = 1, + TwoToTheSixteenth = 1 << 16, + TwoToTheThirtyFirst = 1U << 31 // too big for "int" + }; + EXPECT_EQ("65536", absl::StrCat(FourGig::TwoToTheSixteenth)); + EXPECT_EQ("2147483648", absl::StrCat(FourGig::TwoToTheThirtyFirst)); + EXPECT_EQ("4294967295", absl::StrCat(static_cast<FourGig>(-1))); + + EXPECT_EQ("10000000000", absl::StrCat(Airplane::Canary)); +} + +TEST(StrCat, Basics) { + std::string result; + + std::string strs[] = { + "Hello", + "Cruel", + "World" + }; + + std::string stdstrs[] = { + "std::Hello", + "std::Cruel", + "std::World" + }; + + absl::string_view pieces[] = {"Hello", "Cruel", "World"}; + + const char* c_strs[] = { + "Hello", + "Cruel", + "World" + }; + + int32_t i32s[] = {'H', 'C', 'W'}; + uint64_t ui64s[] = {12345678910LL, 10987654321LL}; + + EXPECT_EQ(absl::StrCat(), ""); + + result = absl::StrCat(false, true, 2, 3); + EXPECT_EQ(result, "0123"); + + result = absl::StrCat(-1); + EXPECT_EQ(result, "-1"); + + result = absl::StrCat(absl::SixDigits(0.5)); + EXPECT_EQ(result, "0.5"); + + result = absl::StrCat(strs[1], pieces[2]); + EXPECT_EQ(result, "CruelWorld"); + + result = absl::StrCat(stdstrs[1], " ", stdstrs[2]); + EXPECT_EQ(result, "std::Cruel std::World"); + + result = absl::StrCat(strs[0], ", ", pieces[2]); + EXPECT_EQ(result, "Hello, World"); + + result = absl::StrCat(strs[0], ", ", strs[1], " ", strs[2], "!"); + EXPECT_EQ(result, "Hello, Cruel World!"); + + result = absl::StrCat(pieces[0], ", ", pieces[1], " ", pieces[2]); + EXPECT_EQ(result, "Hello, Cruel World"); + + result = absl::StrCat(c_strs[0], ", ", c_strs[1], " ", c_strs[2]); + EXPECT_EQ(result, "Hello, Cruel World"); + + result = absl::StrCat("ASCII ", i32s[0], ", ", i32s[1], " ", i32s[2], "!"); + EXPECT_EQ(result, "ASCII 72, 67 87!"); + + result = absl::StrCat(ui64s[0], ", ", ui64s[1], "!"); + EXPECT_EQ(result, "12345678910, 10987654321!"); + + std::string one = "1"; // Actually, it's the size of this std::string that we want; a + // 64-bit build distinguishes between size_t and uint64_t, + // even though they're both unsigned 64-bit values. + result = absl::StrCat("And a ", one.size(), " and a ", + &result[2] - &result[0], " and a ", one, " 2 3 4", "!"); + EXPECT_EQ(result, "And a 1 and a 2 and a 1 2 3 4!"); + + // result = absl::StrCat("Single chars won't compile", '!'); + // result = absl::StrCat("Neither will nullptrs", nullptr); + result = + absl::StrCat("To output a char by ASCII/numeric value, use +: ", '!' + 0); + EXPECT_EQ(result, "To output a char by ASCII/numeric value, use +: 33"); + + float f = 100000.5; + result = absl::StrCat("A hundred K and a half is ", absl::SixDigits(f)); + EXPECT_EQ(result, "A hundred K and a half is 100000"); + + f = 100001.5; + result = + absl::StrCat("A hundred K and one and a half is ", absl::SixDigits(f)); + EXPECT_EQ(result, "A hundred K and one and a half is 100002"); + + double d = 100000.5; + d *= d; + result = + absl::StrCat("A hundred K and a half squared is ", absl::SixDigits(d)); + EXPECT_EQ(result, "A hundred K and a half squared is 1.00001e+10"); + + result = absl::StrCat(1, 2, 333, 4444, 55555, 666666, 7777777, 88888888, + 999999999); + EXPECT_EQ(result, "12333444455555666666777777788888888999999999"); +} + +// A minimal allocator that uses malloc(). +template <typename T> +struct Mallocator { + typedef T value_type; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + + size_type max_size() const { + return size_t(std::numeric_limits<size_type>::max()) / sizeof(value_type); + } + template <typename U> + struct rebind { + typedef Mallocator<U> other; + }; + Mallocator() = default; + + T* allocate(size_t n) { return static_cast<T*>(std::malloc(n * sizeof(T))); } + void deallocate(T* p, size_t) { std::free(p); } +}; +template <typename T, typename U> +bool operator==(const Mallocator<T>&, const Mallocator<U>&) { + return true; +} +template <typename T, typename U> +bool operator!=(const Mallocator<T>&, const Mallocator<U>&) { + return false; +} + +TEST(StrCat, CustomAllocator) { + using mstring = + std::basic_string<char, std::char_traits<char>, Mallocator<char>>; + const mstring str1("PARACHUTE OFF A BLIMP INTO MOSCONE!!"); + + const mstring str2("Read this book about coffee tables"); + + std::string result = absl::StrCat(str1, str2); + EXPECT_EQ(result, + "PARACHUTE OFF A BLIMP INTO MOSCONE!!" + "Read this book about coffee tables"); +} + +TEST(StrCat, MaxArgs) { + std::string result; + // Test 10 up to 26 arguments, the current maximum + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a"); + EXPECT_EQ(result, "123456789a"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b"); + EXPECT_EQ(result, "123456789ab"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c"); + EXPECT_EQ(result, "123456789abc"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d"); + EXPECT_EQ(result, "123456789abcd"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e"); + EXPECT_EQ(result, "123456789abcde"); + result = + absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f"); + EXPECT_EQ(result, "123456789abcdef"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g"); + EXPECT_EQ(result, "123456789abcdefg"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h"); + EXPECT_EQ(result, "123456789abcdefgh"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i"); + EXPECT_EQ(result, "123456789abcdefghi"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j"); + EXPECT_EQ(result, "123456789abcdefghij"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k"); + EXPECT_EQ(result, "123456789abcdefghijk"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l"); + EXPECT_EQ(result, "123456789abcdefghijkl"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m"); + EXPECT_EQ(result, "123456789abcdefghijklm"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m", "n"); + EXPECT_EQ(result, "123456789abcdefghijklmn"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m", "n", "o"); + EXPECT_EQ(result, "123456789abcdefghijklmno"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m", "n", "o", "p"); + EXPECT_EQ(result, "123456789abcdefghijklmnop"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q"); + EXPECT_EQ(result, "123456789abcdefghijklmnopq"); + // No limit thanks to C++11's variadic templates + result = absl::StrCat( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, "a", "b", "c", "d", "e", "f", "g", "h", + "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", + "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", + "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"); + EXPECT_EQ(result, + "12345678910abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); +} + +TEST(StrAppend, Basics) { + std::string result = "existing text"; + + std::string strs[] = { + "Hello", + "Cruel", + "World" + }; + + absl::string_view pieces[] = {"Hello", "Cruel", "World"}; + + const char* c_strs[] = { + "Hello", + "Cruel", + "World" + }; + + int32_t i32s[] = {'H', 'C', 'W'}; + uint64_t ui64s[] = {12345678910LL, 10987654321LL}; + + std::string::size_type old_size = result.size(); + absl::StrAppend(&result); + EXPECT_EQ(result.size(), old_size); + + old_size = result.size(); + absl::StrAppend(&result, strs[0]); + EXPECT_EQ(result.substr(old_size), "Hello"); + + old_size = result.size(); + absl::StrAppend(&result, strs[1], pieces[2]); + EXPECT_EQ(result.substr(old_size), "CruelWorld"); + + old_size = result.size(); + absl::StrAppend(&result, strs[0], ", ", pieces[2]); + EXPECT_EQ(result.substr(old_size), "Hello, World"); + + old_size = result.size(); + absl::StrAppend(&result, strs[0], ", ", strs[1], " ", strs[2], "!"); + EXPECT_EQ(result.substr(old_size), "Hello, Cruel World!"); + + old_size = result.size(); + absl::StrAppend(&result, pieces[0], ", ", pieces[1], " ", pieces[2]); + EXPECT_EQ(result.substr(old_size), "Hello, Cruel World"); + + old_size = result.size(); + absl::StrAppend(&result, c_strs[0], ", ", c_strs[1], " ", c_strs[2]); + EXPECT_EQ(result.substr(old_size), "Hello, Cruel World"); + + old_size = result.size(); + absl::StrAppend(&result, "ASCII ", i32s[0], ", ", i32s[1], " ", i32s[2], "!"); + EXPECT_EQ(result.substr(old_size), "ASCII 72, 67 87!"); + + old_size = result.size(); + absl::StrAppend(&result, ui64s[0], ", ", ui64s[1], "!"); + EXPECT_EQ(result.substr(old_size), "12345678910, 10987654321!"); + + std::string one = "1"; // Actually, it's the size of this std::string that we want; a + // 64-bit build distinguishes between size_t and uint64_t, + // even though they're both unsigned 64-bit values. + old_size = result.size(); + absl::StrAppend(&result, "And a ", one.size(), " and a ", + &result[2] - &result[0], " and a ", one, " 2 3 4", "!"); + EXPECT_EQ(result.substr(old_size), "And a 1 and a 2 and a 1 2 3 4!"); + + // result = absl::StrCat("Single chars won't compile", '!'); + // result = absl::StrCat("Neither will nullptrs", nullptr); + old_size = result.size(); + absl::StrAppend(&result, + "To output a char by ASCII/numeric value, use +: ", '!' + 0); + EXPECT_EQ(result.substr(old_size), + "To output a char by ASCII/numeric value, use +: 33"); + + // Test 9 arguments, the old maximum + old_size = result.size(); + absl::StrAppend(&result, 1, 22, 333, 4444, 55555, 666666, 7777777, 88888888, + 9); + EXPECT_EQ(result.substr(old_size), "1223334444555556666667777777888888889"); + + // No limit thanks to C++11's variadic templates + old_size = result.size(); + absl::StrAppend( + &result, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", // + "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", // + "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", // + "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", // + "No limit thanks to C++11's variadic templates"); + EXPECT_EQ(result.substr(old_size), + "12345678910abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + "No limit thanks to C++11's variadic templates"); +} + +#ifdef GTEST_HAS_DEATH_TEST +TEST(StrAppend, Death) { + std::string s = "self"; + // on linux it's "assertion", on mac it's "Assertion", + // on chromiumos it's "Assertion ... failed". + EXPECT_DEBUG_DEATH(absl::StrAppend(&s, s.c_str() + 1), "ssertion.*failed"); + EXPECT_DEBUG_DEATH(absl::StrAppend(&s, s), "ssertion.*failed"); +} +#endif // GTEST_HAS_DEATH_TEST + +TEST(StrAppend, EmptyString) { + std::string s = ""; + absl::StrAppend(&s, s); + EXPECT_EQ(s, ""); +} + +template <typename IntType> +void CheckHex(IntType v, const char* nopad_format, const char* zeropad_format, + const char* spacepad_format) { + char expected[256]; + + std::string actual = absl::StrCat(absl::Hex(v, absl::kNoPad)); + snprintf(expected, sizeof(expected), nopad_format, v); + EXPECT_EQ(expected, actual) << " decimal value " << v; + + for (int spec = absl::kZeroPad2; spec <= absl::kZeroPad16; ++spec) { + std::string actual = + absl::StrCat(absl::Hex(v, static_cast<absl::PadSpec>(spec))); + snprintf(expected, sizeof(expected), zeropad_format, + spec - absl::kZeroPad2 + 2, v); + EXPECT_EQ(expected, actual) << " decimal value " << v; + } + + for (int spec = absl::kSpacePad2; spec <= absl::kSpacePad16; ++spec) { + std::string actual = + absl::StrCat(absl::Hex(v, static_cast<absl::PadSpec>(spec))); + snprintf(expected, sizeof(expected), spacepad_format, + spec - absl::kSpacePad2 + 2, v); + EXPECT_EQ(expected, actual) << " decimal value " << v; + } +} + +void CheckHex64(uint64_t v) { + unsigned long long llv = v; // NOLINT(runtime/int) + + CheckHex(llv, "%llx", "%0*llx", "%*llx"); +} + +template <typename Int32Type> +void CheckHex32(Int32Type v) { + CheckHex(v, "%x", "%0*x", "%*x"); +} + +void TestFastPrints() { + // Test min int to make sure that works + for (int i = 0; i < 10000; i++) { + CheckHex64(i); + CheckHex32(static_cast<uint32_t>(i)); + CheckHex32(i); + CheckHex32(-i); + } + + CheckHex64(uint64_t{0x123456789abcdef0}); + CheckHex32(0x12345678U); + + int8_t minus_one_8bit = -1; + EXPECT_EQ("ff", absl::StrCat(absl::Hex(minus_one_8bit))); + + int16_t minus_one_16bit = -1; + EXPECT_EQ("ffff", absl::StrCat(absl::Hex(minus_one_16bit))); +} + +TEST(Numbers, TestFunctionsMovedOverFromNumbersMain) { + TestFastPrints(); +} + +} // namespace diff --git a/absl/strings/str_join.h b/absl/strings/str_join.h new file mode 100644 index 000000000000..82a3cac2dcbb --- /dev/null +++ b/absl/strings/str_join.h @@ -0,0 +1,288 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_join.h +// ----------------------------------------------------------------------------- +// +// This header file contains functions for joining a range of elements and +// returning the result as a std::string. StrJoin operations are specified by passing +// a range, a separator std::string to use between the elements joined, and an +// optional Formatter responsible for converting each argument in the range to a +// std::string. If omitted, a default `AlphaNumFormatter()` is called on the elements +// to be joined, using the same formatting that `absl::StrCat()` uses. This +// package defines a number of default formatters, and you can define your own +// implementations. +// +// Ranges are specified by passing a container with `std::begin()` and +// `std::end()` iterators, container-specific `begin()` and `end()` iterators, a +// brace-initialized `std::initializer_list`, or a `std::tuple` of heterogeneous +// objects. The separator std::string is specified as an `absl::string_view`. +// +// Because the default formatter uses the `absl::AlphaNum` class, +// `absl::StrJoin()`, like `absl::StrCat()`, will work out-of-the-box on +// collections of strings, ints, floats, doubles, etc. +// +// Example: +// +// std::vector<std::string> v = {"foo", "bar", "baz"}; +// std::string s = absl::StrJoin(v, "-"); +// EXPECT_EQ("foo-bar-baz", s); +// +// See comments on the `absl::StrJoin()` function for more examples. + +#ifndef ABSL_STRINGS_STR_JOIN_H_ +#define ABSL_STRINGS_STR_JOIN_H_ + +#include <cstdio> +#include <cstring> +#include <initializer_list> +#include <iterator> +#include <string> +#include <tuple> +#include <utility> + +#include "absl/base/macros.h" +#include "absl/strings/internal/str_join_internal.h" +#include "absl/strings/string_view.h" + +namespace absl { + +// ----------------------------------------------------------------------------- +// Concept: Formatter +// ----------------------------------------------------------------------------- +// +// A Formatter is a function object that is responsible for formatting its +// argument as a std::string and appending it to a given output std::string. Formatters +// may be implemented as function objects, lambdas, or normal functions. You may +// provide your own Formatter to enable `absl::StrJoin()` to work with arbitrary +// types. +// +// The following is an example of a custom Formatter that simply uses +// `std::to_string()` to format an integer as a std::string. +// +// struct MyFormatter { +// void operator()(std::string* out, int i) const { +// out->append(std::to_string(i)); +// } +// }; +// +// You would use the above formatter by passing an instance of it as the final +// argument to `absl::StrJoin()`: +// +// std::vector<int> v = {1, 2, 3, 4}; +// std::string s = absl::StrJoin(v, "-", MyFormatter()); +// EXPECT_EQ("1-2-3-4", s); +// +// The following standard formatters are provided within this file: +// +// - `AlphaNumFormatter()` (the default) +// - `StreamFormatter()` +// - `PairFormatter()` +// - `DereferenceFormatter()` + +// AlphaNumFormatter() +// +// Default formatter used if none is specified. Uses `absl::AlphaNum` to convert +// numeric arguments to strings. +inline strings_internal::AlphaNumFormatterImpl AlphaNumFormatter() { + return strings_internal::AlphaNumFormatterImpl(); +} + +// StreamFormatter() +// +// Formats its argument using the << operator. +inline strings_internal::StreamFormatterImpl StreamFormatter() { + return strings_internal::StreamFormatterImpl(); +} + +// Function Template: PairFormatter(Formatter, absl::string_view, Formatter) +// +// Formats a `std::pair` by putting a given separator between the pair's +// `.first` and `.second` members. This formatter allows you to specify +// custom Formatters for both the first and second member of each pair. +template <typename FirstFormatter, typename SecondFormatter> +inline strings_internal::PairFormatterImpl<FirstFormatter, SecondFormatter> +PairFormatter(FirstFormatter f1, absl::string_view sep, SecondFormatter f2) { + return strings_internal::PairFormatterImpl<FirstFormatter, SecondFormatter>( + std::move(f1), sep, std::move(f2)); +} + +// Function overload of PairFormatter() for using a default +// `AlphaNumFormatter()` for each Formatter in the pair. +inline strings_internal::PairFormatterImpl< + strings_internal::AlphaNumFormatterImpl, + strings_internal::AlphaNumFormatterImpl> +PairFormatter(absl::string_view sep) { + return PairFormatter(AlphaNumFormatter(), sep, AlphaNumFormatter()); +} + +// Function Template: DereferenceFormatter(Formatter) +// +// Formats its argument by dereferencing it and then applying the given +// formatter. This formatter is useful for formatting a container of +// pointer-to-T. This pattern often shows up when joining repeated fields in +// protocol buffers. +template <typename Formatter> +strings_internal::DereferenceFormatterImpl<Formatter> DereferenceFormatter( + Formatter&& f) { + return strings_internal::DereferenceFormatterImpl<Formatter>( + std::forward<Formatter>(f)); +} + +// Function overload of `DererefenceFormatter()` for using a default +// `AlphaNumFormatter()`. +inline strings_internal::DereferenceFormatterImpl< + strings_internal::AlphaNumFormatterImpl> +DereferenceFormatter() { + return strings_internal::DereferenceFormatterImpl< + strings_internal::AlphaNumFormatterImpl>(AlphaNumFormatter()); +} + +// ----------------------------------------------------------------------------- +// StrJoin() +// ----------------------------------------------------------------------------- +// +// Joins a range of elements and returns the result as a std::string. +// `absl::StrJoin()` takes a range, a separator std::string to use between the +// elements joined, and an optional Formatter responsible for converting each +// argument in the range to a std::string. +// +// If omitted, the default `AlphaNumFormatter()` is called on the elements to be +// joined. +// +// Example 1: +// // Joins a collection of strings. This pattern also works with a collection +// // of `asbl::string_view` or even `const char*`. +// std::vector<std::string> v = {"foo", "bar", "baz"}; +// std::string s = absl::StrJoin(v, "-"); +// EXPECT_EQ("foo-bar-baz", s); +// +// Example 2: +// // Joins the values in the given `std::initializer_list<>` specified using +// // brace initialization. This pattern also works with an initializer_list +// // of ints or `absl::string_view` -- any `AlphaNum`-compatible type. +// std::string s = absl::StrJoin({"foo", "bar", "baz"}, "-"); +// EXPECT_EQ("foo-bar-baz", s); +// +// Example 3: +// // Joins a collection of ints. This pattern also works with floats, +// // doubles, int64s -- any `StrCat()`-compatible type. +// std::vector<int> v = {1, 2, 3, -4}; +// std::string s = absl::StrJoin(v, "-"); +// EXPECT_EQ("1-2-3--4", s); +// +// Example 4: +// // Joins a collection of pointer-to-int. By default, pointers are +// // dereferenced and the pointee is formatted using the default format for +// // that type; such dereferencing occurs for all levels of indirection, so +// // this pattern works just as well for `std::vector<int**>` as for +// // `std::vector<int*>`. +// int x = 1, y = 2, z = 3; +// std::vector<int*> v = {&x, &y, &z}; +// std::string s = absl::StrJoin(v, "-"); +// EXPECT_EQ("1-2-3", s); +// +// Example 5: +// // Dereferencing of `std::unique_ptr<>` is also supported: +// std::vector<std::unique_ptr<int>> v +// v.emplace_back(new int(1)); +// v.emplace_back(new int(2)); +// v.emplace_back(new int(3)); +// std::string s = absl::StrJoin(v, "-"); +// EXPECT_EQ("1-2-3", s); +// +// Example 6: +// // Joins a `std::map`, with each key-value pair separated by an equals +// // sign. This pattern would also work with, say, a +// // `std::vector<std::pair<>>`. +// std::map<std::string, int> m = { +// std::make_pair("a", 1), +// std::make_pair("b", 2), +// std::make_pair("c", 3)}; +// std::string s = absl::StrJoin(m, ",", strings::PairFormatter("=")); +// EXPECT_EQ("a=1,b=2,c=3", s); +// +// Example 7: +// // These examples show how `absl::StrJoin()` handles a few common edge +// // cases: +// std::vector<std::string> v_empty; +// EXPECT_EQ("", absl::StrJoin(v_empty, "-")); +// +// std::vector<std::string> v_one_item = {"foo"}; +// EXPECT_EQ("foo", absl::StrJoin(v_one_item, "-")); +// +// std::vector<std::string> v_empty_string = {""}; +// EXPECT_EQ("", absl::StrJoin(v_empty_string, "-")); +// +// std::vector<std::string> v_one_item_empty_string = {"a", ""}; +// EXPECT_EQ("a-", absl::StrJoin(v_one_item_empty_string, "-")); +// +// std::vector<std::string> v_two_empty_string = {"", ""}; +// EXPECT_EQ("-", absl::StrJoin(v_two_empty_string, "-")); +// +// Example 8: +// // Joins a `std::tuple<T...>` of heterogeneous types, converting each to +// // a std::string using the `absl::AlphaNum` class. +// std::string s = absl::StrJoin(std::make_tuple(123, "abc", 0.456), "-"); +// EXPECT_EQ("123-abc-0.456", s); + +template <typename Iterator, typename Formatter> +std::string StrJoin(Iterator start, Iterator end, absl::string_view sep, + Formatter&& fmt) { + return strings_internal::JoinAlgorithm(start, end, sep, fmt); +} + +template <typename Range, typename Formatter> +std::string StrJoin(const Range& range, absl::string_view separator, + Formatter&& fmt) { + return strings_internal::JoinRange(range, separator, fmt); +} + +template <typename T, typename Formatter> +std::string StrJoin(std::initializer_list<T> il, absl::string_view separator, + Formatter&& fmt) { + return strings_internal::JoinRange(il, separator, fmt); +} + +template <typename... T, typename Formatter> +std::string StrJoin(const std::tuple<T...>& value, absl::string_view separator, + Formatter&& fmt) { + return strings_internal::JoinAlgorithm(value, separator, fmt); +} + +template <typename Iterator> +std::string StrJoin(Iterator start, Iterator end, absl::string_view separator) { + return strings_internal::JoinRange(start, end, separator); +} + +template <typename Range> +std::string StrJoin(const Range& range, absl::string_view separator) { + return strings_internal::JoinRange(range, separator); +} + +template <typename T> +std::string StrJoin(std::initializer_list<T> il, absl::string_view separator) { + return strings_internal::JoinRange(il, separator); +} + +template <typename... T> +std::string StrJoin(const std::tuple<T...>& value, absl::string_view separator) { + return strings_internal::JoinAlgorithm(value, separator, AlphaNumFormatter()); +} + +} // namespace absl + +#endif // ABSL_STRINGS_STR_JOIN_H_ diff --git a/absl/strings/str_join_test.cc b/absl/strings/str_join_test.cc new file mode 100644 index 000000000000..7c2ed09b91d6 --- /dev/null +++ b/absl/strings/str_join_test.cc @@ -0,0 +1,474 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Unit tests for all join.h functions + +#include "absl/strings/str_join.h" + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <cstdio> +#include <initializer_list> +#include <map> +#include <ostream> +#include <random> +#include <set> +#include <tuple> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" + +namespace { + +TEST(StrJoin, APIExamples) { + { + // Collection of strings + std::vector<std::string> v = {"foo", "bar", "baz"}; + EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-")); + } + + { + // Collection of absl::string_view + std::vector<absl::string_view> v = {"foo", "bar", "baz"}; + EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-")); + } + + { + // Collection of const char* + std::vector<const char*> v = {"foo", "bar", "baz"}; + EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-")); + } + + { + // Collection of non-const char* + std::string a = "foo", b = "bar", c = "baz"; + std::vector<char*> v = {&a[0], &b[0], &c[0]}; + EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-")); + } + + { + // Collection of ints + std::vector<int> v = {1, 2, 3, -4}; + EXPECT_EQ("1-2-3--4", absl::StrJoin(v, "-")); + } + + { + // Literals passed as a std::initializer_list + std::string s = absl::StrJoin({"a", "b", "c"}, "-"); + EXPECT_EQ("a-b-c", s); + } + { + // Join a std::tuple<T...>. + std::string s = absl::StrJoin(std::make_tuple(123, "abc", 0.456), "-"); + EXPECT_EQ("123-abc-0.456", s); + } + + { + // Collection of unique_ptrs + std::vector<std::unique_ptr<int>> v; + v.emplace_back(new int(1)); + v.emplace_back(new int(2)); + v.emplace_back(new int(3)); + EXPECT_EQ("1-2-3", absl::StrJoin(v, "-")); + } + + { + // Array of ints + const int a[] = {1, 2, 3, -4}; + EXPECT_EQ("1-2-3--4", absl::StrJoin(a, a + ABSL_ARRAYSIZE(a), "-")); + } + + { + // Collection of pointers + int x = 1, y = 2, z = 3; + std::vector<int*> v = {&x, &y, &z}; + EXPECT_EQ("1-2-3", absl::StrJoin(v, "-")); + } + + { + // Collection of pointers to pointers + int x = 1, y = 2, z = 3; + int *px = &x, *py = &y, *pz = &z; + std::vector<int**> v = {&px, &py, &pz}; + EXPECT_EQ("1-2-3", absl::StrJoin(v, "-")); + } + + { + // Collection of pointers to std::string + std::string a("a"), b("b"); + std::vector<std::string*> v = {&a, &b}; + EXPECT_EQ("a-b", absl::StrJoin(v, "-")); + } + + { + // A std::map, which is a collection of std::pair<>s. + std::map<std::string, int> m = { {"a", 1}, {"b", 2}, {"c", 3} }; + EXPECT_EQ("a=1,b=2,c=3", absl::StrJoin(m, ",", absl::PairFormatter("="))); + } + + { + // Shows absl::StrSplit and absl::StrJoin working together. This example is + // equivalent to s/=/-/g. + const std::string s = "a=b=c=d"; + EXPECT_EQ("a-b-c-d", absl::StrJoin(absl::StrSplit(s, "="), "-")); + } + + // + // A few examples of edge cases + // + + { + // Empty range yields an empty std::string. + std::vector<std::string> v; + EXPECT_EQ("", absl::StrJoin(v, "-")); + } + + { + // A range of 1 element gives a std::string with that element but no separator. + std::vector<std::string> v = {"foo"}; + EXPECT_EQ("foo", absl::StrJoin(v, "-")); + } + + { + // A range with a single empty std::string element + std::vector<std::string> v = {""}; + EXPECT_EQ("", absl::StrJoin(v, "-")); + } + + { + // A range with 2 elements, one of which is an empty std::string + std::vector<std::string> v = {"a", ""}; + EXPECT_EQ("a-", absl::StrJoin(v, "-")); + } + + { + // A range with 2 empty elements. + std::vector<std::string> v = {"", ""}; + EXPECT_EQ("-", absl::StrJoin(v, "-")); + } + + { + // A std::vector of bool. + std::vector<bool> v = {true, false, true}; + EXPECT_EQ("1-0-1", absl::StrJoin(v, "-")); + } +} + +TEST(StrJoin, CustomFormatter) { + std::vector<std::string> v{"One", "Two", "Three"}; + { + std::string joined = absl::StrJoin(v, "", [](std::string* out, const std::string& in) { + absl::StrAppend(out, "(", in, ")"); + }); + EXPECT_EQ("(One)(Two)(Three)", joined); + } + { + class ImmovableFormatter { + public: + void operator()(std::string* out, const std::string& in) { + absl::StrAppend(out, "(", in, ")"); + } + ImmovableFormatter() {} + ImmovableFormatter(const ImmovableFormatter&) = delete; + }; + EXPECT_EQ("(One)(Two)(Three)", absl::StrJoin(v, "", ImmovableFormatter())); + } + { + class OverloadedFormatter { + public: + void operator()(std::string* out, const std::string& in) { + absl::StrAppend(out, "(", in, ")"); + } + void operator()(std::string* out, const std::string& in) const { + absl::StrAppend(out, "[", in, "]"); + } + }; + EXPECT_EQ("(One)(Two)(Three)", absl::StrJoin(v, "", OverloadedFormatter())); + const OverloadedFormatter fmt = {}; + EXPECT_EQ("[One][Two][Three]", absl::StrJoin(v, "", fmt)); + } +} + +// +// Tests the Formatters +// + +TEST(AlphaNumFormatter, FormatterAPI) { + // Not an exhaustive test. See strings/strcat_test.h for the exhaustive test + // of what AlphaNum can convert. + auto f = absl::AlphaNumFormatter(); + std::string s; + f(&s, "Testing: "); + f(&s, static_cast<int>(1)); + f(&s, static_cast<int16_t>(2)); + f(&s, static_cast<int64_t>(3)); + f(&s, static_cast<float>(4)); + f(&s, static_cast<double>(5)); + f(&s, static_cast<unsigned>(6)); + f(&s, static_cast<size_t>(7)); + f(&s, absl::string_view(" OK")); + EXPECT_EQ("Testing: 1234567 OK", s); +} + +// Make sure people who are mistakenly using std::vector<bool> even though +// they're not memory-constrained can use absl::AlphaNumFormatter(). +TEST(AlphaNumFormatter, VectorOfBool) { + auto f = absl::AlphaNumFormatter(); + std::string s; + std::vector<bool> v = {true, false, true}; + f(&s, *v.cbegin()); + f(&s, *v.begin()); + f(&s, v[1]); + EXPECT_EQ("110", s); +} + +TEST(AlphaNumFormatter, AlphaNum) { + auto f = absl::AlphaNumFormatter(); + std::string s; + f(&s, absl::AlphaNum("hello")); + EXPECT_EQ("hello", s); +} + +struct StreamableType { + std::string contents; +}; +inline std::ostream& operator<<(std::ostream& os, const StreamableType& t) { + os << "Streamable:" << t.contents; + return os; +} + +TEST(StreamFormatter, FormatterAPI) { + auto f = absl::StreamFormatter(); + std::string s; + f(&s, "Testing: "); + f(&s, static_cast<int>(1)); + f(&s, static_cast<int16_t>(2)); + f(&s, static_cast<int64_t>(3)); + f(&s, static_cast<float>(4)); + f(&s, static_cast<double>(5)); + f(&s, static_cast<unsigned>(6)); + f(&s, static_cast<size_t>(7)); + f(&s, absl::string_view(" OK ")); + StreamableType streamable = {"object"}; + f(&s, streamable); + EXPECT_EQ("Testing: 1234567 OK Streamable:object", s); +} + +// A dummy formatter that wraps each element in parens. Used in some tests +// below. +struct TestingParenFormatter { + template <typename T> + void operator()(std::string* s, const T& t) { + absl::StrAppend(s, "(", t, ")"); + } +}; + +TEST(PairFormatter, FormatterAPI) { + { + // Tests default PairFormatter(sep) that uses AlphaNumFormatter for the + // 'first' and 'second' members. + const auto f = absl::PairFormatter("="); + std::string s; + f(&s, std::make_pair("a", "b")); + f(&s, std::make_pair(1, 2)); + EXPECT_EQ("a=b1=2", s); + } + + { + // Tests using a custom formatter for the 'first' and 'second' members. + auto f = absl::PairFormatter(TestingParenFormatter(), "=", + TestingParenFormatter()); + std::string s; + f(&s, std::make_pair("a", "b")); + f(&s, std::make_pair(1, 2)); + EXPECT_EQ("(a)=(b)(1)=(2)", s); + } +} + +TEST(DereferenceFormatter, FormatterAPI) { + { + // Tests wrapping the default AlphaNumFormatter. + const absl::strings_internal::DereferenceFormatterImpl< + absl::strings_internal::AlphaNumFormatterImpl> + f; + int x = 1, y = 2, z = 3; + std::string s; + f(&s, &x); + f(&s, &y); + f(&s, &z); + EXPECT_EQ("123", s); + } + + { + // Tests wrapping std::string's default formatter. + absl::strings_internal::DereferenceFormatterImpl< + absl::strings_internal::DefaultFormatter<std::string>::Type> + f; + + std::string x = "x"; + std::string y = "y"; + std::string z = "z"; + std::string s; + f(&s, &x); + f(&s, &y); + f(&s, &z); + EXPECT_EQ(s, "xyz"); + } + + { + // Tests wrapping a custom formatter. + auto f = absl::DereferenceFormatter(TestingParenFormatter()); + int x = 1, y = 2, z = 3; + std::string s; + f(&s, &x); + f(&s, &y); + f(&s, &z); + EXPECT_EQ("(1)(2)(3)", s); + } + + { + absl::strings_internal::DereferenceFormatterImpl< + absl::strings_internal::AlphaNumFormatterImpl> + f; + auto x = std::unique_ptr<int>(new int(1)); + auto y = std::unique_ptr<int>(new int(2)); + auto z = std::unique_ptr<int>(new int(3)); + std::string s; + f(&s, x); + f(&s, y); + f(&s, z); + EXPECT_EQ("123", s); + } +} + +// +// Tests the interfaces for the 4 public Join function overloads. The semantics +// of the algorithm is covered in the above APIExamples test. +// +TEST(StrJoin, PublicAPIOverloads) { + std::vector<std::string> v = {"a", "b", "c"}; + + // Iterators + formatter + EXPECT_EQ("a-b-c", + absl::StrJoin(v.begin(), v.end(), "-", absl::AlphaNumFormatter())); + // Range + formatter + EXPECT_EQ("a-b-c", absl::StrJoin(v, "-", absl::AlphaNumFormatter())); + // Iterators, no formatter + EXPECT_EQ("a-b-c", absl::StrJoin(v.begin(), v.end(), "-")); + // Range, no formatter + EXPECT_EQ("a-b-c", absl::StrJoin(v, "-")); +} + +TEST(StrJoin, Array) { + const absl::string_view a[] = {"a", "b", "c"}; + EXPECT_EQ("a-b-c", absl::StrJoin(a, "-")); +} + +TEST(StrJoin, InitializerList) { + { EXPECT_EQ("a-b-c", absl::StrJoin({"a", "b", "c"}, "-")); } + + { + auto a = {"a", "b", "c"}; + EXPECT_EQ("a-b-c", absl::StrJoin(a, "-")); + } + + { + std::initializer_list<const char*> a = {"a", "b", "c"}; + EXPECT_EQ("a-b-c", absl::StrJoin(a, "-")); + } + + { + std::initializer_list<std::string> a = {"a", "b", "c"}; + EXPECT_EQ("a-b-c", absl::StrJoin(a, "-")); + } + + { + std::initializer_list<absl::string_view> a = {"a", "b", "c"}; + EXPECT_EQ("a-b-c", absl::StrJoin(a, "-")); + } + + { + // Tests initializer_list with a non-default formatter + auto a = {"a", "b", "c"}; + TestingParenFormatter f; + EXPECT_EQ("(a)-(b)-(c)", absl::StrJoin(a, "-", f)); + } + + { + // initializer_list of ints + EXPECT_EQ("1-2-3", absl::StrJoin({1, 2, 3}, "-")); + } + + { + // Tests initializer_list of ints with a non-default formatter + auto a = {1, 2, 3}; + TestingParenFormatter f; + EXPECT_EQ("(1)-(2)-(3)", absl::StrJoin(a, "-", f)); + } +} + +TEST(StrJoin, Tuple) { + EXPECT_EQ("", absl::StrJoin(std::make_tuple(), "-")); + EXPECT_EQ("hello", absl::StrJoin(std::make_tuple("hello"), "-")); + + int x(10); + std::string y("hello"); + double z(3.14); + EXPECT_EQ("10-hello-3.14", absl::StrJoin(std::make_tuple(x, y, z), "-")); + + // Faster! Faster!! + EXPECT_EQ("10-hello-3.14", + absl::StrJoin(std::make_tuple(x, std::cref(y), z), "-")); + + struct TestFormatter { + char buffer[128]; + void operator()(std::string* out, int v) { + snprintf(buffer, sizeof(buffer), "%#.8x", v); + out->append(buffer); + } + void operator()(std::string* out, double v) { + snprintf(buffer, sizeof(buffer), "%#.0f", v); + out->append(buffer); + } + void operator()(std::string* out, const std::string& v) { + snprintf(buffer, sizeof(buffer), "%.4s", v.c_str()); + out->append(buffer); + } + }; + EXPECT_EQ("0x0000000a-hell-3.", + absl::StrJoin(std::make_tuple(x, y, z), "-", TestFormatter())); + EXPECT_EQ( + "0x0000000a-hell-3.", + absl::StrJoin(std::make_tuple(x, std::cref(y), z), "-", TestFormatter())); + EXPECT_EQ("0x0000000a-hell-3.", + absl::StrJoin(std::make_tuple(&x, &y, &z), "-", + absl::DereferenceFormatter(TestFormatter()))); + EXPECT_EQ("0x0000000a-hell-3.", + absl::StrJoin(std::make_tuple(absl::make_unique<int>(x), + absl::make_unique<std::string>(y), + absl::make_unique<double>(z)), + "-", absl::DereferenceFormatter(TestFormatter()))); + EXPECT_EQ("0x0000000a-hell-3.", + absl::StrJoin(std::make_tuple(absl::make_unique<int>(x), &y, &z), + "-", absl::DereferenceFormatter(TestFormatter()))); +} + +} // namespace diff --git a/absl/strings/str_replace.cc b/absl/strings/str_replace.cc new file mode 100644 index 000000000000..69efa3571388 --- /dev/null +++ b/absl/strings/str_replace.cc @@ -0,0 +1,79 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_replace.h" + +#include "absl/strings/str_cat.h" + +namespace absl { +namespace strings_internal { + +using FixedMapping = + std::initializer_list<std::pair<absl::string_view, absl::string_view>>; + +// Applies the ViableSubstitutions in subs_ptr to the absl::string_view s, and +// stores the result in *result_ptr. Returns the number of substitutions that +// occurred. +int ApplySubstitutions( + absl::string_view s, + std::vector<strings_internal::ViableSubstitution>* subs_ptr, + std::string* result_ptr) { + auto& subs = *subs_ptr; + int substitutions = 0; + size_t pos = 0; + while (!subs.empty()) { + auto& sub = subs.back(); + if (sub.offset >= pos) { + if (pos <= s.size()) { + StrAppend(result_ptr, s.substr(pos, sub.offset - pos), sub.replacement); + } + pos = sub.offset + sub.old.size(); + substitutions += 1; + } + sub.offset = s.find(sub.old, pos); + if (sub.offset == s.npos) { + subs.pop_back(); + } else { + // Insertion sort to ensure the last ViableSubstitution continues to be + // before all the others. + size_t index = subs.size(); + while (--index && subs[index - 1].OccursBefore(subs[index])) { + std::swap(subs[index], subs[index - 1]); + } + } + } + result_ptr->append(s.data() + pos, s.size() - pos); + return substitutions; +} + +} // namespace strings_internal + +// We can implement this in terms of the generic StrReplaceAll, but +// we must specify the template overload because C++ cannot deduce the type +// of an initializer_list parameter to a function, and also if we don't specify +// the type, we just call ourselves. +// +// Note that we implement them here, rather than in the header, so that they +// aren't inlined. + +std::string StrReplaceAll(absl::string_view s, + strings_internal::FixedMapping replacements) { + return StrReplaceAll<strings_internal::FixedMapping>(s, replacements); +} + +int StrReplaceAll(strings_internal::FixedMapping replacements, std::string* target) { + return StrReplaceAll<strings_internal::FixedMapping>(replacements, target); +} + +} // namespace absl diff --git a/absl/strings/str_replace.h b/absl/strings/str_replace.h new file mode 100644 index 000000000000..f4d9bb9545d6 --- /dev/null +++ b/absl/strings/str_replace.h @@ -0,0 +1,213 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_replace.h +// ----------------------------------------------------------------------------- +// +// This file defines `absl::StrReplaceAll()`, a general-purpose std::string +// replacement function designed for large, arbitrary text substitutions, +// especially on strings which you are receiving from some other system for +// further processing (e.g. processing regular expressions, escaping HTML +// entities, etc. `StrReplaceAll` is designed to be efficient even when only +// one substitution is being performed, or when substitution is rare. +// +// If the std::string being modified is known at compile-time, and the substitutions +// vary, `absl::Substitute()` may be a better choice. +// +// Example: +// +// std::string html_escaped = absl::StrReplaceAll(user_input, { +// {"&", "&"}, +// {"<", "<"}, +// {">", ">"}, +// {"\"", """}, +// {"'", "'"}}); +#ifndef ABSL_STRINGS_STR_REPLACE_H_ +#define ABSL_STRINGS_STR_REPLACE_H_ + +#include <string> +#include <utility> +#include <vector> + +#include "absl/base/attributes.h" +#include "absl/strings/string_view.h" + +namespace absl { + +// StrReplaceAll() +// +// Replaces character sequences within a given std::string with replacements provided +// within an initializer list of key/value pairs. Candidate replacements are +// considered in order as they occur within the std::string, with earlier matches +// taking precedence, and longer matches taking precedence for candidates +// starting at the same position in the std::string. Once a substitution is made, the +// replaced text is not considered for any further substitutions. +// +// Example: +// +// std::string s = absl::StrReplaceAll("$who bought $count #Noun. Thanks $who!", +// {{"$count", absl::StrCat(5)}, +// {"$who", "Bob"}, +// {"#Noun", "Apples"}}); +// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +ABSL_MUST_USE_RESULT std::string StrReplaceAll( + absl::string_view s, + std::initializer_list<std::pair<absl::string_view, absl::string_view>> + replacements); + +// Overload of `StrReplaceAll()` to accept a container of key/value replacement +// pairs (typically either an associative map or a `std::vector` of `std::pair` +// elements). A vector of pairs is generally more efficient. +// +// Examples: +// +// std::map<const absl::string_view, const absl::string_view> replacements; +// replacements["$who"] = "Bob"; +// replacements["$count"] = "5"; +// replacements["#Noun"] = "Apples"; +// std::string s = absl::StrReplaceAll("$who bought $count #Noun. Thanks $who!", +// replacements); +// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +// +// // A std::vector of std::pair elements can be more efficient. +// std::vector<std::pair<const absl::string_view, std::string>> replacements; +// replacements.push_back({"&", "&"}); +// replacements.push_back({"<", "<"}); +// replacements.push_back({">", ">"}); +// std::string s = absl::StrReplaceAll("if (ptr < &foo)", +// replacements); +// EXPECT_EQ("if (ptr < &foo)", s); +template <typename StrToStrMapping> +std::string StrReplaceAll(absl::string_view s, const StrToStrMapping& replacements); + +// Overload of `StrReplaceAll()` to replace character sequences within a given +// output std::string *in place* with replacements provided within an initializer +// list of key/value pairs, returning the number of substitutions that occurred. +// +// Example: +// +// std::string s = std::string("$who bought $count #Noun. Thanks $who!"); +// int count; +// count = absl::StrReplaceAll({{"$count", absl::StrCat(5)}, +// {"$who", "Bob"}, +// {"#Noun", "Apples"}}, &s); +// EXPECT_EQ(count, 4); +// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +int StrReplaceAll( + std::initializer_list<std::pair<absl::string_view, absl::string_view>> + replacements, + std::string* target); + +// Overload of `StrReplaceAll()` to replace patterns within a given output +// std::string *in place* with replacements provided within a container of key/value +// pairs. +// +// Example: +// +// std::string s = std::string("if (ptr < &foo)"); +// int count = absl::StrReplaceAll({{"&", "&"}, +// {"<", "<"}, +// {">", ">"}}, &s); +// EXPECT_EQ(count, 2); +// EXPECT_EQ("if (ptr < &foo)", s); +template <typename StrToStrMapping> +int StrReplaceAll(const StrToStrMapping& replacements, std::string* target); + +// Implementation details only, past this point. +namespace strings_internal { + +struct ViableSubstitution { + absl::string_view old; + absl::string_view replacement; + size_t offset; + + ViableSubstitution(absl::string_view old_str, + absl::string_view replacement_str, size_t offset_val) + : old(old_str), replacement(replacement_str), offset(offset_val) {} + + // One substitution occurs "before" another (takes priority) if either + // it has the lowest offset, or it has the same offset but a larger size. + bool OccursBefore(const ViableSubstitution& y) const { + if (offset != y.offset) return offset < y.offset; + return old.size() > y.old.size(); + } +}; + +// Build a vector of ViableSubstitutions based on the given list of +// replacements. subs can be implemented as a priority_queue. However, it turns +// out that most callers have small enough a list of substitutions that the +// overhead of such a queue isn't worth it. +template <typename StrToStrMapping> +std::vector<ViableSubstitution> FindSubstitutions( + absl::string_view s, const StrToStrMapping& replacements) { + std::vector<ViableSubstitution> subs; + subs.reserve(replacements.size()); + + for (const auto& rep : replacements) { + using std::get; + absl::string_view old(get<0>(rep)); + + size_t pos = s.find(old); + if (pos == s.npos) continue; + + // Ignore attempts to replace "". This condition is almost never true, + // but above condition is frequently true. That's why we test for this + // now and not before. + if (old.empty()) continue; + + subs.emplace_back(old, get<1>(rep), pos); + + // Insertion sort to ensure the last ViableSubstitution comes before + // all the others. + size_t index = subs.size(); + while (--index && subs[index - 1].OccursBefore(subs[index])) { + std::swap(subs[index], subs[index - 1]); + } + } + return subs; +} + +int ApplySubstitutions(absl::string_view s, + std::vector<ViableSubstitution>* subs_ptr, + std::string* result_ptr); + +} // namespace strings_internal + +template <typename StrToStrMapping> +std::string StrReplaceAll(absl::string_view s, const StrToStrMapping& replacements) { + auto subs = strings_internal::FindSubstitutions(s, replacements); + std::string result; + result.reserve(s.size()); + strings_internal::ApplySubstitutions(s, &subs, &result); + return result; +} + +template <typename StrToStrMapping> +int StrReplaceAll(const StrToStrMapping& replacements, std::string* target) { + auto subs = strings_internal::FindSubstitutions(*target, replacements); + if (subs.empty()) return 0; + + std::string result; + result.reserve(target->size()); + int substitutions = + strings_internal::ApplySubstitutions(*target, &subs, &result); + target->swap(result); + return substitutions; +} + +} // namespace absl + +#endif // ABSL_STRINGS_STR_REPLACE_H_ diff --git a/absl/strings/str_replace_test.cc b/absl/strings/str_replace_test.cc new file mode 100644 index 000000000000..f49c7e1c32cf --- /dev/null +++ b/absl/strings/str_replace_test.cc @@ -0,0 +1,340 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_replace.h" + +#include <list> +#include <tuple> + +#include "gtest/gtest.h" +#include "absl/strings/str_split.h" +#include "absl/strings/str_cat.h" + +TEST(StrReplaceAll, OneReplacement) { + std::string s; + + // Empty std::string. + s = absl::StrReplaceAll(s, {{"", ""}}); + EXPECT_EQ(s, ""); + s = absl::StrReplaceAll(s, {{"x", ""}}); + EXPECT_EQ(s, ""); + s = absl::StrReplaceAll(s, {{"", "y"}}); + EXPECT_EQ(s, ""); + s = absl::StrReplaceAll(s, {{"x", "y"}}); + EXPECT_EQ(s, ""); + + // Empty substring. + s = absl::StrReplaceAll("abc", {{"", ""}}); + EXPECT_EQ(s, "abc"); + s = absl::StrReplaceAll("abc", {{"", "y"}}); + EXPECT_EQ(s, "abc"); + s = absl::StrReplaceAll("abc", {{"x", ""}}); + EXPECT_EQ(s, "abc"); + + // Substring not found. + s = absl::StrReplaceAll("abc", {{"xyz", "123"}}); + EXPECT_EQ(s, "abc"); + + // Replace entire std::string. + s = absl::StrReplaceAll("abc", {{"abc", "xyz"}}); + EXPECT_EQ(s, "xyz"); + + // Replace once at the start. + s = absl::StrReplaceAll("abc", {{"a", "x"}}); + EXPECT_EQ(s, "xbc"); + + // Replace once in the middle. + s = absl::StrReplaceAll("abc", {{"b", "x"}}); + EXPECT_EQ(s, "axc"); + + // Replace once at the end. + s = absl::StrReplaceAll("abc", {{"c", "x"}}); + EXPECT_EQ(s, "abx"); + + // Replace multiple times with varying lengths of original/replacement. + s = absl::StrReplaceAll("ababa", {{"a", "xxx"}}); + EXPECT_EQ(s, "xxxbxxxbxxx"); + + s = absl::StrReplaceAll("ababa", {{"b", "xxx"}}); + EXPECT_EQ(s, "axxxaxxxa"); + + s = absl::StrReplaceAll("aaabaaabaaa", {{"aaa", "x"}}); + EXPECT_EQ(s, "xbxbx"); + + s = absl::StrReplaceAll("abbbabbba", {{"bbb", "x"}}); + EXPECT_EQ(s, "axaxa"); + + // Overlapping matches are replaced greedily. + s = absl::StrReplaceAll("aaa", {{"aa", "x"}}); + EXPECT_EQ(s, "xa"); + + // The replacements are not recursive. + s = absl::StrReplaceAll("aaa", {{"aa", "a"}}); + EXPECT_EQ(s, "aa"); +} + +TEST(StrReplaceAll, ManyReplacements) { + std::string s; + + // Empty std::string. + s = absl::StrReplaceAll("", {{"", ""}, {"x", ""}, {"", "y"}, {"x", "y"}}); + EXPECT_EQ(s, ""); + + // Empty substring. + s = absl::StrReplaceAll("abc", {{"", ""}, {"", "y"}, {"x", ""}}); + EXPECT_EQ(s, "abc"); + + // Replace entire std::string, one char at a time + s = absl::StrReplaceAll("abc", {{"a", "x"}, {"b", "y"}, {"c", "z"}}); + EXPECT_EQ(s, "xyz"); + s = absl::StrReplaceAll("zxy", {{"z", "x"}, {"x", "y"}, {"y", "z"}}); + EXPECT_EQ(s, "xyz"); + + // Replace once at the start (longer matches take precedence) + s = absl::StrReplaceAll("abc", {{"a", "x"}, {"ab", "xy"}, {"abc", "xyz"}}); + EXPECT_EQ(s, "xyz"); + + // Replace once in the middle. + s = absl::StrReplaceAll( + "Abc!", {{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc", "yz"}, {"c", "z"}}); + EXPECT_EQ(s, "Ayz!"); + + // Replace once at the end. + s = absl::StrReplaceAll( + "Abc!", + {{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc!", "yz?"}, {"c!", "z;"}}); + EXPECT_EQ(s, "Ayz?"); + + // Replace multiple times with varying lengths of original/replacement. + s = absl::StrReplaceAll("ababa", {{"a", "xxx"}, {"b", "XXXX"}}); + EXPECT_EQ(s, "xxxXXXXxxxXXXXxxx"); + + // Overlapping matches are replaced greedily. + s = absl::StrReplaceAll("aaa", {{"aa", "x"}, {"a", "X"}}); + EXPECT_EQ(s, "xX"); + s = absl::StrReplaceAll("aaa", {{"a", "X"}, {"aa", "x"}}); + EXPECT_EQ(s, "xX"); + + // Two well-known sentences + s = absl::StrReplaceAll("the quick brown fox jumped over the lazy dogs", + { + {"brown", "box"}, + {"dogs", "jugs"}, + {"fox", "with"}, + {"jumped", "five"}, + {"over", "dozen"}, + {"quick", "my"}, + {"the", "pack"}, + {"the lazy", "liquor"}, + }); + EXPECT_EQ(s, "pack my box with five dozen liquor jugs"); +} + +TEST(StrReplaceAll, ManyReplacementsInMap) { + std::map<const char *, const char *> replacements; + replacements["$who"] = "Bob"; + replacements["$count"] = "5"; + replacements["#Noun"] = "Apples"; + std::string s = absl::StrReplaceAll("$who bought $count #Noun. Thanks $who!", + replacements); + EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +} + +TEST(StrReplaceAll, ReplacementsInPlace) { + std::string s = std::string("$who bought $count #Noun. Thanks $who!"); + int count; + count = absl::StrReplaceAll({{"$count", absl::StrCat(5)}, + {"$who", "Bob"}, + {"#Noun", "Apples"}}, &s); + EXPECT_EQ(count, 4); + EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +} + +TEST(StrReplaceAll, ReplacementsInPlaceInMap) { + std::string s = std::string("$who bought $count #Noun. Thanks $who!"); + std::map<absl::string_view, absl::string_view> replacements; + replacements["$who"] = "Bob"; + replacements["$count"] = "5"; + replacements["#Noun"] = "Apples"; + int count; + count = absl::StrReplaceAll(replacements, &s); + EXPECT_EQ(count, 4); + EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +} + +struct Cont { + Cont() {} + explicit Cont(absl::string_view src) : data(src) {} + + absl::string_view data; +}; + +template <int index> +absl::string_view get(const Cont& c) { + auto splitter = absl::StrSplit(c.data, ':'); + auto it = splitter.begin(); + for (int i = 0; i < index; ++i) ++it; + + return *it; +} + +TEST(StrReplaceAll, VariableNumber) { + std::string s; + { + std::vector<std::pair<std::string, std::string>> replacements; + + s = "abc"; + EXPECT_EQ(0, absl::StrReplaceAll(replacements, &s)); + EXPECT_EQ("abc", s); + + s = "abc"; + replacements.push_back({"a", "A"}); + EXPECT_EQ(1, absl::StrReplaceAll(replacements, &s)); + EXPECT_EQ("Abc", s); + + s = "abc"; + replacements.push_back({"b", "B"}); + EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s)); + EXPECT_EQ("ABc", s); + + s = "abc"; + replacements.push_back({"d", "D"}); + EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s)); + EXPECT_EQ("ABc", s); + + EXPECT_EQ("ABcABc", absl::StrReplaceAll("abcabc", replacements)); + } + + { + std::map<const char*, const char*> replacements; + replacements["aa"] = "x"; + replacements["a"] = "X"; + s = "aaa"; + EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s)); + EXPECT_EQ("xX", s); + + EXPECT_EQ("xxX", absl::StrReplaceAll("aaaaa", replacements)); + } + + { + std::list<std::pair<absl::string_view, absl::string_view>> replacements = { + {"a", "x"}, {"b", "y"}, {"c", "z"}}; + + std::string s = absl::StrReplaceAll("abc", replacements); + EXPECT_EQ(s, "xyz"); + } + + { + using X = std::tuple<absl::string_view, std::string, int>; + std::vector<X> replacements(3); + replacements[0] = X{"a", "x", 1}; + replacements[1] = X{"b", "y", 0}; + replacements[2] = X{"c", "z", -1}; + + std::string s = absl::StrReplaceAll("abc", replacements); + EXPECT_EQ(s, "xyz"); + } + + { + std::vector<Cont> replacements(3); + replacements[0] = Cont{"a:x"}; + replacements[1] = Cont{"b:y"}; + replacements[2] = Cont{"c:z"}; + + std::string s = absl::StrReplaceAll("abc", replacements); + EXPECT_EQ(s, "xyz"); + } +} + +// Same as above, but using the in-place variant of absl::StrReplaceAll, +// that returns the # of replacements performed. +TEST(StrReplaceAll, Inplace) { + std::string s; + int reps; + + // Empty std::string. + s = ""; + reps = absl::StrReplaceAll({{"", ""}, {"x", ""}, {"", "y"}, {"x", "y"}}, &s); + EXPECT_EQ(reps, 0); + EXPECT_EQ(s, ""); + + // Empty substring. + s = "abc"; + reps = absl::StrReplaceAll({{"", ""}, {"", "y"}, {"x", ""}}, &s); + EXPECT_EQ(reps, 0); + EXPECT_EQ(s, "abc"); + + // Replace entire std::string, one char at a time + s = "abc"; + reps = absl::StrReplaceAll({{"a", "x"}, {"b", "y"}, {"c", "z"}}, &s); + EXPECT_EQ(reps, 3); + EXPECT_EQ(s, "xyz"); + s = "zxy"; + reps = absl::StrReplaceAll({{"z", "x"}, {"x", "y"}, {"y", "z"}}, &s); + EXPECT_EQ(reps, 3); + EXPECT_EQ(s, "xyz"); + + // Replace once at the start (longer matches take precedence) + s = "abc"; + reps = absl::StrReplaceAll({{"a", "x"}, {"ab", "xy"}, {"abc", "xyz"}}, &s); + EXPECT_EQ(reps, 1); + EXPECT_EQ(s, "xyz"); + + // Replace once in the middle. + s = "Abc!"; + reps = absl::StrReplaceAll( + {{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc", "yz"}, {"c", "z"}}, &s); + EXPECT_EQ(reps, 1); + EXPECT_EQ(s, "Ayz!"); + + // Replace once at the end. + s = "Abc!"; + reps = absl::StrReplaceAll( + {{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc!", "yz?"}, {"c!", "z;"}}, &s); + EXPECT_EQ(reps, 1); + EXPECT_EQ(s, "Ayz?"); + + // Replace multiple times with varying lengths of original/replacement. + s = "ababa"; + reps = absl::StrReplaceAll({{"a", "xxx"}, {"b", "XXXX"}}, &s); + EXPECT_EQ(reps, 5); + EXPECT_EQ(s, "xxxXXXXxxxXXXXxxx"); + + // Overlapping matches are replaced greedily. + s = "aaa"; + reps = absl::StrReplaceAll({{"aa", "x"}, {"a", "X"}}, &s); + EXPECT_EQ(reps, 2); + EXPECT_EQ(s, "xX"); + s = "aaa"; + reps = absl::StrReplaceAll({{"a", "X"}, {"aa", "x"}}, &s); + EXPECT_EQ(reps, 2); + EXPECT_EQ(s, "xX"); + + // Two well-known sentences + s = "the quick brown fox jumped over the lazy dogs"; + reps = absl::StrReplaceAll( + { + {"brown", "box"}, + {"dogs", "jugs"}, + {"fox", "with"}, + {"jumped", "five"}, + {"over", "dozen"}, + {"quick", "my"}, + {"the", "pack"}, + {"the lazy", "liquor"}, + }, + &s); + EXPECT_EQ(reps, 8); + EXPECT_EQ(s, "pack my box with five dozen liquor jugs"); +} diff --git a/absl/strings/str_split.cc b/absl/strings/str_split.cc new file mode 100644 index 000000000000..910a67cf2840 --- /dev/null +++ b/absl/strings/str_split.cc @@ -0,0 +1,133 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_split.h" + +#include <cassert> +#include <cstdlib> +#include <cstring> +#include <iterator> +#include <limits> + +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/ascii.h" + +namespace absl { + +namespace { + +// This GenericFind() template function encapsulates the finding algorithm +// shared between the ByString and ByAnyChar delimiters. The FindPolicy +// template parameter allows each delimiter to customize the actual find +// function to use and the length of the found delimiter. For example, the +// Literal delimiter will ultimately use absl::string_view::find(), and the +// AnyOf delimiter will use absl::string_view::find_first_of(). +template <typename FindPolicy> +absl::string_view GenericFind(absl::string_view text, + absl::string_view delimiter, size_t pos, + FindPolicy find_policy) { + if (delimiter.empty() && text.length() > 0) { + // Special case for empty std::string delimiters: always return a zero-length + // absl::string_view referring to the item at position 1 past pos. + return absl::string_view(text.begin() + pos + 1, 0); + } + size_t found_pos = absl::string_view::npos; + absl::string_view found(text.end(), 0); // By default, not found + found_pos = find_policy.Find(text, delimiter, pos); + if (found_pos != absl::string_view::npos) { + found = absl::string_view(text.data() + found_pos, + find_policy.Length(delimiter)); + } + return found; +} + +// Finds using absl::string_view::find(), therefore the length of the found +// delimiter is delimiter.length(). +struct LiteralPolicy { + size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) { + return text.find(delimiter, pos); + } + size_t Length(absl::string_view delimiter) { return delimiter.length(); } +}; + +// Finds using absl::string_view::find_first_of(), therefore the length of the +// found delimiter is 1. +struct AnyOfPolicy { + size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) { + return text.find_first_of(delimiter, pos); + } + size_t Length(absl::string_view /* delimiter */) { return 1; } +}; + +} // namespace + +// +// ByString +// + +ByString::ByString(absl::string_view sp) : delimiter_(sp) {} + +absl::string_view ByString::Find(absl::string_view text, size_t pos) const { + if (delimiter_.length() == 1) { + // Much faster to call find on a single character than on an + // absl::string_view. + size_t found_pos = text.find(delimiter_[0], pos); + if (found_pos == absl::string_view::npos) + return absl::string_view(text.end(), 0); + return text.substr(found_pos, 1); + } + return GenericFind(text, delimiter_, pos, LiteralPolicy()); +} + +// +// ByChar +// + +absl::string_view ByChar::Find(absl::string_view text, size_t pos) const { + size_t found_pos = text.find(c_, pos); + if (found_pos == absl::string_view::npos) + return absl::string_view(text.end(), 0); + return text.substr(found_pos, 1); +} + +// +// ByAnyChar +// + +ByAnyChar::ByAnyChar(absl::string_view sp) : delimiters_(sp) {} + +absl::string_view ByAnyChar::Find(absl::string_view text, size_t pos) const { + return GenericFind(text, delimiters_, pos, AnyOfPolicy()); +} + +// +// ByLength +// +ByLength::ByLength(ptrdiff_t length) : length_(length) { + ABSL_RAW_CHECK(length > 0, ""); +} + +absl::string_view ByLength::Find(absl::string_view text, + size_t pos) const { + pos = std::min(pos, text.size()); // truncate `pos` + absl::string_view substr = text.substr(pos); + // If the std::string is shorter than the chunk size we say we + // "can't find the delimiter" so this will be the last chunk. + if (substr.length() <= static_cast<size_t>(length_)) + return absl::string_view(text.end(), 0); + + return absl::string_view(substr.begin() + length_, 0); +} + +} // namespace absl diff --git a/absl/strings/str_split.h b/absl/strings/str_split.h new file mode 100644 index 000000000000..a7b48b18916b --- /dev/null +++ b/absl/strings/str_split.h @@ -0,0 +1,511 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_split.h +// ----------------------------------------------------------------------------- +// +// This file contains functions for splitting strings. It defines the main +// `StrSplit()` function, several delimiters for determining the boundaries on +// which to split the std::string, and predicates for filtering delimited results. +// `StrSplit()` adapts the returned collection to the type specified by the +// caller. +// +// Example: +// +// // Splits the given std::string on commas. Returns the results in a +// // vector of strings. +// std::vector<std::string> v = absl::StrSplit("a,b,c", ','); +// // Can also use "," +// // v[0] == "a", v[1] == "b", v[2] == "c" +// +// See StrSplit() below for more information. +#ifndef ABSL_STRINGS_STR_SPLIT_H_ +#define ABSL_STRINGS_STR_SPLIT_H_ + +#include <algorithm> +#include <cstddef> +#include <map> +#include <set> +#include <string> +#include <utility> +#include <vector> + +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/str_split_internal.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" + +namespace absl { + +//------------------------------------------------------------------------------ +// Delimiters +//------------------------------------------------------------------------------ +// +// `StrSplit()` uses delimiters to define the boundaries between elements in the +// provided input. Several `Delimiter` types are defined below. If a std::string +// (`const char*`, `std::string`, or `absl::string_view`) is passed in place of +// an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it +// were passed a `ByString` delimiter. +// +// A `Delimiter` is an object with a `Find()` function that knows how to find +// the first occurrence of itself in a given `absl::string_view`. +// +// The following `Delimiter` types are available for use within `StrSplit()`: +// +// - `ByString` (default for std::string arguments) +// - `ByChar` (default for a char argument) +// - `ByAnyChar` +// - `ByLength` +// - `MaxSplits` +// +// +// A Delimiter's Find() member function will be passed the input text that is to +// be split and the position to begin searching for the next delimiter in the +// input text. The returned absl::string_view should refer to the next +// occurrence (after pos) of the represented delimiter; this returned +// absl::string_view represents the next location where the input std::string should +// be broken. The returned absl::string_view may be zero-length if the Delimiter +// does not represent a part of the std::string (e.g., a fixed-length delimiter). If +// no delimiter is found in the given text, a zero-length absl::string_view +// referring to text.end() should be returned (e.g., +// absl::string_view(text.end(), 0)). It is important that the returned +// absl::string_view always be within the bounds of input text given as an +// argument--it must not refer to a std::string that is physically located outside of +// the given std::string. +// +// The following example is a simple Delimiter object that is created with a +// single char and will look for that char in the text passed to the Find() +// function: +// +// struct SimpleDelimiter { +// const char c_; +// explicit SimpleDelimiter(char c) : c_(c) {} +// absl::string_view Find(absl::string_view text, size_t pos) { +// auto found = text.find(c_, pos); +// if (found == absl::string_view::npos) +// return absl::string_view(text.end(), 0); +// +// return absl::string_view(text, found, 1); +// } +// }; + +// ByString +// +// A sub-std::string delimiter. If `StrSplit()` is passed a std::string in place of a +// `Delimiter` object, the std::string will be implicitly converted into a +// `ByString` delimiter. +// +// Example: +// +// // Because a std::string literal is converted to an `absl::ByString`, +// // the following two splits are equivalent. +// +// std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", "); +// +// using absl::ByString; +// std::vector<std::string> v2 = absl::StrSplit("a, b, c", +// ByString(", ")); +// // v[0] == "a", v[1] == "b", v[3] == "c" +class ByString { + public: + explicit ByString(absl::string_view sp); + absl::string_view Find(absl::string_view text, size_t pos) const; + + private: + const std::string delimiter_; +}; + +// ByChar +// +// A single character delimiter. `ByChar` is functionally equivalent to a +// 1-char std::string within a `ByString` delimiter, but slightly more +// efficient. +// +// Example: +// +// // Because a char literal is converted to a absl::ByChar, +// // the following two splits are equivalent. +// std::vector<std::string> v1 = absl::StrSplit("a,b,c", ','); +// using absl::ByChar; +// std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(',')); +// // v[0] == "a", v[1] == "b", v[3] == "c" +// +// `ByChar` is also the default delimiter if a single character is given +// as the delimiter to `StrSplit()`. For example, the following calls are +// equivalent: +// +// std::vector<std::string> v = absl::StrSplit("a-b", '-'); +// +// using absl::ByChar; +// std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-')); +// +class ByChar { + public: + explicit ByChar(char c) : c_(c) {} + absl::string_view Find(absl::string_view text, size_t pos) const; + + private: + char c_; +}; + +// ByAnyChar +// +// A delimiter that will match any of the given byte-sized characters within +// its provided std::string. +// +// Note: this delimiter works with single-byte std::string data, but does not work +// with variable-width encodings, such as UTF-8. +// +// Example: +// +// using absl::ByAnyChar; +// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",=")); +// // v[0] == "a", v[1] == "b", v[3] == "c" +// +// If `ByAnyChar` is given the empty std::string, it behaves exactly like +// `ByString` and matches each individual character in the input std::string. +// +class ByAnyChar { + public: + explicit ByAnyChar(absl::string_view sp); + absl::string_view Find(absl::string_view text, size_t pos) const; + + private: + const std::string delimiters_; +}; + +// ByLength +// +// A delimiter for splitting into equal-length strings. The length argument to +// the constructor must be greater than 0. +// +// Note: this delimiter works with single-byte std::string data, but does not work +// with variable-width encodings, such as UTF-8. +// +// Example: +// +// using absl::ByLength; +// std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3)); + +// // v[0] == "123", v[1] == "456", v[2] == "789" +// +// Note that the std::string does not have to be a multiple of the fixed split +// length. In such a case, the last substring will be shorter. +// +// using absl::ByLength; +// std::vector<std::string> v = absl::StrSplit("12345", ByLength(2)); +// +// // v[0] == "12", v[1] == "35", v[2] == "5" +class ByLength { + public: + explicit ByLength(ptrdiff_t length); + absl::string_view Find(absl::string_view text, size_t pos) const; + + private: + const ptrdiff_t length_; +}; + +namespace strings_internal { + +// A traits-like metafunction for selecting the default Delimiter object type +// for a particular Delimiter type. The base case simply exposes type Delimiter +// itself as the delimiter's Type. However, there are specializations for +// std::string-like objects that map them to the ByString delimiter object. +// This allows functions like absl::StrSplit() and absl::MaxSplits() to accept +// std::string-like objects (e.g., ',') as delimiter arguments but they will be +// treated as if a ByString delimiter was given. +template <typename Delimiter> +struct SelectDelimiter { + using type = Delimiter; +}; + +template <> +struct SelectDelimiter<char> { + using type = ByChar; +}; +template <> +struct SelectDelimiter<char*> { + using type = ByString; +}; +template <> +struct SelectDelimiter<const char*> { + using type = ByString; +}; +template <> +struct SelectDelimiter<absl::string_view> { + using type = ByString; +}; +template <> +struct SelectDelimiter<std::string> { + using type = ByString; +}; + +// Wraps another delimiter and sets a max number of matches for that delimiter. +template <typename Delimiter> +class MaxSplitsImpl { + public: + MaxSplitsImpl(Delimiter delimiter, int limit) + : delimiter_(delimiter), limit_(limit), count_(0) {} + absl::string_view Find(absl::string_view text, size_t pos) { + if (count_++ == limit_) { + return absl::string_view(text.end(), 0); // No more matches. + } + return delimiter_.Find(text, pos); + } + + private: + Delimiter delimiter_; + const int limit_; + int count_; +}; + +} // namespace strings_internal + +// MaxSplits() +// +// A delimiter that limits the number of matches which can occur to the passed +// `limit`. The last element in the returned collection will contain all +// remaining unsplit pieces, which may contain instances of the delimiter. +// The collection will contain at most `limit` + 1 elements. +// Example: +// +// using absl::MaxSplits; +// std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1)); +// +// // v[0] == "a", v[1] == "b,c" +template <typename Delimiter> +inline strings_internal::MaxSplitsImpl< + typename strings_internal::SelectDelimiter<Delimiter>::type> +MaxSplits(Delimiter delimiter, int limit) { + typedef + typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType; + return strings_internal::MaxSplitsImpl<DelimiterType>( + DelimiterType(delimiter), limit); +} + +//------------------------------------------------------------------------------ +// Predicates +//------------------------------------------------------------------------------ +// +// Predicates filter the results of a `StrSplit()` by determining whether or not +// a resultant element is included in the result set. A predicate may be passed +// as an optional third argument to the `StrSplit()` function. +// +// Predicates are unary functions (or functors) that take a single +// `absl::string_view` argument and return a bool indicating whether the +// argument should be included (`true`) or excluded (`false`). +// +// Predicates are useful when filtering out empty substrings. By default, empty +// substrings may be returned by `StrSplit()`, which is similar to the way split +// functions work in other programming languages. + +// AllowEmpty() +// +// Always returns `true`, indicating that all strings--including empty +// strings--should be included in the split output. This predicate is not +// strictly needed because this is the default behavior of `StrSplit()`; +// however, it might be useful at some call sites to make the intent explicit. +// +// Example: +// +// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty()); +// +// // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == "" +struct AllowEmpty { + bool operator()(absl::string_view) const { return true; } +}; + +// SkipEmpty() +// +// Returns `false` if the given `absl::string_view` is empty, indicating that +// `StrSplit()` should omit the empty std::string. +// +// Example: +// +// std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty()); +// +// // v[0] == "a", v[1] == "b" +// +// Note: `SkipEmpty()` does not consider a std::string containing only whitespace +// to be empty. To skip such whitespace as well, use the `SkipWhitespace()` +// predicate. +struct SkipEmpty { + bool operator()(absl::string_view sp) const { return !sp.empty(); } +}; + +// SkipWhitespace() +// +// Returns `false` if the given `absl::string_view` is empty *or* contains only +// whitespace, indicating that `StrSplit()` should omit the std::string. +// +// Example: +// +// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", +// ',', SkipWhitespace()); +// // v[0] == " a ", v[1] == "b" +// +// // SkipEmpty() would return whitespace elements +// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty()); +// // v[0] == " a ", v[1] == " ", v[2] == "b" +struct SkipWhitespace { + bool operator()(absl::string_view sp) const { + sp = absl::StripAsciiWhitespace(sp); + return !sp.empty(); + } +}; + +//------------------------------------------------------------------------------ +// StrSplit() +//------------------------------------------------------------------------------ + +// StrSplit() +// +// Splits a given `std::string` based on the provided `Delimiter` object, +// returning the elements within the type specified by the caller. Optionally, +// you may also pass a `Predicate` to `StrSplit()` indicating whether to include +// or exclude the resulting element within the final result set. (See the +// overviews for Delimiters and Predicates above.) +// +// Example: +// +// std::vector<std::string> v = absl::StrSplit("a,b,c,d", ','); +// // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d" +// +// You can also provide an explicit `Delimiter` object: +// +// Example: +// +// using absl::ByAnyChar; +// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",=")); +// // v[0] == "a", v[1] == "b", v[3] == "c" +// +// See above for more information on delimiters. +// +// By default, empty strings are included in the result set. You can optionally +// include a third `Predicate` argument to apply a test for whether the +// resultant element should be included in the result set: +// +// Example: +// +// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", +// ',', SkipWhitespace()); +// // v[0] == "a", v[1] == "b" +// +// See above for more information on predicates. +// +//------------------------------------------------------------------------------ +// StrSplit() Return Types +//------------------------------------------------------------------------------ +// +// The `StrSplit()` function adapts the returned collection to the collection +// specified by the caller (e.g. `std::vector` above). The returned collections +// may contain `string`, `absl::string_view` (in which case the original std::string +// being split must ensure that it outlives the collection), or any object that +// can be explicitly created from an `absl::string_view`. This behavior works +// for: +// +// 1) All standard STL containers including `std::vector`, `std::list`, +// `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap` +// 2) `std::pair` (which is not actually a container). See below. +// +// Example: +// +// // The results are returned as `absl::string_view` objects. Note that we +// // have to ensure that the input std::string outlives any results. +// std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); +// +// // Stores results in a std::set<std::string>, which also performs +// // de-duplication and orders the elements in ascending order. +// std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ','); +// // v[0] == "a", v[1] == "b", v[2] = "c" +// +// // `StrSplit()` can be used within a range-based for loop, in which case +// // each element will be of type `absl::string_view`. +// std::vector<std::string> v; +// for (const auto sv : absl::StrSplit("a,b,c", ',')) { +// if (sv != "b") v.emplace_back(sv); +// } +// // v[0] == "a", v[1] == "c" +// +// // Stores results in a map. The map implementation assumes that the input +// // is provided as a series of key/value pairs. For example, the 0th element +// // resulting from the split will be stored as a key to the 1st element. If +// // an odd number of elements are resolved, the last element is paired with +// // a default-constructed value (e.g., empty std::string). +// std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ','); +// // m["a"] == "b", m["c"] == "" // last component value equals "" +// +// Splitting to `std::pair` is an interesting case because it can hold only two +// elements and is not a collection type. When splitting to a `std::pair` the +// first two split strings become the `std::pair` `.first` and `.second` +// members, respectively. The remaining split substrings are discarded. If there +// are less than two split substrings, the empty std::string is used for the +// corresponding +// `std::pair` member. +// +// Example: +// +// // Stores first two split strings as the members in a std::pair. +// std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); +// // p.first == "a", p.second == "b" // "c" is omitted. +// +// The `StrSplit()` function can be used multiple times to perform more +// complicated splitting logic, such as intelligently parsing key-value pairs. +// +// Example: +// +// // The input std::string "a=b=c,d=e,f=,g" becomes +// // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" } +// std::map<std::string, std::string> m; +// for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) { +// m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1))); +// } +// EXPECT_EQ("b=c", m.find("a")->second); +// EXPECT_EQ("e", m.find("d")->second); +// EXPECT_EQ("", m.find("f")->second); +// EXPECT_EQ("", m.find("g")->second); +// +// WARNING: Due to a legacy bug that is maintained for backward compatibility, +// splitting the following empty string_views produces different results: +// +// absl::StrSplit(absl::string_view(""), '-'); // {""} +// absl::StrSplit(absl::string_view(), '-'); // {}, but should be {""} +// +// Try not to depend on this distinction because the bug may one day be fixed. +template <typename Delimiter> +strings_internal::Splitter< + typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty> +StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) { + using DelimiterType = + typename strings_internal::SelectDelimiter<Delimiter>::type; + return strings_internal::Splitter<DelimiterType, AllowEmpty>( + std::move(text), DelimiterType(d), AllowEmpty()); +} + +template <typename Delimiter, typename Predicate> +strings_internal::Splitter< + typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate> +StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d, + Predicate p) { + using DelimiterType = + typename strings_internal::SelectDelimiter<Delimiter>::type; + return strings_internal::Splitter<DelimiterType, Predicate>( + std::move(text), DelimiterType(d), std::move(p)); +} + +} // namespace absl + +#endif // ABSL_STRINGS_STR_SPLIT_H_ diff --git a/absl/strings/str_split_test.cc b/absl/strings/str_split_test.cc new file mode 100644 index 000000000000..a95a0fbd3596 --- /dev/null +++ b/absl/strings/str_split_test.cc @@ -0,0 +1,896 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_split.h" + +#include <climits> +#include <cstdlib> +#include <cstring> +#include <deque> +#include <limits> +#include <list> +#include <map> +#include <memory> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/dynamic_annotations.h" // for RunningOnValgrind +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/strings/numbers.h" + +namespace { + +using ::testing::ElementsAre; +using ::testing::Pair; +using ::testing::UnorderedElementsAre; + +// This tests the overall split API, which is made up of the absl::StrSplit() +// function and the Delimiter objects in the absl:: namespace. +// This TEST macro is outside of any namespace to require full specification of +// namespaces just like callers will need to use. +TEST(Split, APIExamples) { + { + // Passes std::string delimiter. Assumes the default of Literal. + std::vector<std::string> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + + // Equivalent to... + using absl::ByString; + v = absl::StrSplit("a,b,c", ByString(",")); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + + // Equivalent to... + EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")), + ElementsAre("a", "b", "c")); + } + + { + // Same as above, but using a single character as the delimiter. + std::vector<std::string> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + + // Equivalent to... + using absl::ByChar; + v = absl::StrSplit("a,b,c", ByChar(',')); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Same as above, but using std::string + std::vector<std::string> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + + // Equivalent to... + using absl::ByChar; + v = absl::StrSplit("a,b,c", ByChar(',')); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Uses the Literal std::string "=>" as the delimiter. + const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>"); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // The substrings are returned as string_views, eliminating copying. + std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Leading and trailing empty substrings. + std::vector<std::string> v = absl::StrSplit(",a,b,c,", ','); + EXPECT_THAT(v, ElementsAre("", "a", "b", "c", "")); + } + + { + // Splits on a delimiter that is not found. + std::vector<std::string> v = absl::StrSplit("abc", ','); + EXPECT_THAT(v, ElementsAre("abc")); + } + + { + // Splits the input std::string into individual characters by using an empty + // std::string as the delimiter. + std::vector<std::string> v = absl::StrSplit("abc", ""); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Splits std::string data with embedded NUL characters, using NUL as the + // delimiter. A simple delimiter of "\0" doesn't work because strlen() will + // say that's the empty std::string when constructing the absl::string_view + // delimiter. Instead, a non-empty std::string containing NUL can be used as the + // delimiter. + std::string embedded_nulls("a\0b\0c", 5); + std::string null_delim("\0", 1); + std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Stores first two split strings as the members in a std::pair. + std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); + EXPECT_EQ("a", p.first); + EXPECT_EQ("b", p.second); + // "c" is omitted because std::pair can hold only two elements. + } + + { + // Results stored in std::set<std::string> + std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Uses a non-const char* delimiter. + char a[] = ","; + char* d = a + 0; + std::vector<std::string> v = absl::StrSplit("a,b,c", d); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Results split using either of , or ; + using absl::ByAnyChar; + std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;")); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Uses the SkipWhitespace predicate. + using absl::SkipWhitespace; + std::vector<std::string> v = absl::StrSplit("a, ,,b,", ',', SkipWhitespace()); + EXPECT_THAT(v, ElementsAre("a", "b")); + } + + { + // Uses the ByLength delimiter. + using absl::ByLength; + std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3)); + EXPECT_THAT(v, ElementsAre("abc", "def", "g")); + } + + { + // Results stored in a std::map. + std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ','); + EXPECT_EQ(2, m.size()); + EXPECT_EQ("3", m["a"]); + EXPECT_EQ("2", m["b"]); + } + + { + // Results stored in a std::multimap. + std::multimap<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ','); + EXPECT_EQ(3, m.size()); + auto it = m.find("a"); + EXPECT_EQ("1", it->second); + ++it; + EXPECT_EQ("3", it->second); + it = m.find("b"); + EXPECT_EQ("2", it->second); + } + + { + // Demonstrates use in a range-based for loop in C++11. + std::string s = "x,x,x,x,x,x,x"; + for (absl::string_view sp : absl::StrSplit(s, ',')) { + EXPECT_EQ("x", sp); + } + } + + { + // Demonstrates use with a Predicate in a range-based for loop. + using absl::SkipWhitespace; + std::string s = " ,x,,x,,x,x,x,,"; + for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) { + EXPECT_EQ("x", sp); + } + } + + { + // Demonstrates a "smart" split to std::map using two separate calls to + // absl::StrSplit. One call to split the records, and another call to split + // the keys and values. This also uses the Limit delimiter so that the + // std::string "a=b=c" will split to "a" -> "b=c". + std::map<std::string, std::string> m; + for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) { + m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1))); + } + EXPECT_EQ("b=c", m.find("a")->second); + EXPECT_EQ("e", m.find("d")->second); + EXPECT_EQ("", m.find("f")->second); + EXPECT_EQ("", m.find("g")->second); + } +} + +// +// Tests for SplitIterator +// + +TEST(SplitIterator, Basics) { + auto splitter = absl::StrSplit("a,b", ','); + auto it = splitter.begin(); + auto end = splitter.end(); + + EXPECT_NE(it, end); + EXPECT_EQ("a", *it); // tests dereference + ++it; // tests preincrement + EXPECT_NE(it, end); + EXPECT_EQ("b", std::string(it->data(), it->size())); // tests dereference as ptr + it++; // tests postincrement + EXPECT_EQ(it, end); +} + +// Simple Predicate to skip a particular std::string. +class Skip { + public: + explicit Skip(const std::string& s) : s_(s) {} + bool operator()(absl::string_view sp) { return sp != s_; } + + private: + std::string s_; +}; + +TEST(SplitIterator, Predicate) { + auto splitter = absl::StrSplit("a,b,c", ',', Skip("b")); + auto it = splitter.begin(); + auto end = splitter.end(); + + EXPECT_NE(it, end); + EXPECT_EQ("a", *it); // tests dereference + ++it; // tests preincrement -- "b" should be skipped here. + EXPECT_NE(it, end); + EXPECT_EQ("c", std::string(it->data(), it->size())); // tests dereference as ptr + it++; // tests postincrement + EXPECT_EQ(it, end); +} + +TEST(SplitIterator, EdgeCases) { + // Expected input and output, assuming a delimiter of ',' + struct { + std::string in; + std::vector<std::string> expect; + } specs[] = { + {"", {""}}, + {"foo", {"foo"}}, + {",", {"", ""}}, + {",foo", {"", "foo"}}, + {"foo,", {"foo", ""}}, + {",foo,", {"", "foo", ""}}, + {"foo,bar", {"foo", "bar"}}, + }; + + for (const auto& spec : specs) { + SCOPED_TRACE(spec.in); + auto splitter = absl::StrSplit(spec.in, ','); + auto it = splitter.begin(); + auto end = splitter.end(); + for (const auto& expected : spec.expect) { + EXPECT_NE(it, end); + EXPECT_EQ(expected, *it++); + } + EXPECT_EQ(it, end); + } +} + +TEST(Splitter, Const) { + const auto splitter = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(splitter, ElementsAre("a", "b", "c")); +} + +TEST(Split, EmptyAndNull) { + // Attention: Splitting a null absl::string_view is different than splitting + // an empty absl::string_view even though both string_views are considered + // equal. This behavior is likely surprising and undesirable. However, to + // maintain backward compatibility, there is a small "hack" in + // str_split_internal.h that preserves this behavior. If that behavior is ever + // changed/fixed, this test will need to be updated. + EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre("")); + EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre()); +} + +TEST(SplitIterator, EqualityAsEndCondition) { + auto splitter = absl::StrSplit("a,b,c", ','); + auto it = splitter.begin(); + auto it2 = it; + + // Increments it2 twice to point to "c" in the input text. + ++it2; + ++it2; + EXPECT_EQ("c", *it2); + + // This test uses a non-end SplitIterator as the terminating condition in a + // for loop. This relies on SplitIterator equality for non-end SplitIterators + // working correctly. At this point it2 points to "c", and we use that as the + // "end" condition in this test. + std::vector<absl::string_view> v; + for (; it != it2; ++it) { + v.push_back(*it); + } + EXPECT_THAT(v, ElementsAre("a", "b")); +} + +// +// Tests for Splitter +// + +TEST(Splitter, RangeIterators) { + auto splitter = absl::StrSplit("a,b,c", ','); + std::vector<absl::string_view> output; + for (const absl::string_view p : splitter) { + output.push_back(p); + } + EXPECT_THAT(output, ElementsAre("a", "b", "c")); +} + +// Some template functions for use in testing conversion operators +template <typename ContainerType, typename Splitter> +void TestConversionOperator(const Splitter& splitter) { + ContainerType output = splitter; + EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d")); +} + +template <typename MapType, typename Splitter> +void TestMapConversionOperator(const Splitter& splitter) { + MapType m = splitter; + EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d"))); +} + +template <typename FirstType, typename SecondType, typename Splitter> +void TestPairConversionOperator(const Splitter& splitter) { + std::pair<FirstType, SecondType> p = splitter; + EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b"))); +} + +TEST(Splitter, ConversionOperator) { + auto splitter = absl::StrSplit("a,b,c,d", ','); + + TestConversionOperator<std::vector<absl::string_view>>(splitter); + TestConversionOperator<std::vector<std::string>>(splitter); + TestConversionOperator<std::list<absl::string_view>>(splitter); + TestConversionOperator<std::list<std::string>>(splitter); + TestConversionOperator<std::deque<absl::string_view>>(splitter); + TestConversionOperator<std::deque<std::string>>(splitter); + TestConversionOperator<std::set<absl::string_view>>(splitter); + TestConversionOperator<std::set<std::string>>(splitter); + TestConversionOperator<std::multiset<absl::string_view>>(splitter); + TestConversionOperator<std::multiset<std::string>>(splitter); + TestConversionOperator<std::unordered_set<std::string>>(splitter); + + // Tests conversion to map-like objects. + + TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>( + splitter); + TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter); + TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter); + TestMapConversionOperator<std::map<std::string, std::string>>(splitter); + TestMapConversionOperator< + std::multimap<absl::string_view, absl::string_view>>(splitter); + TestMapConversionOperator<std::multimap<absl::string_view, std::string>>(splitter); + TestMapConversionOperator<std::multimap<std::string, absl::string_view>>(splitter); + TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter); + TestMapConversionOperator<std::unordered_map<std::string, std::string>>(splitter); + + // Tests conversion to std::pair + + TestPairConversionOperator<absl::string_view, absl::string_view>(splitter); + TestPairConversionOperator<absl::string_view, std::string>(splitter); + TestPairConversionOperator<std::string, absl::string_view>(splitter); + TestPairConversionOperator<std::string, std::string>(splitter); +} + +// A few additional tests for conversion to std::pair. This conversion is +// different from others because a std::pair always has exactly two elements: +// .first and .second. The split has to work even when the split has +// less-than, equal-to, and more-than 2 strings. +TEST(Splitter, ToPair) { + { + // Empty std::string + std::pair<std::string, std::string> p = absl::StrSplit("", ','); + EXPECT_EQ("", p.first); + EXPECT_EQ("", p.second); + } + + { + // Only first + std::pair<std::string, std::string> p = absl::StrSplit("a", ','); + EXPECT_EQ("a", p.first); + EXPECT_EQ("", p.second); + } + + { + // Only second + std::pair<std::string, std::string> p = absl::StrSplit(",b", ','); + EXPECT_EQ("", p.first); + EXPECT_EQ("b", p.second); + } + + { + // First and second. + std::pair<std::string, std::string> p = absl::StrSplit("a,b", ','); + EXPECT_EQ("a", p.first); + EXPECT_EQ("b", p.second); + } + + { + // First and second and then more stuff that will be ignored. + std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); + EXPECT_EQ("a", p.first); + EXPECT_EQ("b", p.second); + // "c" is omitted. + } +} + +TEST(Splitter, Predicates) { + static const char kTestChars[] = ",a, ,b,"; + using absl::AllowEmpty; + using absl::SkipEmpty; + using absl::SkipWhitespace; + + { + // No predicate. Does not skip empties. + auto splitter = absl::StrSplit(kTestChars, ','); + std::vector<std::string> v = splitter; + EXPECT_THAT(v, ElementsAre("", "a", " ", "b", "")); + } + + { + // Allows empty strings. Same behavior as no predicate at all. + auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty()); + std::vector<std::string> v_allowempty = splitter; + EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", "")); + + // Ensures AllowEmpty equals the behavior with no predicate. + auto splitter_nopredicate = absl::StrSplit(kTestChars, ','); + std::vector<std::string> v_nopredicate = splitter_nopredicate; + EXPECT_EQ(v_allowempty, v_nopredicate); + } + + { + // Skips empty strings. + auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty()); + std::vector<std::string> v = splitter; + EXPECT_THAT(v, ElementsAre("a", " ", "b")); + } + + { + // Skips empty and all-whitespace strings. + auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace()); + std::vector<std::string> v = splitter; + EXPECT_THAT(v, ElementsAre("a", "b")); + } +} + +// +// Tests for StrSplit() +// + +TEST(Split, Basics) { + { + // Doesn't really do anything useful because the return value is ignored, + // but it should work. + absl::StrSplit("a,b,c", ','); + } + + { + std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + std::vector<std::string> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Ensures that assignment works. This requires a little extra work with + // C++11 because of overloads with initializer_list. + std::vector<std::string> v; + v = absl::StrSplit("a,b,c", ','); + + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + std::map<std::string, std::string> m; + m = absl::StrSplit("a,b,c", ','); + EXPECT_EQ(2, m.size()); + std::unordered_map<std::string, std::string> hm; + hm = absl::StrSplit("a,b,c", ','); + EXPECT_EQ(2, hm.size()); + } +} + +absl::string_view ReturnStringView() { return "Hello World"; } +const char* ReturnConstCharP() { return "Hello World"; } +char* ReturnCharP() { return const_cast<char*>("Hello World"); } + +TEST(Split, AcceptsCertainTemporaries) { + std::vector<std::string> v; + v = absl::StrSplit(ReturnStringView(), ' '); + EXPECT_THAT(v, ElementsAre("Hello", "World")); + v = absl::StrSplit(ReturnConstCharP(), ' '); + EXPECT_THAT(v, ElementsAre("Hello", "World")); + v = absl::StrSplit(ReturnCharP(), ' '); + EXPECT_THAT(v, ElementsAre("Hello", "World")); +} + +TEST(Split, Temporary) { + // Use a std::string longer than the small-std::string-optimization length, so that when + // the temporary is destroyed, if the splitter keeps a reference to the + // std::string's contents, it'll reference freed memory instead of just dead + // on-stack memory. + const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u"; + EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input)) + << "Input should be larger than fits on the stack."; + + // This happens more often in C++11 as part of a range-based for loop. + auto splitter = absl::StrSplit(std::string(input), ','); + std::string expected = "a"; + for (absl::string_view letter : splitter) { + EXPECT_EQ(expected, letter); + ++expected[0]; + } + EXPECT_EQ("v", expected); + + // This happens more often in C++11 as part of a range-based for loop. + auto std_splitter = absl::StrSplit(std::string(input), ','); + expected = "a"; + for (absl::string_view letter : std_splitter) { + EXPECT_EQ(expected, letter); + ++expected[0]; + } + EXPECT_EQ("v", expected); +} + +template <typename T> +static std::unique_ptr<T> CopyToHeap(const T& value) { + return std::unique_ptr<T>(new T(value)); +} + +TEST(Split, LvalueCaptureIsCopyable) { + std::string input = "a,b"; + auto heap_splitter = CopyToHeap(absl::StrSplit(input, ',')); + auto stack_splitter = *heap_splitter; + heap_splitter.reset(); + std::vector<std::string> result = stack_splitter; + EXPECT_THAT(result, testing::ElementsAre("a", "b")); +} + +TEST(Split, TemporaryCaptureIsCopyable) { + auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ',')); + auto stack_splitter = *heap_splitter; + heap_splitter.reset(); + std::vector<std::string> result = stack_splitter; + EXPECT_THAT(result, testing::ElementsAre("a", "b")); +} + +TEST(Split, SplitterIsCopyableAndMoveable) { + auto a = absl::StrSplit("foo", '-'); + + // Ensures that the following expressions compile. + auto b = a; // Copy construct + auto c = std::move(a); // Move construct + b = c; // Copy assign + c = std::move(b); // Move assign + + EXPECT_THAT(c, ElementsAre("foo")); +} + +TEST(Split, StringDelimiter) { + { + std::vector<absl::string_view> v = absl::StrSplit("a,b", ','); + EXPECT_THAT(v, ElementsAre("a", "b")); + } + + { + std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(",")); + EXPECT_THAT(v, ElementsAre("a", "b")); + } + + { + std::vector<absl::string_view> v = + absl::StrSplit("a,b", absl::string_view(",")); + EXPECT_THAT(v, ElementsAre("a", "b")); + } +} + +TEST(Split, UTF8) { + // Tests splitting utf8 strings and utf8 delimiters. + { + // A utf8 input std::string with an ascii delimiter. + std::vector<absl::string_view> v = absl::StrSplit("a,ฮบแฝนฯฮผฮต", ','); + EXPECT_THAT(v, ElementsAre("a", "ฮบแฝนฯฮผฮต")); + } + + { + // A utf8 input std::string and a utf8 delimiter. + std::vector<absl::string_view> v = absl::StrSplit("a,ฮบแฝนฯฮผฮต,b", ",ฮบแฝนฯฮผฮต,"); + EXPECT_THAT(v, ElementsAre("a", "b")); + } + + { + // A utf8 input std::string and ByAnyChar with ascii chars. + std::vector<absl::string_view> v = + absl::StrSplit("Foo hรคllo thไธre", absl::ByAnyChar(" \t")); + EXPECT_THAT(v, ElementsAre("Foo", "hรคllo", "thไธre")); + } +} + +TEST(Split, EmptyStringDelimiter) { + { + std::vector<std::string> v = absl::StrSplit("", ""); + EXPECT_THAT(v, ElementsAre("")); + } + + { + std::vector<std::string> v = absl::StrSplit("a", ""); + EXPECT_THAT(v, ElementsAre("a")); + } + + { + std::vector<std::string> v = absl::StrSplit("ab", ""); + EXPECT_THAT(v, ElementsAre("a", "b")); + } + + { + std::vector<std::string> v = absl::StrSplit("a b", ""); + EXPECT_THAT(v, ElementsAre("a", " ", "b")); + } +} + +TEST(Split, SubstrDelimiter) { + std::vector<absl::string_view> results; + absl::string_view delim("//"); + + results = absl::StrSplit("", delim); + EXPECT_THAT(results, ElementsAre("")); + + results = absl::StrSplit("//", delim); + EXPECT_THAT(results, ElementsAre("", "")); + + results = absl::StrSplit("ab", delim); + EXPECT_THAT(results, ElementsAre("ab")); + + results = absl::StrSplit("ab//", delim); + EXPECT_THAT(results, ElementsAre("ab", "")); + + results = absl::StrSplit("ab/", delim); + EXPECT_THAT(results, ElementsAre("ab/")); + + results = absl::StrSplit("a/b", delim); + EXPECT_THAT(results, ElementsAre("a/b")); + + results = absl::StrSplit("a//b", delim); + EXPECT_THAT(results, ElementsAre("a", "b")); + + results = absl::StrSplit("a///b", delim); + EXPECT_THAT(results, ElementsAre("a", "/b")); + + results = absl::StrSplit("a////b", delim); + EXPECT_THAT(results, ElementsAre("a", "", "b")); +} + +TEST(Split, EmptyResults) { + std::vector<absl::string_view> results; + + results = absl::StrSplit("", '#'); + EXPECT_THAT(results, ElementsAre("")); + + results = absl::StrSplit("#", '#'); + EXPECT_THAT(results, ElementsAre("", "")); + + results = absl::StrSplit("#cd", '#'); + EXPECT_THAT(results, ElementsAre("", "cd")); + + results = absl::StrSplit("ab#cd#", '#'); + EXPECT_THAT(results, ElementsAre("ab", "cd", "")); + + results = absl::StrSplit("ab##cd", '#'); + EXPECT_THAT(results, ElementsAre("ab", "", "cd")); + + results = absl::StrSplit("ab##", '#'); + EXPECT_THAT(results, ElementsAre("ab", "", "")); + + results = absl::StrSplit("ab#ab#", '#'); + EXPECT_THAT(results, ElementsAre("ab", "ab", "")); + + results = absl::StrSplit("aaaa", 'a'); + EXPECT_THAT(results, ElementsAre("", "", "", "", "")); + + results = absl::StrSplit("", '#', absl::SkipEmpty()); + EXPECT_THAT(results, ElementsAre()); +} + +template <typename Delimiter> +static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d, + size_t starting_pos, int expected_pos) { + absl::string_view found = d.Find(text, starting_pos); + return found.data() != text.end() && + expected_pos == found.data() - text.data(); +} + +// Helper function for testing Delimiter objects. Returns true if the given +// Delimiter is found in the given std::string at the given position. This function +// tests two cases: +// 1. The actual text given, staring at position 0 +// 2. The text given with leading padding that should be ignored +template <typename Delimiter> +static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) { + const std::string leading_text = ",x,y,z,"; + return IsFoundAtStartingPos(text, d, 0, expected_pos) && + IsFoundAtStartingPos(leading_text + std::string(text), d, + leading_text.length(), + expected_pos + leading_text.length()); +} + +// +// Tests for Literal +// + +// Tests using any delimiter that represents a single comma. +template <typename Delimiter> +void TestComma(Delimiter d) { + EXPECT_TRUE(IsFoundAt(",", d, 0)); + EXPECT_TRUE(IsFoundAt("a,", d, 1)); + EXPECT_TRUE(IsFoundAt(",b", d, 0)); + EXPECT_TRUE(IsFoundAt("a,b", d, 1)); + EXPECT_TRUE(IsFoundAt("a,b,", d, 1)); + EXPECT_TRUE(IsFoundAt("a,b,c", d, 1)); + EXPECT_FALSE(IsFoundAt("", d, -1)); + EXPECT_FALSE(IsFoundAt(" ", d, -1)); + EXPECT_FALSE(IsFoundAt("a", d, -1)); + EXPECT_FALSE(IsFoundAt("a b c", d, -1)); + EXPECT_FALSE(IsFoundAt("a;b;c", d, -1)); + EXPECT_FALSE(IsFoundAt(";", d, -1)); +} + +TEST(Delimiter, Literal) { + using absl::ByString; + TestComma(ByString(",")); + + // Works as named variable. + ByString comma_string(","); + TestComma(comma_string); + + // The first occurrence of empty std::string ("") in a std::string is at position 0. + // There is a test below that demonstrates this for absl::string_view::find(). + // If the ByString delimiter returned position 0 for this, there would + // be an infinite loop in the SplitIterator code. To avoid this, empty std::string + // is a special case in that it always returns the item at position 1. + absl::string_view abc("abc"); + EXPECT_EQ(0, abc.find("")); // "" is found at position 0 + ByString empty(""); + EXPECT_FALSE(IsFoundAt("", empty, 0)); + EXPECT_FALSE(IsFoundAt("a", empty, 0)); + EXPECT_TRUE(IsFoundAt("ab", empty, 1)); + EXPECT_TRUE(IsFoundAt("abc", empty, 1)); +} + +TEST(Split, ByChar) { + using absl::ByChar; + TestComma(ByChar(',')); + + // Works as named variable. + ByChar comma_char(','); + TestComma(comma_char); +} + +// +// Tests for ByAnyChar +// + +TEST(Delimiter, ByAnyChar) { + using absl::ByAnyChar; + ByAnyChar one_delim(","); + // Found + EXPECT_TRUE(IsFoundAt(",", one_delim, 0)); + EXPECT_TRUE(IsFoundAt("a,", one_delim, 1)); + EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1)); + EXPECT_TRUE(IsFoundAt(",b", one_delim, 0)); + // Not found + EXPECT_FALSE(IsFoundAt("", one_delim, -1)); + EXPECT_FALSE(IsFoundAt(" ", one_delim, -1)); + EXPECT_FALSE(IsFoundAt("a", one_delim, -1)); + EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1)); + EXPECT_FALSE(IsFoundAt(";", one_delim, -1)); + + ByAnyChar two_delims(",;"); + // Found + EXPECT_TRUE(IsFoundAt(",", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(";", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(",;", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(";,", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0)); + EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1)); + EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1)); + EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1)); + EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1)); + // Not found + EXPECT_FALSE(IsFoundAt("", two_delims, -1)); + EXPECT_FALSE(IsFoundAt(" ", two_delims, -1)); + EXPECT_FALSE(IsFoundAt("a", two_delims, -1)); + EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1)); + EXPECT_FALSE(IsFoundAt("=", two_delims, -1)); + + // ByAnyChar behaves just like ByString when given a delimiter of empty + // std::string. That is, it always returns a zero-length absl::string_view + // referring to the item at position 1, not position 0. + ByAnyChar empty(""); + EXPECT_FALSE(IsFoundAt("", empty, 0)); + EXPECT_FALSE(IsFoundAt("a", empty, 0)); + EXPECT_TRUE(IsFoundAt("ab", empty, 1)); + EXPECT_TRUE(IsFoundAt("abc", empty, 1)); +} + +// +// Tests for ByLength +// + +TEST(Delimiter, ByLength) { + using absl::ByLength; + + ByLength four_char_delim(4); + + // Found + EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4)); + EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4)); + EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4)); + // Not found + EXPECT_FALSE(IsFoundAt("", four_char_delim, 0)); + EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0)); + EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0)); + EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0)); + EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0)); +} + +// Allocates too much memory for TSan and MSan. +#if !defined(THREAD_SANITIZER) && !defined(MEMORY_SANITIZER) +TEST(Split, WorksWithLargeStrings) { + if (sizeof(size_t) > 4 && !RunningOnValgrind()) { + std::string s(1ULL << 31, 'x'); + s.push_back('-'); // 2G + 1 byte + std::vector<absl::string_view> v = absl::StrSplit(s, '-'); + EXPECT_EQ(2, v.size()); + // The first element will contain 2G of 'x's. + // testing::StartsWith is too slow with a 2G std::string. + EXPECT_EQ('x', v[0][0]); + EXPECT_EQ('x', v[0][1]); + EXPECT_EQ('x', v[0][3]); + EXPECT_EQ("", v[1]); + } +} +#endif // THREAD_SANITIZER + +TEST(SplitInternalTest, TypeTraits) { + EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value); + EXPECT_TRUE( + (absl::strings_internal::HasMappedType<std::map<int, int>>::value)); + EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value); + EXPECT_TRUE( + (absl::strings_internal::HasValueType<std::map<int, int>>::value)); + EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value); + EXPECT_TRUE( + (absl::strings_internal::HasConstIterator<std::map<int, int>>::value)); + EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value); + EXPECT_TRUE((absl::strings_internal::IsInitializerList< + std::initializer_list<int>>::value)); +} + +} // namespace diff --git a/absl/strings/string_view.cc b/absl/strings/string_view.cc new file mode 100644 index 000000000000..4d4ba6c1741f --- /dev/null +++ b/absl/strings/string_view.cc @@ -0,0 +1,248 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/string_view.h" + +#ifndef ABSL_HAVE_STD_STRING_VIEW + +#include <algorithm> +#include <climits> +#include <cstring> +#include <ostream> +#include <string> + +#include "absl/strings/internal/memutil.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/match.h" + +namespace absl { + +namespace { +void WritePadding(std::ostream& o, size_t pad) { + char fill_buf[32]; + memset(fill_buf, o.fill(), sizeof(fill_buf)); + while (pad) { + size_t n = std::min(pad, sizeof(fill_buf)); + o.write(fill_buf, n); + pad -= n; + } +} + +class LookupTable { + public: + // For each character in wanted, sets the index corresponding + // to the ASCII code of that character. This is used by + // the find_.*_of methods below to tell whether or not a character is in + // the lookup table in constant time. + explicit LookupTable(string_view wanted) { + for (char c : wanted) { + table_[Index(c)] = true; + } + } + bool operator[](char c) const { return table_[Index(c)]; } + + private: + static unsigned char Index(char c) { return static_cast<unsigned char>(c); } + bool table_[UCHAR_MAX + 1] = {}; +}; + +} // namespace + +std::ostream& operator<<(std::ostream& o, string_view piece) { + std::ostream::sentry sentry(o); + if (sentry) { + size_t lpad = 0; + size_t rpad = 0; + if (static_cast<size_t>(o.width()) > piece.size()) { + size_t pad = o.width() - piece.size(); + if ((o.flags() & o.adjustfield) == o.left) { + rpad = pad; + } else { + lpad = pad; + } + } + if (lpad) WritePadding(o, lpad); + o.write(piece.data(), piece.size()); + if (rpad) WritePadding(o, rpad); + o.width(0); + } + return o; +} + +string_view::size_type string_view::copy(char* buf, size_type n, + size_type pos) const { + size_type ulen = length_; + assert(pos <= ulen); + size_type rlen = std::min(ulen - pos, n); + if (rlen > 0) { + const char* start = ptr_ + pos; + std::copy(start, start + rlen, buf); + } + return rlen; +} + +string_view::size_type string_view::find(string_view s, size_type pos) const + noexcept { + if (empty() || pos > length_) { + if (empty() && pos == 0 && s.empty()) return 0; + return npos; + } + const char* result = + strings_internal::memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_); + return result ? result - ptr_ : npos; +} + +string_view::size_type string_view::find(char c, size_type pos) const noexcept { + if (empty() || pos >= length_) { + return npos; + } + const char* result = + static_cast<const char*>(memchr(ptr_ + pos, c, length_ - pos)); + return result != nullptr ? result - ptr_ : npos; +} + +string_view::size_type string_view::rfind(string_view s, size_type pos) const + noexcept { + if (length_ < s.length_) return npos; + if (s.empty()) return std::min(length_, pos); + const char* last = ptr_ + std::min(length_ - s.length_, pos) + s.length_; + const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_); + return result != last ? result - ptr_ : npos; +} + +// Search range is [0..pos] inclusive. If pos == npos, search everything. +string_view::size_type string_view::rfind(char c, size_type pos) const + noexcept { + // Note: memrchr() is not available on Windows. + if (empty()) return npos; + for (size_type i = std::min(pos, length_ - 1);; --i) { + if (ptr_[i] == c) { + return i; + } + if (i == 0) break; + } + return npos; +} + +string_view::size_type string_view::find_first_of(string_view s, + size_type pos) const + noexcept { + if (empty() || s.empty()) { + return npos; + } + // Avoid the cost of LookupTable() for a single-character search. + if (s.length_ == 1) return find_first_of(s.ptr_[0], pos); + LookupTable tbl(s); + for (size_type i = pos; i < length_; ++i) { + if (tbl[ptr_[i]]) { + return i; + } + } + return npos; +} + +string_view::size_type string_view::find_first_not_of(string_view s, + size_type pos) const + noexcept { + if (empty()) return npos; + // Avoid the cost of LookupTable() for a single-character search. + if (s.length_ == 1) return find_first_not_of(s.ptr_[0], pos); + LookupTable tbl(s); + for (size_type i = pos; i < length_; ++i) { + if (!tbl[ptr_[i]]) { + return i; + } + } + return npos; +} + +string_view::size_type string_view::find_first_not_of(char c, + size_type pos) const + noexcept { + if (empty()) return npos; + for (; pos < length_; ++pos) { + if (ptr_[pos] != c) { + return pos; + } + } + return npos; +} + +string_view::size_type string_view::find_last_of(string_view s, + size_type pos) const noexcept { + if (empty() || s.empty()) return npos; + // Avoid the cost of LookupTable() for a single-character search. + if (s.length_ == 1) return find_last_of(s.ptr_[0], pos); + LookupTable tbl(s); + for (size_type i = std::min(pos, length_ - 1);; --i) { + if (tbl[ptr_[i]]) { + return i; + } + if (i == 0) break; + } + return npos; +} + +string_view::size_type string_view::find_last_not_of(string_view s, + size_type pos) const + noexcept { + if (empty()) return npos; + size_type i = std::min(pos, length_ - 1); + if (s.empty()) return i; + // Avoid the cost of LookupTable() for a single-character search. + if (s.length_ == 1) return find_last_not_of(s.ptr_[0], pos); + LookupTable tbl(s); + for (;; --i) { + if (!tbl[ptr_[i]]) { + return i; + } + if (i == 0) break; + } + return npos; +} + +string_view::size_type string_view::find_last_not_of(char c, + size_type pos) const + noexcept { + if (empty()) return npos; + size_type i = std::min(pos, length_ - 1); + for (;; --i) { + if (ptr_[i] != c) { + return i; + } + if (i == 0) break; + } + return npos; +} + +// MSVC has non-standard behavior that implicitly creates definitions for static +// const members. These implicit definitions conflict with explicit out-of-class +// member definitions that are required by the C++ standard, resulting in +// LNK1169 "multiply defined" errors at link time. __declspec(selectany) asks +// MSVC to choose only one definition for the symbol it decorates. See details +// at http://msdn.microsoft.com/en-us/library/34h23df8(v=vs.100).aspx +#ifdef _MSC_VER +#define ABSL_STRING_VIEW_SELECTANY __declspec(selectany) +#else +#define ABSL_STRING_VIEW_SELECTANY +#endif + +ABSL_STRING_VIEW_SELECTANY +constexpr string_view::size_type string_view::npos; +ABSL_STRING_VIEW_SELECTANY +constexpr string_view::size_type string_view::kMaxSize; + +} // namespace absl + +#endif // ABSL_HAVE_STD_STRING_VIEW diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h new file mode 100644 index 000000000000..e2609f174f93 --- /dev/null +++ b/absl/strings/string_view.h @@ -0,0 +1,572 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: string_view.h +// ----------------------------------------------------------------------------- +// +// This file contains the definition of the `absl::string_view` class. A +// `string_view` points to a contiguous span of characters, often part or all of +// another `std::string`, double-quoted std::string literal, character array, or even +// another `string_view`. +// +// This `absl::string_view` abstraction is designed to be a drop-in +// replacement for the C++17 `std::string_view` abstraction. +#ifndef ABSL_STRINGS_STRING_VIEW_H_ +#define ABSL_STRINGS_STRING_VIEW_H_ + +#include <algorithm> +#include "absl/base/config.h" + +#ifdef ABSL_HAVE_STD_STRING_VIEW + +#include <string_view> + +namespace absl { +using std::string_view; +}; + +#else // ABSL_HAVE_STD_STRING_VIEW + +#include <cassert> +#include <cstddef> +#include <cstring> +#include <iosfwd> +#include <iterator> +#include <limits> +#include <string> + +#include "absl/base/internal/throw_delegate.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" + +namespace absl { + +// absl::string_view +// +// A `string_view` provides a lightweight view into the std::string data provided by +// a `std::string`, double-quoted std::string literal, character array, or even +// another `string_view`. A `string_view` does *not* own the std::string to which it +// points, and that data cannot be modified through the view. +// +// You can use `string_view` as a function or method parameter anywhere a +// parameter can receive a double-quoted std::string literal, `const char*`, +// `std::string`, or another `absl::string_view` argument with no need to copy +// the std::string data. Systematic use of `string_view` within function arguments +// reduces data copies and `strlen()` calls. +// +// Because of its small size, prefer passing `string_view` by value: +// +// void MyFunction(absl::string_view arg); +// +// If circumstances require, you may also pass one by const reference: +// +// void MyFunction(const absl::string_view& arg); // not preferred +// +// Passing by value generates slightly smaller code for many architectures. +// +// In either case, the source data of the `string_view` must outlive the +// `string_view` itself. +// +// A `string_view` is also suitable for local variables if you know that the +// lifetime of the underlying object is longer than the lifetime of your +// `string_view` variable. However, beware of binding a `string_view` to a +// temporary value: +// +// // BAD use of string_view: lifetime problem +// absl::string_view sv = obj.ReturnAString(); +// +// // GOOD use of string_view: str outlives sv +// std::string str = obj.ReturnAString(); +// absl::string_view sv = str; +// +// Due to lifetime issues, a `string_view` is sometimes a poor choice for a +// return value and usually a poor choice for a data member. If you do use a +// `string_view` this way, it is your responsibility to ensure that the object +// pointed to by the `string_view` outlives the `string_view`. +// +// A `string_view` may represent a whole std::string or just part of a std::string. For +// example, when splitting a std::string, `std::vector<absl::string_view>` is a +// natural data type for the output. +// +// +// When constructed from a source which is nul-terminated, the `string_view` +// itself will not include the nul-terminator unless a specific size (including +// the nul) is passed to the constructor. As a result, common idioms that work +// on nul-terminated strings do not work on `string_view` objects. If you write +// code that scans a `string_view`, you must check its length rather than test +// for nul, for example. Note, however, that nuls may still be embedded within +// a `string_view` explicitly. +// +// You may create a null `string_view` in two ways: +// +// absl::string_view sv(); +// absl::string_view sv(nullptr, 0); +// +// For the above, `sv.data() == nullptr`, `sv.length() == 0`, and +// `sv.empty() == true`. Also, if you create a `string_view` with a non-null +// pointer then `sv.data() != nullptr`. Thus, you can use `string_view()` to +// signal an undefined value that is different from other `string_view` values +// in a similar fashion to how `const char* p1 = nullptr;` is different from +// `const char* p2 = "";`. However, in practice, it is not recommended to rely +// on this behavior. +// +// Be careful not to confuse a null `string_view` with an empty one. A null +// `string_view` is an empty `string_view`, but some empty `string_view`s are +// not null. Prefer checking for emptiness over checking for null. +// +// There are many ways to create an empty string_view: +// +// const char* nullcp = nullptr; +// // string_view.size() will return 0 in all cases. +// absl::string_view(); +// absl::string_view(nullcp, 0); +// absl::string_view(""); +// absl::string_view("", 0); +// absl::string_view("abcdef", 0); +// absl::string_view("abcdef" + 6, 0); +// +// All empty `string_view` objects whether null or not, are equal: +// +// absl::string_view() == absl::string_view("", 0) +// absl::string_view(nullptr, 0) == absl:: string_view("abcdef"+6, 0) +class string_view { + public: + using traits_type = std::char_traits<char>; + using value_type = char; + using pointer = char*; + using const_pointer = const char*; + using reference = char&; + using const_reference = const char&; + using const_iterator = const char*; + using iterator = const_iterator; + using const_reverse_iterator = std::reverse_iterator<const_iterator>; + using reverse_iterator = const_reverse_iterator; + using size_type = size_t; + using difference_type = std::ptrdiff_t; + + static constexpr size_type npos = static_cast<size_type>(-1); + + // Null `string_view` constructor + constexpr string_view() noexcept : ptr_(nullptr), length_(0) {} + + // Implicit constructors + + template <typename Allocator> + string_view( // NOLINT(runtime/explicit) + const std::basic_string<char, std::char_traits<char>, Allocator>& + str) noexcept + : ptr_(str.data()), length_(str.size()) {} + + // Implicit constructor of a `string_view` from nul-terminated `str`. When + // accepting possibly null strings, use `absl::NullSafeStringView(str)` + // instead (see below). + constexpr string_view(const char* str) // NOLINT(runtime/explicit) + : ptr_(str), length_(StrLenInternal(str)) {} + + // Implicit consructor of a `string_view` from a `const char*` and length + constexpr string_view(const char* data, size_type len) + : ptr_(data), length_(CheckLengthInternal(len)) {} + + // NOTE(b/36227513): harmlessly omitted to work around gdb bug. + // constexpr string_view(const string_view&) noexcept = default; + // string_view& operator=(const string_view&) noexcept = default; + + // Iterators + + // string_view::begin() + // + // Returns an iterator pointing to the first character at the beginning of the + // `string_view`, or `end()` if the `string_view` is empty. + constexpr const_iterator begin() const noexcept { return ptr_; } + + // string_view::end() + // + // Returns an iterator pointing just beyond the last character at the end of + // the `string_view`. This iterator acts as a placeholder; attempting to + // access it results in undefined behavior. + constexpr const_iterator end() const noexcept { return ptr_ + length_; } + + // string_view::cbegin() + // + // Returns a const iterator pointing to the first character at the beginning + // of the `string_view`, or `end()` if the `string_view` is empty. + constexpr const_iterator cbegin() const noexcept { return begin(); } + + // string_view::cend() + // + // Returns a const iterator pointing just beyond the last character at the end + // of the `string_view`. This pointer acts as a placeholder; attempting to + // access its element results in undefined behavior. + constexpr const_iterator cend() const noexcept { return end(); } + + // string_view::rbegin() + // + // Returns a reverse iterator pointing to the last character at the end of the + // `string_view`, or `rend()` if the `string_view` is empty. + const_reverse_iterator rbegin() const noexcept { + return const_reverse_iterator(end()); + } + + // string_view::rend() + // + // Returns a reverse iterator pointing just before the first character at the + // beginning of the `string_view`. This pointer acts as a placeholder; + // attempting to access its element results in undefined behavior. + const_reverse_iterator rend() const noexcept { + return const_reverse_iterator(begin()); + } + + // string_view::crbegin() + // + // Returns a const reverse iterator pointing to the last character at the end + // of the `string_view`, or `crend()` if the `string_view` is empty. + const_reverse_iterator crbegin() const noexcept { return rbegin(); } + + // string_view::crend() + // + // Returns a const reverse iterator pointing just before the first character + // at the beginning of the `string_view`. This pointer acts as a placeholder; + // attempting to access its element results in undefined behavior. + const_reverse_iterator crend() const noexcept { return rend(); } + + // Capacity Utilities + + // string_view::size() + // + // Returns the number of characters in the `string_view`. + constexpr size_type size() const noexcept { + return length_; + } + + // string_view::length() + // + // Returns the number of characters in the `string_view`. Alias for `size()`. + constexpr size_type length() const noexcept { return size(); } + + // string_view::max_size() + // + // Returns the maximum number of characters the `string_view` can hold. + constexpr size_type max_size() const noexcept { return kMaxSize; } + + // string_view::empty() + // + // Checks if the `string_view` is empty (refers to no characters). + constexpr bool empty() const noexcept { return length_ == 0; } + + // std::string:view::operator[] + // + // Returns the ith element of an `string_view` using the array operator. + // Note that this operator does not perform any bounds checking. + constexpr const_reference operator[](size_type i) const { return ptr_[i]; } + + // string_view::front() + // + // Returns the first element of a `string_view`. + constexpr const_reference front() const { return ptr_[0]; } + + // string_view::back() + // + // Returns the last element of a `string_view`. + constexpr const_reference back() const { return ptr_[size() - 1]; } + + // string_view::data() + // + // Returns a pointer to the underlying character array (which is of course + // stored elsewhere). Note that `string_view::data()` may contain embedded nul + // characters, but the returned buffer may or may not be nul-terminated; + // therefore, do not pass `data()` to a routine that expects a nul-terminated + // std::string. + constexpr const_pointer data() const noexcept { return ptr_; } + + // Modifiers + + // string_view::remove_prefix() + // + // Removes the first `n` characters from the `string_view`, returning a + // pointer to the new first character. Note that the underlying std::string is not + // changed, only the view. + void remove_prefix(size_type n) { + assert(n <= length_); + ptr_ += n; + length_ -= n; + } + + // string_view::remove_suffix() + // + // Removes the last `n` characters from the `string_view`. Note that the + // underlying std::string is not changed, only the view. + void remove_suffix(size_type n) { + assert(n <= length_); + length_ -= n; + } + + // string_view::swap() + // + // Swaps this `string_view` with another `string_view`. + void swap(string_view& s) noexcept { + auto t = *this; + *this = s; + s = t; + } + + // Explicit conversion operators + + // Supports conversion to both `std::basic_string` where available. + template <typename A> + explicit operator std::basic_string<char, traits_type, A>() const { + if (!data()) return {}; + return std::basic_string<char, traits_type, A>(data(), size()); + } + + // string_view::copy() + // + // Copies the contents of the `string_view` at offset `pos` and length `n` + // into `buf`. + size_type copy(char* buf, size_type n, size_type pos = 0) const; + + // string_view::substr() + // + // Returns a "substring" of the `string_view` (at offset `post` and length + // `n`) as another std::string views. This function throws `std::out_of_bounds` if + // `pos > size'. + string_view substr(size_type pos, size_type n = npos) const { + if (ABSL_PREDICT_FALSE(pos > length_)) + base_internal::ThrowStdOutOfRange("absl::string_view::substr"); + n = std::min(n, length_ - pos); + return string_view(ptr_ + pos, n); + } + + // string_view::compare() + // + // Performs a lexicographical comparison between the `string_view` and + // another `absl::string_view), returning -1 if `this` is less than, 0 if + // `this` is equal to, and 1 if `this` is greater than the passed std::string + // view. Note that in the case of data equality, a further comparison is made + // on the respective sizes of the two `string_view`s to determine which is + // smaller, equal, or greater. + int compare(string_view x) const noexcept { + auto min_length = std::min(length_, x.length_); + if (min_length > 0) { + int r = memcmp(ptr_, x.ptr_, min_length); + if (r < 0) return -1; + if (r > 0) return 1; + } + if (length_ < x.length_) return -1; + if (length_ > x.length_) return 1; + return 0; + } + + // Overload of `string_view::compare()` for comparing a substring of the + // 'string_view` and another `absl::string_view`. + int compare(size_type pos1, size_type count1, string_view v) const { + return substr(pos1, count1).compare(v); + } + + // Overload of `string_view::compare()` for comparing a substring of the + // `string_view` and a substring of another `absl::string_view`. + int compare(size_type pos1, size_type count1, string_view v, size_type pos2, + size_type count2) const { + return substr(pos1, count1).compare(v.substr(pos2, count2)); + } + + // Overload of `string_view::compare()` for comparing a `string_view` and a + // a different C-style std::string `s`. + int compare(const char* s) const { return compare(string_view(s)); } + + // Overload of `string_view::compare()` for comparing a substring of the + // `string_view` and a different std::string C-style std::string `s`. + int compare(size_type pos1, size_type count1, const char* s) const { + return substr(pos1, count1).compare(string_view(s)); + } + + // Overload of `string_view::compare()` for comparing a substring of the + // `string_view` and a substring of a different C-style std::string `s`. + int compare(size_type pos1, size_type count1, const char* s, + size_type count2) const { + return substr(pos1, count1).compare(string_view(s, count2)); + } + + // Find Utilities + + // string_view::find() + // + // Finds the first occurrence of the substring `s` within the `string_view`, + // returning the position of the first character's match, or `npos` if no + // match was found. + size_type find(string_view s, size_type pos = 0) const noexcept; + + // Overload of `string_view::find()` for finding the given character `c` + // within the `string_view`. + size_type find(char c, size_type pos = 0) const noexcept; + + // string_view::rfind() + // + // Finds the last occurrence of a substring `s` within the `string_view`, + // returning the position of the first character's match, or `npos` if no + // match was found. + size_type rfind(string_view s, size_type pos = npos) const + noexcept; + + // Overload of `string_view::rfind()` for finding the given character `c` + // within the `string_view`. + size_type rfind(char c, size_type pos = npos) const noexcept; + + // string_view::find_first_of() + // + // Finds the first occurrence of any of the characters in `s` within the + // `string_view`, returning the start position of the match, or `npos` if no + // match was found. + size_type find_first_of(string_view s, size_type pos = 0) const + noexcept; + + // Overload of `string_view::find_first_of()` for finding a character `c` + // within the `string_view`. + size_type find_first_of(char c, size_type pos = 0) const + noexcept { + return find(c, pos); + } + + // string_view::find_last_of() + // + // Finds the last occurrence of any of the characters in `s` within the + // `string_view`, returning the start position of the match, or `npos` if no + // match was found. + size_type find_last_of(string_view s, size_type pos = npos) const + noexcept; + + // Overload of `string_view::find_last_of()` for finding a character `c` + // within the `string_view`. + size_type find_last_of(char c, size_type pos = npos) const + noexcept { + return rfind(c, pos); + } + + // string_view::find_first_not_of() + // + // Finds the first occurrence of any of the characters not in `s` within the + // `string_view`, returning the start position of the first non-match, or + // `npos` if no non-match was found. + size_type find_first_not_of(string_view s, size_type pos = 0) const noexcept; + + // Overload of `string_view::find_first_not_of()` for finding a character + // that is not `c` within the `string_view`. + size_type find_first_not_of(char c, size_type pos = 0) const noexcept; + + // string_view::find_last_not_of() + // + // Finds the last occurrence of any of the characters not in `s` within the + // `string_view`, returning the start position of the last non-match, or + // `npos` if no non-match was found. + size_type find_last_not_of(string_view s, + size_type pos = npos) const noexcept; + + // Overload of `string_view::find_last_not_of()` for finding a character + // that is not `c` within the `string_view`. + size_type find_last_not_of(char c, size_type pos = npos) const + noexcept; + + private: + static constexpr size_type kMaxSize = + std::numeric_limits<size_type>::max() / 2 + 1; + + static constexpr size_type StrLenInternal(const char* str) { + return str ? +// check whether __builtin_strlen is provided by the compiler. +// GCC doesn't have __has_builtin() +// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66970), +// but has __builtin_strlen according to +// https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Other-Builtins.html. +#if ABSL_HAVE_BUILTIN(__builtin_strlen) || \ + (defined(__GNUC__) && !defined(__clang__)) + __builtin_strlen(str) +#else + strlen(str) +#endif + : 0; + } + + static constexpr size_type CheckLengthInternal(size_type len) { + return ABSL_ASSERT(len <= kMaxSize), len; + } + + const char* ptr_; + size_type length_; +}; + +// This large function is defined inline so that in a fairly common case where +// one of the arguments is a literal, the compiler can elide a lot of the +// following comparisons. +inline bool operator==(string_view x, string_view y) noexcept { + auto len = x.size(); + if (len != y.size()) { + return false; + } + return x.data() == y.data() || len <= 0 || + memcmp(x.data(), y.data(), len) == 0; +} + +inline bool operator!=(string_view x, string_view y) noexcept { + return !(x == y); +} + +inline bool operator<(string_view x, string_view y) noexcept { + auto min_size = std::min(x.size(), y.size()); + const int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size); + return (r < 0) || (r == 0 && x.size() < y.size()); +} + +inline bool operator>(string_view x, string_view y) noexcept { return y < x; } + +inline bool operator<=(string_view x, string_view y) noexcept { + return !(y < x); +} + +inline bool operator>=(string_view x, string_view y) noexcept { + return !(x < y); +} + +// IO Insertion Operator +std::ostream& operator<<(std::ostream& o, string_view piece); + +} // namespace absl + +#endif // ABSL_HAVE_STD_STRING_VIEW + +namespace absl { + +// ClippedSubstr() +// +// Like `s.substr(pos, n)`, but clips `pos` to an upper bound of `s.size()`. +// Provided because std::string_view::substr throws if `pos > size()`, +// to support b/37991613. +inline string_view ClippedSubstr(string_view s, size_t pos, + size_t n = string_view::npos) { + pos = std::min(pos, static_cast<size_t>(s.size())); + return s.substr(pos, n); +} + +// NullSafeStringView() +// +// Creates an `absl::string_view` from a pointer `p` even if it's null-valued. +// This function should be used where an `absl::string_view` can be created from +// a possibly-null pointer. +inline string_view NullSafeStringView(const char* p) { + return p ? string_view(p) : string_view(); +} + +} // namespace absl + +#endif // ABSL_STRINGS_STRING_VIEW_H_ diff --git a/absl/strings/string_view_test.cc b/absl/strings/string_view_test.cc new file mode 100644 index 000000000000..439d6499730e --- /dev/null +++ b/absl/strings/string_view_test.cc @@ -0,0 +1,1097 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/string_view.h" + +#include <algorithm> +#include <iomanip> +#include <iterator> +#include <map> +#include <random> +#include <sstream> +#include <string> +#include <type_traits> +#include <utility> + +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/dynamic_annotations.h" +#include "absl/base/port.h" + +namespace { + +// A minimal allocator that uses malloc(). +template <typename T> +struct Mallocator { + typedef T value_type; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + + size_type max_size() const { + return size_t(std::numeric_limits<size_type>::max()) / sizeof(value_type); + } + template <typename U> + struct rebind { + typedef Mallocator<U> other; + }; + Mallocator() = default; + + T* allocate(size_t n) { return static_cast<T*>(std::malloc(n * sizeof(T))); } + void deallocate(T* p, size_t) { std::free(p); } +}; +template <typename T, typename U> +bool operator==(const Mallocator<T>&, const Mallocator<U>&) { + return true; +} +template <typename T, typename U> +bool operator!=(const Mallocator<T>&, const Mallocator<U>&) { + return false; +} + +TEST(StringViewTest, Ctor) { + { + // Null. + absl::string_view s10; + EXPECT_TRUE(s10.data() == nullptr); + EXPECT_EQ(0, s10.length()); + } + + { + // const char* without length. + const char* hello = "hello"; + absl::string_view s20(hello); + EXPECT_TRUE(s20.data() == hello); + EXPECT_EQ(5, s20.length()); + + // const char* with length. + absl::string_view s21(hello, 4); + EXPECT_TRUE(s21.data() == hello); + EXPECT_EQ(4, s21.length()); + + // Not recommended, but valid C++ + absl::string_view s22(hello, 6); + EXPECT_TRUE(s22.data() == hello); + EXPECT_EQ(6, s22.length()); + } + + { + // std::string. + std::string hola = "hola"; + absl::string_view s30(hola); + EXPECT_TRUE(s30.data() == hola.data()); + EXPECT_EQ(4, s30.length()); + + // std::string with embedded '\0'. + hola.push_back('\0'); + hola.append("h2"); + hola.push_back('\0'); + absl::string_view s31(hola); + EXPECT_TRUE(s31.data() == hola.data()); + EXPECT_EQ(8, s31.length()); + } + + { + using mstring = + std::basic_string<char, std::char_traits<char>, Mallocator<char>>; + mstring str1("BUNGIE-JUMPING!"); + const mstring str2("SLEEPING!"); + + absl::string_view s1(str1); + s1.remove_prefix(strlen("BUNGIE-JUM")); + + absl::string_view s2(str2); + s2.remove_prefix(strlen("SLEE")); + + EXPECT_EQ(s1, s2); + EXPECT_EQ(s1, "PING!"); + } + + // TODO(mec): absl::string_view(const absl::string_view&); +} + +TEST(StringViewTest, Swap) { + absl::string_view a("a"); + absl::string_view b("bbb"); + EXPECT_TRUE(noexcept(a.swap(b))); + a.swap(b); + EXPECT_EQ(a, "bbb"); + EXPECT_EQ(b, "a"); + a.swap(b); + EXPECT_EQ(a, "a"); + EXPECT_EQ(b, "bbb"); +} + +TEST(StringViewTest, STLComparator) { + std::string s1("foo"); + std::string s2("bar"); + std::string s3("baz"); + + absl::string_view p1(s1); + absl::string_view p2(s2); + absl::string_view p3(s3); + + typedef std::map<absl::string_view, int> TestMap; + TestMap map; + + map.insert(std::make_pair(p1, 0)); + map.insert(std::make_pair(p2, 1)); + map.insert(std::make_pair(p3, 2)); + EXPECT_EQ(map.size(), 3); + + TestMap::const_iterator iter = map.begin(); + EXPECT_EQ(iter->second, 1); + ++iter; + EXPECT_EQ(iter->second, 2); + ++iter; + EXPECT_EQ(iter->second, 0); + ++iter; + EXPECT_TRUE(iter == map.end()); + + TestMap::iterator new_iter = map.find("zot"); + EXPECT_TRUE(new_iter == map.end()); + + new_iter = map.find("bar"); + EXPECT_TRUE(new_iter != map.end()); + + map.erase(new_iter); + EXPECT_EQ(map.size(), 2); + + iter = map.begin(); + EXPECT_EQ(iter->second, 2); + ++iter; + EXPECT_EQ(iter->second, 0); + ++iter; + EXPECT_TRUE(iter == map.end()); +} + +#define COMPARE(result, op, x, y) \ + EXPECT_EQ(result, absl::string_view((x)) op absl::string_view((y))); \ + EXPECT_EQ(result, absl::string_view((x)).compare(absl::string_view((y))) op 0) + +TEST(StringViewTest, ComparisonOperators) { + COMPARE(true, ==, "", ""); + COMPARE(true, ==, "", absl::string_view()); + COMPARE(true, ==, absl::string_view(), ""); + COMPARE(true, ==, "a", "a"); + COMPARE(true, ==, "aa", "aa"); + COMPARE(false, ==, "a", ""); + COMPARE(false, ==, "", "a"); + COMPARE(false, ==, "a", "b"); + COMPARE(false, ==, "a", "aa"); + COMPARE(false, ==, "aa", "a"); + + COMPARE(false, !=, "", ""); + COMPARE(false, !=, "a", "a"); + COMPARE(false, !=, "aa", "aa"); + COMPARE(true, !=, "a", ""); + COMPARE(true, !=, "", "a"); + COMPARE(true, !=, "a", "b"); + COMPARE(true, !=, "a", "aa"); + COMPARE(true, !=, "aa", "a"); + + COMPARE(true, <, "a", "b"); + COMPARE(true, <, "a", "aa"); + COMPARE(true, <, "aa", "b"); + COMPARE(true, <, "aa", "bb"); + COMPARE(false, <, "a", "a"); + COMPARE(false, <, "b", "a"); + COMPARE(false, <, "aa", "a"); + COMPARE(false, <, "b", "aa"); + COMPARE(false, <, "bb", "aa"); + + COMPARE(true, <=, "a", "a"); + COMPARE(true, <=, "a", "b"); + COMPARE(true, <=, "a", "aa"); + COMPARE(true, <=, "aa", "b"); + COMPARE(true, <=, "aa", "bb"); + COMPARE(false, <=, "b", "a"); + COMPARE(false, <=, "aa", "a"); + COMPARE(false, <=, "b", "aa"); + COMPARE(false, <=, "bb", "aa"); + + COMPARE(false, >=, "a", "b"); + COMPARE(false, >=, "a", "aa"); + COMPARE(false, >=, "aa", "b"); + COMPARE(false, >=, "aa", "bb"); + COMPARE(true, >=, "a", "a"); + COMPARE(true, >=, "b", "a"); + COMPARE(true, >=, "aa", "a"); + COMPARE(true, >=, "b", "aa"); + COMPARE(true, >=, "bb", "aa"); + + COMPARE(false, >, "a", "a"); + COMPARE(false, >, "a", "b"); + COMPARE(false, >, "a", "aa"); + COMPARE(false, >, "aa", "b"); + COMPARE(false, >, "aa", "bb"); + COMPARE(true, >, "b", "a"); + COMPARE(true, >, "aa", "a"); + COMPARE(true, >, "b", "aa"); + COMPARE(true, >, "bb", "aa"); +} + +TEST(StringViewTest, ComparisonOperatorsByCharacterPosition) { + std::string x; + for (int i = 0; i < 256; i++) { + x += 'a'; + std::string y = x; + COMPARE(true, ==, x, y); + for (int j = 0; j < i; j++) { + std::string z = x; + z[j] = 'b'; // Differs in position 'j' + COMPARE(false, ==, x, z); + COMPARE(true, <, x, z); + COMPARE(true, >, z, x); + if (j + 1 < i) { + z[j + 1] = 'A'; // Differs in position 'j+1' as well + COMPARE(false, ==, x, z); + COMPARE(true, <, x, z); + COMPARE(true, >, z, x); + z[j + 1] = 'z'; // Differs in position 'j+1' as well + COMPARE(false, ==, x, z); + COMPARE(true, <, x, z); + COMPARE(true, >, z, x); + } + } + } +} +#undef COMPARE + +// Sadly, our users often confuse std::string::npos with absl::string_view::npos; +// So much so that we test here that they are the same. They need to +// both be unsigned, and both be the maximum-valued integer of their type. + +template <typename T> +struct is_type { + template <typename U> + static bool same(U) { + return false; + } + static bool same(T) { return true; } +}; + +TEST(StringViewTest, NposMatchesStdStringView) { + EXPECT_EQ(absl::string_view::npos, std::string::npos); + + EXPECT_TRUE(is_type<size_t>::same(absl::string_view::npos)); + EXPECT_FALSE(is_type<size_t>::same("")); + + // Make sure absl::string_view::npos continues to be a header constant. + char test[absl::string_view::npos & 1] = {0}; + EXPECT_EQ(0, test[0]); +} + +TEST(StringViewTest, STL1) { + const absl::string_view a("abcdefghijklmnopqrstuvwxyz"); + const absl::string_view b("abc"); + const absl::string_view c("xyz"); + const absl::string_view d("foobar"); + const absl::string_view e; + std::string temp("123"); + temp += '\0'; + temp += "456"; + const absl::string_view f(temp); + + EXPECT_EQ(a[6], 'g'); + EXPECT_EQ(b[0], 'a'); + EXPECT_EQ(c[2], 'z'); + EXPECT_EQ(f[3], '\0'); + EXPECT_EQ(f[5], '5'); + + EXPECT_EQ(*d.data(), 'f'); + EXPECT_EQ(d.data()[5], 'r'); + EXPECT_TRUE(e.data() == nullptr); + + EXPECT_EQ(*a.begin(), 'a'); + EXPECT_EQ(*(b.begin() + 2), 'c'); + EXPECT_EQ(*(c.end() - 1), 'z'); + + EXPECT_EQ(*a.rbegin(), 'z'); + EXPECT_EQ(*(b.rbegin() + 2), 'a'); + EXPECT_EQ(*(c.rend() - 1), 'x'); + EXPECT_TRUE(a.rbegin() + 26 == a.rend()); + + EXPECT_EQ(a.size(), 26); + EXPECT_EQ(b.size(), 3); + EXPECT_EQ(c.size(), 3); + EXPECT_EQ(d.size(), 6); + EXPECT_EQ(e.size(), 0); + EXPECT_EQ(f.size(), 7); + + EXPECT_TRUE(!d.empty()); + EXPECT_TRUE(d.begin() != d.end()); + EXPECT_TRUE(d.begin() + 6 == d.end()); + + EXPECT_TRUE(e.empty()); + EXPECT_TRUE(e.begin() == e.end()); + + char buf[4] = { '%', '%', '%', '%' }; + EXPECT_EQ(a.copy(buf, 4), 4); + EXPECT_EQ(buf[0], a[0]); + EXPECT_EQ(buf[1], a[1]); + EXPECT_EQ(buf[2], a[2]); + EXPECT_EQ(buf[3], a[3]); + EXPECT_EQ(a.copy(buf, 3, 7), 3); + EXPECT_EQ(buf[0], a[7]); + EXPECT_EQ(buf[1], a[8]); + EXPECT_EQ(buf[2], a[9]); + EXPECT_EQ(buf[3], a[3]); + EXPECT_EQ(c.copy(buf, 99), 3); + EXPECT_EQ(buf[0], c[0]); + EXPECT_EQ(buf[1], c[1]); + EXPECT_EQ(buf[2], c[2]); + EXPECT_EQ(buf[3], a[3]); +} + +// Separated from STL1() because some compilers produce an overly +// large stack frame for the combined function. +TEST(StringViewTest, STL2) { + const absl::string_view a("abcdefghijklmnopqrstuvwxyz"); + const absl::string_view b("abc"); + const absl::string_view c("xyz"); + absl::string_view d("foobar"); + const absl::string_view e; + const absl::string_view f( + "123" + "\0" + "456", + 7); + + d = absl::string_view(); + EXPECT_EQ(d.size(), 0); + EXPECT_TRUE(d.empty()); + EXPECT_TRUE(d.data() == nullptr); + EXPECT_TRUE(d.begin() == d.end()); + + EXPECT_EQ(a.find(b), 0); + EXPECT_EQ(a.find(b, 1), absl::string_view::npos); + EXPECT_EQ(a.find(c), 23); + EXPECT_EQ(a.find(c, 9), 23); + EXPECT_EQ(a.find(c, absl::string_view::npos), absl::string_view::npos); + EXPECT_EQ(b.find(c), absl::string_view::npos); + EXPECT_EQ(b.find(c, absl::string_view::npos), absl::string_view::npos); + EXPECT_EQ(a.find(d), 0); + EXPECT_EQ(a.find(e), 0); + EXPECT_EQ(a.find(d, 12), 12); + EXPECT_EQ(a.find(e, 17), 17); + absl::string_view g("xx not found bb"); + EXPECT_EQ(a.find(g), absl::string_view::npos); + // empty std::string nonsense + EXPECT_EQ(d.find(b), absl::string_view::npos); + EXPECT_EQ(e.find(b), absl::string_view::npos); + EXPECT_EQ(d.find(b, 4), absl::string_view::npos); + EXPECT_EQ(e.find(b, 7), absl::string_view::npos); + + size_t empty_search_pos = std::string().find(std::string()); + EXPECT_EQ(d.find(d), empty_search_pos); + EXPECT_EQ(d.find(e), empty_search_pos); + EXPECT_EQ(e.find(d), empty_search_pos); + EXPECT_EQ(e.find(e), empty_search_pos); + EXPECT_EQ(d.find(d, 4), std::string().find(std::string(), 4)); + EXPECT_EQ(d.find(e, 4), std::string().find(std::string(), 4)); + EXPECT_EQ(e.find(d, 4), std::string().find(std::string(), 4)); + EXPECT_EQ(e.find(e, 4), std::string().find(std::string(), 4)); + + EXPECT_EQ(a.find('a'), 0); + EXPECT_EQ(a.find('c'), 2); + EXPECT_EQ(a.find('z'), 25); + EXPECT_EQ(a.find('$'), absl::string_view::npos); + EXPECT_EQ(a.find('\0'), absl::string_view::npos); + EXPECT_EQ(f.find('\0'), 3); + EXPECT_EQ(f.find('3'), 2); + EXPECT_EQ(f.find('5'), 5); + EXPECT_EQ(g.find('o'), 4); + EXPECT_EQ(g.find('o', 4), 4); + EXPECT_EQ(g.find('o', 5), 8); + EXPECT_EQ(a.find('b', 5), absl::string_view::npos); + // empty std::string nonsense + EXPECT_EQ(d.find('\0'), absl::string_view::npos); + EXPECT_EQ(e.find('\0'), absl::string_view::npos); + EXPECT_EQ(d.find('\0', 4), absl::string_view::npos); + EXPECT_EQ(e.find('\0', 7), absl::string_view::npos); + EXPECT_EQ(d.find('x'), absl::string_view::npos); + EXPECT_EQ(e.find('x'), absl::string_view::npos); + EXPECT_EQ(d.find('x', 4), absl::string_view::npos); + EXPECT_EQ(e.find('x', 7), absl::string_view::npos); + + EXPECT_EQ(a.rfind(b), 0); + EXPECT_EQ(a.rfind(b, 1), 0); + EXPECT_EQ(a.rfind(c), 23); + EXPECT_EQ(a.rfind(c, 22), absl::string_view::npos); + EXPECT_EQ(a.rfind(c, 1), absl::string_view::npos); + EXPECT_EQ(a.rfind(c, 0), absl::string_view::npos); + EXPECT_EQ(b.rfind(c), absl::string_view::npos); + EXPECT_EQ(b.rfind(c, 0), absl::string_view::npos); + EXPECT_EQ(a.rfind(d), std::string(a).rfind(std::string())); + EXPECT_EQ(a.rfind(e), std::string(a).rfind(std::string())); + EXPECT_EQ(a.rfind(d, 12), 12); + EXPECT_EQ(a.rfind(e, 17), 17); + EXPECT_EQ(a.rfind(g), absl::string_view::npos); + EXPECT_EQ(d.rfind(b), absl::string_view::npos); + EXPECT_EQ(e.rfind(b), absl::string_view::npos); + EXPECT_EQ(d.rfind(b, 4), absl::string_view::npos); + EXPECT_EQ(e.rfind(b, 7), absl::string_view::npos); + // empty std::string nonsense + EXPECT_EQ(d.rfind(d, 4), std::string().rfind(std::string())); + EXPECT_EQ(e.rfind(d, 7), std::string().rfind(std::string())); + EXPECT_EQ(d.rfind(e, 4), std::string().rfind(std::string())); + EXPECT_EQ(e.rfind(e, 7), std::string().rfind(std::string())); + EXPECT_EQ(d.rfind(d), std::string().rfind(std::string())); + EXPECT_EQ(e.rfind(d), std::string().rfind(std::string())); + EXPECT_EQ(d.rfind(e), std::string().rfind(std::string())); + EXPECT_EQ(e.rfind(e), std::string().rfind(std::string())); + + EXPECT_EQ(g.rfind('o'), 8); + EXPECT_EQ(g.rfind('q'), absl::string_view::npos); + EXPECT_EQ(g.rfind('o', 8), 8); + EXPECT_EQ(g.rfind('o', 7), 4); + EXPECT_EQ(g.rfind('o', 3), absl::string_view::npos); + EXPECT_EQ(f.rfind('\0'), 3); + EXPECT_EQ(f.rfind('\0', 12), 3); + EXPECT_EQ(f.rfind('3'), 2); + EXPECT_EQ(f.rfind('5'), 5); + // empty std::string nonsense + EXPECT_EQ(d.rfind('o'), absl::string_view::npos); + EXPECT_EQ(e.rfind('o'), absl::string_view::npos); + EXPECT_EQ(d.rfind('o', 4), absl::string_view::npos); + EXPECT_EQ(e.rfind('o', 7), absl::string_view::npos); +} + +// Continued from STL2 +TEST(StringViewTest, STL2FindFirst) { + const absl::string_view a("abcdefghijklmnopqrstuvwxyz"); + const absl::string_view b("abc"); + const absl::string_view c("xyz"); + absl::string_view d("foobar"); + const absl::string_view e; + const absl::string_view f( + "123" + "\0" + "456", + 7); + absl::string_view g("xx not found bb"); + + d = absl::string_view(); + EXPECT_EQ(a.find_first_of(b), 0); + EXPECT_EQ(a.find_first_of(b, 0), 0); + EXPECT_EQ(a.find_first_of(b, 1), 1); + EXPECT_EQ(a.find_first_of(b, 2), 2); + EXPECT_EQ(a.find_first_of(b, 3), absl::string_view::npos); + EXPECT_EQ(a.find_first_of(c), 23); + EXPECT_EQ(a.find_first_of(c, 23), 23); + EXPECT_EQ(a.find_first_of(c, 24), 24); + EXPECT_EQ(a.find_first_of(c, 25), 25); + EXPECT_EQ(a.find_first_of(c, 26), absl::string_view::npos); + EXPECT_EQ(g.find_first_of(b), 13); + EXPECT_EQ(g.find_first_of(c), 0); + EXPECT_EQ(a.find_first_of(f), absl::string_view::npos); + EXPECT_EQ(f.find_first_of(a), absl::string_view::npos); + // empty std::string nonsense + EXPECT_EQ(a.find_first_of(d), absl::string_view::npos); + EXPECT_EQ(a.find_first_of(e), absl::string_view::npos); + EXPECT_EQ(d.find_first_of(b), absl::string_view::npos); + EXPECT_EQ(e.find_first_of(b), absl::string_view::npos); + EXPECT_EQ(d.find_first_of(d), absl::string_view::npos); + EXPECT_EQ(e.find_first_of(d), absl::string_view::npos); + EXPECT_EQ(d.find_first_of(e), absl::string_view::npos); + EXPECT_EQ(e.find_first_of(e), absl::string_view::npos); + + EXPECT_EQ(a.find_first_not_of(b), 3); + EXPECT_EQ(a.find_first_not_of(c), 0); + EXPECT_EQ(b.find_first_not_of(a), absl::string_view::npos); + EXPECT_EQ(c.find_first_not_of(a), absl::string_view::npos); + EXPECT_EQ(f.find_first_not_of(a), 0); + EXPECT_EQ(a.find_first_not_of(f), 0); + EXPECT_EQ(a.find_first_not_of(d), 0); + EXPECT_EQ(a.find_first_not_of(e), 0); + // empty std::string nonsense + EXPECT_EQ(a.find_first_not_of(d), 0); + EXPECT_EQ(a.find_first_not_of(e), 0); + EXPECT_EQ(a.find_first_not_of(d, 1), 1); + EXPECT_EQ(a.find_first_not_of(e, 1), 1); + EXPECT_EQ(a.find_first_not_of(d, a.size() - 1), a.size() - 1); + EXPECT_EQ(a.find_first_not_of(e, a.size() - 1), a.size() - 1); + EXPECT_EQ(a.find_first_not_of(d, a.size()), absl::string_view::npos); + EXPECT_EQ(a.find_first_not_of(e, a.size()), absl::string_view::npos); + EXPECT_EQ(a.find_first_not_of(d, absl::string_view::npos), + absl::string_view::npos); + EXPECT_EQ(a.find_first_not_of(e, absl::string_view::npos), + absl::string_view::npos); + EXPECT_EQ(d.find_first_not_of(a), absl::string_view::npos); + EXPECT_EQ(e.find_first_not_of(a), absl::string_view::npos); + EXPECT_EQ(d.find_first_not_of(d), absl::string_view::npos); + EXPECT_EQ(e.find_first_not_of(d), absl::string_view::npos); + EXPECT_EQ(d.find_first_not_of(e), absl::string_view::npos); + EXPECT_EQ(e.find_first_not_of(e), absl::string_view::npos); + + absl::string_view h("===="); + EXPECT_EQ(h.find_first_not_of('='), absl::string_view::npos); + EXPECT_EQ(h.find_first_not_of('=', 3), absl::string_view::npos); + EXPECT_EQ(h.find_first_not_of('\0'), 0); + EXPECT_EQ(g.find_first_not_of('x'), 2); + EXPECT_EQ(f.find_first_not_of('\0'), 0); + EXPECT_EQ(f.find_first_not_of('\0', 3), 4); + EXPECT_EQ(f.find_first_not_of('\0', 2), 2); + // empty std::string nonsense + EXPECT_EQ(d.find_first_not_of('x'), absl::string_view::npos); + EXPECT_EQ(e.find_first_not_of('x'), absl::string_view::npos); + EXPECT_EQ(d.find_first_not_of('\0'), absl::string_view::npos); + EXPECT_EQ(e.find_first_not_of('\0'), absl::string_view::npos); +} + +// Continued from STL2 +TEST(StringViewTest, STL2FindLast) { + const absl::string_view a("abcdefghijklmnopqrstuvwxyz"); + const absl::string_view b("abc"); + const absl::string_view c("xyz"); + absl::string_view d("foobar"); + const absl::string_view e; + const absl::string_view f( + "123" + "\0" + "456", + 7); + absl::string_view g("xx not found bb"); + absl::string_view h("===="); + absl::string_view i("56"); + + d = absl::string_view(); + EXPECT_EQ(h.find_last_of(a), absl::string_view::npos); + EXPECT_EQ(g.find_last_of(a), g.size()-1); + EXPECT_EQ(a.find_last_of(b), 2); + EXPECT_EQ(a.find_last_of(c), a.size()-1); + EXPECT_EQ(f.find_last_of(i), 6); + EXPECT_EQ(a.find_last_of('a'), 0); + EXPECT_EQ(a.find_last_of('b'), 1); + EXPECT_EQ(a.find_last_of('z'), 25); + EXPECT_EQ(a.find_last_of('a', 5), 0); + EXPECT_EQ(a.find_last_of('b', 5), 1); + EXPECT_EQ(a.find_last_of('b', 0), absl::string_view::npos); + EXPECT_EQ(a.find_last_of('z', 25), 25); + EXPECT_EQ(a.find_last_of('z', 24), absl::string_view::npos); + EXPECT_EQ(f.find_last_of(i, 5), 5); + EXPECT_EQ(f.find_last_of(i, 6), 6); + EXPECT_EQ(f.find_last_of(a, 4), absl::string_view::npos); + // empty std::string nonsense + EXPECT_EQ(f.find_last_of(d), absl::string_view::npos); + EXPECT_EQ(f.find_last_of(e), absl::string_view::npos); + EXPECT_EQ(f.find_last_of(d, 4), absl::string_view::npos); + EXPECT_EQ(f.find_last_of(e, 4), absl::string_view::npos); + EXPECT_EQ(d.find_last_of(d), absl::string_view::npos); + EXPECT_EQ(d.find_last_of(e), absl::string_view::npos); + EXPECT_EQ(e.find_last_of(d), absl::string_view::npos); + EXPECT_EQ(e.find_last_of(e), absl::string_view::npos); + EXPECT_EQ(d.find_last_of(f), absl::string_view::npos); + EXPECT_EQ(e.find_last_of(f), absl::string_view::npos); + EXPECT_EQ(d.find_last_of(d, 4), absl::string_view::npos); + EXPECT_EQ(d.find_last_of(e, 4), absl::string_view::npos); + EXPECT_EQ(e.find_last_of(d, 4), absl::string_view::npos); + EXPECT_EQ(e.find_last_of(e, 4), absl::string_view::npos); + EXPECT_EQ(d.find_last_of(f, 4), absl::string_view::npos); + EXPECT_EQ(e.find_last_of(f, 4), absl::string_view::npos); + + EXPECT_EQ(a.find_last_not_of(b), a.size()-1); + EXPECT_EQ(a.find_last_not_of(c), 22); + EXPECT_EQ(b.find_last_not_of(a), absl::string_view::npos); + EXPECT_EQ(b.find_last_not_of(b), absl::string_view::npos); + EXPECT_EQ(f.find_last_not_of(i), 4); + EXPECT_EQ(a.find_last_not_of(c, 24), 22); + EXPECT_EQ(a.find_last_not_of(b, 3), 3); + EXPECT_EQ(a.find_last_not_of(b, 2), absl::string_view::npos); + // empty std::string nonsense + EXPECT_EQ(f.find_last_not_of(d), f.size()-1); + EXPECT_EQ(f.find_last_not_of(e), f.size()-1); + EXPECT_EQ(f.find_last_not_of(d, 4), 4); + EXPECT_EQ(f.find_last_not_of(e, 4), 4); + EXPECT_EQ(d.find_last_not_of(d), absl::string_view::npos); + EXPECT_EQ(d.find_last_not_of(e), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of(d), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of(e), absl::string_view::npos); + EXPECT_EQ(d.find_last_not_of(f), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of(f), absl::string_view::npos); + EXPECT_EQ(d.find_last_not_of(d, 4), absl::string_view::npos); + EXPECT_EQ(d.find_last_not_of(e, 4), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of(d, 4), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of(e, 4), absl::string_view::npos); + EXPECT_EQ(d.find_last_not_of(f, 4), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of(f, 4), absl::string_view::npos); + + EXPECT_EQ(h.find_last_not_of('x'), h.size() - 1); + EXPECT_EQ(h.find_last_not_of('='), absl::string_view::npos); + EXPECT_EQ(b.find_last_not_of('c'), 1); + EXPECT_EQ(h.find_last_not_of('x', 2), 2); + EXPECT_EQ(h.find_last_not_of('=', 2), absl::string_view::npos); + EXPECT_EQ(b.find_last_not_of('b', 1), 0); + // empty std::string nonsense + EXPECT_EQ(d.find_last_not_of('x'), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of('x'), absl::string_view::npos); + EXPECT_EQ(d.find_last_not_of('\0'), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of('\0'), absl::string_view::npos); +} + +// Continued from STL2 +TEST(StringViewTest, STL2Substr) { + const absl::string_view a("abcdefghijklmnopqrstuvwxyz"); + const absl::string_view b("abc"); + const absl::string_view c("xyz"); + absl::string_view d("foobar"); + const absl::string_view e; + + d = absl::string_view(); + EXPECT_EQ(a.substr(0, 3), b); + EXPECT_EQ(a.substr(23), c); + EXPECT_EQ(a.substr(23, 3), c); + EXPECT_EQ(a.substr(23, 99), c); + EXPECT_EQ(a.substr(0), a); + EXPECT_EQ(a.substr(3, 2), "de"); + // empty std::string nonsense + EXPECT_EQ(d.substr(0, 99), e); + // use of npos + EXPECT_EQ(a.substr(0, absl::string_view::npos), a); + EXPECT_EQ(a.substr(23, absl::string_view::npos), c); + // throw exception +#ifdef ABSL_HAVE_EXCEPTIONS + EXPECT_THROW(a.substr(99, 2), std::out_of_range); +#else + EXPECT_DEATH(a.substr(99, 2), "absl::string_view::substr"); +#endif +} + +TEST(StringViewTest, TruncSubstr) { + const absl::string_view hi("hi"); + EXPECT_EQ("", absl::ClippedSubstr(hi, 0, 0)); + EXPECT_EQ("h", absl::ClippedSubstr(hi, 0, 1)); + EXPECT_EQ("hi", absl::ClippedSubstr(hi, 0)); + EXPECT_EQ("i", absl::ClippedSubstr(hi, 1)); + EXPECT_EQ("", absl::ClippedSubstr(hi, 2)); + EXPECT_EQ("", absl::ClippedSubstr(hi, 3)); // truncation + EXPECT_EQ("", absl::ClippedSubstr(hi, 3, 2)); // truncation +} + +TEST(StringViewTest, UTF8) { + EXPECT_EQ(strlen("รก"), absl::string_view("รก รก").find_first_of(" ")); + EXPECT_EQ(strlen("รก"), absl::string_view("รก รก").find_first_of(" \t")); +} + +TEST(StringViewTest, FindConformance) { + struct { + std::string haystack; + std::string needle; + } specs[] = { + {"", ""}, + {"", "a"}, + {"a", ""}, + {"a", "a"}, + {"a", "b"}, + {"aa", ""}, + {"aa", "a"}, + {"aa", "b"}, + {"ab", "a"}, + {"ab", "b"}, + {"abcd", ""}, + {"abcd", "a"}, + {"abcd", "d"}, + {"abcd", "ab"}, + {"abcd", "bc"}, + {"abcd", "cd"}, + {"abcd", "abcd"}, + }; + for (const auto& s : specs) { + SCOPED_TRACE(s.haystack); + SCOPED_TRACE(s.needle); + std::string st = s.haystack; + absl::string_view sp = s.haystack; + for (size_t i = 0; i <= sp.size(); ++i) { + size_t pos = (i == sp.size()) ? absl::string_view::npos : i; + SCOPED_TRACE(pos); + EXPECT_EQ(sp.find(s.needle, pos), + st.find(s.needle, pos)); + EXPECT_EQ(sp.rfind(s.needle, pos), + st.rfind(s.needle, pos)); + EXPECT_EQ(sp.find_first_of(s.needle, pos), + st.find_first_of(s.needle, pos)); + EXPECT_EQ(sp.find_first_not_of(s.needle, pos), + st.find_first_not_of(s.needle, pos)); + EXPECT_EQ(sp.find_last_of(s.needle, pos), + st.find_last_of(s.needle, pos)); + EXPECT_EQ(sp.find_last_not_of(s.needle, pos), + st.find_last_not_of(s.needle, pos)); + } + } +} + +TEST(StringViewTest, Remove) { + absl::string_view a("foobar"); + std::string s1("123"); + s1 += '\0'; + s1 += "456"; + absl::string_view b(s1); + absl::string_view e; + std::string s2; + + // remove_prefix + absl::string_view c(a); + c.remove_prefix(3); + EXPECT_EQ(c, "bar"); + c = a; + c.remove_prefix(0); + EXPECT_EQ(c, a); + c.remove_prefix(c.size()); + EXPECT_EQ(c, e); + + // remove_suffix + c = a; + c.remove_suffix(3); + EXPECT_EQ(c, "foo"); + c = a; + c.remove_suffix(0); + EXPECT_EQ(c, a); + c.remove_suffix(c.size()); + EXPECT_EQ(c, e); +} + +TEST(StringViewTest, Set) { + absl::string_view a("foobar"); + absl::string_view empty; + absl::string_view b; + + // set + b = absl::string_view("foobar", 6); + EXPECT_EQ(b, a); + b = absl::string_view("foobar", 0); + EXPECT_EQ(b, empty); + b = absl::string_view("foobar", 7); + EXPECT_NE(b, a); + + b = absl::string_view("foobar"); + EXPECT_EQ(b, a); +} + +TEST(StringViewTest, FrontBack) { + static const char arr[] = "abcd"; + const absl::string_view csp(arr, 4); + EXPECT_EQ(&arr[0], &csp.front()); + EXPECT_EQ(&arr[3], &csp.back()); +} + +TEST(StringViewTest, FrontBackSingleChar) { + static const char c = 'a'; + const absl::string_view csp(&c, 1); + EXPECT_EQ(&c, &csp.front()); + EXPECT_EQ(&c, &csp.back()); +} + +TEST(StringViewTest, NULLInput) { + absl::string_view s; + EXPECT_EQ(s.data(), nullptr); + EXPECT_EQ(s.size(), 0); + + s = absl::string_view(nullptr); + EXPECT_EQ(s.data(), nullptr); + EXPECT_EQ(s.size(), 0); + + // .ToString() on a absl::string_view with nullptr should produce the empty + // std::string. + EXPECT_EQ("", std::string(s)); +} + +TEST(StringViewTest, Comparisons2) { + // The `compare` member has 6 overloads (v: string_view, s: const char*): + // (1) compare(v) + // (2) compare(pos1, count1, v) + // (3) compare(pos1, count1, v, pos2, count2) + // (4) compare(s) + // (5) compare(pos1, count1, s) + // (6) compare(pos1, count1, s, count2) + + absl::string_view abc("abcdefghijklmnopqrstuvwxyz"); + + // check comparison operations on strings longer than 4 bytes. + EXPECT_EQ(abc, absl::string_view("abcdefghijklmnopqrstuvwxyz")); + EXPECT_EQ(abc.compare(absl::string_view("abcdefghijklmnopqrstuvwxyz")), 0); + + EXPECT_LT(abc, absl::string_view("abcdefghijklmnopqrstuvwxzz")); + EXPECT_LT(abc.compare(absl::string_view("abcdefghijklmnopqrstuvwxzz")), 0); + + EXPECT_GT(abc, absl::string_view("abcdefghijklmnopqrstuvwxyy")); + EXPECT_GT(abc.compare(absl::string_view("abcdefghijklmnopqrstuvwxyy")), 0); + + // The "substr" variants of `compare`. + absl::string_view digits("0123456789"); + auto npos = absl::string_view::npos; + + // Taking string_view + EXPECT_EQ(digits.compare(3, npos, absl::string_view("3456789")), 0); // 2 + EXPECT_EQ(digits.compare(3, 4, absl::string_view("3456")), 0); // 2 + EXPECT_EQ(digits.compare(10, 0, absl::string_view()), 0); // 2 + EXPECT_EQ(digits.compare(3, 4, absl::string_view("0123456789"), 3, 4), + 0); // 3 + EXPECT_LT(digits.compare(3, 4, absl::string_view("0123456789"), 3, 5), + 0); // 3 + EXPECT_LT(digits.compare(0, npos, absl::string_view("0123456789"), 3, 5), + 0); // 3 + // Taking const char* + EXPECT_EQ(digits.compare(3, 4, "3456"), 0); // 5 + EXPECT_EQ(digits.compare(3, npos, "3456789"), 0); // 5 + EXPECT_EQ(digits.compare(10, 0, ""), 0); // 5 + EXPECT_EQ(digits.compare(3, 4, "0123456789", 3, 4), 0); // 6 + EXPECT_LT(digits.compare(3, 4, "0123456789", 3, 5), 0); // 6 + EXPECT_LT(digits.compare(0, npos, "0123456789", 3, 5), 0); // 6 +} + +struct MyCharAlloc : std::allocator<char> {}; + +TEST(StringViewTest, ExplicitConversionOperator) { + absl::string_view sp = "hi"; + EXPECT_EQ(sp, std::string(sp)); +} + +TEST(StringViewTest, NullSafeStringView) { + { + absl::string_view s = absl::NullSafeStringView(nullptr); + EXPECT_EQ(nullptr, s.data()); + EXPECT_EQ(0, s.size()); + EXPECT_EQ(absl::string_view(), s); + } + { + static const char kHi[] = "hi"; + absl::string_view s = absl::NullSafeStringView(kHi); + EXPECT_EQ(kHi, s.data()); + EXPECT_EQ(strlen(kHi), s.size()); + EXPECT_EQ(absl::string_view("hi"), s); + } +} + +TEST(StringViewTest, ConstexprCompiles) { + constexpr absl::string_view sp; + constexpr absl::string_view cstr(nullptr); + constexpr absl::string_view cstr_len("cstr", 4); + +#if defined(ABSL_HAVE_STD_STRING_VIEW) + // In libstdc++ (as of 7.2), `std::string_view::string_view(const char*)` + // calls `std::char_traits<char>::length(const char*)` to get the std::string + // length, but it is not marked constexpr yet. See GCC bug: + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78156 + // Also, there is a LWG issue that adds constexpr to length() which was just + // resolved 2017-06-02. See + // http://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#2232 + // TODO(zhangxy): Update the condition when libstdc++ adopts the constexpr + // length(). +#if !defined(__GLIBCXX__) +#define ABSL_HAVE_CONSTEXPR_STRING_VIEW_FROM_CSTR 1 +#endif // !__GLIBCXX__ + +#else // ABSL_HAVE_STD_STRING_VIEW + +// This duplicates the check for __builtin_strlen in the header. +#if ABSL_HAVE_BUILTIN(__builtin_strlen) || \ + (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_HAVE_CONSTEXPR_STRING_VIEW_FROM_CSTR 1 +#elif defined(__GNUC__) // GCC or clang +#error GCC/clang should have constexpr string_view. +#endif + +#endif // ABSL_HAVE_STD_STRING_VIEW + +#ifdef ABSL_HAVE_CONSTEXPR_STRING_VIEW_FROM_CSTR + constexpr absl::string_view cstr_strlen("foo"); + EXPECT_EQ(cstr_strlen.length(), 3); + constexpr absl::string_view cstr_strlen2 = "bar"; + EXPECT_EQ(cstr_strlen2, "bar"); +#endif + +#if !defined(__clang__) || 3 < __clang_major__ || \ + (3 == __clang_major__ && 4 < __clang_minor__) + // older clang versions (< 3.5) complain that: + // "cannot perform pointer arithmetic on null pointer" + constexpr absl::string_view::iterator const_begin_empty = sp.begin(); + constexpr absl::string_view::iterator const_end_empty = sp.end(); + EXPECT_EQ(const_begin_empty, const_end_empty); +#endif + + constexpr absl::string_view::iterator const_begin = cstr_len.begin(); + constexpr absl::string_view::iterator const_end = cstr_len.end(); + constexpr absl::string_view::size_type const_size = cstr_len.size(); + constexpr absl::string_view::size_type const_length = cstr_len.length(); + EXPECT_EQ(const_begin + const_size, const_end); + EXPECT_EQ(const_begin + const_length, const_end); + + constexpr bool isempty = sp.empty(); + EXPECT_TRUE(isempty); + + constexpr const char c = cstr_len[2]; + EXPECT_EQ(c, 't'); + + constexpr const char cfront = cstr_len.front(); + constexpr const char cback = cstr_len.back(); + EXPECT_EQ(cfront, 'c'); + EXPECT_EQ(cback, 'r'); + + constexpr const char* np = sp.data(); + constexpr const char* cstr_ptr = cstr_len.data(); + EXPECT_EQ(np, nullptr); + EXPECT_NE(cstr_ptr, nullptr); + + constexpr size_t sp_npos = sp.npos; + EXPECT_EQ(sp_npos, -1); +} + +TEST(StringViewTest, Noexcept) { + EXPECT_TRUE((std::is_nothrow_constructible<absl::string_view, + const std::string&>::value)); + EXPECT_TRUE( + (std::is_nothrow_constructible<absl::string_view, const std::string&>::value)); + EXPECT_TRUE(std::is_nothrow_constructible<absl::string_view>::value); + constexpr absl::string_view sp; + EXPECT_TRUE(noexcept(sp.begin())); + EXPECT_TRUE(noexcept(sp.end())); + EXPECT_TRUE(noexcept(sp.cbegin())); + EXPECT_TRUE(noexcept(sp.cend())); + EXPECT_TRUE(noexcept(sp.rbegin())); + EXPECT_TRUE(noexcept(sp.rend())); + EXPECT_TRUE(noexcept(sp.crbegin())); + EXPECT_TRUE(noexcept(sp.crend())); + EXPECT_TRUE(noexcept(sp.size())); + EXPECT_TRUE(noexcept(sp.length())); + EXPECT_TRUE(noexcept(sp.empty())); + EXPECT_TRUE(noexcept(sp.data())); + EXPECT_TRUE(noexcept(sp.compare(sp))); + EXPECT_TRUE(noexcept(sp.find(sp))); + EXPECT_TRUE(noexcept(sp.find('f'))); + EXPECT_TRUE(noexcept(sp.rfind(sp))); + EXPECT_TRUE(noexcept(sp.rfind('f'))); + EXPECT_TRUE(noexcept(sp.find_first_of(sp))); + EXPECT_TRUE(noexcept(sp.find_first_of('f'))); + EXPECT_TRUE(noexcept(sp.find_last_of(sp))); + EXPECT_TRUE(noexcept(sp.find_last_of('f'))); + EXPECT_TRUE(noexcept(sp.find_first_not_of(sp))); + EXPECT_TRUE(noexcept(sp.find_first_not_of('f'))); + EXPECT_TRUE(noexcept(sp.find_last_not_of(sp))); + EXPECT_TRUE(noexcept(sp.find_last_not_of('f'))); +} + +TEST(ComparisonOpsTest, StringCompareNotAmbiguous) { + EXPECT_EQ("hello", std::string("hello")); + EXPECT_LT("hello", std::string("world")); +} + +TEST(ComparisonOpsTest, HeterogenousStringViewEquals) { + EXPECT_EQ(absl::string_view("hello"), std::string("hello")); + EXPECT_EQ("hello", absl::string_view("hello")); +} + +TEST(FindOneCharTest, EdgeCases) { + absl::string_view a("xxyyyxx"); + + // Set a = "xyyyx". + a.remove_prefix(1); + a.remove_suffix(1); + + EXPECT_EQ(0, a.find('x')); + EXPECT_EQ(0, a.find('x', 0)); + EXPECT_EQ(4, a.find('x', 1)); + EXPECT_EQ(4, a.find('x', 4)); + EXPECT_EQ(absl::string_view::npos, a.find('x', 5)); + + EXPECT_EQ(4, a.rfind('x')); + EXPECT_EQ(4, a.rfind('x', 5)); + EXPECT_EQ(4, a.rfind('x', 4)); + EXPECT_EQ(0, a.rfind('x', 3)); + EXPECT_EQ(0, a.rfind('x', 0)); + + // Set a = "yyy". + a.remove_prefix(1); + a.remove_suffix(1); + + EXPECT_EQ(absl::string_view::npos, a.find('x')); + EXPECT_EQ(absl::string_view::npos, a.rfind('x')); +} + +#ifndef THREAD_SANITIZER // Allocates too much memory for tsan. +TEST(HugeStringView, TwoPointTwoGB) { + if (sizeof(size_t) <= 4 || RunningOnValgrind()) + return; + // Try a huge std::string piece. + const size_t size = size_t{2200} * 1000 * 1000; + std::string s(size, 'a'); + absl::string_view sp(s); + EXPECT_EQ(size, sp.length()); + sp.remove_prefix(1); + EXPECT_EQ(size - 1, sp.length()); + sp.remove_suffix(2); + EXPECT_EQ(size - 1 - 2, sp.length()); +} +#endif // THREAD_SANITIZER + +#ifndef NDEBUG +TEST(NonNegativeLenTest, NonNegativeLen) { + EXPECT_DEATH_IF_SUPPORTED(absl::string_view("xyz", -1), "len <= kMaxSize"); +} +#endif // NDEBUG + +class StringViewStreamTest : public ::testing::Test { + public: + // Set negative 'width' for right justification. + template <typename T> + std::string Pad(const T& s, int width, char fill = 0) { + std::ostringstream oss; + if (fill != 0) { + oss << std::setfill(fill); + } + if (width < 0) { + width = -width; + oss << std::right; + } + oss << std::setw(width) << s; + return oss.str(); + } +}; + +TEST_F(StringViewStreamTest, Padding) { + std::string s("hello"); + absl::string_view sp(s); + for (int w = -64; w < 64; ++w) { + SCOPED_TRACE(w); + EXPECT_EQ(Pad(s, w), Pad(sp, w)); + } + for (int w = -64; w < 64; ++w) { + SCOPED_TRACE(w); + EXPECT_EQ(Pad(s, w, '#'), Pad(sp, w, '#')); + } +} + +TEST_F(StringViewStreamTest, ResetsWidth) { + // Width should reset after one formatted write. + // If we weren't resetting width after formatting the string_view, + // we'd have width=5 carrying over to the printing of the "]", + // creating "[###hi####]". + std::string s = "hi"; + absl::string_view sp = s; + { + std::ostringstream oss; + oss << "[" << std::setfill('#') << std::setw(5) << s << "]"; + ASSERT_EQ("[###hi]", oss.str()); + } + { + std::ostringstream oss; + oss << "[" << std::setfill('#') << std::setw(5) << sp << "]"; + EXPECT_EQ("[###hi]", oss.str()); + } +} + +} // namespace diff --git a/absl/strings/strip.cc b/absl/strings/strip.cc new file mode 100644 index 000000000000..968c09c6fd31 --- /dev/null +++ b/absl/strings/strip.cc @@ -0,0 +1,269 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains functions that remove a defined part from the std::string, +// i.e., strip the std::string. + +#include "absl/strings/strip.h" + +#include <algorithm> +#include <cassert> +#include <cstring> +#include <string> + +#include "absl/strings/ascii.h" +#include "absl/strings/string_view.h" + +// ---------------------------------------------------------------------- +// ReplaceCharacters +// Replaces any occurrence of the character 'remove' (or the characters +// in 'remove') with the character 'replace_with'. +// ---------------------------------------------------------------------- +void ReplaceCharacters(char* str, size_t len, absl::string_view remove, + char replace_with) { + for (char* end = str + len; str != end; ++str) { + if (remove.find(*str) != absl::string_view::npos) { + *str = replace_with; + } + } +} + +void ReplaceCharacters(std::string* s, absl::string_view remove, char replace_with) { + for (char& ch : *s) { + if (remove.find(ch) != absl::string_view::npos) { + ch = replace_with; + } + } +} + +bool StripTrailingNewline(std::string* s) { + if (!s->empty() && (*s)[s->size() - 1] == '\n') { + if (s->size() > 1 && (*s)[s->size() - 2] == '\r') + s->resize(s->size() - 2); + else + s->resize(s->size() - 1); + return true; + } + return false; +} + +// ---------------------------------------------------------------------- +// Misc. stripping routines +// ---------------------------------------------------------------------- +void StripCurlyBraces(std::string* s) { + return StripBrackets('{', '}', s); +} + +void StripBrackets(char left, char right, std::string* s) { + std::string::iterator opencurly = std::find(s->begin(), s->end(), left); + while (opencurly != s->end()) { + std::string::iterator closecurly = std::find(opencurly, s->end(), right); + if (closecurly == s->end()) return; + opencurly = s->erase(opencurly, closecurly + 1); + opencurly = std::find(opencurly, s->end(), left); + } +} + +void StripMarkupTags(std::string* s) { + std::string::iterator output = std::find(s->begin(), s->end(), '<'); + std::string::iterator input = output; + while (input != s->end()) { + if (*input == '<') { + input = std::find(input, s->end(), '>'); + if (input == s->end()) break; + ++input; + } else { + *output++ = *input++; + } + } + s->resize(output - s->begin()); +} + +std::string OutputWithMarkupTagsStripped(const std::string& s) { + std::string result(s); + StripMarkupTags(&result); + return result; +} + +ptrdiff_t TrimStringLeft(std::string* s, absl::string_view remove) { + size_t i = 0; + while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) { + ++i; + } + if (i > 0) s->erase(0, i); + return i; +} + +ptrdiff_t TrimStringRight(std::string* s, absl::string_view remove) { + size_t i = s->size(), trimmed = 0; + while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) { + --i; + } + if (i < s->size()) { + trimmed = s->size() - i; + s->erase(i); + } + return trimmed; +} + +// Unfortunately, absl::string_view does not have erase, so we've to replicate +// the implementation with remove_prefix()/remove_suffix() +ptrdiff_t TrimStringLeft(absl::string_view* s, absl::string_view remove) { + size_t i = 0; + while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) { + ++i; + } + if (i > 0) s->remove_prefix(i); + return i; +} + +ptrdiff_t TrimStringRight(absl::string_view* s, absl::string_view remove) { + size_t i = s->size(), trimmed = 0; + while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) { + --i; + } + if (i < s->size()) { + trimmed = s->size() - i; + s->remove_suffix(trimmed); + } + return trimmed; +} + +// ---------------------------------------------------------------------- +// Various removal routines +// ---------------------------------------------------------------------- +ptrdiff_t strrm(char* str, char c) { + char* src; + char* dest; + for (src = dest = str; *src != '\0'; ++src) + if (*src != c) *(dest++) = *src; + *dest = '\0'; + return dest - str; +} + +ptrdiff_t memrm(char* str, ptrdiff_t strlen, char c) { + char* src; + char* dest; + for (src = dest = str; strlen-- > 0; ++src) + if (*src != c) *(dest++) = *src; + return dest - str; +} + +ptrdiff_t strrmm(char* str, const char* chars) { + char* src; + char* dest; + for (src = dest = str; *src != '\0'; ++src) { + bool skip = false; + for (const char* c = chars; *c != '\0'; c++) { + if (*src == *c) { + skip = true; + break; + } + } + if (!skip) *(dest++) = *src; + } + *dest = '\0'; + return dest - str; +} + +ptrdiff_t strrmm(std::string* str, const std::string& chars) { + size_t str_len = str->length(); + size_t in_index = str->find_first_of(chars); + if (in_index == std::string::npos) return str_len; + + size_t out_index = in_index++; + + while (in_index < str_len) { + char c = (*str)[in_index++]; + if (chars.find(c) == std::string::npos) (*str)[out_index++] = c; + } + + str->resize(out_index); + return out_index; +} + +// ---------------------------------------------------------------------- +// StripDupCharacters +// Replaces any repeated occurrence of the character 'dup_char' +// with single occurrence. e.g., +// StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d" +// Return the number of characters removed +// ---------------------------------------------------------------------- +ptrdiff_t StripDupCharacters(std::string* s, char dup_char, ptrdiff_t start_pos) { + if (start_pos < 0) start_pos = 0; + + // remove dups by compaction in-place + ptrdiff_t input_pos = start_pos; // current reader position + ptrdiff_t output_pos = start_pos; // current writer position + const ptrdiff_t input_end = s->size(); + while (input_pos < input_end) { + // keep current character + const char curr_char = (*s)[input_pos]; + if (output_pos != input_pos) // must copy + (*s)[output_pos] = curr_char; + ++input_pos; + ++output_pos; + + if (curr_char == dup_char) { // skip subsequent dups + while ((input_pos < input_end) && ((*s)[input_pos] == dup_char)) + ++input_pos; + } + } + const ptrdiff_t num_deleted = input_pos - output_pos; + s->resize(s->size() - num_deleted); + return num_deleted; +} + +// ---------------------------------------------------------------------- +// TrimRunsInString +// Removes leading and trailing runs, and collapses middle +// runs of a set of characters into a single character (the +// first one specified in 'remove'). Useful for collapsing +// runs of repeated delimiters, whitespace, etc. E.g., +// TrimRunsInString(&s, " :,()") removes leading and trailing +// delimiter chars and collapses and converts internal runs +// of delimiters to single ' ' characters, so, for example, +// " a:(b):c " -> "a b c" +// "first,last::(area)phone, ::zip" -> "first last area phone zip" +// ---------------------------------------------------------------------- +void TrimRunsInString(std::string* s, absl::string_view remove) { + std::string::iterator dest = s->begin(); + std::string::iterator src_end = s->end(); + for (std::string::iterator src = s->begin(); src != src_end;) { + if (remove.find(*src) == absl::string_view::npos) { + *(dest++) = *(src++); + } else { + // Skip to the end of this run of chars that are in 'remove'. + for (++src; src != src_end; ++src) { + if (remove.find(*src) == absl::string_view::npos) { + if (dest != s->begin()) { + // This is an internal run; collapse it. + *(dest++) = remove[0]; + } + *(dest++) = *(src++); + break; + } + } + } + } + s->erase(dest, src_end); +} + +// ---------------------------------------------------------------------- +// RemoveNullsInString +// Removes any internal \0 characters from the std::string. +// ---------------------------------------------------------------------- +void RemoveNullsInString(std::string* s) { + s->erase(std::remove(s->begin(), s->end(), '\0'), s->end()); +} diff --git a/absl/strings/strip.h b/absl/strings/strip.h new file mode 100644 index 000000000000..370f9e88f0d6 --- /dev/null +++ b/absl/strings/strip.h @@ -0,0 +1,89 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: strip.h +// ----------------------------------------------------------------------------- +// +// This file contains various functions for stripping substrings from a std::string. +#ifndef ABSL_STRINGS_STRIP_H_ +#define ABSL_STRINGS_STRIP_H_ + +#include <cstddef> +#include <string> + +#include "absl/base/macros.h" +#include "absl/strings/ascii.h" +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" + +namespace absl { + +// ConsumePrefix() +// +// Strips the `expected` prefix from the start of the given std::string, returning +// `true` if the strip operation succeeded or false otherwise. +// +// Example: +// +// absl::string_view input("abc"); +// EXPECT_TRUE(absl::ConsumePrefix(&input, "a")); +// EXPECT_EQ(input, "bc"); +inline bool ConsumePrefix(absl::string_view* str, absl::string_view expected) { + if (!absl::StartsWith(*str, expected)) return false; + str->remove_prefix(expected.size()); + return true; +} +// ConsumeSuffix() +// +// Strips the `expected` suffix from the end of the given std::string, returning +// `true` if the strip operation succeeded or false otherwise. +// +// Example: +// +// absl::string_view input("abcdef"); +// EXPECT_TRUE(absl::ConsumeSuffix(&input, "def")); +// EXPECT_EQ(input, "abc"); +inline bool ConsumeSuffix(absl::string_view* str, absl::string_view expected) { + if (!absl::EndsWith(*str, expected)) return false; + str->remove_suffix(expected.size()); + return true; +} + +// StripPrefix() +// +// Returns a view into the input std::string 'str' with the given 'prefix' removed, +// but leaving the original std::string intact. If the prefix does not match at the +// start of the std::string, returns the original std::string instead. +inline absl::string_view StripPrefix(absl::string_view str, + absl::string_view prefix) { + if (absl::StartsWith(str, prefix)) str.remove_prefix(prefix.size()); + return str; +} + +// StripSuffix() +// +// Returns a view into the input std::string 'str' with the given 'suffix' removed, +// but leaving the original std::string intact. If the suffix does not match at the +// end of the std::string, returns the original std::string instead. +inline absl::string_view StripSuffix(absl::string_view str, + absl::string_view suffix) { + if (absl::EndsWith(str, suffix)) str.remove_suffix(suffix.size()); + return str; +} + +} // namespace absl + +#endif // ABSL_STRINGS_STRIP_H_ diff --git a/absl/strings/strip_test.cc b/absl/strings/strip_test.cc new file mode 100644 index 000000000000..3c9e726eedc1 --- /dev/null +++ b/absl/strings/strip_test.cc @@ -0,0 +1,119 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains functions that remove a defined part from the std::string, +// i.e., strip the std::string. + +#include "absl/strings/strip.h" + +#include <cassert> +#include <cstdio> +#include <cstring> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" + +namespace { + +using testing::ElementsAre; +using testing::IsEmpty; + +TEST(Strip, ConsumePrefixOneChar) { + absl::string_view input("abc"); + EXPECT_TRUE(absl::ConsumePrefix(&input, "a")); + EXPECT_EQ(input, "bc"); + + EXPECT_FALSE(absl::ConsumePrefix(&input, "x")); + EXPECT_EQ(input, "bc"); + + EXPECT_TRUE(absl::ConsumePrefix(&input, "b")); + EXPECT_EQ(input, "c"); + + EXPECT_TRUE(absl::ConsumePrefix(&input, "c")); + EXPECT_EQ(input, ""); + + EXPECT_FALSE(absl::ConsumePrefix(&input, "a")); + EXPECT_EQ(input, ""); +} + +TEST(Strip, ConsumePrefix) { + absl::string_view input("abcdef"); + EXPECT_FALSE(absl::ConsumePrefix(&input, "abcdefg")); + EXPECT_EQ(input, "abcdef"); + + EXPECT_FALSE(absl::ConsumePrefix(&input, "abce")); + EXPECT_EQ(input, "abcdef"); + + EXPECT_TRUE(absl::ConsumePrefix(&input, "")); + EXPECT_EQ(input, "abcdef"); + + EXPECT_FALSE(absl::ConsumePrefix(&input, "abcdeg")); + EXPECT_EQ(input, "abcdef"); + + EXPECT_TRUE(absl::ConsumePrefix(&input, "abcdef")); + EXPECT_EQ(input, ""); + + input = "abcdef"; + EXPECT_TRUE(absl::ConsumePrefix(&input, "abcde")); + EXPECT_EQ(input, "f"); +} + +TEST(Strip, ConsumeSuffix) { + absl::string_view input("abcdef"); + EXPECT_FALSE(absl::ConsumeSuffix(&input, "abcdefg")); + EXPECT_EQ(input, "abcdef"); + + EXPECT_TRUE(absl::ConsumeSuffix(&input, "")); + EXPECT_EQ(input, "abcdef"); + + EXPECT_TRUE(absl::ConsumeSuffix(&input, "def")); + EXPECT_EQ(input, "abc"); + + input = "abcdef"; + EXPECT_FALSE(absl::ConsumeSuffix(&input, "abcdeg")); + EXPECT_EQ(input, "abcdef"); + + EXPECT_TRUE(absl::ConsumeSuffix(&input, "f")); + EXPECT_EQ(input, "abcde"); + + EXPECT_TRUE(absl::ConsumeSuffix(&input, "abcde")); + EXPECT_EQ(input, ""); +} + +TEST(Strip, StripPrefix) { + const absl::string_view null_str; + + EXPECT_EQ(absl::StripPrefix("foobar", "foo"), "bar"); + EXPECT_EQ(absl::StripPrefix("foobar", ""), "foobar"); + EXPECT_EQ(absl::StripPrefix("foobar", null_str), "foobar"); + EXPECT_EQ(absl::StripPrefix("foobar", "foobar"), ""); + EXPECT_EQ(absl::StripPrefix("foobar", "bar"), "foobar"); + EXPECT_EQ(absl::StripPrefix("foobar", "foobarr"), "foobar"); + EXPECT_EQ(absl::StripPrefix("", ""), ""); +} + +TEST(Strip, StripSuffix) { + const absl::string_view null_str; + + EXPECT_EQ(absl::StripSuffix("foobar", "bar"), "foo"); + EXPECT_EQ(absl::StripSuffix("foobar", ""), "foobar"); + EXPECT_EQ(absl::StripSuffix("foobar", null_str), "foobar"); + EXPECT_EQ(absl::StripSuffix("foobar", "foobar"), ""); + EXPECT_EQ(absl::StripSuffix("foobar", "foo"), "foobar"); + EXPECT_EQ(absl::StripSuffix("foobar", "ffoobar"), "foobar"); + EXPECT_EQ(absl::StripSuffix("", ""), ""); +} + +} // namespace diff --git a/absl/strings/substitute.cc b/absl/strings/substitute.cc new file mode 100644 index 000000000000..f739f8c20b2c --- /dev/null +++ b/absl/strings/substitute.cc @@ -0,0 +1,117 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/substitute.h" + +#include <algorithm> + +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/ascii.h" +#include "absl/strings/escaping.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/string_view.h" + +namespace absl { +namespace substitute_internal { + +void SubstituteAndAppendArray(std::string* output, absl::string_view format, + const absl::string_view* args_array, + size_t num_args) { + // Determine total size needed. + size_t size = 0; + for (size_t i = 0; i < format.size(); i++) { + if (format[i] == '$') { + if (i + 1 >= format.size()) { +#ifndef NDEBUG + ABSL_RAW_LOG(FATAL, + "Invalid strings::Substitute() format std::string: \"%s\".", + absl::CEscape(format).c_str()); +#endif + return; + } else if (absl::ascii_isdigit(format[i + 1])) { + int index = format[i + 1] - '0'; + if (static_cast<size_t>(index) >= num_args) { +#ifndef NDEBUG + ABSL_RAW_LOG( + FATAL, + "Invalid strings::Substitute() format std::string: asked for \"$" + "%d\", but only %d args were given. Full format std::string was: " + "\"%s\".", + index, static_cast<int>(num_args), absl::CEscape(format).c_str()); +#endif + return; + } + size += args_array[index].size(); + ++i; // Skip next char. + } else if (format[i + 1] == '$') { + ++size; + ++i; // Skip next char. + } else { +#ifndef NDEBUG + ABSL_RAW_LOG(FATAL, + "Invalid strings::Substitute() format std::string: \"%s\".", + absl::CEscape(format).c_str()); +#endif + return; + } + } else { + ++size; + } + } + + if (size == 0) return; + + // Build the std::string. + size_t original_size = output->size(); + strings_internal::STLStringResizeUninitialized(output, original_size + size); + char* target = &(*output)[original_size]; + for (size_t i = 0; i < format.size(); i++) { + if (format[i] == '$') { + if (absl::ascii_isdigit(format[i + 1])) { + const absl::string_view src = args_array[format[i + 1] - '0']; + target = std::copy(src.begin(), src.end(), target); + ++i; // Skip next char. + } else if (format[i + 1] == '$') { + *target++ = '$'; + ++i; // Skip next char. + } + } else { + *target++ = format[i]; + } + } + + assert(target == output->data() + output->size()); +} + +Arg::Arg(const void* value) { + static_assert(sizeof(scratch_) >= sizeof(value) * 2 + 2, + "fix sizeof(scratch_)"); + if (value == nullptr) { + piece_ = "NULL"; + } else { + char* ptr = scratch_ + sizeof(scratch_); + uintptr_t num = reinterpret_cast<uintptr_t>(value); + static const char kHexDigits[] = "0123456789abcdef"; + do { + *--ptr = kHexDigits[num & 0xf]; + num >>= 4; + } while (num != 0); + *--ptr = 'x'; + *--ptr = '0'; + piece_ = absl::string_view(ptr, scratch_ + sizeof(scratch_) - ptr); + } +} + +} // namespace substitute_internal +} // namespace absl diff --git a/absl/strings/substitute.h b/absl/strings/substitute.h new file mode 100644 index 000000000000..5d6bfd90c2fb --- /dev/null +++ b/absl/strings/substitute.h @@ -0,0 +1,674 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: substitute.h +// ----------------------------------------------------------------------------- +// +// This package contains functions for efficiently performing std::string +// substitutions using a format std::string with positional notation: +// `Substitute()` and `SubstituteAndAppend()`. +// +// Unlike printf-style format specifiers, `Substitute()` functions do not need +// to specify the type of the substitution arguments. Supported arguments +// following the format std::string, such as strings, string_views, ints, +// floats, and bools, are automatically converted to strings during the +// substitution process. (See below for a full list of supported types.) +// +// `Substitute()` does not allow you to specify *how* to format a value, beyond +// the default conversion to std::string. For example, you cannot format an integer +// in hex. +// +// The format std::string uses positional identifiers indicated by a dollar sign ($) +// and single digit positional ids to indicate which substitution arguments to +// use at that location within the format std::string. +// +// Example 1: +// std::string s = Substitute("$1 purchased $0 $2. Thanks $1!", +// 5, "Bob", "Apples"); +// EXPECT_EQ("Bob purchased 5 Apples. Thanks Bob!", s); +// +// Example 2: +// std::string s = "Hi. "; +// SubstituteAndAppend(&s, "My name is $0 and I am $1 years old.", "Bob", 5); +// EXPECT_EQ("Hi. My name is Bob and I am 5 years old.", s); +// +// Differences from `StringPrintf()`: +// * The format std::string does not identify the types of arguments. Instead, the +// arguments are implicitly converted to strings. See below for a list of +// accepted types. +// * Substitutions in the format std::string are identified by a '$' followed by a +// single digit. You can use arguments out-of-order and use the same +// argument multiple times. +// * A '$$' sequence in the format std::string means output a literal '$' +// character. +// * `Substitute()` is significantly faster than `StringPrintf()`. For very +// large strings, it may be orders of magnitude faster. +// +// Supported types: +// * absl::string_view, std::string, const char* (null is equivalent to "") +// * int32_t, int64_t, uint32_t, uint64 +// * float, double +// * bool (Printed as "true" or "false") +// * pointer types other than char* (Printed as "0x<lower case hex std::string>", +// except that null is printed as "NULL") +// +// If an invalid format std::string is provided, Substitute returns an empty std::string +// and SubstituteAndAppend does not change the provided output std::string. +// A format std::string is invalid if it: +// * ends in an unescaped $ character, +// e.g. "Hello $", or +// * calls for a position argument which is not provided, +// e.g. Substitute("Hello $2", "world"), or +// * specifies a non-digit, non-$ character after an unescaped $ character, +// e.g. "Hello %f". +// In debug mode, i.e. #ifndef NDEBUG, such errors terminate the program. + +#ifndef ABSL_STRINGS_SUBSTITUTE_H_ +#define ABSL_STRINGS_SUBSTITUTE_H_ + +#include <cstring> +#include <string> + +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/strings/ascii.h" +#include "absl/strings/escaping.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" + +namespace absl { +namespace substitute_internal { + +// Arg +// +// This class provides an argument type for `absl::Substitute()` and +// `absl::SubstituteAndAppend()`. `Arg` handles implicit conversion of various +// types to a std::string. (`Arg` is very similar to the `AlphaNum` class in +// `StrCat()`.) +// +// This class has implicit constructors. +class Arg { + public: + // Overloads for std::string-y things + // + // Explicitly overload `const char*` so the compiler doesn't cast to `bool`. + Arg(const char* value) // NOLINT(runtime/explicit) + : piece_(value) {} + Arg(const std::string& value) // NOLINT(runtime/explicit) + : piece_(value) {} + Arg(absl::string_view value) // NOLINT(runtime/explicit) + : piece_(value) {} + + // Overloads for primitives + // + // No overloads are available for signed and unsigned char because if people + // are explicitly declaring their chars as signed or unsigned then they are + // probably using them as 8-bit integers and would probably prefer an integer + // representation. However, we can't really know, so we make the caller decide + // what to do. + Arg(char value) // NOLINT(runtime/explicit) + : piece_(scratch_, 1) { scratch_[0] = value; } + Arg(short value) // NOLINT(runtime/explicit) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(unsigned short value) // NOLINT(runtime/explicit) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(int value) // NOLINT(runtime/explicit) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(unsigned int value) // NOLINT(runtime/explicit) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(long value) // NOLINT(runtime/explicit) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(unsigned long value) // NOLINT(runtime/explicit) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(long long value) // NOLINT(runtime/explicit) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(unsigned long long value) // NOLINT(runtime/explicit) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(float value) // NOLINT(runtime/explicit) + : piece_(numbers_internal::RoundTripFloatToBuffer(value, scratch_)) {} + Arg(double value) // NOLINT(runtime/explicit) + : piece_(numbers_internal::RoundTripDoubleToBuffer(value, scratch_)) {} + Arg(bool value) // NOLINT(runtime/explicit) + : piece_(value ? "true" : "false") {} + // `void*` values, with the exception of `char*`, are printed as + // `StringPrintf()` with format "%p": e.g. ("0x<hex value>"). + // However, in the case of `nullptr`, "NULL" is printed. + Arg(const void* value); // NOLINT(runtime/explicit) + + Arg(const Arg&) = delete; + Arg& operator=(const Arg&) = delete; + + absl::string_view piece() const { return piece_; } + + private: + absl::string_view piece_; + char scratch_[numbers_internal::kFastToBufferSize]; +}; + +// Internal helper function. Don't call this from outside this implementation. +// This interface may change without notice. +void SubstituteAndAppendArray(std::string* output, absl::string_view format, + const absl::string_view* args_array, + size_t num_args); + +#if defined(ABSL_BAD_CALL_IF) +constexpr int CalculateOneBit(const char* format) { + return (*format < '0' || *format > '9') ? 0 : (1 << (*format - '0')); +} + +constexpr const char* SkipNumber(const char* format) { + return !*format ? format : (format + 1); +} + +constexpr int PlaceholderBitmask(const char* format) { + return !*format ? 0 : *format != '$' + ? PlaceholderBitmask(format + 1) + : (CalculateOneBit(format + 1) | + PlaceholderBitmask(SkipNumber(format + 1))); +} +#endif // ABSL_BAD_CALL_IF + +} // namespace substitute_internal + +// +// PUBLIC API +// + +// SubstituteAndAppend() +// +// Substitutes variables into a given format std::string and appends to a given +// output std::string. See file comments above for usage. +// +// The declarations of `SubstituteAndAppend()` below consist of overloads +// for passing 0 to 10 arguments, respectively. +// +// NOTE: A zero-argument `SubstituteAndAppend()` may be used within variadic +// templates to allow a variable number of arguments. +// +// Example: +// template <typename... Args> +// void VarMsg(std::string* boilerplate, const std::string& format, +// const Args&... args) { +// std::string s = absl::SubstituteAndAppend(boilerplate, format, args...)"; +// } +// +inline void SubstituteAndAppend(std::string* output, absl::string_view format) { + substitute_internal::SubstituteAndAppendArray(output, format, nullptr, 0); +} + +inline void SubstituteAndAppend(std::string* output, absl::string_view format, + const substitute_internal::Arg& a0) { + const absl::string_view args[] = {a0.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1) { + const absl::string_view args[] = {a0.piece(), a1.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2) { + const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3) { + const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4) { + const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5) { + const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece(), a5.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6) { + const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece(), a5.piece(), + a6.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend( + std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6, const substitute_internal::Arg& a7) { + const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece(), a5.piece(), + a6.piece(), a7.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend( + std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6, const substitute_internal::Arg& a7, + const substitute_internal::Arg& a8) { + const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece(), a5.piece(), + a6.piece(), a7.piece(), a8.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend( + std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6, const substitute_internal::Arg& a7, + const substitute_internal::Arg& a8, const substitute_internal::Arg& a9) { + const absl::string_view args[] = { + a0.piece(), a1.piece(), a2.piece(), a3.piece(), a4.piece(), + a5.piece(), a6.piece(), a7.piece(), a8.piece(), a9.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +#if defined(ABSL_BAD_CALL_IF) +// This body of functions catches cases where the number of placeholders +// doesn't match the number of data arguments. +void SubstituteAndAppend(std::string* output, const char* format) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0, + "There were no substitution arguments " + "but this format std::string has a $[0-9] in it"); + +void SubstituteAndAppend(std::string* output, const char* format, + const substitute_internal::Arg& a0) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1, + "There was 1 substitution argument given, but " + "this format std::string is either missing its $0, or " + "contains one of $1-$9"); + +void SubstituteAndAppend(std::string* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 3, + "There were 2 substitution arguments given, but " + "this format std::string is either missing its $0/$1, or " + "contains one of $2-$9"); + +void SubstituteAndAppend(std::string* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 7, + "There were 3 substitution arguments given, but " + "this format std::string is either missing its $0/$1/$2, or " + "contains one of $3-$9"); + +void SubstituteAndAppend(std::string* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 15, + "There were 4 substitution arguments given, but " + "this format std::string is either missing its $0-$3, or " + "contains one of $4-$9"); + +void SubstituteAndAppend(std::string* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 31, + "There were 5 substitution arguments given, but " + "this format std::string is either missing its $0-$4, or " + "contains one of $5-$9"); + +void SubstituteAndAppend(std::string* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 63, + "There were 6 substitution arguments given, but " + "this format std::string is either missing its $0-$5, or " + "contains one of $6-$9"); + +void SubstituteAndAppend( + std::string* output, const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 127, + "There were 7 substitution arguments given, but " + "this format std::string is either missing its $0-$6, or " + "contains one of $7-$9"); + +void SubstituteAndAppend( + std::string* output, const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 255, + "There were 8 substitution arguments given, but " + "this format std::string is either missing its $0-$7, or " + "contains one of $8-$9"); + +void SubstituteAndAppend( + std::string* output, const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 511, + "There were 9 substitution arguments given, but " + "this format std::string is either missing its $0-$8, or contains a $9"); + +void SubstituteAndAppend( + std::string* output, const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8, + const substitute_internal::Arg& a9) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1023, + "There were 10 substitution arguments given, but this " + "format std::string doesn't contain all of $0 through $9"); +#endif // ABSL_BAD_CALL_IF + +// Substitute() +// +// Substitutes variables into a given format std::string. See file comments above +// for usage. +// +// The declarations of `Substitute()` below consist of overloads for passing 0 +// to 10 arguments, respectively. +// +// NOTE: A zero-argument `Substitute()` may be used within variadic templates to +// allow a variable number of arguments. +// +// Example: +// template <typename... Args> +// void VarMsg(const std::string& format, const Args&... args) { +// std::string s = absl::Substitute(format, args...)"; + +ABSL_MUST_USE_RESULT inline std::string Substitute(absl::string_view format) { + std::string result; + SubstituteAndAppend(&result, format); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0) { + std::string result; + SubstituteAndAppend(&result, format, a0); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7, a8); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8, + const substitute_internal::Arg& a9) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9); + return result; +} + +#if defined(ABSL_BAD_CALL_IF) +// This body of functions catches cases where the number of placeholders +// doesn't match the number of data arguments. +std::string Substitute(const char* format) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0, + "There were no substitution arguments " + "but this format std::string has a $[0-9] in it"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1, + "There was 1 substitution argument given, but " + "this format std::string is either missing its $0, or " + "contains one of $1-$9"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 3, + "There were 2 substitution arguments given, but " + "this format std::string is either missing its $0/$1, or " + "contains one of $2-$9"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 7, + "There were 3 substitution arguments given, but " + "this format std::string is either missing its $0/$1/$2, or " + "contains one of $3-$9"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 15, + "There were 4 substitution arguments given, but " + "this format std::string is either missing its $0-$3, or " + "contains one of $4-$9"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 31, + "There were 5 substitution arguments given, but " + "this format std::string is either missing its $0-$4, or " + "contains one of $5-$9"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 63, + "There were 6 substitution arguments given, but " + "this format std::string is either missing its $0-$5, or " + "contains one of $6-$9"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 127, + "There were 7 substitution arguments given, but " + "this format std::string is either missing its $0-$6, or " + "contains one of $7-$9"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 255, + "There were 8 substitution arguments given, but " + "this format std::string is either missing its $0-$7, or " + "contains one of $8-$9"); + +std::string Substitute( + const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 511, + "There were 9 substitution arguments given, but " + "this format std::string is either missing its $0-$8, or contains a $9"); + +std::string Substitute( + const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8, + const substitute_internal::Arg& a9) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1023, + "There were 10 substitution arguments given, but this " + "format std::string doesn't contain all of $0 through $9"); +#endif // ABSL_BAD_CALL_IF + +} // namespace absl + +#endif // ABSL_STRINGS_SUBSTITUTE_H_ diff --git a/absl/strings/substitute_test.cc b/absl/strings/substitute_test.cc new file mode 100644 index 000000000000..a6d7d7b0782d --- /dev/null +++ b/absl/strings/substitute_test.cc @@ -0,0 +1,168 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/substitute.h" + +#include <cstdint> + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" + +namespace { + +TEST(SubstituteTest, Substitute) { + // Basic. + EXPECT_EQ("Hello, world!", absl::Substitute("$0, $1!", "Hello", "world")); + + // Non-char* types. + EXPECT_EQ("123 0.2 0.1 foo true false x", + absl::Substitute("$0 $1 $2 $3 $4 $5 $6", 123, 0.2, 0.1f, + std::string("foo"), true, false, 'x')); + + // All int types. + EXPECT_EQ( + "-32767 65535 " + "-1234567890 3234567890 " + "-1234567890 3234567890 " + "-1234567890123456789 9234567890123456789", + absl::Substitute( + "$0 $1 $2 $3 $4 $5 $6 $7", + static_cast<short>(-32767), // NOLINT(runtime/int) + static_cast<unsigned short>(65535), // NOLINT(runtime/int) + -1234567890, 3234567890U, -1234567890L, 3234567890UL, + -int64_t{1234567890123456789}, uint64_t{9234567890123456789u})); + + // Pointer. + const int* int_p = reinterpret_cast<const int*>(0x12345); + std::string str = absl::Substitute("$0", int_p); + EXPECT_EQ(absl::StrCat("0x", absl::Hex(reinterpret_cast<intptr_t>(int_p))), + str); + + // null is special. StrCat prints 0x0. Substitute prints NULL. + const uint64_t* null_p = nullptr; + str = absl::Substitute("$0", null_p); + EXPECT_EQ("NULL", str); + + // char* is also special. + const char* char_p = "print me"; + str = absl::Substitute("$0", char_p); + EXPECT_EQ("print me", str); + + char char_buf[16]; + strncpy(char_buf, "print me too", sizeof(char_buf)); + str = absl::Substitute("$0", char_buf); + EXPECT_EQ("print me too", str); + + // null char* is "doubly" special. Represented as the empty std::string. + char_p = nullptr; + str = absl::Substitute("$0", char_p); + EXPECT_EQ("", str); + + // Out-of-order. + EXPECT_EQ("b, a, c, b", absl::Substitute("$1, $0, $2, $1", "a", "b", "c")); + + // Literal $ + EXPECT_EQ("$", absl::Substitute("$$")); + + EXPECT_EQ("$1", absl::Substitute("$$1")); + + // Test all overloads. + EXPECT_EQ("a", absl::Substitute("$0", "a")); + EXPECT_EQ("a b", absl::Substitute("$0 $1", "a", "b")); + EXPECT_EQ("a b c", absl::Substitute("$0 $1 $2", "a", "b", "c")); + EXPECT_EQ("a b c d", absl::Substitute("$0 $1 $2 $3", "a", "b", "c", "d")); + EXPECT_EQ("a b c d e", + absl::Substitute("$0 $1 $2 $3 $4", "a", "b", "c", "d", "e")); + EXPECT_EQ("a b c d e f", absl::Substitute("$0 $1 $2 $3 $4 $5", "a", "b", "c", + "d", "e", "f")); + EXPECT_EQ("a b c d e f g", absl::Substitute("$0 $1 $2 $3 $4 $5 $6", "a", "b", + "c", "d", "e", "f", "g")); + EXPECT_EQ("a b c d e f g h", + absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7", "a", "b", "c", "d", "e", + "f", "g", "h")); + EXPECT_EQ("a b c d e f g h i", + absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7 $8", "a", "b", "c", "d", + "e", "f", "g", "h", "i")); + EXPECT_EQ("a b c d e f g h i j", + absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7 $8 $9", "a", "b", "c", + "d", "e", "f", "g", "h", "i", "j")); + EXPECT_EQ("a b c d e f g h i j b0", + absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7 $8 $9 $10", "a", "b", "c", + "d", "e", "f", "g", "h", "i", "j")); + + const char* null_cstring = nullptr; + EXPECT_EQ("Text: ''", absl::Substitute("Text: '$0'", null_cstring)); +} + +TEST(SubstituteTest, SubstituteAndAppend) { + std::string str = "Hello"; + absl::SubstituteAndAppend(&str, ", $0!", "world"); + EXPECT_EQ("Hello, world!", str); + + // Test all overloads. + str.clear(); + absl::SubstituteAndAppend(&str, "$0", "a"); + EXPECT_EQ("a", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1", "a", "b"); + EXPECT_EQ("a b", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2", "a", "b", "c"); + EXPECT_EQ("a b c", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3", "a", "b", "c", "d"); + EXPECT_EQ("a b c d", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4", "a", "b", "c", "d", "e"); + EXPECT_EQ("a b c d e", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5", "a", "b", "c", "d", "e", + "f"); + EXPECT_EQ("a b c d e f", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6", "a", "b", "c", "d", + "e", "f", "g"); + EXPECT_EQ("a b c d e f g", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6 $7", "a", "b", "c", "d", + "e", "f", "g", "h"); + EXPECT_EQ("a b c d e f g h", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6 $7 $8", "a", "b", "c", + "d", "e", "f", "g", "h", "i"); + EXPECT_EQ("a b c d e f g h i", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6 $7 $8 $9", "a", "b", + "c", "d", "e", "f", "g", "h", "i", "j"); + EXPECT_EQ("a b c d e f g h i j", str); +} + +#ifdef GTEST_HAS_DEATH_TEST + +TEST(SubstituteDeathTest, SubstituteDeath) { + EXPECT_DEBUG_DEATH( + static_cast<void>(absl::Substitute(absl::string_view("-$2"), "a", "b")), + "Invalid strings::Substitute\\(\\) format std::string: asked for \"\\$2\", " + "but only 2 args were given."); + EXPECT_DEBUG_DEATH( + static_cast<void>(absl::Substitute("-$z-")), + "Invalid strings::Substitute\\(\\) format std::string: \"-\\$z-\""); + EXPECT_DEBUG_DEATH( + static_cast<void>(absl::Substitute("-$")), + "Invalid strings::Substitute\\(\\) format std::string: \"-\\$\""); +} + +#endif // GTEST_HAS_DEATH_TEST + +} // namespace diff --git a/absl/strings/testdata/getline-1.txt b/absl/strings/testdata/getline-1.txt new file mode 100644 index 000000000000..19b909733ae3 --- /dev/null +++ b/absl/strings/testdata/getline-1.txt @@ -0,0 +1,3 @@ +alpha + +beta gamma diff --git a/absl/strings/testdata/getline-2.txt b/absl/strings/testdata/getline-2.txt new file mode 100644 index 000000000000..d6842d8ee362 --- /dev/null +++ b/absl/strings/testdata/getline-2.txt @@ -0,0 +1 @@ +one.two.three |