diff options
Diffstat (limited to 'absl/strings/internal')
-rw-r--r-- | absl/strings/internal/char_map.h | 154 | ||||
-rw-r--r-- | absl/strings/internal/char_map_test.cc | 172 | ||||
-rw-r--r-- | absl/strings/internal/escaping_test_common.inc | 113 | ||||
-rw-r--r-- | absl/strings/internal/fastmem.h | 215 | ||||
-rw-r--r-- | absl/strings/internal/fastmem_test.cc | 453 | ||||
-rw-r--r-- | absl/strings/internal/memutil.cc | 110 | ||||
-rw-r--r-- | absl/strings/internal/memutil.h | 146 | ||||
-rw-r--r-- | absl/strings/internal/memutil_test.cc | 180 | ||||
-rw-r--r-- | absl/strings/internal/numbers_test_common.inc | 166 | ||||
-rw-r--r-- | absl/strings/internal/ostringstream.h | 97 | ||||
-rw-r--r-- | absl/strings/internal/ostringstream_test.cc | 103 | ||||
-rw-r--r-- | absl/strings/internal/resize_uninitialized.h | 69 | ||||
-rw-r--r-- | absl/strings/internal/resize_uninitialized_test.cc | 68 | ||||
-rw-r--r-- | absl/strings/internal/str_join_internal.h | 314 | ||||
-rw-r--r-- | absl/strings/internal/str_split_internal.h | 439 | ||||
-rw-r--r-- | absl/strings/internal/utf8.cc | 51 | ||||
-rw-r--r-- | absl/strings/internal/utf8.h | 52 | ||||
-rw-r--r-- | absl/strings/internal/utf8_test.cc | 58 |
18 files changed, 2960 insertions, 0 deletions
diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h new file mode 100644 index 000000000000..8d92963a4dea --- /dev/null +++ b/absl/strings/internal/char_map.h @@ -0,0 +1,154 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Character Map Class +// +// A fast, bit-vector map for 8-bit unsigned characters. +// This class is useful for non-character purposes as well. + +#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ +#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ + +#include <cstddef> +#include <cstdint> +#include <cstring> + +#include "absl/base/macros.h" +#include "absl/base/port.h" + +namespace absl { +namespace strings_internal { + +class Charmap { + public: + constexpr Charmap() : m_() {} + + // Initializes with a given char*. Note that NUL is not treated as + // a terminator, but rather a char to be flicked. + Charmap(const char* str, int len) : m_() { + while (len--) SetChar(*str++); + } + + // Initializes with a given char*. NUL is treated as a terminator + // and will not be in the charmap. + explicit Charmap(const char* str) : m_() { + while (*str) SetChar(*str++); + } + + constexpr bool contains(unsigned char c) const { + return (m_[c / 64] >> (c % 64)) & 0x1; + } + + // Returns true if and only if a character exists in both maps. + bool IntersectsWith(const Charmap& c) const { + for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) { + if ((m_[i] & c.m_[i]) != 0) return true; + } + return false; + } + + bool IsZero() const { + for (uint64_t c : m_) { + if (c != 0) return false; + } + return true; + } + + // Containing only a single specified char. + static constexpr Charmap Char(char x) { + return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1), + CharMaskForWord(x, 2), CharMaskForWord(x, 3)); + } + + // Containing all the chars in the C-std::string 's'. + // Note that this is expensively recursive because of the C++11 constexpr + // formulation. Use only in constexpr initializers. + static constexpr Charmap FromString(const char* s) { + return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1)); + } + + // Containing all the chars in the closed interval [lo,hi]. + static constexpr Charmap Range(char lo, char hi) { + return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1), + RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3)); + } + + friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) { + return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2], + a.m_[3] & b.m_[3]); + } + + friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) { + return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2], + a.m_[3] | b.m_[3]); + } + + friend constexpr Charmap operator~(const Charmap& a) { + return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]); + } + + private: + constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3) + : m_{b0, b1, b2, b3} {} + + static constexpr uint64_t RangeForWord(unsigned char lo, unsigned char hi, + uint64_t word) { + return OpenRangeFromZeroForWord(hi + 1, word) & + ~OpenRangeFromZeroForWord(lo, word); + } + + // All the chars in the specified word of the range [0, upper). + static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper, + uint64_t word) { + return (upper <= 64 * word) + ? 0 + : (upper >= 64 * (word + 1)) + ? ~static_cast<uint64_t>(0) + : (~static_cast<uint64_t>(0) >> (64 - upper % 64)); + } + + static constexpr uint64_t CharMaskForWord(unsigned char x, uint64_t word) { + return (x / 64 == word) ? (static_cast<uint64_t>(1) << (x % 64)) : 0; + } + + private: + void SetChar(unsigned char c) { + m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64); + } + + uint64_t m_[4]; +}; + +// Mirror the char-classifying predicates in <cctype> +constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); } +constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); } +constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); } +constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); } +constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); } +constexpr Charmap XDigitCharmap() { + return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f'); +} +constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); } +constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); } +constexpr Charmap CntrlCharmap() { + return Charmap::Range(0, 0x7f) & ~PrintCharmap(); +} +constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); } +constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); } +constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); } + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ diff --git a/absl/strings/internal/char_map_test.cc b/absl/strings/internal/char_map_test.cc new file mode 100644 index 000000000000..2167be975e7d --- /dev/null +++ b/absl/strings/internal/char_map_test.cc @@ -0,0 +1,172 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/char_map.h" + +#include <cstdio> +#include <cstdlib> +#include <cctype> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { + +constexpr absl::strings_internal::Charmap everything_map = + ~absl::strings_internal::Charmap(); +constexpr absl::strings_internal::Charmap nothing_map{}; + +TEST(Charmap, AllTests) { + const absl::strings_internal::Charmap also_nothing_map("", 0); + ASSERT_TRUE(everything_map.contains('\0')); + ASSERT_TRUE(!nothing_map.contains('\0')); + ASSERT_TRUE(!also_nothing_map.contains('\0')); + for (unsigned char ch = 1; ch != 0; ++ch) { + ASSERT_TRUE(everything_map.contains(ch)); + ASSERT_TRUE(!nothing_map.contains(ch)); + ASSERT_TRUE(!also_nothing_map.contains(ch)); + } + + const absl::strings_internal::Charmap symbols("&@#@^!@?", 5); + ASSERT_TRUE(symbols.contains('&')); + ASSERT_TRUE(symbols.contains('@')); + ASSERT_TRUE(symbols.contains('#')); + ASSERT_TRUE(symbols.contains('^')); + ASSERT_TRUE(!symbols.contains('!')); + ASSERT_TRUE(!symbols.contains('?')); + int cnt = 0; + for (unsigned char ch = 1; ch != 0; ++ch) + cnt += symbols.contains(ch); + ASSERT_EQ(cnt, 4); + + const absl::strings_internal::Charmap lets("^abcde", 3); + const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10); + const absl::strings_internal::Charmap lets3("fghij\0klmnop"); + ASSERT_TRUE(lets2.contains('k')); + ASSERT_TRUE(!lets3.contains('k')); + + ASSERT_TRUE(symbols.IntersectsWith(lets)); + ASSERT_TRUE(!lets2.IntersectsWith(lets)); + ASSERT_TRUE(lets.IntersectsWith(symbols)); + ASSERT_TRUE(!lets.IntersectsWith(lets2)); + + ASSERT_TRUE(nothing_map.IsZero()); + ASSERT_TRUE(!lets.IsZero()); +} + +namespace { +std::string Members(const absl::strings_internal::Charmap& m) { + std::string r; + for (size_t i = 0; i < 256; ++i) + if (m.contains(i)) r.push_back(i); + return r; +} + +std::string ClosedRangeString(unsigned char lo, unsigned char hi) { + // Don't depend on lo<hi. Just increment until lo==hi. + std::string s; + while (true) { + s.push_back(lo); + if (lo == hi) break; + ++lo; + } + return s; +} + +} // namespace + +TEST(Charmap, Constexpr) { + constexpr absl::strings_internal::Charmap kEmpty = nothing_map; + EXPECT_THAT(Members(kEmpty), ""); + constexpr absl::strings_internal::Charmap kA = + absl::strings_internal::Charmap::Char('A'); + EXPECT_THAT(Members(kA), "A"); + constexpr absl::strings_internal::Charmap kAZ = + absl::strings_internal::Charmap::Range('A', 'Z'); + EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + constexpr absl::strings_internal::Charmap kIdentifier = + absl::strings_internal::Charmap::Range('0', '9') | + absl::strings_internal::Charmap::Range('A', 'Z') | + absl::strings_internal::Charmap::Range('a', 'z') | + absl::strings_internal::Charmap::Char('_'); + EXPECT_THAT(Members(kIdentifier), + "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "_" + "abcdefghijklmnopqrstuvwxyz"); + constexpr absl::strings_internal::Charmap kAll = everything_map; + for (size_t i = 0; i < 256; ++i) { + EXPECT_TRUE(kAll.contains(i)) << i; + } + constexpr absl::strings_internal::Charmap kHello = + absl::strings_internal::Charmap::FromString("Hello, world!"); + EXPECT_THAT(Members(kHello), " !,Hdelorw"); + + // test negation and intersection + constexpr absl::strings_internal::Charmap kABC = + absl::strings_internal::Charmap::Range('A', 'Z') & + ~absl::strings_internal::Charmap::Range('D', 'Z'); + EXPECT_THAT(Members(kABC), "ABC"); +} + +TEST(Charmap, Range) { + // Exhaustive testing takes too long, so test some of the boundaries that + // are perhaps going to cause trouble. + std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15, + 16, 17, 30, 31, 32, 33, 63, 64, 65, + 127, 128, 129, 223, 224, 225, 254, 255}; + for (auto lo = poi.begin(); lo != poi.end(); ++lo) { + SCOPED_TRACE(*lo); + for (auto hi = lo; hi != poi.end(); ++hi) { + SCOPED_TRACE(*hi); + EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)), + ClosedRangeString(*lo, *hi)); + } + } +} + +bool AsBool(int x) { return static_cast<bool>(x); } + +TEST(CharmapCtype, Match) { + for (int c = 0; c < 256; ++c) { + SCOPED_TRACE(c); + SCOPED_TRACE(static_cast<char>(c)); + EXPECT_EQ(AsBool(std::isupper(c)), + absl::strings_internal::UpperCharmap().contains(c)); + EXPECT_EQ(AsBool(std::islower(c)), + absl::strings_internal::LowerCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isdigit(c)), + absl::strings_internal::DigitCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isalpha(c)), + absl::strings_internal::AlphaCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isalnum(c)), + absl::strings_internal::AlnumCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isxdigit(c)), + absl::strings_internal::XDigitCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isprint(c)), + absl::strings_internal::PrintCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isspace(c)), + absl::strings_internal::SpaceCharmap().contains(c)); + EXPECT_EQ(AsBool(std::iscntrl(c)), + absl::strings_internal::CntrlCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isblank(c)), + absl::strings_internal::BlankCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isgraph(c)), + absl::strings_internal::GraphCharmap().contains(c)); + EXPECT_EQ(AsBool(std::ispunct(c)), + absl::strings_internal::PunctCharmap().contains(c)); + } +} + +} // namespace diff --git a/absl/strings/internal/escaping_test_common.inc b/absl/strings/internal/escaping_test_common.inc new file mode 100644 index 000000000000..6f29140e3584 --- /dev/null +++ b/absl/strings/internal/escaping_test_common.inc @@ -0,0 +1,113 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This test contains common things needed by both escaping_test.cc and +// escaping_benchmark.cc. + +namespace { + +struct { + absl::string_view plaintext; + absl::string_view cyphertext; +} const base64_strings[] = { + // Some google quotes + // Cyphertext created with "uuencode (GNU sharutils) 4.6.3" + // (Note that we're testing the websafe encoding, though, so if + // you add messages, be sure to run "tr -- '+/' '-_'" on the output) + { "I was always good at math and science, and I never realized " + "that was unusual or somehow undesirable. So one of the things " + "I care a lot about is helping to remove that stigma, " + "to show girls that you can be feminine, you can like the things " + "that girls like, but you can also be really good at technology. " + "You can be really good at building things." + " - Marissa Meyer, Newsweek, 2010-12-22" "\n", + + "SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg" + "bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu" + "ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg" + "YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo" + "b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp" + "a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs" + "c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy" + "ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll" + "ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" }, + + { "Typical first year for a new cluster: " + "~0.5 overheating " + "~1 PDU failure " + "~1 rack-move " + "~1 network rewiring " + "~20 rack failures " + "~5 racks go wonky " + "~8 network maintenances " + "~12 router reloads " + "~3 router failures " + "~dozens of minor 30-second blips for dns " + "~1000 individual machine failures " + "~thousands of hard drive failures " + "slow disks, bad memory, misconfigured machines, flaky machines, etc." + " - Jeff Dean, The Joys of Real Hardware" "\n", + + "VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92" + "ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3" + "b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv" + "bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk" + "cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv" + "bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp" + "bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg" + "ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs" + "YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS" + "ZWFsIEhhcmR3YXJlCg" }, + + { "I'm the head of the webspam team at Google. " + "That means that if you type your name into Google and get porn back, " + "it's my fault. Unless you're a porn star, in which case porn is a " + "completely reasonable response." + " - Matt Cutts, Google Plus" "\n", + + "SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg" + "VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv" + "b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz" + "IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg" + "Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs" + "IEdvb2dsZSBQbHVzCg" }, + + { "It will still be a long time before machines approach human intelligence. " + "But luckily, machines don't actually have to be intelligent; " + "they just have to fake it. Access to a wealth of information, " + "combined with a rudimentary decision-making capacity, " + "can often be almost as useful. Of course, the results are better yet " + "when coupled with intelligence. A reference librarian with access to " + "a good search engine is a formidable tool." + " - Craig Silverstein, Siemens Pictures of the Future, Spring 2004" "\n", + + "SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg" + "YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj" + "aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg" + "dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo" + "IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg" + "ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0" + "IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy" + "IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl" + "bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu" + "Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp" + "biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw" + "NAo" }, + + // Degenerate edge case + { "", + "" }, +}; + +} // namespace diff --git a/absl/strings/internal/fastmem.h b/absl/strings/internal/fastmem.h new file mode 100644 index 000000000000..9989b12e34d3 --- /dev/null +++ b/absl/strings/internal/fastmem.h @@ -0,0 +1,215 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Fast memory copying and comparison routines. +// strings::fastmemcmp_inlined() replaces memcmp() +// strings::memcpy_inlined() replaces memcpy() +// strings::memeq(a, b, n) replaces memcmp(a, b, n) == 0 +// +// strings::*_inlined() routines are inline versions of the +// routines exported by this module. Sometimes using the inlined +// versions is faster. Measure before using the inlined versions. +// + +#ifndef ABSL_STRINGS_INTERNAL_FASTMEM_H_ +#define ABSL_STRINGS_INTERNAL_FASTMEM_H_ + +#ifdef __SSE4_1__ +#include <immintrin.h> +#endif +#include <cstddef> +#include <cstdint> +#include <cstdio> +#include <cstring> + +#include "absl/base/internal/unaligned_access.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" + +namespace absl { +namespace strings_internal { + +// Return true if the n bytes at a equal the n bytes at b. +// The regions are allowed to overlap. +// +// The performance is similar to the performance of memcmp(), but faster for +// moderately-sized inputs, or inputs that share a common prefix and differ +// somewhere in their last 8 bytes. Further optimizations can be added later +// if it makes sense to do so. Alternatively, if the compiler & runtime improve +// to eliminate the need for this, we can remove it. +inline bool memeq(const char* a, const char* b, size_t n) { + size_t n_rounded_down = n & ~static_cast<size_t>(7); + if (ABSL_PREDICT_FALSE(n_rounded_down == 0)) { // n <= 7 + return memcmp(a, b, n) == 0; + } + // n >= 8 + { + uint64_t u = + ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b); + uint64_t v = ABSL_INTERNAL_UNALIGNED_LOAD64(a + n - 8) ^ + ABSL_INTERNAL_UNALIGNED_LOAD64(b + n - 8); + if ((u | v) != 0) { // The first or last 8 bytes differ. + return false; + } + } + // The next line forces n to be a multiple of 8. + n = n_rounded_down; + if (n >= 80) { + // In 2013 or later, this should be fast on long strings. + return memcmp(a, b, n) == 0; + } + // Now force n to be a multiple of 16. Arguably, a "switch" would be smart + // here, but there's a difficult-to-evaluate code size vs. speed issue. The + // current approach often re-compares some bytes (worst case is if n initially + // was 16, 32, 48, or 64), but is fairly short. + size_t e = n & 8; + a += e; + b += e; + n -= e; + // n is now in {0, 16, 32, ...}. Process 0 or more 16-byte chunks. + while (n > 0) { +#ifdef __SSE4_1__ + __m128i u = + _mm_xor_si128(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)), + _mm_loadu_si128(reinterpret_cast<const __m128i*>(b))); + if (!_mm_test_all_zeros(u, u)) { + return false; + } +#else + uint64_t x = + ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b); + uint64_t y = ABSL_INTERNAL_UNALIGNED_LOAD64(a + 8) ^ + ABSL_INTERNAL_UNALIGNED_LOAD64(b + 8); + if ((x | y) != 0) { + return false; + } +#endif + a += 16; + b += 16; + n -= 16; + } + return true; +} + +inline int fastmemcmp_inlined(const void* va, const void* vb, size_t n) { + const unsigned char* pa = static_cast<const unsigned char*>(va); + const unsigned char* pb = static_cast<const unsigned char*>(vb); + switch (n) { + default: + return memcmp(va, vb, n); + case 7: + if (*pa != *pb) return *pa < *pb ? -1 : +1; + ++pa; + ++pb; + ABSL_FALLTHROUGH_INTENDED; + case 6: + if (*pa != *pb) return *pa < *pb ? -1 : +1; + ++pa; + ++pb; + ABSL_FALLTHROUGH_INTENDED; + case 5: + if (*pa != *pb) return *pa < *pb ? -1 : +1; + ++pa; + ++pb; + ABSL_FALLTHROUGH_INTENDED; + case 4: + if (*pa != *pb) return *pa < *pb ? -1 : +1; + ++pa; + ++pb; + ABSL_FALLTHROUGH_INTENDED; + case 3: + if (*pa != *pb) return *pa < *pb ? -1 : +1; + ++pa; + ++pb; + ABSL_FALLTHROUGH_INTENDED; + case 2: + if (*pa != *pb) return *pa < *pb ? -1 : +1; + ++pa; + ++pb; + ABSL_FALLTHROUGH_INTENDED; + case 1: + if (*pa != *pb) return *pa < *pb ? -1 : +1; + ABSL_FALLTHROUGH_INTENDED; + case 0: + break; + } + return 0; +} + +// The standard memcpy operation is slow for variable small sizes. +// This implementation inlines the optimal realization for sizes 1 to 16. +// To avoid code bloat don't use it in case of not performance-critical spots, +// nor when you don't expect very frequent values of size <= 16. +inline void memcpy_inlined(char* dst, const char* src, size_t size) { + // Compiler inlines code with minimal amount of data movement when third + // parameter of memcpy is a constant. + switch (size) { + case 1: + memcpy(dst, src, 1); + break; + case 2: + memcpy(dst, src, 2); + break; + case 3: + memcpy(dst, src, 3); + break; + case 4: + memcpy(dst, src, 4); + break; + case 5: + memcpy(dst, src, 5); + break; + case 6: + memcpy(dst, src, 6); + break; + case 7: + memcpy(dst, src, 7); + break; + case 8: + memcpy(dst, src, 8); + break; + case 9: + memcpy(dst, src, 9); + break; + case 10: + memcpy(dst, src, 10); + break; + case 11: + memcpy(dst, src, 11); + break; + case 12: + memcpy(dst, src, 12); + break; + case 13: + memcpy(dst, src, 13); + break; + case 14: + memcpy(dst, src, 14); + break; + case 15: + memcpy(dst, src, 15); + break; + case 16: + memcpy(dst, src, 16); + break; + default: + memcpy(dst, src, size); + break; + } +} + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_FASTMEM_H_ diff --git a/absl/strings/internal/fastmem_test.cc b/absl/strings/internal/fastmem_test.cc new file mode 100644 index 000000000000..7c670f967bb3 --- /dev/null +++ b/absl/strings/internal/fastmem_test.cc @@ -0,0 +1,453 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/fastmem.h" + +#include <memory> +#include <random> +#include <string> + +#include "base/init_google.h" +#include "base/logging.h" +#include "testing/base/public/benchmark.h" +#include "gtest/gtest.h" + +namespace { + +using RandomEngine = std::minstd_rand0; + +void VerifyResults(const int r1, const int r2, const std::string& a, + const std::string& b) { + CHECK_EQ(a.size(), b.size()); + if (r1 == 0) { + EXPECT_EQ(r2, 0) << a << " " << b; + } else if (r1 > 0) { + EXPECT_GT(r2, 0) << a << " " << b; + } else { + EXPECT_LT(r2, 0) << a << " " << b; + } + if ((r1 == 0) == (r2 == 0)) { + EXPECT_EQ(r1 == 0, + absl::strings_internal::memeq(a.data(), b.data(), a.size())) + << r1 << " " << a << " " << b; + } +} + +// Check correctness against glibc's memcmp implementation +void CheckSingle(const std::string& a, const std::string& b) { + CHECK_EQ(a.size(), b.size()); + const int r1 = memcmp(a.data(), b.data(), a.size()); + const int r2 = + absl::strings_internal::fastmemcmp_inlined(a.data(), b.data(), a.size()); + VerifyResults(r1, r2, a, b); +} + +void GenerateString(size_t len, std::string* s) { + s->clear(); + for (int i = 0; i < len; i++) { + *s += ('a' + (i % 26)); + } +} + +void CheckCompare(const std::string& a, const std::string& b) { + CheckSingle(a, b); + for (int common = 0; common <= 32; common++) { + std::string extra; + GenerateString(common, &extra); + CheckSingle(extra + a, extra + b); + CheckSingle(a + extra, b + extra); + for (char c1 = 'a'; c1 <= 'c'; c1++) { + for (char c2 = 'a'; c2 <= 'c'; c2++) { + CheckSingle(extra + c1 + a, extra + c2 + b); + } + } + } +} + +TEST(FastCompare, Misc) { + CheckCompare("", ""); + + CheckCompare("a", "a"); + CheckCompare("ab", "ab"); + CheckCompare("abc", "abc"); + CheckCompare("abcd", "abcd"); + CheckCompare("abcde", "abcde"); + + CheckCompare("a", "x"); + CheckCompare("ab", "xb"); + CheckCompare("abc", "xbc"); + CheckCompare("abcd", "xbcd"); + CheckCompare("abcde", "xbcde"); + + CheckCompare("x", "a"); + CheckCompare("xb", "ab"); + CheckCompare("xbc", "abc"); + CheckCompare("xbcd", "abcd"); + CheckCompare("xbcde", "abcde"); + + CheckCompare("a", "x"); + CheckCompare("ab", "ax"); + CheckCompare("abc", "abx"); + CheckCompare("abcd", "abcx"); + CheckCompare("abcde", "abcdx"); + + CheckCompare("x", "a"); + CheckCompare("ax", "ab"); + CheckCompare("abx", "abc"); + CheckCompare("abcx", "abcd"); + CheckCompare("abcdx", "abcde"); + + for (int len = 0; len < 1000; len++) { + std::string p(len, 'z'); + CheckCompare(p + "x", p + "a"); + CheckCompare(p + "ax", p + "ab"); + CheckCompare(p + "abx", p + "abc"); + CheckCompare(p + "abcx", p + "abcd"); + CheckCompare(p + "abcdx", p + "abcde"); + } +} + +TEST(FastCompare, TrailingByte) { + for (int i = 0; i < 256; i++) { + for (int j = 0; j < 256; j++) { + std::string a(1, i); + std::string b(1, j); + CheckSingle(a, b); + } + } +} + +// Check correctness of memcpy_inlined. +void CheckSingleMemcpyInlined(const std::string& a) { + std::unique_ptr<char[]> destination(new char[a.size() + 2]); + destination[0] = 'x'; + destination[a.size() + 1] = 'x'; + absl::strings_internal::memcpy_inlined(destination.get() + 1, a.data(), + a.size()); + CHECK_EQ('x', destination[0]); + CHECK_EQ('x', destination[a.size() + 1]); + CHECK_EQ(0, memcmp(a.data(), destination.get() + 1, a.size())); +} + +TEST(MemCpyInlined, Misc) { + CheckSingleMemcpyInlined(""); + CheckSingleMemcpyInlined("0"); + CheckSingleMemcpyInlined("012"); + CheckSingleMemcpyInlined("0123"); + CheckSingleMemcpyInlined("01234"); + CheckSingleMemcpyInlined("012345"); + CheckSingleMemcpyInlined("0123456"); + CheckSingleMemcpyInlined("01234567"); + CheckSingleMemcpyInlined("012345678"); + CheckSingleMemcpyInlined("0123456789"); + CheckSingleMemcpyInlined("0123456789a"); + CheckSingleMemcpyInlined("0123456789ab"); + CheckSingleMemcpyInlined("0123456789abc"); + CheckSingleMemcpyInlined("0123456789abcd"); + CheckSingleMemcpyInlined("0123456789abcde"); + CheckSingleMemcpyInlined("0123456789abcdef"); + CheckSingleMemcpyInlined("0123456789abcdefg"); +} + +template <typename Function> +inline void CopyLoop(benchmark::State& state, int size, Function func) { + char* src = new char[size]; + char* dst = new char[size]; + memset(src, 'x', size); + memset(dst, 'y', size); + for (auto _ : state) { + func(dst, src, size); + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size); + CHECK_EQ(dst[0], 'x'); + delete[] src; + delete[] dst; +} + +void BM_memcpy(benchmark::State& state) { + CopyLoop(state, state.range(0), memcpy); +} +BENCHMARK(BM_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20); + +void BM_memcpy_inlined(benchmark::State& state) { + CopyLoop(state, state.range(0), absl::strings_internal::memcpy_inlined); +} +BENCHMARK(BM_memcpy_inlined)->DenseRange(1, 18)->Range(32, 8 << 20); + +// unaligned memcpy +void BM_unaligned_memcpy(benchmark::State& state) { + const int n = state.range(0); + const int kMaxOffset = 32; + char* src = new char[n + kMaxOffset]; + char* dst = new char[n + kMaxOffset]; + memset(src, 'x', n + kMaxOffset); + int r = 0, i = 0; + for (auto _ : state) { + memcpy(dst + (i % kMaxOffset), src + ((i + 5) % kMaxOffset), n); + r += dst[0]; + ++i; + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); + delete[] src; + delete[] dst; + benchmark::DoNotOptimize(r); +} +BENCHMARK(BM_unaligned_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20); + +// memmove worst case: heavy overlap, but not always by the same amount. +// Also, the source and destination will often be unaligned. +void BM_memmove_worst_case(benchmark::State& state) { + const int n = state.range(0); + const int32_t kDeterministicSeed = 301; + const int kMaxOffset = 32; + char* src = new char[n + kMaxOffset]; + memset(src, 'x', n + kMaxOffset); + size_t offsets[64]; + RandomEngine rng(kDeterministicSeed); + std::uniform_int_distribution<size_t> random_to_max_offset(0, kMaxOffset); + for (size_t& offset : offsets) { + offset = random_to_max_offset(rng); + } + int r = 0, i = 0; + for (auto _ : state) { + memmove(src + offsets[i], src + offsets[i + 1], n); + r += src[0]; + i = (i + 2) % arraysize(offsets); + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); + delete[] src; + benchmark::DoNotOptimize(r); +} +BENCHMARK(BM_memmove_worst_case)->DenseRange(1, 18)->Range(32, 8 << 20); + +// memmove cache-friendly: aligned and overlapping with 4k +// between the source and destination addresses. +void BM_memmove_cache_friendly(benchmark::State& state) { + const int n = state.range(0); + char* src = new char[n + 4096]; + memset(src, 'x', n); + int r = 0; + while (state.KeepRunningBatch(2)) { // count each memmove as an iteration + memmove(src + 4096, src, n); + memmove(src, src + 4096, n); + r += src[0]; + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); + delete[] src; + benchmark::DoNotOptimize(r); +} +BENCHMARK(BM_memmove_cache_friendly) + ->Arg(5 * 1024) + ->Arg(10 * 1024) + ->Range(16 << 10, 8 << 20); + +// memmove best(?) case: aligned and non-overlapping. +void BM_memmove_aligned_non_overlapping(benchmark::State& state) { + CopyLoop(state, state.range(0), memmove); +} +BENCHMARK(BM_memmove_aligned_non_overlapping) + ->DenseRange(1, 18) + ->Range(32, 8 << 20); + +// memset speed +void BM_memset(benchmark::State& state) { + const int n = state.range(0); + char* dst = new char[n]; + int r = 0; + for (auto _ : state) { + memset(dst, 'x', n); + r += dst[0]; + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); + delete[] dst; + benchmark::DoNotOptimize(r); +} +BENCHMARK(BM_memset)->Range(8, 4096 << 10); + +// Bandwidth (vectorization?) test: the ideal generated code will be limited +// by memory bandwidth. Even so-so generated code will max out memory bandwidth +// on some machines. +void BM_membandwidth(benchmark::State& state) { + const int n = state.range(0); + CHECK_EQ(n % 32, 0); // We will read 32 bytes per iter. + char* dst = new char[n]; + int r = 0; + for (auto _ : state) { + const uint32_t* p = reinterpret_cast<uint32_t*>(dst); + const uint32_t* limit = reinterpret_cast<uint32_t*>(dst + n); + uint32_t x = 0; + while (p < limit) { + x += p[0]; + x += p[1]; + x += p[2]; + x += p[3]; + x += p[4]; + x += p[5]; + x += p[6]; + x += p[7]; + p += 8; + } + r += x; + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); + delete[] dst; + benchmark::DoNotOptimize(r); +} +BENCHMARK(BM_membandwidth)->Range(32, 16384 << 10); + +// Helper for benchmarks. Repeatedly compares two strings that are +// either equal or different only in one character. If test_equal_strings +// is false then position_to_modify determines where the difference will be. +template <typename Function> +ABSL_ATTRIBUTE_ALWAYS_INLINE inline void StringCompareLoop( + benchmark::State& state, bool test_equal_strings, + std::string::size_type position_to_modify, int size, Function func) { + const int kIterMult = 4; // Iteration multiplier for better timing resolution + CHECK_GT(size, 0); + const bool position_to_modify_is_valid = + position_to_modify != std::string::npos && position_to_modify < size; + CHECK_NE(position_to_modify_is_valid, test_equal_strings); + if (!position_to_modify_is_valid) { + position_to_modify = 0; + } + std::string sa(size, 'a'); + std::string sb = sa; + char last = sa[size - 1]; + int num = 0; + for (auto _ : state) { + for (int i = 0; i < kIterMult; ++i) { + sb[position_to_modify] = test_equal_strings ? last : last ^ 1; + num += func(sa, sb); + } + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size); + benchmark::DoNotOptimize(num); +} + +// Helper for benchmarks. Repeatedly compares two memory regions that are +// either equal or different only in their final character. +template <typename Function> +ABSL_ATTRIBUTE_ALWAYS_INLINE inline void CompareLoop(benchmark::State& state, + bool test_equal_strings, + int size, Function func) { + const int kIterMult = 4; // Iteration multiplier for better timing resolution + CHECK_GT(size, 0); + char* data = static_cast<char*>(malloc(size * 2)); + memset(data, 'a', size * 2); + char* a = data; + char* b = data + size; + char last = a[size - 1]; + int num = 0; + for (auto _ : state) { + for (int i = 0; i < kIterMult; ++i) { + b[size - 1] = test_equal_strings ? last : last ^ 1; + num += func(a, b, size); + } + } + state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size); + benchmark::DoNotOptimize(num); + free(data); +} + +void BM_memcmp(benchmark::State& state) { + CompareLoop(state, false, state.range(0), memcmp); +} +BENCHMARK(BM_memcmp)->DenseRange(1, 9)->Range(32, 8 << 20); + +void BM_fastmemcmp_inlined(benchmark::State& state) { + CompareLoop(state, false, state.range(0), + absl::strings_internal::fastmemcmp_inlined); +} +BENCHMARK(BM_fastmemcmp_inlined)->DenseRange(1, 9)->Range(32, 8 << 20); + +void BM_memeq(benchmark::State& state) { + CompareLoop(state, false, state.range(0), absl::strings_internal::memeq); +} +BENCHMARK(BM_memeq)->DenseRange(1, 9)->Range(32, 8 << 20); + +void BM_memeq_equal(benchmark::State& state) { + CompareLoop(state, true, state.range(0), absl::strings_internal::memeq); +} +BENCHMARK(BM_memeq_equal)->DenseRange(1, 9)->Range(32, 8 << 20); + +bool StringLess(const std::string& x, const std::string& y) { return x < y; } +bool StringEqual(const std::string& x, const std::string& y) { return x == y; } +bool StdEqual(const std::string& x, const std::string& y) { + return x.size() == y.size() && + std::equal(x.data(), x.data() + x.size(), y.data()); +} + +// Benchmark for x < y, where x and y are strings that differ in only their +// final char. That should be more-or-less the worst case for <. +void BM_string_less(benchmark::State& state) { + StringCompareLoop(state, false, state.range(0) - 1, state.range(0), + StringLess); +} +BENCHMARK(BM_string_less)->DenseRange(1, 9)->Range(32, 1 << 20); + +// Benchmark for x < y, where x and y are strings that differ in only their +// first char. That should be more-or-less the best case for <. +void BM_string_less_easy(benchmark::State& state) { + StringCompareLoop(state, false, 0, state.range(0), StringLess); +} +BENCHMARK(BM_string_less_easy)->DenseRange(1, 9)->Range(32, 1 << 20); + +void BM_string_equal(benchmark::State& state) { + StringCompareLoop(state, false, state.range(0) - 1, state.range(0), + StringEqual); +} +BENCHMARK(BM_string_equal)->DenseRange(1, 9)->Range(32, 1 << 20); + +void BM_string_equal_equal(benchmark::State& state) { + StringCompareLoop(state, true, std::string::npos, state.range(0), StringEqual); +} +BENCHMARK(BM_string_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20); + +void BM_std_equal(benchmark::State& state) { + StringCompareLoop(state, false, state.range(0) - 1, state.range(0), StdEqual); +} +BENCHMARK(BM_std_equal)->DenseRange(1, 9)->Range(32, 1 << 20); + +void BM_std_equal_equal(benchmark::State& state) { + StringCompareLoop(state, true, std::string::npos, state.range(0), StdEqual); +} +BENCHMARK(BM_std_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20); + +void BM_string_equal_unequal_lengths(benchmark::State& state) { + const int size = state.range(0); + std::string a(size, 'a'); + std::string b(size + 1, 'a'); + int count = 0; + for (auto _ : state) { + b[size - 1] = 'a'; + count += (a == b); + } + benchmark::DoNotOptimize(count); +} +BENCHMARK(BM_string_equal_unequal_lengths)->Arg(1)->Arg(1 << 20); + +void BM_stdstring_equal_unequal_lengths(benchmark::State& state) { + const int size = state.range(0); + std::string a(size, 'a'); + std::string b(size + 1, 'a'); + int count = 0; + for (auto _ : state) { + b[size - 1] = 'a'; + count += (a == b); + } + benchmark::DoNotOptimize(count); +} +BENCHMARK(BM_stdstring_equal_unequal_lengths)->Arg(1)->Arg(1 << 20); + +} // namespace diff --git a/absl/strings/internal/memutil.cc b/absl/strings/internal/memutil.cc new file mode 100644 index 000000000000..a0de70dffdbd --- /dev/null +++ b/absl/strings/internal/memutil.cc @@ -0,0 +1,110 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/memutil.h" + +#include <cstdlib> + +namespace absl { +namespace strings_internal { + +int memcasecmp(const char* s1, const char* s2, size_t len) { + const unsigned char* us1 = reinterpret_cast<const unsigned char*>(s1); + const unsigned char* us2 = reinterpret_cast<const unsigned char*>(s2); + + for (size_t i = 0; i < len; i++) { + const int diff = + int{static_cast<unsigned char>(absl::ascii_tolower(us1[i]))} - + int{static_cast<unsigned char>(absl::ascii_tolower(us2[i]))}; + if (diff != 0) return diff; + } + return 0; +} + +char* memdup(const char* s, size_t slen) { + void* copy; + if ((copy = malloc(slen)) == nullptr) return nullptr; + memcpy(copy, s, slen); + return reinterpret_cast<char*>(copy); +} + +char* memrchr(const char* s, int c, size_t slen) { + for (const char* e = s + slen - 1; e >= s; e--) { + if (*e == c) return const_cast<char*>(e); + } + return nullptr; +} + +size_t memspn(const char* s, size_t slen, const char* accept) { + const char* p = s; + const char* spanp; + char c, sc; + +cont: + c = *p++; + if (slen-- == 0) return p - 1 - s; + for (spanp = accept; (sc = *spanp++) != '\0';) + if (sc == c) goto cont; + return p - 1 - s; +} + +size_t memcspn(const char* s, size_t slen, const char* reject) { + const char* p = s; + const char* spanp; + char c, sc; + + while (slen-- != 0) { + c = *p++; + for (spanp = reject; (sc = *spanp++) != '\0';) + if (sc == c) return p - 1 - s; + } + return p - s; +} + +char* mempbrk(const char* s, size_t slen, const char* accept) { + const char* scanp; + int sc; + + for (; slen; ++s, --slen) { + for (scanp = accept; (sc = *scanp++) != '\0';) + if (sc == *s) return const_cast<char*>(s); + } + return nullptr; +} + +// This is significantly faster for case-sensitive matches with very +// few possible matches. See unit test for benchmarks. +const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, + size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + // A static cast is used here to work around the fact that memchr returns + // a void* on Posix-compliant systems and const void* on Windows. + while ((match = static_cast<const char*>( + memchr(phaystack, pneedle[0], hayend - phaystack)))) { + if (memcmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } + return nullptr; +} + +} // namespace strings_internal +} // namespace absl diff --git a/absl/strings/internal/memutil.h b/absl/strings/internal/memutil.h new file mode 100644 index 000000000000..a6f1c69138e3 --- /dev/null +++ b/absl/strings/internal/memutil.h @@ -0,0 +1,146 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// These routines provide mem versions of standard C std::string routines, +// such as strpbrk. They function exactly the same as the str versions, +// so if you wonder what they are, replace the word "mem" by +// "str" and check out the man page. I could return void*, as the +// strutil.h mem*() routines tend to do, but I return char* instead +// since this is by far the most common way these functions are called. +// +// The difference between the mem and str versions is the mem version +// takes a pointer and a length, rather than a '\0'-terminated std::string. +// The memcase* routines defined here assume the locale is "C" +// (they use absl::ascii_tolower instead of tolower). +// +// These routines are based on the BSD library. +// +// Here's a list of routines from std::string.h, and their mem analogues. +// Functions in lowercase are defined in std::string.h; those in UPPERCASE +// are defined here: +// +// strlen -- +// strcat strncat MEMCAT +// strcpy strncpy memcpy +// -- memccpy (very cool function, btw) +// -- memmove +// -- memset +// strcmp strncmp memcmp +// strcasecmp strncasecmp MEMCASECMP +// strchr memchr +// strcoll -- +// strxfrm -- +// strdup strndup MEMDUP +// strrchr MEMRCHR +// strspn MEMSPN +// strcspn MEMCSPN +// strpbrk MEMPBRK +// strstr MEMSTR MEMMEM +// (g)strcasestr MEMCASESTR MEMCASEMEM +// strtok -- +// strprefix MEMPREFIX (strprefix is from strutil.h) +// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h) +// strsuffix MEMSUFFIX (strsuffix is from strutil.h) +// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h) +// -- MEMIS +// -- MEMCASEIS +// strcount MEMCOUNT (strcount is from strutil.h) + +#ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_ +#define ABSL_STRINGS_INTERNAL_MEMUTIL_H_ + +#include <cstddef> +#include <cstring> + +#include "absl/base/port.h" // disable some warnings on Windows +#include "absl/strings/ascii.h" // for absl::ascii_tolower + +namespace absl { +namespace strings_internal { + +inline char* memcat(char* dest, size_t destlen, const char* src, + size_t srclen) { + return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen)); +} + +int memcasecmp(const char* s1, const char* s2, size_t len); +char* memdup(const char* s, size_t slen); +char* memrchr(const char* s, int c, size_t slen); +size_t memspn(const char* s, size_t slen, const char* accept); +size_t memcspn(const char* s, size_t slen, const char* reject); +char* mempbrk(const char* s, size_t slen, const char* accept); + +// This is for internal use only. Don't call this directly +template <bool case_sensitive> +const char* int_memmatch(const char* haystack, size_t haylen, + const char* needle, size_t neelen) { + if (0 == neelen) { + return haystack; // even if haylen is 0 + } + const char* hayend = haystack + haylen; + const char* needlestart = needle; + const char* needleend = needlestart + neelen; + + for (; haystack < hayend; ++haystack) { + char hay = case_sensitive + ? *haystack + : absl::ascii_tolower(static_cast<unsigned char>(*haystack)); + char nee = case_sensitive + ? *needle + : absl::ascii_tolower(static_cast<unsigned char>(*needle)); + if (hay == nee) { + if (++needle == needleend) { + return haystack + 1 - neelen; + } + } else if (needle != needlestart) { + // must back up haystack in case a prefix matched (find "aab" in "aaab") + haystack -= needle - needlestart; // for loop will advance one more + needle = needlestart; + } + } + return nullptr; +} + +// These are the guys you can call directly +inline const char* memstr(const char* phaystack, size_t haylen, + const char* pneedle) { + return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle)); +} + +inline const char* memcasestr(const char* phaystack, size_t haylen, + const char* pneedle) { + return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle)); +} + +inline const char* memmem(const char* phaystack, size_t haylen, + const char* pneedle, size_t needlelen) { + return int_memmatch<true>(phaystack, haylen, pneedle, needlelen); +} + +inline const char* memcasemem(const char* phaystack, size_t haylen, + const char* pneedle, size_t needlelen) { + return int_memmatch<false>(phaystack, haylen, pneedle, needlelen); +} + +// This is significantly faster for case-sensitive matches with very +// few possible matches. See unit test for benchmarks. +const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, + size_t neelen); + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_ diff --git a/absl/strings/internal/memutil_test.cc b/absl/strings/internal/memutil_test.cc new file mode 100644 index 000000000000..1ff60f20e0bd --- /dev/null +++ b/absl/strings/internal/memutil_test.cc @@ -0,0 +1,180 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Unit test for memutil.cc + +#include "absl/strings/internal/memutil.h" + +#include <algorithm> +#include <cstdlib> + +#include "gtest/gtest.h" +#include "absl/strings/ascii.h" + +namespace { + +static char* memcasechr(const char* s, int c, size_t slen) { + c = absl::ascii_tolower(c); + for (; slen; ++s, --slen) { + if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s); + } + return nullptr; +} + +static const char* memcasematch(const char* phaystack, size_t haylen, + const char* pneedle, size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + while ((match = static_cast<char*>( + memcasechr(phaystack, pneedle[0], hayend - phaystack)))) { + if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } + return nullptr; +} + +TEST(MemUtilTest, AllTests) { + // check memutil functions + char a[1000]; + absl::strings_internal::memcat(a, 0, "hello", sizeof("hello") - 1); + absl::strings_internal::memcat(a, 5, " there", sizeof(" there") - 1); + + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there", + sizeof("hello there") - 1), + 0); + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf", + sizeof("hello there") - 1), + -1); + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf", + sizeof("hello there") - 2), + 0); + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0); + + char* p = absl::strings_internal::memdup("hello", 5); + free(p); + + p = absl::strings_internal::memrchr("hello there", 'e', + sizeof("hello there") - 1); + EXPECT_TRUE(p && p[-1] == 'r'); + p = absl::strings_internal::memrchr("hello there", 'e', + sizeof("hello there") - 2); + EXPECT_TRUE(p && p[-1] == 'h'); + p = absl::strings_internal::memrchr("hello there", 'u', + sizeof("hello there") - 1); + EXPECT_TRUE(p == nullptr); + + int len = absl::strings_internal::memspn("hello there", + sizeof("hello there") - 1, "hole"); + EXPECT_EQ(len, sizeof("hello") - 1); + len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, + "u"); + EXPECT_EQ(len, 0); + len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, + ""); + EXPECT_EQ(len, 0); + len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, + "trole h"); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memspn("hello there!", + sizeof("hello there!") - 1, "trole h"); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memspn("hello there!", + sizeof("hello there!") - 2, "trole h!"); + EXPECT_EQ(len, sizeof("hello there!") - 2); + + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, "leho"); + EXPECT_EQ(len, 0); + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, "u"); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, ""); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, " "); + EXPECT_EQ(len, 5); + + p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, + "leho"); + EXPECT_TRUE(p && p[1] == 'e' && p[2] == 'l'); + p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, + "nu"); + EXPECT_TRUE(p == nullptr); + p = absl::strings_internal::mempbrk("hello there!", + sizeof("hello there!") - 2, "!"); + EXPECT_TRUE(p == nullptr); + p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, + " t "); + EXPECT_TRUE(p && p[-1] == 'o' && p[1] == 't'); + + { + const char kHaystack[] = "0123456789"; + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 0, "", 0), kHaystack); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "012", 3), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "0xx", 1), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "789", 3), + kHaystack + 7); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "9xx", 1), + kHaystack + 9); + EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "9xx", 3) == + nullptr); + EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "xxx", 1) == + nullptr); + } + { + const char kHaystack[] = "aBcDeFgHiJ"; + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 0, "", 0), + kHaystack); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Abc", 3), + kHaystack); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Axx", 1), + kHaystack); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "hIj", 3), + kHaystack + 7); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 1), + kHaystack + 9); + EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 3) == + nullptr); + EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "xxx", 1) == + nullptr); + } + { + const char kHaystack[] = "0123456789"; + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 0, "", 0), kHaystack); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "012", 3), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "0xx", 1), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "789", 3), + kHaystack + 7); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 1), + kHaystack + 9); + EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 3) == + nullptr); + EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "xxx", 1) == + nullptr); + } +} + +} // namespace diff --git a/absl/strings/internal/numbers_test_common.inc b/absl/strings/internal/numbers_test_common.inc new file mode 100644 index 000000000000..e165b3bea5c6 --- /dev/null +++ b/absl/strings/internal/numbers_test_common.inc @@ -0,0 +1,166 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file contains common things needed by numbers_test.cc, +// numbers_legacy_test.cc and numbers_benchmark.cc. + +namespace { + +// Previously documented minimum buffer sizes for Fast*ToBuffer functions. +// NOTE(edk): These should be deleted and uses replaced with kFastToBufferSize +// once existing code has been fixed to use kFastToBufferSize. +enum { + kFastInt32ToBufferSize = 12, + kFastInt64ToBufferSize = 22, + kFastUInt32ToBufferSize = 12, + kFastUInt64ToBufferSize = 22 +}; + +template <typename IntType> +bool Itoa(IntType value, int base, std::string* destination) { + destination->clear(); + if (base <= 1 || base > 36) { + return false; + } + + if (value == 0) { + destination->push_back('0'); + return true; + } + + bool negative = value < 0; + while (value != 0) { + const IntType next_value = value / base; + // Can't use std::abs here because of problems when IntType is unsigned. + int remainder = value > next_value * base ? value - next_value * base + : next_value * base - value; + char c = remainder < 10 ? '0' + remainder : 'A' + remainder - 10; + destination->insert(0, 1, c); + value = next_value; + } + + if (negative) { + destination->insert(0, 1, '-'); + } + return true; +} + +struct uint32_test_case { + const char* str; + bool expect_ok; + int base; // base to pass to the conversion function + uint32_t expected; +} const strtouint32_test_cases[] = { + {"0xffffffff", true, 16, std::numeric_limits<uint32_t>::max()}, + {"0x34234324", true, 16, 0x34234324}, + {"34234324", true, 16, 0x34234324}, + {"0", true, 16, 0}, + {" \t\n 0xffffffff", true, 16, std::numeric_limits<uint32_t>::max()}, + {" \f\v 46", true, 10, 46}, // must accept weird whitespace + {" \t\n 72717222", true, 8, 072717222}, + {" \t\n 072717222", true, 8, 072717222}, + {" \t\n 072717228", false, 8, 07271722}, + {"0", true, 0, 0}, + + // Base-10 version. + {"34234324", true, 0, 34234324}, + {"4294967295", true, 0, std::numeric_limits<uint32_t>::max()}, + {"34234324 \n\t", true, 10, 34234324}, + + // Unusual base + {"0", true, 3, 0}, + {"2", true, 3, 2}, + {"11", true, 3, 4}, + + // Invalid uints. + {"", false, 0, 0}, + {" ", false, 0, 0}, + {"abc", false, 0, 0}, // would be valid hex, but prefix is missing + {"34234324a", false, 0, 34234324}, + {"34234.3", false, 0, 34234}, + {"-1", false, 0, 0}, + {" -123", false, 0, 0}, + {" \t\n -123", false, 0, 0}, + + // Out of bounds. + {"4294967296", false, 0, std::numeric_limits<uint32_t>::max()}, + {"0x100000000", false, 0, std::numeric_limits<uint32_t>::max()}, + {nullptr, false, 0, 0}, +}; + +struct uint64_test_case { + const char* str; + bool expect_ok; + int base; + uint64_t expected; +} const strtouint64_test_cases[] = { + {"0x3423432448783446", true, 16, int64_t{0x3423432448783446}}, + {"3423432448783446", true, 16, int64_t{0x3423432448783446}}, + + {"0", true, 16, 0}, + {"000", true, 0, 0}, + {"0", true, 0, 0}, + {" \t\n 0xffffffffffffffff", true, 16, + std::numeric_limits<uint64_t>::max()}, + + {"012345670123456701234", true, 8, int64_t{012345670123456701234}}, + {"12345670123456701234", true, 8, int64_t{012345670123456701234}}, + + {"12845670123456701234", false, 8, 0}, + + // Base-10 version. + {"34234324487834466", true, 0, int64_t{34234324487834466}}, + + {" \t\n 18446744073709551615", true, 0, + std::numeric_limits<uint64_t>::max()}, + + {"34234324487834466 \n\t ", true, 0, int64_t{34234324487834466}}, + + {" \f\v 46", true, 10, 46}, // must accept weird whitespace + + // Unusual base + {"0", true, 3, 0}, + {"2", true, 3, 2}, + {"11", true, 3, 4}, + + {"0", true, 0, 0}, + + // Invalid uints. + {"", false, 0, 0}, + {" ", false, 0, 0}, + {"abc", false, 0, 0}, + {"34234324487834466a", false, 0, 0}, + {"34234487834466.3", false, 0, 0}, + {"-1", false, 0, 0}, + {" -123", false, 0, 0}, + {" \t\n -123", false, 0, 0}, + + // Out of bounds. + {"18446744073709551616", false, 10, 0}, + {"18446744073709551616", false, 0, 0}, + {"0x10000000000000000", false, 16, std::numeric_limits<uint64_t>::max()}, + {"0X10000000000000000", false, 16, + std::numeric_limits<uint64_t>::max()}, // 0X versus 0x. + {"0x10000000000000000", false, 0, std::numeric_limits<uint64_t>::max()}, + {"0X10000000000000000", false, 0, + std::numeric_limits<uint64_t>::max()}, // 0X versus 0x. + + {"0x1234", true, 16, 0x1234}, + + // Base-10 std::string version. + {"1234", true, 0, 1234}, + {nullptr, false, 0, 0}, +}; + +} // namespace diff --git a/absl/strings/internal/ostringstream.h b/absl/strings/internal/ostringstream.h new file mode 100644 index 000000000000..017632a9292e --- /dev/null +++ b/absl/strings/internal/ostringstream.h @@ -0,0 +1,97 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ +#define ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ + +#include <cassert> +#include <ostream> +#include <streambuf> +#include <string> + +#include "absl/base/port.h" + +namespace absl { +namespace strings_internal { + +// The same as std::ostringstream but appends to a user-specified std::string, +// and is faster. It is ~70% faster to create, ~50% faster to write to, and +// completely free to extract the result std::string. +// +// std::string s; +// OStringStream strm(&s); +// strm << 42 << ' ' << 3.14; // appends to `s` +// +// The stream object doesn't have to be named. Starting from C++11 operator<< +// works with rvalues of std::ostream. +// +// std::string s; +// OStringStream(&s) << 42 << ' ' << 3.14; // appends to `s` +// +// OStringStream is faster to create than std::ostringstream but it's still +// relatively slow. Avoid creating multiple streams where a single stream will +// do. +// +// Creates unnecessary instances of OStringStream: slow. +// +// std::string s; +// OStringStream(&s) << 42; +// OStringStream(&s) << ' '; +// OStringStream(&s) << 3.14; +// +// Creates a single instance of OStringStream and reuses it: fast. +// +// std::string s; +// OStringStream strm(&s); +// strm << 42; +// strm << ' '; +// strm << 3.14; +// +// Note: flush() has no effect. No reason to call it. +class OStringStream : private std::basic_streambuf<char>, public std::ostream { + public: + // The argument can be null, in which case you'll need to call str(p) with a + // non-null argument before you can write to the stream. + // + // The destructor of OStringStream doesn't use the std::string. It's OK to destroy + // the std::string before the stream. + explicit OStringStream(std::string* s) : std::ostream(this), s_(s) {} + + std::string* str() { return s_; } + const std::string* str() const { return s_; } + void str(std::string* s) { s_ = s; } + + private: + using Buf = std::basic_streambuf<char>; + + Buf::int_type overflow(int c = Buf::traits_type::eof()) override { + assert(s_); + if (!Buf::traits_type::eq_int_type(c, Buf::traits_type::eof())) + s_->push_back(static_cast<char>(c)); + return 1; + } + + std::streamsize xsputn(const char* s, std::streamsize n) override { + assert(s_); + s_->append(s, n); + return n; + } + + std::string* s_; +}; + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ diff --git a/absl/strings/internal/ostringstream_test.cc b/absl/strings/internal/ostringstream_test.cc new file mode 100644 index 000000000000..0047ec822e8b --- /dev/null +++ b/absl/strings/internal/ostringstream_test.cc @@ -0,0 +1,103 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/ostringstream.h" + +#include <memory> +#include <ostream> +#include <sstream> +#include <string> +#include <type_traits> + +#include "gtest/gtest.h" + +namespace { + +TEST(OStringStream, IsOStream) { + static_assert( + std::is_base_of<std::ostream, absl::strings_internal::OStringStream>(), + ""); +} + +TEST(OStringStream, ConstructDestroy) { + { + absl::strings_internal::OStringStream strm(nullptr); + EXPECT_EQ(nullptr, strm.str()); + } + { + std::string s = "abc"; + { + absl::strings_internal::OStringStream strm(&s); + EXPECT_EQ(&s, strm.str()); + } + EXPECT_EQ("abc", s); + } + { + std::unique_ptr<std::string> s(new std::string); + absl::strings_internal::OStringStream strm(s.get()); + s.reset(); + } +} + +TEST(OStringStream, Str) { + std::string s1; + absl::strings_internal::OStringStream strm(&s1); + const absl::strings_internal::OStringStream& c_strm(strm); + + static_assert(std::is_same<decltype(strm.str()), std::string*>(), ""); + static_assert(std::is_same<decltype(c_strm.str()), const std::string*>(), ""); + + EXPECT_EQ(&s1, strm.str()); + EXPECT_EQ(&s1, c_strm.str()); + + strm.str(&s1); + EXPECT_EQ(&s1, strm.str()); + EXPECT_EQ(&s1, c_strm.str()); + + std::string s2; + strm.str(&s2); + EXPECT_EQ(&s2, strm.str()); + EXPECT_EQ(&s2, c_strm.str()); + + strm.str(nullptr); + EXPECT_EQ(nullptr, strm.str()); + EXPECT_EQ(nullptr, c_strm.str()); +} + +TEST(OStreamStream, WriteToLValue) { + std::string s = "abc"; + { + absl::strings_internal::OStringStream strm(&s); + EXPECT_EQ("abc", s); + strm << ""; + EXPECT_EQ("abc", s); + strm << 42; + EXPECT_EQ("abc42", s); + strm << 'x' << 'y'; + EXPECT_EQ("abc42xy", s); + } + EXPECT_EQ("abc42xy", s); +} + +TEST(OStreamStream, WriteToRValue) { + std::string s = "abc"; + absl::strings_internal::OStringStream(&s) << ""; + EXPECT_EQ("abc", s); + absl::strings_internal::OStringStream(&s) << 42; + EXPECT_EQ("abc42", s); + absl::strings_internal::OStringStream(&s) << 'x' << 'y'; + EXPECT_EQ("abc42xy", s); +} + +} // namespace diff --git a/absl/strings/internal/resize_uninitialized.h b/absl/strings/internal/resize_uninitialized.h new file mode 100644 index 000000000000..0157ca0245f3 --- /dev/null +++ b/absl/strings/internal/resize_uninitialized.h @@ -0,0 +1,69 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ +#define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ + +#include <string> +#include <utility> + +#include "absl/base/port.h" +#include "absl/meta/type_traits.h" // for void_t + +namespace absl { +namespace strings_internal { + +// Is a subclass of true_type or false_type, depending on whether or not +// T has a resize_uninitialized member. +template <typename T, typename = void> +struct HasResizeUninitialized : std::false_type {}; +template <typename T> +struct HasResizeUninitialized< + T, absl::void_t<decltype(std::declval<T>().resize_uninitialized(237))>> + : std::true_type {}; + +template <typename string_type> +void ResizeUninit(string_type* s, size_t new_size, std::true_type) { + s->resize_uninitialized(new_size); +} +template <typename string_type> +void ResizeUninit(string_type* s, size_t new_size, std::false_type) { + s->resize(new_size); +} + +// Returns true if the std::string implementation supports a resize where +// the new characters added to the std::string are left untouched. +// +// (A better name might be "STLStringSupportsUninitializedResize", alluding to +// the previous function.) +template <typename string_type> +inline constexpr bool STLStringSupportsNontrashingResize(string_type*) { + return HasResizeUninitialized<string_type>(); +} + +// Like str->resize(new_size), except any new characters added to "*str" as a +// result of resizing may be left uninitialized, rather than being filled with +// '0' bytes. Typically used when code is then going to overwrite the backing +// store of the std::string with known data. Uses a Google extension to std::string. +template <typename string_type, typename = void> +inline void STLStringResizeUninitialized(string_type* s, size_t new_size) { + ResizeUninit(s, new_size, HasResizeUninitialized<string_type>()); +} + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ diff --git a/absl/strings/internal/resize_uninitialized_test.cc b/absl/strings/internal/resize_uninitialized_test.cc new file mode 100644 index 000000000000..ad282efcd9bb --- /dev/null +++ b/absl/strings/internal/resize_uninitialized_test.cc @@ -0,0 +1,68 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/resize_uninitialized.h" + +#include "gtest/gtest.h" + +namespace { + +int resize_call_count = 0; + +struct resizable_string { + void resize(size_t) { resize_call_count += 1; } +}; + +int resize_uninitialized_call_count = 0; + +struct resize_uninitializable_string { + void resize(size_t) { resize_call_count += 1; } + void resize_uninitialized(size_t) { resize_uninitialized_call_count += 1; } +}; + +TEST(ResizeUninit, WithAndWithout) { + resize_call_count = 0; + resize_uninitialized_call_count = 0; + { + resizable_string rs; + + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_uninitialized_call_count, 0); + EXPECT_FALSE( + absl::strings_internal::STLStringSupportsNontrashingResize(&rs)); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_uninitialized_call_count, 0); + absl::strings_internal::STLStringResizeUninitialized(&rs, 237); + EXPECT_EQ(resize_call_count, 1); + EXPECT_EQ(resize_uninitialized_call_count, 0); + } + + resize_call_count = 0; + resize_uninitialized_call_count = 0; + { + resize_uninitializable_string rus; + + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_uninitialized_call_count, 0); + EXPECT_TRUE( + absl::strings_internal::STLStringSupportsNontrashingResize(&rus)); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_uninitialized_call_count, 0); + absl::strings_internal::STLStringResizeUninitialized(&rus, 237); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_uninitialized_call_count, 1); + } +} + +} // namespace diff --git a/absl/strings/internal/str_join_internal.h b/absl/strings/internal/str_join_internal.h new file mode 100644 index 000000000000..e73f1dde403d --- /dev/null +++ b/absl/strings/internal/str_join_internal.h @@ -0,0 +1,314 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// This file declares INTERNAL parts of the Join API that are inlined/templated +// or otherwise need to be available at compile time. The main abstractions +// defined in this file are: +// +// - A handful of default Formatters +// - JoinAlgorithm() overloads +// - JoinRange() overloads +// - JoinTuple() +// +// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including +// absl/strings/str_join.h +// +// IWYU pragma: private, include "absl/strings/str_join.h" + +#ifndef ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ + +#include <cassert> +#include <iterator> +#include <memory> +#include <string> +#include <utility> + +#include "absl/strings/internal/ostringstream.h" +#include "absl/strings/str_cat.h" + +namespace absl { +namespace strings_internal { + +// +// Formatter objects +// +// The following are implementation classes for standard Formatter objects. The +// factory functions that users will call to create and use these formatters are +// defined and documented in strings/join.h. +// + +// The default formatter. Converts alpha-numeric types to strings. +struct AlphaNumFormatterImpl { + // This template is needed in order to support passing in a dereferenced + // vector<bool>::iterator + template <typename T> + void operator()(std::string* out, const T& t) const { + StrAppend(out, AlphaNum(t)); + } + + void operator()(std::string* out, const AlphaNum& t) const { + StrAppend(out, t); + } +}; + +// A type that's used to overload the JoinAlgorithm() function (defined below) +// for ranges that do not require additional formatting (e.g., a range of +// strings). + +struct NoFormatter : public AlphaNumFormatterImpl {}; + +// Formats types to strings using the << operator. +class StreamFormatterImpl { + public: + // The method isn't const because it mutates state. Making it const will + // render StreamFormatterImpl thread-hostile. + template <typename T> + void operator()(std::string* out, const T& t) { + // The stream is created lazily to avoid paying the relatively high cost + // of its construction when joining an empty range. + if (strm_) { + strm_->clear(); // clear the bad, fail and eof bits in case they were set + strm_->str(out); + } else { + strm_.reset(new strings_internal::OStringStream(out)); + } + *strm_ << t; + } + + private: + std::unique_ptr<strings_internal::OStringStream> strm_; +}; + +// Formats a std::pair<>. The 'first' member is formatted using f1_ and the +// 'second' member is formatted using f2_. sep_ is the separator. +template <typename F1, typename F2> +class PairFormatterImpl { + public: + PairFormatterImpl(F1 f1, absl::string_view sep, F2 f2) + : f1_(std::move(f1)), sep_(sep), f2_(std::move(f2)) {} + + template <typename T> + void operator()(std::string* out, const T& p) { + f1_(out, p.first); + out->append(sep_); + f2_(out, p.second); + } + + template <typename T> + void operator()(std::string* out, const T& p) const { + f1_(out, p.first); + out->append(sep_); + f2_(out, p.second); + } + + private: + F1 f1_; + std::string sep_; + F2 f2_; +}; + +// Wraps another formatter and dereferences the argument to operator() then +// passes the dereferenced argument to the wrapped formatter. This can be +// useful, for example, to join a std::vector<int*>. +template <typename Formatter> +class DereferenceFormatterImpl { + public: + DereferenceFormatterImpl() : f_() {} + explicit DereferenceFormatterImpl(Formatter&& f) + : f_(std::forward<Formatter>(f)) {} + + template <typename T> + void operator()(std::string* out, const T& t) { + f_(out, *t); + } + + template <typename T> + void operator()(std::string* out, const T& t) const { + f_(out, *t); + } + + private: + Formatter f_; +}; + +// DefaultFormatter<T> is a traits class that selects a default Formatter to use +// for the given type T. The ::Type member names the Formatter to use. This is +// used by the strings::Join() functions that do NOT take a Formatter argument, +// in which case a default Formatter must be chosen. +// +// AlphaNumFormatterImpl is the default in the base template, followed by +// specializations for other types. +template <typename ValueType> +struct DefaultFormatter { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<const char*> { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<char*> { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<std::string> { + typedef NoFormatter Type; +}; +template <> +struct DefaultFormatter<absl::string_view> { + typedef NoFormatter Type; +}; +template <typename ValueType> +struct DefaultFormatter<ValueType*> { + typedef DereferenceFormatterImpl<typename DefaultFormatter<ValueType>::Type> + Type; +}; + +template <typename ValueType> +struct DefaultFormatter<std::unique_ptr<ValueType>> + : public DefaultFormatter<ValueType*> {}; + +// +// JoinAlgorithm() functions +// + +// The main joining algorithm. This simply joins the elements in the given +// iterator range, each separated by the given separator, into an output std::string, +// and formats each element using the provided Formatter object. +template <typename Iterator, typename Formatter> +std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s, + Formatter&& f) { + std::string result; + absl::string_view sep(""); + for (Iterator it = start; it != end; ++it) { + result.append(sep.data(), sep.size()); + f(&result, *it); + sep = s; + } + return result; +} + +// No-op placeholder for input iterators which can not be iterated over. +template <typename Iterator> +size_t GetResultSize(Iterator, Iterator, size_t, std::input_iterator_tag) { + return 0; +} + +// Calculates space to reserve, if the iterator supports multiple passes. +template <typename Iterator> +size_t GetResultSize(Iterator it, Iterator end, size_t separator_size, + std::forward_iterator_tag) { + assert(it != end); + size_t length = it->size(); + while (++it != end) { + length += separator_size; + length += it->size(); + } + return length; +} + +// A joining algorithm that's optimized for an iterator range of std::string-like +// objects that do not need any additional formatting. This is to optimize the +// common case of joining, say, a std::vector<std::string> or a +// std::vector<absl::string_view>. +// +// This is an overload of the previous JoinAlgorithm() function. Here the +// Formatter argument is of type NoFormatter. Since NoFormatter is an internal +// type, this overload is only invoked when strings::Join() is called with a +// range of std::string-like objects (e.g., std::string, absl::string_view), and an +// explicit Formatter argument was NOT specified. +// +// The optimization is that the needed space will be reserved in the output +// std::string to avoid the need to resize while appending. To do this, the iterator +// range will be traversed twice: once to calculate the total needed size, and +// then again to copy the elements and delimiters to the output std::string. +template <typename Iterator> +std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s, + NoFormatter) { + std::string result; + if (start != end) { + typename std::iterator_traits<Iterator>::iterator_category iterator_tag; + result.reserve(GetResultSize(start, end, s.size(), iterator_tag)); + + // Joins strings + absl::string_view sep("", 0); + for (Iterator it = start; it != end; ++it) { + result.append(sep.data(), sep.size()); + result.append(it->data(), it->size()); + sep = s; + } + } + + return result; +} + +// JoinTupleLoop implements a loop over the elements of a std::tuple, which +// are heterogeneous. The primary template matches the tuple interior case. It +// continues the iteration after appending a separator (for nonzero indices) +// and formatting an element of the tuple. The specialization for the I=N case +// matches the end-of-tuple, and terminates the iteration. +template <size_t I, size_t N> +struct JoinTupleLoop { + template <typename Tup, typename Formatter> + void operator()(std::string* out, const Tup& tup, absl::string_view sep, + Formatter&& fmt) { + if (I > 0) out->append(sep.data(), sep.size()); + fmt(out, std::get<I>(tup)); + JoinTupleLoop<I + 1, N>()(out, tup, sep, fmt); + } +}; +template <size_t N> +struct JoinTupleLoop<N, N> { + template <typename Tup, typename Formatter> + void operator()(std::string*, const Tup&, absl::string_view, Formatter&&) {} +}; + +template <typename... T, typename Formatter> +std::string JoinAlgorithm(const std::tuple<T...>& tup, absl::string_view sep, + Formatter&& fmt) { + std::string result; + JoinTupleLoop<0, sizeof...(T)>()(&result, tup, sep, fmt); + return result; +} + +template <typename Iterator> +std::string JoinRange(Iterator first, Iterator last, absl::string_view separator) { + // No formatter was explicitly given, so a default must be chosen. + typedef typename std::iterator_traits<Iterator>::value_type ValueType; + typedef typename DefaultFormatter<ValueType>::Type Formatter; + return JoinAlgorithm(first, last, separator, Formatter()); +} + +template <typename Range, typename Formatter> +std::string JoinRange(const Range& range, absl::string_view separator, + Formatter&& fmt) { + using std::begin; + using std::end; + return JoinAlgorithm(begin(range), end(range), separator, fmt); +} + +template <typename Range> +std::string JoinRange(const Range& range, absl::string_view separator) { + using std::begin; + using std::end; + return JoinRange(begin(range), end(range), separator); +} + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ diff --git a/absl/strings/internal/str_split_internal.h b/absl/strings/internal/str_split_internal.h new file mode 100644 index 000000000000..dc31a8ef9090 --- /dev/null +++ b/absl/strings/internal/str_split_internal.h @@ -0,0 +1,439 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// This file declares INTERNAL parts of the Split API that are inline/templated +// or otherwise need to be available at compile time. The main abstractions +// defined in here are +// +// - ConvertibleToStringView +// - SplitIterator<> +// - Splitter<> +// +// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including +// absl/strings/str_split.h. +// +// IWYU pragma: private, include "absl/strings/str_split.h" + +#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ + +#ifdef _GLIBCXX_DEBUG +#include <glibcxx_debug_traits.h> +#endif // _GLIBCXX_DEBUG + +#include <array> +#include <initializer_list> +#include <iterator> +#include <map> +#include <type_traits> +#include <utility> +#include <vector> + +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/meta/type_traits.h" +#include "absl/strings/string_view.h" + +namespace absl { +namespace strings_internal { + +#ifdef _GLIBCXX_DEBUG +using ::glibcxx_debug_traits::IsStrictlyDebugWrapperBase; +#else // _GLIBCXX_DEBUG +template <typename T> struct IsStrictlyDebugWrapperBase : std::false_type {}; +#endif // _GLIBCXX_DEBUG + +// This class is implicitly constructible from everything that absl::string_view +// is implicitly constructible from. If it's constructed from a temporary +// std::string, the data is moved into a data member so its lifetime matches that of +// the ConvertibleToStringView instance. +class ConvertibleToStringView { + public: + ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit) + : value_(s) {} + ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit) + ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit) + : value_(s) {} + ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit) + : value_(s) {} + + // Matches rvalue strings and moves their data to a member. +ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit) + : copy_(std::move(s)), value_(copy_) {} + + ConvertibleToStringView(const ConvertibleToStringView& other) + : copy_(other.copy_), + value_(other.IsSelfReferential() ? copy_ : other.value_) {} + + ConvertibleToStringView(ConvertibleToStringView&& other) { + StealMembers(std::move(other)); + } + + ConvertibleToStringView& operator=(ConvertibleToStringView other) { + StealMembers(std::move(other)); + return *this; + } + + absl::string_view value() const { return value_; } + + private: + // Returns true if ctsp's value refers to its internal copy_ member. + bool IsSelfReferential() const { return value_.data() == copy_.data(); } + + void StealMembers(ConvertibleToStringView&& other) { + if (other.IsSelfReferential()) { + copy_ = std::move(other.copy_); + value_ = copy_; + other.value_ = other.copy_; + } else { + value_ = other.value_; + } + } + + // Holds the data moved from temporary std::string arguments. Declared first so + // that 'value' can refer to 'copy_'. + std::string copy_; + absl::string_view value_; +}; + +// An iterator that enumerates the parts of a std::string from a Splitter. The text +// to be split, the Delimiter, and the Predicate are all taken from the given +// Splitter object. Iterators may only be compared if they refer to the same +// Splitter instance. +// +// This class is NOT part of the public splitting API. +template <typename Splitter> +class SplitIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = absl::string_view; + using difference_type = ptrdiff_t; + using pointer = const value_type*; + using reference = const value_type&; + + enum State { kInitState, kLastState, kEndState }; + SplitIterator(State state, const Splitter* splitter) + : pos_(0), + state_(state), + splitter_(splitter), + delimiter_(splitter->delimiter()), + predicate_(splitter->predicate()) { + // Hack to maintain backward compatibility. This one block makes it so an + // empty absl::string_view whose .data() happens to be nullptr behaves + // *differently* from an otherwise empty absl::string_view whose .data() is + // not nullptr. This is an undesirable difference in general, but this + // behavior is maintained to avoid breaking existing code that happens to + // depend on this old behavior/bug. Perhaps it will be fixed one day. The + // difference in behavior is as follows: + // Split(absl::string_view(""), '-'); // {""} + // Split(absl::string_view(), '-'); // {} + if (splitter_->text().data() == nullptr) { + state_ = kEndState; + pos_ = splitter_->text().size(); + return; + } + + if (state_ == kEndState) { + pos_ = splitter_->text().size(); + } else { + ++(*this); + } + } + + bool at_end() const { return state_ == kEndState; } + + reference operator*() const { return curr_; } + pointer operator->() const { return &curr_; } + + SplitIterator& operator++() { + do { + if (state_ == kLastState) { + state_ = kEndState; + return *this; + } + const absl::string_view text = splitter_->text(); + const absl::string_view d = delimiter_.Find(text, pos_); + if (d.data() == text.end()) state_ = kLastState; + curr_ = text.substr(pos_, d.data() - (text.data() + pos_)); + pos_ += curr_.size() + d.size(); + } while (!predicate_(curr_)); + return *this; + } + + SplitIterator operator++(int) { + SplitIterator old(*this); + ++(*this); + return old; + } + + friend bool operator==(const SplitIterator& a, const SplitIterator& b) { + return a.state_ == b.state_ && a.pos_ == b.pos_; + } + + friend bool operator!=(const SplitIterator& a, const SplitIterator& b) { + return !(a == b); + } + + private: + size_t pos_; + State state_; + absl::string_view curr_; + const Splitter* splitter_; + typename Splitter::DelimiterType delimiter_; + typename Splitter::PredicateType predicate_; +}; + +// HasMappedType<T>::value is true iff there exists a type T::mapped_type. +template <typename T, typename = void> +struct HasMappedType : std::false_type {}; +template <typename T> +struct HasMappedType<T, absl::void_t<typename T::mapped_type>> + : std::true_type {}; + +// HasValueType<T>::value is true iff there exists a type T::value_type. +template <typename T, typename = void> +struct HasValueType : std::false_type {}; +template <typename T> +struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type { +}; + +// HasConstIterator<T>::value is true iff there exists a type T::const_iterator. +template <typename T, typename = void> +struct HasConstIterator : std::false_type {}; +template <typename T> +struct HasConstIterator<T, absl::void_t<typename T::const_iterator>> + : std::true_type {}; + +// IsInitializerList<T>::value is true iff T is an std::initializer_list. More +// details below in Splitter<> where this is used. +std::false_type IsInitializerListDispatch(...); // default: No +template <typename T> +std::true_type IsInitializerListDispatch(std::initializer_list<T>*); +template <typename T> +struct IsInitializerList + : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {}; + +// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition +// is true for type 'C'. +// +// Restricts conversion to container-like types (by testing for the presence of +// a const_iterator member type) and also to disable conversion to an +// std::initializer_list (which also has a const_iterator). Otherwise, code +// compiled in C++11 will get an error due to ambiguous conversion paths (in +// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T> +// or an std::initializer_list<T>). +template <typename C> +struct SplitterIsConvertibleTo + : std::enable_if< + !IsStrictlyDebugWrapperBase<C>::value && + !IsInitializerList<C>::value && + HasValueType<C>::value && + HasConstIterator<C>::value> {}; + +// This class implements the range that is returned by absl::StrSplit(). This +// class has templated conversion operators that allow it to be implicitly +// converted to a variety of types that the caller may have specified on the +// left-hand side of an assignment. +// +// The main interface for interacting with this class is through its implicit +// conversion operators. However, this class may also be used like a container +// in that it has .begin() and .end() member functions. It may also be used +// within a range-for loop. +// +// Output containers can be collections of any type that is constructible from +// an absl::string_view. +// +// An Predicate functor may be supplied. This predicate will be used to filter +// the split strings: only strings for which the predicate returns true will be +// kept. A Predicate object is any unary functor that takes an absl::string_view +// and returns bool. +template <typename Delimiter, typename Predicate> +class Splitter { + public: + using DelimiterType = Delimiter; + using PredicateType = Predicate; + using const_iterator = strings_internal::SplitIterator<Splitter>; + using value_type = typename std::iterator_traits<const_iterator>::value_type; + + Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p) + : text_(std::move(input_text)), + delimiter_(std::move(d)), + predicate_(std::move(p)) {} + + absl::string_view text() const { return text_.value(); } + const Delimiter& delimiter() const { return delimiter_; } + const Predicate& predicate() const { return predicate_; } + + // Range functions that iterate the split substrings as absl::string_view + // objects. These methods enable a Splitter to be used in a range-based for + // loop. + const_iterator begin() const { return {const_iterator::kInitState, this}; } + const_iterator end() const { return {const_iterator::kEndState, this}; } + + // An implicit conversion operator that is restricted to only those containers + // that the splitter is convertible to. + template <typename Container, + typename OnlyIf = typename SplitterIsConvertibleTo<Container>::type> + operator Container() const { // NOLINT(runtime/explicit) + return ConvertToContainer<Container, typename Container::value_type, + HasMappedType<Container>::value>()(*this); + } + + // Returns a pair with its .first and .second members set to the first two + // strings returned by the begin() iterator. Either/both of .first and .second + // will be constructed with empty strings if the iterator doesn't have a + // corresponding value. + template <typename First, typename Second> + operator std::pair<First, Second>() const { // NOLINT(runtime/explicit) + absl::string_view first, second; + auto it = begin(); + if (it != end()) { + first = *it; + if (++it != end()) { + second = *it; + } + } + return {First(first), Second(second)}; + } + + private: + // ConvertToContainer is a functor converting a Splitter to the requested + // Container of ValueType. It is specialized below to optimize splitting to + // certain combinations of Container and ValueType. + // + // This base template handles the generic case of storing the split results in + // the requested non-map-like container and converting the split substrings to + // the requested type. + template <typename Container, typename ValueType, bool is_map = false> + struct ConvertToContainer { + Container operator()(const Splitter& splitter) const { + Container c; + auto it = std::inserter(c, c.end()); + for (const auto sp : splitter) { + *it++ = ValueType(sp); + } + return c; + } + }; + + // Partial specialization for a std::vector<absl::string_view>. + // + // Optimized for the common case of splitting to a + // std::vector<absl::string_view>. In this case we first split the results to + // a small array of absl::string_view on the stack, to reduce reallocations. + template <typename A> + struct ConvertToContainer<std::vector<absl::string_view, A>, + absl::string_view, false> { + std::vector<absl::string_view, A> operator()( + const Splitter& splitter) const { + struct raw_view { + const char* data; + size_t size; + operator absl::string_view() const { // NOLINT(runtime/explicit) + return {data, size}; + } + }; + std::vector<absl::string_view, A> v; + std::array<raw_view, 16> ar; + for (auto it = splitter.begin(); !it.at_end();) { + size_t index = 0; + do { + ar[index].data = it->data(); + ar[index].size = it->size(); + ++it; + } while (++index != ar.size() && !it.at_end()); + v.insert(v.end(), ar.begin(), ar.begin() + index); + } + return v; + } + }; + + // Partial specialization for a std::vector<std::string>. + // + // Optimized for the common case of splitting to a std::vector<std::string>. In + // this case we first split the results to a std::vector<absl::string_view> so + // the returned std::vector<std::string> can have space reserved to avoid std::string + // moves. + template <typename A> + struct ConvertToContainer<std::vector<std::string, A>, std::string, false> { + std::vector<std::string, A> operator()(const Splitter& splitter) const { + const std::vector<absl::string_view> v = splitter; + return std::vector<std::string, A>(v.begin(), v.end()); + } + }; + + // Partial specialization for containers of pairs (e.g., maps). + // + // The algorithm is to insert a new pair into the map for each even-numbered + // item, with the even-numbered item as the key with a default-constructed + // value. Each odd-numbered item will then be assigned to the last pair's + // value. + template <typename Container, typename First, typename Second> + struct ConvertToContainer<Container, std::pair<const First, Second>, true> { + Container operator()(const Splitter& splitter) const { + Container m; + typename Container::iterator it; + bool insert = true; + for (const auto sp : splitter) { + if (insert) { + it = Inserter<Container>::Insert(&m, First(sp), Second()); + } else { + it->second = Second(sp); + } + insert = !insert; + } + return m; + } + + // Inserts the key and value into the given map, returning an iterator to + // the inserted item. Specialized for std::map and std::multimap to use + // emplace() and adapt emplace()'s return value. + template <typename Map> + struct Inserter { + using M = Map; + template <typename... Args> + static typename M::iterator Insert(M* m, Args&&... args) { + return m->insert(std::make_pair(std::forward<Args>(args)...)).first; + } + }; + + template <typename... Ts> + struct Inserter<std::map<Ts...>> { + using M = std::map<Ts...>; + template <typename... Args> + static typename M::iterator Insert(M* m, Args&&... args) { + return m->emplace(std::make_pair(std::forward<Args>(args)...)).first; + } + }; + + template <typename... Ts> + struct Inserter<std::multimap<Ts...>> { + using M = std::multimap<Ts...>; + template <typename... Args> + static typename M::iterator Insert(M* m, Args&&... args) { + return m->emplace(std::make_pair(std::forward<Args>(args)...)); + } + }; + }; + + ConvertibleToStringView text_; + Delimiter delimiter_; + Predicate predicate_; +}; + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ diff --git a/absl/strings/internal/utf8.cc b/absl/strings/internal/utf8.cc new file mode 100644 index 000000000000..2415c2ccc45c --- /dev/null +++ b/absl/strings/internal/utf8.cc @@ -0,0 +1,51 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// UTF8 utilities, implemented to reduce dependencies. + +#include "absl/strings/internal/utf8.h" + +namespace absl { +namespace strings_internal { + +size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) { + if (utf8_char <= 0x7F) { + *buffer = static_cast<char>(utf8_char); + return 1; + } else if (utf8_char <= 0x7FF) { + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xC0 | utf8_char; + return 2; + } else if (utf8_char <= 0xFFFF) { + buffer[2] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xE0 | utf8_char; + return 3; + } else { + buffer[3] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[2] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xF0 | utf8_char; + return 4; + } +} + +} // namespace strings_internal +} // namespace absl diff --git a/absl/strings/internal/utf8.h b/absl/strings/internal/utf8.h new file mode 100644 index 000000000000..705eea7f16ab --- /dev/null +++ b/absl/strings/internal/utf8.h @@ -0,0 +1,52 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// UTF8 utilities, implemented to reduce dependencies. +// +// If you need Unicode specific processing (for example being aware of +// Unicode character boundaries, or knowledge of Unicode casing rules, +// or various forms of equivalence and normalization), take a look at +// files in i18n/utf8. + +#ifndef ABSL_STRINGS_INTERNAL_UTF8_H_ +#define ABSL_STRINGS_INTERNAL_UTF8_H_ + +#include <cstddef> +#include <cstdint> + + +namespace absl { +namespace strings_internal { + +// For Unicode code points 0 through 0x10FFFF, EncodeUTF8Char writes +// out the UTF-8 encoding into buffer, and returns the number of chars +// it wrote. +// +// As described in https://tools.ietf.org/html/rfc3629#section-3 , the encodings +// are: +// 00 - 7F : 0xxxxxxx +// 80 - 7FF : 110xxxxx 10xxxxxx +// 800 - FFFF : 1110xxxx 10xxxxxx 10xxxxxx +// 10000 - 10FFFF : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx +// +// Values greater than 0x10FFFF are not supported and may or may not write +// characters into buffer, however never will more than kMaxEncodedUTF8Size +// bytes be written, regardless of the value of utf8_char. +enum { kMaxEncodedUTF8Size = 4 }; +size_t EncodeUTF8Char(char *buffer, char32_t utf8_char); + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_UTF8_H_ diff --git a/absl/strings/internal/utf8_test.cc b/absl/strings/internal/utf8_test.cc new file mode 100644 index 000000000000..4d437427a88a --- /dev/null +++ b/absl/strings/internal/utf8_test.cc @@ -0,0 +1,58 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/utf8.h" + +#include <cctype> +#include <cstdlib> +#include <cstring> +#include <cstdint> + +#include "gtest/gtest.h" + +namespace { + +TEST(EncodeUTF8Char, BasicFunction) { + std::pair<char32_t, std::string> tests[] = {{0x0030, u8"\u0030"}, + {0x00A3, u8"\u00A3"}, + {0x00010000, u8"\U00010000"}, + {0x0000FFFF, u8"\U0000FFFF"}, + {0x0010FFFD, u8"\U0010FFFD"}}; + for (auto &test : tests) { + char buf0[7] = {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}; + char buf1[7] = {'\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF'}; + char *buf0_written = + &buf0[absl::strings_internal::EncodeUTF8Char(buf0, test.first)]; + char *buf1_written = + &buf1[absl::strings_internal::EncodeUTF8Char(buf1, test.first)]; + int apparent_length = 7; + while (buf0[apparent_length - 1] == '\x00' && + buf1[apparent_length - 1] == '\xFF') { + if (--apparent_length == 0) break; + } + EXPECT_EQ(apparent_length, buf0_written - buf0); + EXPECT_EQ(apparent_length, buf1_written - buf1); + EXPECT_EQ(apparent_length, test.second.length()); + EXPECT_EQ(std::string(buf0, apparent_length), test.second); + EXPECT_EQ(std::string(buf1, apparent_length), test.second); + } + char buf[32] = "Don't Tread On Me"; + EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf, 0x00110000), + absl::strings_internal::kMaxEncodedUTF8Size); + char buf2[32] = "Negative is invalid but sane"; + EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf2, -1), + absl::strings_internal::kMaxEncodedUTF8Size); +} + +} // namespace |