diff options
author | Vincent Ambo <tazjin@google.com> | 2020-05-20T01·32+0100 |
---|---|---|
committer | Vincent Ambo <tazjin@google.com> | 2020-05-20T01·32+0100 |
commit | fc8dc48020ac5b52731d0828a96ea4d2526c77ba (patch) | |
tree | 353204eea3268095a9ad3f5345720f32c2615c69 /third_party/abseil_cpp/absl/strings/internal | |
parent | ffb2ae54beb5796cd408fbe15d2d2da09ff37adf (diff) | |
parent | 768eb2ca2857342673fcd462792ce04b8bac3fa3 (diff) |
Add 'third_party/abseil_cpp/' from commit '768eb2ca2857342673fcd462792ce04b8bac3fa3' r/781
git-subtree-dir: third_party/abseil_cpp git-subtree-mainline: ffb2ae54beb5796cd408fbe15d2d2da09ff37adf git-subtree-split: 768eb2ca2857342673fcd462792ce04b8bac3fa3
Diffstat (limited to 'third_party/abseil_cpp/absl/strings/internal')
53 files changed, 11815 insertions, 0 deletions
diff --git a/third_party/abseil_cpp/absl/strings/internal/char_map.h b/third_party/abseil_cpp/absl/strings/internal/char_map.h new file mode 100644 index 000000000000..61484de0b795 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/char_map.h @@ -0,0 +1,156 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Character Map Class +// +// A fast, bit-vector map for 8-bit unsigned characters. +// This class is useful for non-character purposes as well. + +#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ +#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ + +#include <cstddef> +#include <cstdint> +#include <cstring> + +#include "absl/base/macros.h" +#include "absl/base/port.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +class Charmap { + public: + constexpr Charmap() : m_() {} + + // Initializes with a given char*. Note that NUL is not treated as + // a terminator, but rather a char to be flicked. + Charmap(const char* str, int len) : m_() { + while (len--) SetChar(*str++); + } + + // Initializes with a given char*. NUL is treated as a terminator + // and will not be in the charmap. + explicit Charmap(const char* str) : m_() { + while (*str) SetChar(*str++); + } + + constexpr bool contains(unsigned char c) const { + return (m_[c / 64] >> (c % 64)) & 0x1; + } + + // Returns true if and only if a character exists in both maps. + bool IntersectsWith(const Charmap& c) const { + for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) { + if ((m_[i] & c.m_[i]) != 0) return true; + } + return false; + } + + bool IsZero() const { + for (uint64_t c : m_) { + if (c != 0) return false; + } + return true; + } + + // Containing only a single specified char. + static constexpr Charmap Char(char x) { + return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1), + CharMaskForWord(x, 2), CharMaskForWord(x, 3)); + } + + // Containing all the chars in the C-string 's'. + // Note that this is expensively recursive because of the C++11 constexpr + // formulation. Use only in constexpr initializers. + static constexpr Charmap FromString(const char* s) { + return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1)); + } + + // Containing all the chars in the closed interval [lo,hi]. + static constexpr Charmap Range(char lo, char hi) { + return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1), + RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3)); + } + + friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) { + return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2], + a.m_[3] & b.m_[3]); + } + + friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) { + return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2], + a.m_[3] | b.m_[3]); + } + + friend constexpr Charmap operator~(const Charmap& a) { + return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]); + } + + private: + constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3) + : m_{b0, b1, b2, b3} {} + + static constexpr uint64_t RangeForWord(unsigned char lo, unsigned char hi, + uint64_t word) { + return OpenRangeFromZeroForWord(hi + 1, word) & + ~OpenRangeFromZeroForWord(lo, word); + } + + // All the chars in the specified word of the range [0, upper). + static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper, + uint64_t word) { + return (upper <= 64 * word) + ? 0 + : (upper >= 64 * (word + 1)) + ? ~static_cast<uint64_t>(0) + : (~static_cast<uint64_t>(0) >> (64 - upper % 64)); + } + + static constexpr uint64_t CharMaskForWord(unsigned char x, uint64_t word) { + return (x / 64 == word) ? (static_cast<uint64_t>(1) << (x % 64)) : 0; + } + + private: + void SetChar(unsigned char c) { + m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64); + } + + uint64_t m_[4]; +}; + +// Mirror the char-classifying predicates in <cctype> +constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); } +constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); } +constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); } +constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); } +constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); } +constexpr Charmap XDigitCharmap() { + return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f'); +} +constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); } +constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); } +constexpr Charmap CntrlCharmap() { + return Charmap::Range(0, 0x7f) & ~PrintCharmap(); +} +constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); } +constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); } +constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); } + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/char_map_benchmark.cc b/third_party/abseil_cpp/absl/strings/internal/char_map_benchmark.cc new file mode 100644 index 000000000000..5cef967b3087 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/char_map_benchmark.cc @@ -0,0 +1,61 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/char_map.h" + +#include <cstdint> + +#include "benchmark/benchmark.h" + +namespace { + +absl::strings_internal::Charmap MakeBenchmarkMap() { + absl::strings_internal::Charmap m; + uint32_t x[] = {0x0, 0x1, 0x2, 0x3, 0xf, 0xe, 0xd, 0xc}; + for (uint32_t& t : x) t *= static_cast<uint32_t>(0x11111111UL); + for (uint32_t i = 0; i < 256; ++i) { + if ((x[i / 32] >> (i % 32)) & 1) + m = m | absl::strings_internal::Charmap::Char(i); + } + return m; +} + +// Micro-benchmark for Charmap::contains. +void BM_Contains(benchmark::State& state) { + // Loop-body replicated 10 times to increase time per iteration. + // Argument continuously changed to avoid generating common subexpressions. + const absl::strings_internal::Charmap benchmark_map = MakeBenchmarkMap(); + unsigned char c = 0; + int ops = 0; + for (auto _ : state) { + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + } + benchmark::DoNotOptimize(ops); +} +BENCHMARK(BM_Contains); + +// We don't bother benchmarking Charmap::IsZero or Charmap::IntersectsWith; +// their running time is data-dependent and it is not worth characterizing +// "typical" data. + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/char_map_test.cc b/third_party/abseil_cpp/absl/strings/internal/char_map_test.cc new file mode 100644 index 000000000000..d3306241a404 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/char_map_test.cc @@ -0,0 +1,172 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/char_map.h" + +#include <cctype> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { + +constexpr absl::strings_internal::Charmap everything_map = + ~absl::strings_internal::Charmap(); +constexpr absl::strings_internal::Charmap nothing_map{}; + +TEST(Charmap, AllTests) { + const absl::strings_internal::Charmap also_nothing_map("", 0); + ASSERT_TRUE(everything_map.contains('\0')); + ASSERT_TRUE(!nothing_map.contains('\0')); + ASSERT_TRUE(!also_nothing_map.contains('\0')); + for (unsigned char ch = 1; ch != 0; ++ch) { + ASSERT_TRUE(everything_map.contains(ch)); + ASSERT_TRUE(!nothing_map.contains(ch)); + ASSERT_TRUE(!also_nothing_map.contains(ch)); + } + + const absl::strings_internal::Charmap symbols("&@#@^!@?", 5); + ASSERT_TRUE(symbols.contains('&')); + ASSERT_TRUE(symbols.contains('@')); + ASSERT_TRUE(symbols.contains('#')); + ASSERT_TRUE(symbols.contains('^')); + ASSERT_TRUE(!symbols.contains('!')); + ASSERT_TRUE(!symbols.contains('?')); + int cnt = 0; + for (unsigned char ch = 1; ch != 0; ++ch) + cnt += symbols.contains(ch); + ASSERT_EQ(cnt, 4); + + const absl::strings_internal::Charmap lets("^abcde", 3); + const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10); + const absl::strings_internal::Charmap lets3("fghij\0klmnop"); + ASSERT_TRUE(lets2.contains('k')); + ASSERT_TRUE(!lets3.contains('k')); + + ASSERT_TRUE(symbols.IntersectsWith(lets)); + ASSERT_TRUE(!lets2.IntersectsWith(lets)); + ASSERT_TRUE(lets.IntersectsWith(symbols)); + ASSERT_TRUE(!lets.IntersectsWith(lets2)); + + ASSERT_TRUE(nothing_map.IsZero()); + ASSERT_TRUE(!lets.IsZero()); +} + +namespace { +std::string Members(const absl::strings_internal::Charmap& m) { + std::string r; + for (size_t i = 0; i < 256; ++i) + if (m.contains(i)) r.push_back(i); + return r; +} + +std::string ClosedRangeString(unsigned char lo, unsigned char hi) { + // Don't depend on lo<hi. Just increment until lo==hi. + std::string s; + while (true) { + s.push_back(lo); + if (lo == hi) break; + ++lo; + } + return s; +} + +} // namespace + +TEST(Charmap, Constexpr) { + constexpr absl::strings_internal::Charmap kEmpty = nothing_map; + EXPECT_THAT(Members(kEmpty), ""); + constexpr absl::strings_internal::Charmap kA = + absl::strings_internal::Charmap::Char('A'); + EXPECT_THAT(Members(kA), "A"); + constexpr absl::strings_internal::Charmap kAZ = + absl::strings_internal::Charmap::Range('A', 'Z'); + EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + constexpr absl::strings_internal::Charmap kIdentifier = + absl::strings_internal::Charmap::Range('0', '9') | + absl::strings_internal::Charmap::Range('A', 'Z') | + absl::strings_internal::Charmap::Range('a', 'z') | + absl::strings_internal::Charmap::Char('_'); + EXPECT_THAT(Members(kIdentifier), + "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "_" + "abcdefghijklmnopqrstuvwxyz"); + constexpr absl::strings_internal::Charmap kAll = everything_map; + for (size_t i = 0; i < 256; ++i) { + EXPECT_TRUE(kAll.contains(i)) << i; + } + constexpr absl::strings_internal::Charmap kHello = + absl::strings_internal::Charmap::FromString("Hello, world!"); + EXPECT_THAT(Members(kHello), " !,Hdelorw"); + + // test negation and intersection + constexpr absl::strings_internal::Charmap kABC = + absl::strings_internal::Charmap::Range('A', 'Z') & + ~absl::strings_internal::Charmap::Range('D', 'Z'); + EXPECT_THAT(Members(kABC), "ABC"); +} + +TEST(Charmap, Range) { + // Exhaustive testing takes too long, so test some of the boundaries that + // are perhaps going to cause trouble. + std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15, + 16, 17, 30, 31, 32, 33, 63, 64, 65, + 127, 128, 129, 223, 224, 225, 254, 255}; + for (auto lo = poi.begin(); lo != poi.end(); ++lo) { + SCOPED_TRACE(*lo); + for (auto hi = lo; hi != poi.end(); ++hi) { + SCOPED_TRACE(*hi); + EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)), + ClosedRangeString(*lo, *hi)); + } + } +} + +bool AsBool(int x) { return static_cast<bool>(x); } + +TEST(CharmapCtype, Match) { + for (int c = 0; c < 256; ++c) { + SCOPED_TRACE(c); + SCOPED_TRACE(static_cast<char>(c)); + EXPECT_EQ(AsBool(std::isupper(c)), + absl::strings_internal::UpperCharmap().contains(c)); + EXPECT_EQ(AsBool(std::islower(c)), + absl::strings_internal::LowerCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isdigit(c)), + absl::strings_internal::DigitCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isalpha(c)), + absl::strings_internal::AlphaCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isalnum(c)), + absl::strings_internal::AlnumCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isxdigit(c)), + absl::strings_internal::XDigitCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isprint(c)), + absl::strings_internal::PrintCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isspace(c)), + absl::strings_internal::SpaceCharmap().contains(c)); + EXPECT_EQ(AsBool(std::iscntrl(c)), + absl::strings_internal::CntrlCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isblank(c)), + absl::strings_internal::BlankCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isgraph(c)), + absl::strings_internal::GraphCharmap().contains(c)); + EXPECT_EQ(AsBool(std::ispunct(c)), + absl::strings_internal::PunctCharmap().contains(c)); + } +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/charconv_bigint.cc b/third_party/abseil_cpp/absl/strings/internal/charconv_bigint.cc new file mode 100644 index 000000000000..ebf8c0791af9 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/charconv_bigint.cc @@ -0,0 +1,359 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/charconv_bigint.h" + +#include <algorithm> +#include <cassert> +#include <string> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +namespace { + +// Table containing some large powers of 5, for fast computation. + +// Constant step size for entries in the kLargePowersOfFive table. Each entry +// is larger than the previous entry by a factor of 5**kLargePowerOfFiveStep +// (or 5**27). +// +// In other words, the Nth entry in the table is 5**(27*N). +// +// 5**27 is the largest power of 5 that fits in 64 bits. +constexpr int kLargePowerOfFiveStep = 27; + +// The largest legal index into the kLargePowersOfFive table. +// +// In other words, the largest precomputed power of 5 is 5**(27*20). +constexpr int kLargestPowerOfFiveIndex = 20; + +// Table of powers of (5**27), up to (5**27)**20 == 5**540. +// +// Used to generate large powers of 5 while limiting the number of repeated +// multiplications required. +// +// clang-format off +const uint32_t kLargePowersOfFive[] = { +// 5**27 (i=1), start=0, end=2 + 0xfa10079dU, 0x6765c793U, +// 5**54 (i=2), start=2, end=6 + 0x97d9f649U, 0x6664242dU, 0x29939b14U, 0x29c30f10U, +// 5**81 (i=3), start=6, end=12 + 0xc4f809c5U, 0x7bf3f22aU, 0x67bdae34U, 0xad340517U, 0x369d1b5fU, 0x10de1593U, +// 5**108 (i=4), start=12, end=20 + 0x92b260d1U, 0x9efff7c7U, 0x81de0ec6U, 0xaeba5d56U, 0x410664a4U, 0x4f40737aU, + 0x20d3846fU, 0x06d00f73U, +// 5**135 (i=5), start=20, end=30 + 0xff1b172dU, 0x13a1d71cU, 0xefa07617U, 0x7f682d3dU, 0xff8c90c0U, 0x3f0131e7U, + 0x3fdcb9feU, 0x917b0177U, 0x16c407a7U, 0x02c06b9dU, +// 5**162 (i=6), start=30, end=42 + 0x960f7199U, 0x056667ecU, 0xe07aefd8U, 0x80f2b9ccU, 0x8273f5e3U, 0xeb9a214aU, + 0x40b38005U, 0x0e477ad4U, 0x277d08e6U, 0xfa28b11eU, 0xd3f7d784U, 0x011c835bU, +// 5**189 (i=7), start=42, end=56 + 0xf723d9d5U, 0x3282d3f3U, 0xe00857d1U, 0x69659d25U, 0x2cf117cfU, 0x24da6d07U, + 0x954d1417U, 0x3e5d8cedU, 0x7a8bb766U, 0xfd785ae6U, 0x645436d2U, 0x40c78b34U, + 0x94151217U, 0x0072e9f7U, +// 5**216 (i=8), start=56, end=72 + 0x2b416aa1U, 0x7893c5a7U, 0xe37dc6d4U, 0x2bad2beaU, 0xf0fc846cU, 0x7575ae4bU, + 0x62587b14U, 0x83b67a34U, 0x02110cdbU, 0xf7992f55U, 0x00deb022U, 0xa4a23becU, + 0x8af5c5cdU, 0xb85b654fU, 0x818df38bU, 0x002e69d2U, +// 5**243 (i=9), start=72, end=90 + 0x3518cbbdU, 0x20b0c15fU, 0x38756c2fU, 0xfb5dc3ddU, 0x22ad2d94U, 0xbf35a952U, + 0xa699192aU, 0x9a613326U, 0xad2a9cedU, 0xd7f48968U, 0xe87dfb54U, 0xc8f05db6U, + 0x5ef67531U, 0x31c1ab49U, 0xe202ac9fU, 0x9b2957b5U, 0xa143f6d3U, 0x0012bf07U, +// 5**270 (i=10), start=90, end=110 + 0x8b971de9U, 0x21aba2e1U, 0x63944362U, 0x57172336U, 0xd9544225U, 0xfb534166U, + 0x08c563eeU, 0x14640ee2U, 0x24e40d31U, 0x02b06537U, 0x03887f14U, 0x0285e533U, + 0xb744ef26U, 0x8be3a6c4U, 0x266979b4U, 0x6761ece2U, 0xd9cb39e4U, 0xe67de319U, + 0x0d39e796U, 0x00079250U, +// 5**297 (i=11), start=110, end=132 + 0x260eb6e5U, 0xf414a796U, 0xee1a7491U, 0xdb9368ebU, 0xf50c105bU, 0x59157750U, + 0x9ed2fb5cU, 0xf6e56d8bU, 0xeaee8d23U, 0x0f319f75U, 0x2aa134d6U, 0xac2908e9U, + 0xd4413298U, 0x02f02a55U, 0x989d5a7aU, 0x70dde184U, 0xba8040a7U, 0x03200981U, + 0xbe03b11cU, 0x3c1c2a18U, 0xd60427a1U, 0x00030ee0U, +// 5**324 (i=12), start=132, end=156 + 0xce566d71U, 0xf1c4aa25U, 0x4e93ca53U, 0xa72283d0U, 0x551a73eaU, 0x3d0538e2U, + 0x8da4303fU, 0x6a58de60U, 0x0e660221U, 0x49cf61a6U, 0x8d058fc1U, 0xb9d1a14cU, + 0x4bab157dU, 0xc85c6932U, 0x518c8b9eU, 0x9b92b8d0U, 0x0d8a0e21U, 0xbd855df9U, + 0xb3ea59a1U, 0x8da29289U, 0x4584d506U, 0x3752d80fU, 0xb72569c6U, 0x00013c33U, +// 5**351 (i=13), start=156, end=182 + 0x190f354dU, 0x83695cfeU, 0xe5a4d0c7U, 0xb60fb7e8U, 0xee5bbcc4U, 0xb922054cU, + 0xbb4f0d85U, 0x48394028U, 0x1d8957dbU, 0x0d7edb14U, 0x4ecc7587U, 0x505e9e02U, + 0x4c87f36bU, 0x99e66bd6U, 0x44b9ed35U, 0x753037d4U, 0xe5fe5f27U, 0x2742c203U, + 0x13b2ed2bU, 0xdc525d2cU, 0xe6fde59aU, 0x77ffb18fU, 0x13c5752cU, 0x08a84bccU, + 0x859a4940U, 0x00007fb6U, +// 5**378 (i=14), start=182, end=210 + 0x4f98cb39U, 0xa60edbbcU, 0x83b5872eU, 0xa501acffU, 0x9cc76f78U, 0xbadd4c73U, + 0x43e989faU, 0xca7acf80U, 0x2e0c824fU, 0xb19f4ffcU, 0x092fd81cU, 0xe4eb645bU, + 0xa1ff84c2U, 0x8a5a83baU, 0xa8a1fae9U, 0x1db43609U, 0xb0fed50bU, 0x0dd7d2bdU, + 0x7d7accd8U, 0x91fa640fU, 0x37dcc6c5U, 0x1c417fd5U, 0xe4d462adU, 0xe8a43399U, + 0x131bf9a5U, 0x8df54d29U, 0x36547dc1U, 0x00003395U, +// 5**405 (i=15), start=210, end=240 + 0x5bd330f5U, 0x77d21967U, 0x1ac481b7U, 0x6be2f7ceU, 0x7f4792a9U, 0xe84c2c52U, + 0x84592228U, 0x9dcaf829U, 0xdab44ce1U, 0x3d0c311bU, 0x532e297dU, 0x4704e8b4U, + 0x9cdc32beU, 0x41e64d9dU, 0x7717bea1U, 0xa824c00dU, 0x08f50b27U, 0x0f198d77U, + 0x49bbfdf0U, 0x025c6c69U, 0xd4e55cd3U, 0xf083602bU, 0xb9f0fecdU, 0xc0864aeaU, + 0x9cb98681U, 0xaaf620e9U, 0xacb6df30U, 0x4faafe66U, 0x8af13c3bU, 0x000014d5U, +// 5**432 (i=16), start=240, end=272 + 0x682bb941U, 0x89a9f297U, 0xcba75d7bU, 0x404217b1U, 0xb4e519e9U, 0xa1bc162bU, + 0xf7f5910aU, 0x98715af5U, 0x2ff53e57U, 0xe3ef118cU, 0x490c4543U, 0xbc9b1734U, + 0x2affbe4dU, 0x4cedcb4cU, 0xfb14e99eU, 0x35e34212U, 0xece39c24U, 0x07673ab3U, + 0xe73115ddU, 0xd15d38e7U, 0x093eed3bU, 0xf8e7eac5U, 0x78a8cc80U, 0x25227aacU, + 0x3f590551U, 0x413da1cbU, 0xdf643a55U, 0xab65ad44U, 0xd70b23d7U, 0xc672cd76U, + 0x3364ea62U, 0x0000086aU, +// 5**459 (i=17), start=272, end=306 + 0x22f163ddU, 0x23cf07acU, 0xbe2af6c2U, 0xf412f6f6U, 0xc3ff541eU, 0x6eeaf7deU, + 0xa47047e0U, 0x408cda92U, 0x0f0eeb08U, 0x56deba9dU, 0xcfc6b090U, 0x8bbbdf04U, + 0x3933cdb3U, 0x9e7bb67dU, 0x9f297035U, 0x38946244U, 0xee1d37bbU, 0xde898174U, + 0x63f3559dU, 0x705b72fbU, 0x138d27d9U, 0xf8603a78U, 0x735eec44U, 0xe30987d5U, + 0xc6d38070U, 0x9cfe548eU, 0x9ff01422U, 0x7c564aa8U, 0x91cc60baU, 0xcbc3565dU, + 0x7550a50bU, 0x6909aeadU, 0x13234c45U, 0x00000366U, +// 5**486 (i=18), start=306, end=342 + 0x17954989U, 0x3a7d7709U, 0x98042de5U, 0xa9011443U, 0x45e723c2U, 0x269ffd6fU, + 0x58852a46U, 0xaaa1042aU, 0x2eee8153U, 0xb2b6c39eU, 0xaf845b65U, 0xf6c365d7U, + 0xe4cffb2bU, 0xc840e90cU, 0xabea8abbU, 0x5c58f8d2U, 0x5c19fa3aU, 0x4670910aU, + 0x4449f21cU, 0xefa645b3U, 0xcc427decU, 0x083c3d73U, 0x467cb413U, 0x6fe10ae4U, + 0x3caffc72U, 0x9f8da55eU, 0x5e5c8ea7U, 0x490594bbU, 0xf0871b0bU, 0xdd89816cU, + 0x8e931df8U, 0xe85ce1c9U, 0xcca090a5U, 0x575fa16bU, 0x6b9f106cU, 0x0000015fU, +// 5**513 (i=19), start=342, end=380 + 0xee20d805U, 0x57bc3c07U, 0xcdea624eU, 0xd3f0f52dU, 0x9924b4f4U, 0xcf968640U, + 0x61d41962U, 0xe87fb464U, 0xeaaf51c7U, 0x564c8b60U, 0xccda4028U, 0x529428bbU, + 0x313a1fa8U, 0x96bd0f94U, 0x7a82ebaaU, 0xad99e7e9U, 0xf2668cd4U, 0xbe33a45eU, + 0xfd0db669U, 0x87ee369fU, 0xd3ec20edU, 0x9c4d7db7U, 0xdedcf0d8U, 0x7cd2ca64U, + 0xe25a6577U, 0x61003fd4U, 0xe56f54ccU, 0x10b7c748U, 0x40526e5eU, 0x7300ae87U, + 0x5c439261U, 0x2c0ff469U, 0xbf723f12U, 0xb2379b61U, 0xbf59b4f5U, 0xc91b1c3fU, + 0xf0046d27U, 0x0000008dU, +// 5**540 (i=20), start=380, end=420 + 0x525c9e11U, 0xf4e0eb41U, 0xebb2895dU, 0x5da512f9U, 0x7d9b29d4U, 0x452f4edcU, + 0x0b90bc37U, 0x341777cbU, 0x63d269afU, 0x1da77929U, 0x0a5c1826U, 0x77991898U, + 0x5aeddf86U, 0xf853a877U, 0x538c31ccU, 0xe84896daU, 0xb7a0010bU, 0x17ef4de5U, + 0xa52a2adeU, 0x029fd81cU, 0x987ce701U, 0x27fefd77U, 0xdb46c66fU, 0x5d301900U, + 0x496998c0U, 0xbb6598b9U, 0x5eebb607U, 0xe547354aU, 0xdf4a2f7eU, 0xf06c4955U, + 0x96242ffaU, 0x1775fb27U, 0xbecc58ceU, 0xebf2a53bU, 0x3eaad82aU, 0xf41137baU, + 0x573e6fbaU, 0xfb4866b8U, 0x54002148U, 0x00000039U, +}; +// clang-format on + +// Returns a pointer to the big integer data for (5**27)**i. i must be +// between 1 and 20, inclusive. +const uint32_t* LargePowerOfFiveData(int i) { + return kLargePowersOfFive + i * (i - 1); +} + +// Returns the size of the big integer data for (5**27)**i, in words. i must be +// between 1 and 20, inclusive. +int LargePowerOfFiveSize(int i) { return 2 * i; } +} // namespace + +ABSL_DLL const uint32_t kFiveToNth[14] = { + 1, 5, 25, 125, 625, 3125, 15625, + 78125, 390625, 1953125, 9765625, 48828125, 244140625, 1220703125, +}; + +ABSL_DLL const uint32_t kTenToNth[10] = { + 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, +}; + +template <int max_words> +int BigUnsigned<max_words>::ReadFloatMantissa(const ParsedFloat& fp, + int significant_digits) { + SetToZero(); + assert(fp.type == FloatType::kNumber); + + if (fp.subrange_begin == nullptr) { + // We already exactly parsed the mantissa, so no more work is necessary. + words_[0] = fp.mantissa & 0xffffffffu; + words_[1] = fp.mantissa >> 32; + if (words_[1]) { + size_ = 2; + } else if (words_[0]) { + size_ = 1; + } + return fp.exponent; + } + int exponent_adjust = + ReadDigits(fp.subrange_begin, fp.subrange_end, significant_digits); + return fp.literal_exponent + exponent_adjust; +} + +template <int max_words> +int BigUnsigned<max_words>::ReadDigits(const char* begin, const char* end, + int significant_digits) { + assert(significant_digits <= Digits10() + 1); + SetToZero(); + + bool after_decimal_point = false; + // Discard any leading zeroes before the decimal point + while (begin < end && *begin == '0') { + ++begin; + } + int dropped_digits = 0; + // Discard any trailing zeroes. These may or may not be after the decimal + // point. + while (begin < end && *std::prev(end) == '0') { + --end; + ++dropped_digits; + } + if (begin < end && *std::prev(end) == '.') { + // If the string ends in '.', either before or after dropping zeroes, then + // drop the decimal point and look for more digits to drop. + dropped_digits = 0; + --end; + while (begin < end && *std::prev(end) == '0') { + --end; + ++dropped_digits; + } + } else if (dropped_digits) { + // We dropped digits, and aren't sure if they're before or after the decimal + // point. Figure that out now. + const char* dp = std::find(begin, end, '.'); + if (dp != end) { + // The dropped trailing digits were after the decimal point, so don't + // count them. + dropped_digits = 0; + } + } + // Any non-fraction digits we dropped need to be accounted for in our exponent + // adjustment. + int exponent_adjust = dropped_digits; + + uint32_t queued = 0; + int digits_queued = 0; + for (; begin != end && significant_digits > 0; ++begin) { + if (*begin == '.') { + after_decimal_point = true; + continue; + } + if (after_decimal_point) { + // For each fractional digit we emit in our parsed integer, adjust our + // decimal exponent to compensate. + --exponent_adjust; + } + int digit = (*begin - '0'); + --significant_digits; + if (significant_digits == 0 && std::next(begin) != end && + (digit == 0 || digit == 5)) { + // If this is the very last significant digit, but insignificant digits + // remain, we know that the last of those remaining significant digits is + // nonzero. (If it wasn't, we would have stripped it before we got here.) + // So if this final digit is a 0 or 5, adjust it upward by 1. + // + // This adjustment is what allows incredibly large mantissas ending in + // 500000...000000000001 to correctly round up, rather than to nearest. + ++digit; + } + queued = 10 * queued + digit; + ++digits_queued; + if (digits_queued == kMaxSmallPowerOfTen) { + MultiplyBy(kTenToNth[kMaxSmallPowerOfTen]); + AddWithCarry(0, queued); + queued = digits_queued = 0; + } + } + // Encode any remaining digits. + if (digits_queued) { + MultiplyBy(kTenToNth[digits_queued]); + AddWithCarry(0, queued); + } + + // If any insignificant digits remain, we will drop them. But if we have not + // yet read the decimal point, then we have to adjust the exponent to account + // for the dropped digits. + if (begin < end && !after_decimal_point) { + // This call to std::find will result in a pointer either to the decimal + // point, or to the end of our buffer if there was none. + // + // Either way, [begin, decimal_point) will contain the set of dropped digits + // that require an exponent adjustment. + const char* decimal_point = std::find(begin, end, '.'); + exponent_adjust += (decimal_point - begin); + } + return exponent_adjust; +} + +template <int max_words> +/* static */ BigUnsigned<max_words> BigUnsigned<max_words>::FiveToTheNth( + int n) { + BigUnsigned answer(1u); + + // Seed from the table of large powers, if possible. + bool first_pass = true; + while (n >= kLargePowerOfFiveStep) { + int big_power = + std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex); + if (first_pass) { + // just copy, rather than multiplying by 1 + std::copy( + LargePowerOfFiveData(big_power), + LargePowerOfFiveData(big_power) + LargePowerOfFiveSize(big_power), + answer.words_); + answer.size_ = LargePowerOfFiveSize(big_power); + first_pass = false; + } else { + answer.MultiplyBy(LargePowerOfFiveSize(big_power), + LargePowerOfFiveData(big_power)); + } + n -= kLargePowerOfFiveStep * big_power; + } + answer.MultiplyByFiveToTheNth(n); + return answer; +} + +template <int max_words> +void BigUnsigned<max_words>::MultiplyStep(int original_size, + const uint32_t* other_words, + int other_size, int step) { + int this_i = std::min(original_size - 1, step); + int other_i = step - this_i; + + uint64_t this_word = 0; + uint64_t carry = 0; + for (; this_i >= 0 && other_i < other_size; --this_i, ++other_i) { + uint64_t product = words_[this_i]; + product *= other_words[other_i]; + this_word += product; + carry += (this_word >> 32); + this_word &= 0xffffffff; + } + AddWithCarry(step + 1, carry); + words_[step] = this_word & 0xffffffff; + if (this_word > 0 && size_ <= step) { + size_ = step + 1; + } +} + +template <int max_words> +std::string BigUnsigned<max_words>::ToString() const { + BigUnsigned<max_words> copy = *this; + std::string result; + // Build result in reverse order + while (copy.size() > 0) { + int next_digit = copy.DivMod<10>(); + result.push_back('0' + next_digit); + } + if (result.empty()) { + result.push_back('0'); + } + std::reverse(result.begin(), result.end()); + return result; +} + +template class BigUnsigned<4>; +template class BigUnsigned<84>; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/charconv_bigint.h b/third_party/abseil_cpp/absl/strings/internal/charconv_bigint.h new file mode 100644 index 000000000000..8f702976a80d --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/charconv_bigint.h @@ -0,0 +1,423 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ +#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ + +#include <algorithm> +#include <cstdint> +#include <iostream> +#include <string> + +#include "absl/base/config.h" +#include "absl/strings/ascii.h" +#include "absl/strings/internal/charconv_parse.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// The largest power that 5 that can be raised to, and still fit in a uint32_t. +constexpr int kMaxSmallPowerOfFive = 13; +// The largest power that 10 that can be raised to, and still fit in a uint32_t. +constexpr int kMaxSmallPowerOfTen = 9; + +ABSL_DLL extern const uint32_t + kFiveToNth[kMaxSmallPowerOfFive + 1]; +ABSL_DLL extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1]; + +// Large, fixed-width unsigned integer. +// +// Exact rounding for decimal-to-binary floating point conversion requires very +// large integer math, but a design goal of absl::from_chars is to avoid +// allocating memory. The integer precision needed for decimal-to-binary +// conversions is large but bounded, so a huge fixed-width integer class +// suffices. +// +// This is an intentionally limited big integer class. Only needed operations +// are implemented. All storage lives in an array data member, and all +// arithmetic is done in-place, to avoid requiring separate storage for operand +// and result. +// +// This is an internal class. Some methods live in the .cc file, and are +// instantiated only for the values of max_words we need. +template <int max_words> +class BigUnsigned { + public: + static_assert(max_words == 4 || max_words == 84, + "unsupported max_words value"); + + BigUnsigned() : size_(0), words_{} {} + explicit constexpr BigUnsigned(uint64_t v) + : size_((v >> 32) ? 2 : v ? 1 : 0), + words_{static_cast<uint32_t>(v & 0xffffffffu), + static_cast<uint32_t>(v >> 32)} {} + + // Constructs a BigUnsigned from the given string_view containing a decimal + // value. If the input string is not a decimal integer, constructs a 0 + // instead. + explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} { + // Check for valid input, returning a 0 otherwise. This is reasonable + // behavior only because this constructor is for unit tests. + if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() || + sv.empty()) { + return; + } + int exponent_adjust = + ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1); + if (exponent_adjust > 0) { + MultiplyByTenToTheNth(exponent_adjust); + } + } + + // Loads the mantissa value of a previously-parsed float. + // + // Returns the associated decimal exponent. The value of the parsed float is + // exactly *this * 10**exponent. + int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits); + + // Returns the number of decimal digits of precision this type provides. All + // numbers with this many decimal digits or fewer are representable by this + // type. + // + // Analagous to std::numeric_limits<BigUnsigned>::digits10. + static constexpr int Digits10() { + // 9975007/1035508 is very slightly less than log10(2**32). + return static_cast<uint64_t>(max_words) * 9975007 / 1035508; + } + + // Shifts left by the given number of bits. + void ShiftLeft(int count) { + if (count > 0) { + const int word_shift = count / 32; + if (word_shift >= max_words) { + SetToZero(); + return; + } + size_ = (std::min)(size_ + word_shift, max_words); + count %= 32; + if (count == 0) { + std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_); + } else { + for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) { + words_[i] = (words_[i - word_shift] << count) | + (words_[i - word_shift - 1] >> (32 - count)); + } + words_[word_shift] = words_[0] << count; + // Grow size_ if necessary. + if (size_ < max_words && words_[size_]) { + ++size_; + } + } + std::fill(words_, words_ + word_shift, 0u); + } + } + + + // Multiplies by v in-place. + void MultiplyBy(uint32_t v) { + if (size_ == 0 || v == 1) { + return; + } + if (v == 0) { + SetToZero(); + return; + } + const uint64_t factor = v; + uint64_t window = 0; + for (int i = 0; i < size_; ++i) { + window += factor * words_[i]; + words_[i] = window & 0xffffffff; + window >>= 32; + } + // If carry bits remain and there's space for them, grow size_. + if (window && size_ < max_words) { + words_[size_] = window & 0xffffffff; + ++size_; + } + } + + void MultiplyBy(uint64_t v) { + uint32_t words[2]; + words[0] = static_cast<uint32_t>(v); + words[1] = static_cast<uint32_t>(v >> 32); + if (words[1] == 0) { + MultiplyBy(words[0]); + } else { + MultiplyBy(2, words); + } + } + + // Multiplies in place by 5 to the power of n. n must be non-negative. + void MultiplyByFiveToTheNth(int n) { + while (n >= kMaxSmallPowerOfFive) { + MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]); + n -= kMaxSmallPowerOfFive; + } + if (n > 0) { + MultiplyBy(kFiveToNth[n]); + } + } + + // Multiplies in place by 10 to the power of n. n must be non-negative. + void MultiplyByTenToTheNth(int n) { + if (n > kMaxSmallPowerOfTen) { + // For large n, raise to a power of 5, then shift left by the same amount. + // (10**n == 5**n * 2**n.) This requires fewer multiplications overall. + MultiplyByFiveToTheNth(n); + ShiftLeft(n); + } else if (n > 0) { + // We can do this more quickly for very small N by using a single + // multiplication. + MultiplyBy(kTenToNth[n]); + } + } + + // Returns the value of 5**n, for non-negative n. This implementation uses + // a lookup table, and is faster then seeding a BigUnsigned with 1 and calling + // MultiplyByFiveToTheNth(). + static BigUnsigned FiveToTheNth(int n); + + // Multiplies by another BigUnsigned, in-place. + template <int M> + void MultiplyBy(const BigUnsigned<M>& other) { + MultiplyBy(other.size(), other.words()); + } + + void SetToZero() { + std::fill(words_, words_ + size_, 0u); + size_ = 0; + } + + // Returns the value of the nth word of this BigUnsigned. This is + // range-checked, and returns 0 on out-of-bounds accesses. + uint32_t GetWord(int index) const { + if (index < 0 || index >= size_) { + return 0; + } + return words_[index]; + } + + // Returns this integer as a decimal string. This is not used in the decimal- + // to-binary conversion; it is intended to aid in testing. + std::string ToString() const; + + int size() const { return size_; } + const uint32_t* words() const { return words_; } + + private: + // Reads the number between [begin, end), possibly containing a decimal point, + // into this BigUnsigned. + // + // Callers are required to ensure [begin, end) contains a valid number, with + // one or more decimal digits and at most one decimal point. This routine + // will behave unpredictably if these preconditions are not met. + // + // Only the first `significant_digits` digits are read. Digits beyond this + // limit are "sticky": If the final significant digit is 0 or 5, and if any + // dropped digit is nonzero, then that final significant digit is adjusted up + // to 1 or 6. This adjustment allows for precise rounding. + // + // Returns `exponent_adjustment`, a power-of-ten exponent adjustment to + // account for the decimal point and for dropped significant digits. After + // this function returns, + // actual_value_of_parsed_string ~= *this * 10**exponent_adjustment. + int ReadDigits(const char* begin, const char* end, int significant_digits); + + // Performs a step of big integer multiplication. This computes the full + // (64-bit-wide) values that should be added at the given index (step), and + // adds to that location in-place. + // + // Because our math all occurs in place, we must multiply starting from the + // highest word working downward. (This is a bit more expensive due to the + // extra carries involved.) + // + // This must be called in steps, for each word to be calculated, starting from + // the high end and working down to 0. The first value of `step` should be + // `std::min(original_size + other.size_ - 2, max_words - 1)`. + // The reason for this expression is that multiplying the i'th word from one + // multiplicand and the j'th word of another multiplicand creates a + // two-word-wide value to be stored at the (i+j)'th element. The highest + // word indices we will access are `original_size - 1` from this object, and + // `other.size_ - 1` from our operand. Therefore, + // `original_size + other.size_ - 2` is the first step we should calculate, + // but limited on an upper bound by max_words. + + // Working from high-to-low ensures that we do not overwrite the portions of + // the initial value of *this which are still needed for later steps. + // + // Once called with step == 0, *this contains the result of the + // multiplication. + // + // `original_size` is the size_ of *this before the first call to + // MultiplyStep(). `other_words` and `other_size` are the contents of our + // operand. `step` is the step to perform, as described above. + void MultiplyStep(int original_size, const uint32_t* other_words, + int other_size, int step); + + void MultiplyBy(int other_size, const uint32_t* other_words) { + const int original_size = size_; + const int first_step = + (std::min)(original_size + other_size - 2, max_words - 1); + for (int step = first_step; step >= 0; --step) { + MultiplyStep(original_size, other_words, other_size, step); + } + } + + // Adds a 32-bit value to the index'th word, with carry. + void AddWithCarry(int index, uint32_t value) { + if (value) { + while (index < max_words && value > 0) { + words_[index] += value; + // carry if we overflowed in this word: + if (value > words_[index]) { + value = 1; + ++index; + } else { + value = 0; + } + } + size_ = (std::min)(max_words, (std::max)(index + 1, size_)); + } + } + + void AddWithCarry(int index, uint64_t value) { + if (value && index < max_words) { + uint32_t high = value >> 32; + uint32_t low = value & 0xffffffff; + words_[index] += low; + if (words_[index] < low) { + ++high; + if (high == 0) { + // Carry from the low word caused our high word to overflow. + // Short circuit here to do the right thing. + AddWithCarry(index + 2, static_cast<uint32_t>(1)); + return; + } + } + if (high > 0) { + AddWithCarry(index + 1, high); + } else { + // Normally 32-bit AddWithCarry() sets size_, but since we don't call + // it when `high` is 0, do it ourselves here. + size_ = (std::min)(max_words, (std::max)(index + 1, size_)); + } + } + } + + // Divide this in place by a constant divisor. Returns the remainder of the + // division. + template <uint32_t divisor> + uint32_t DivMod() { + uint64_t accumulator = 0; + for (int i = size_ - 1; i >= 0; --i) { + accumulator <<= 32; + accumulator += words_[i]; + // accumulator / divisor will never overflow an int32_t in this loop + words_[i] = static_cast<uint32_t>(accumulator / divisor); + accumulator = accumulator % divisor; + } + while (size_ > 0 && words_[size_ - 1] == 0) { + --size_; + } + return static_cast<uint32_t>(accumulator); + } + + // The number of elements in words_ that may carry significant values. + // All elements beyond this point are 0. + // + // When size_ is 0, this BigUnsigned stores the value 0. + // When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is + // nonzero. This can occur due to overflow truncation. + // In particular, x.size_ != y.size_ does *not* imply x != y. + int size_; + uint32_t words_[max_words]; +}; + +// Compares two big integer instances. +// +// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs. +template <int N, int M> +int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + int limit = (std::max)(lhs.size(), rhs.size()); + for (int i = limit - 1; i >= 0; --i) { + const uint32_t lhs_word = lhs.GetWord(i); + const uint32_t rhs_word = rhs.GetWord(i); + if (lhs_word < rhs_word) { + return -1; + } else if (lhs_word > rhs_word) { + return 1; + } + } + return 0; +} + +template <int N, int M> +bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + int limit = (std::max)(lhs.size(), rhs.size()); + for (int i = 0; i < limit; ++i) { + if (lhs.GetWord(i) != rhs.GetWord(i)) { + return false; + } + } + return true; +} + +template <int N, int M> +bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return !(lhs == rhs); +} + +template <int N, int M> +bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return Compare(lhs, rhs) == -1; +} + +template <int N, int M> +bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return rhs < lhs; +} +template <int N, int M> +bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return !(rhs < lhs); +} +template <int N, int M> +bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return !(lhs < rhs); +} + +// Output operator for BigUnsigned, for testing purposes only. +template <int N> +std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) { + return os << num.ToString(); +} + +// Explicit instantiation declarations for the sizes of BigUnsigned that we +// are using. +// +// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is +// still bigger than an int128, and 84 is a large value we will want to use +// in the from_chars implementation. +// +// Comments justifying the use of 84 belong in the from_chars implementation, +// and will be added in a follow-up CL. +extern template class BigUnsigned<4>; +extern template class BigUnsigned<84>; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/charconv_bigint_test.cc b/third_party/abseil_cpp/absl/strings/internal/charconv_bigint_test.cc new file mode 100644 index 000000000000..363bcb03d938 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/charconv_bigint_test.cc @@ -0,0 +1,205 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/charconv_bigint.h" + +#include <string> + +#include "gtest/gtest.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +TEST(BigUnsigned, ShiftLeft) { + { + // Check that 3 * 2**100 is calculated correctly + BigUnsigned<4> num(3u); + num.ShiftLeft(100); + EXPECT_EQ(num, BigUnsigned<4>("3802951800684688204490109616128")); + } + { + // Test that overflow is truncated properly. + // 15 is 4 bits long, and BigUnsigned<4> is a 128-bit bigint. + // Shifting left by 125 bits should truncate off the high bit, so that + // 15 << 125 == 7 << 125 + // after truncation. + BigUnsigned<4> a(15u); + BigUnsigned<4> b(7u); + BigUnsigned<4> c(3u); + a.ShiftLeft(125); + b.ShiftLeft(125); + c.ShiftLeft(125); + EXPECT_EQ(a, b); + EXPECT_NE(a, c); + } + { + // Same test, larger bigint: + BigUnsigned<84> a(15u); + BigUnsigned<84> b(7u); + BigUnsigned<84> c(3u); + a.ShiftLeft(84 * 32 - 3); + b.ShiftLeft(84 * 32 - 3); + c.ShiftLeft(84 * 32 - 3); + EXPECT_EQ(a, b); + EXPECT_NE(a, c); + } + { + // Check that incrementally shifting has the same result as doing it all at + // once (attempting to capture corner cases.) + const std::string seed = "1234567890123456789012345678901234567890"; + BigUnsigned<84> a(seed); + for (int i = 1; i <= 84 * 32; ++i) { + a.ShiftLeft(1); + BigUnsigned<84> b(seed); + b.ShiftLeft(i); + EXPECT_EQ(a, b); + } + // And we should have fully rotated all bits off by now: + EXPECT_EQ(a, BigUnsigned<84>(0u)); + } +} + +TEST(BigUnsigned, MultiplyByUint32) { + const BigUnsigned<84> factorial_100( + "933262154439441526816992388562667004907159682643816214685929638952175999" + "932299156089414639761565182862536979208272237582511852109168640000000000" + "00000000000000"); + BigUnsigned<84> a(1u); + for (uint32_t i = 1; i <= 100; ++i) { + a.MultiplyBy(i); + } + EXPECT_EQ(a, BigUnsigned<84>(factorial_100)); +} + +TEST(BigUnsigned, MultiplyByBigUnsigned) { + { + // Put the terms of factorial_200 into two bigints, and multiply them + // together. + const BigUnsigned<84> factorial_200( + "7886578673647905035523632139321850622951359776871732632947425332443594" + "4996340334292030428401198462390417721213891963883025764279024263710506" + "1926624952829931113462857270763317237396988943922445621451664240254033" + "2918641312274282948532775242424075739032403212574055795686602260319041" + "7032406235170085879617892222278962370389737472000000000000000000000000" + "0000000000000000000000000"); + BigUnsigned<84> evens(1u); + BigUnsigned<84> odds(1u); + for (uint32_t i = 1; i < 200; i += 2) { + odds.MultiplyBy(i); + evens.MultiplyBy(i + 1); + } + evens.MultiplyBy(odds); + EXPECT_EQ(evens, factorial_200); + } + { + // Multiply various powers of 10 together. + for (int a = 0 ; a < 700; a += 25) { + SCOPED_TRACE(a); + BigUnsigned<84> a_value("3" + std::string(a, '0')); + for (int b = 0; b < (700 - a); b += 25) { + SCOPED_TRACE(b); + BigUnsigned<84> b_value("2" + std::string(b, '0')); + BigUnsigned<84> expected_product("6" + std::string(a + b, '0')); + b_value.MultiplyBy(a_value); + EXPECT_EQ(b_value, expected_product); + } + } + } +} + +TEST(BigUnsigned, MultiplyByOverflow) { + { + // Check that multiplcation overflow predictably truncates. + + // A big int with all bits on. + BigUnsigned<4> all_bits_on("340282366920938463463374607431768211455"); + // Modulo 2**128, this is equal to -1. Therefore the square of this, + // modulo 2**128, should be 1. + all_bits_on.MultiplyBy(all_bits_on); + EXPECT_EQ(all_bits_on, BigUnsigned<4>(1u)); + } + { + // Try multiplying a large bigint by 2**50, and compare the result to + // shifting. + BigUnsigned<4> value_1("12345678901234567890123456789012345678"); + BigUnsigned<4> value_2("12345678901234567890123456789012345678"); + BigUnsigned<4> two_to_fiftieth(1u); + two_to_fiftieth.ShiftLeft(50); + + value_1.ShiftLeft(50); + value_2.MultiplyBy(two_to_fiftieth); + EXPECT_EQ(value_1, value_2); + } +} + +TEST(BigUnsigned, FiveToTheNth) { + { + // Sanity check that MultiplyByFiveToTheNth gives consistent answers, up to + // and including overflow. + for (int i = 0; i < 1160; ++i) { + SCOPED_TRACE(i); + BigUnsigned<84> value_1(123u); + BigUnsigned<84> value_2(123u); + value_1.MultiplyByFiveToTheNth(i); + for (int j = 0; j < i; j++) { + value_2.MultiplyBy(5u); + } + EXPECT_EQ(value_1, value_2); + } + } + { + // Check that the faster, table-lookup-based static method returns the same + // result that multiplying in-place would return, up to and including + // overflow. + for (int i = 0; i < 1160; ++i) { + SCOPED_TRACE(i); + BigUnsigned<84> value_1(1u); + value_1.MultiplyByFiveToTheNth(i); + BigUnsigned<84> value_2 = BigUnsigned<84>::FiveToTheNth(i); + EXPECT_EQ(value_1, value_2); + } + } +} + +TEST(BigUnsigned, TenToTheNth) { + { + // Sanity check MultiplyByTenToTheNth. + for (int i = 0; i < 800; ++i) { + SCOPED_TRACE(i); + BigUnsigned<84> value_1(123u); + BigUnsigned<84> value_2(123u); + value_1.MultiplyByTenToTheNth(i); + for (int j = 0; j < i; j++) { + value_2.MultiplyBy(10u); + } + EXPECT_EQ(value_1, value_2); + } + } + { + // Alternate testing approach, taking advantage of the decimal parser. + for (int i = 0; i < 200; ++i) { + SCOPED_TRACE(i); + BigUnsigned<84> value_1(135u); + value_1.MultiplyByTenToTheNth(i); + BigUnsigned<84> value_2("135" + std::string(i, '0')); + EXPECT_EQ(value_1, value_2); + } + } +} + + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/charconv_parse.cc b/third_party/abseil_cpp/absl/strings/internal/charconv_parse.cc new file mode 100644 index 000000000000..fd6d9480fc04 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/charconv_parse.cc @@ -0,0 +1,504 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/charconv_parse.h" +#include "absl/strings/charconv.h" + +#include <cassert> +#include <cstdint> +#include <limits> + +#include "absl/strings/internal/memutil.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace { + +// ParseFloat<10> will read the first 19 significant digits of the mantissa. +// This number was chosen for multiple reasons. +// +// (a) First, for whatever integer type we choose to represent the mantissa, we +// want to choose the largest possible number of decimal digits for that integer +// type. We are using uint64_t, which can express any 19-digit unsigned +// integer. +// +// (b) Second, we need to parse enough digits that the binary value of any +// mantissa we capture has more bits of resolution than the mantissa +// representation in the target float. Our algorithm requires at least 3 bits +// of headway, but 19 decimal digits give a little more than that. +// +// The following static assertions verify the above comments: +constexpr int kDecimalMantissaDigitsMax = 19; + +static_assert(std::numeric_limits<uint64_t>::digits10 == + kDecimalMantissaDigitsMax, + "(a) above"); + +// IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa. +static_assert(std::numeric_limits<double>::is_iec559, "IEEE double assumed"); +static_assert(std::numeric_limits<double>::radix == 2, "IEEE double fact"); +static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact"); + +// The lowest valued 19-digit decimal mantissa we can read still contains +// sufficient information to reconstruct a binary mantissa. +static_assert(1000000000000000000u > (uint64_t(1) << (53 + 3)), "(b) above"); + +// ParseFloat<16> will read the first 15 significant digits of the mantissa. +// +// Because a base-16-to-base-2 conversion can be done exactly, we do not need +// to maximize the number of scanned hex digits to improve our conversion. What +// is required is to scan two more bits than the mantissa can represent, so that +// we always round correctly. +// +// (One extra bit does not suffice to perform correct rounding, since a number +// exactly halfway between two representable floats has unique rounding rules, +// so we need to differentiate between a "halfway between" number and a "closer +// to the larger value" number.) +constexpr int kHexadecimalMantissaDigitsMax = 15; + +// The minimum number of significant bits that will be read from +// kHexadecimalMantissaDigitsMax hex digits. We must subtract by three, since +// the most significant digit can be a "1", which only contributes a single +// significant bit. +constexpr int kGuaranteedHexadecimalMantissaBitPrecision = + 4 * kHexadecimalMantissaDigitsMax - 3; + +static_assert(kGuaranteedHexadecimalMantissaBitPrecision > + std::numeric_limits<double>::digits + 2, + "kHexadecimalMantissaDigitsMax too small"); + +// We also impose a limit on the number of significant digits we will read from +// an exponent, to avoid having to deal with integer overflow. We use 9 for +// this purpose. +// +// If we read a 9 digit exponent, the end result of the conversion will +// necessarily be infinity or zero, depending on the sign of the exponent. +// Therefore we can just drop extra digits on the floor without any extra +// logic. +constexpr int kDecimalExponentDigitsMax = 9; +static_assert(std::numeric_limits<int>::digits10 >= kDecimalExponentDigitsMax, + "int type too small"); + +// To avoid incredibly large inputs causing integer overflow for our exponent, +// we impose an arbitrary but very large limit on the number of significant +// digits we will accept. The implementation refuses to match a string with +// more consecutive significant mantissa digits than this. +constexpr int kDecimalDigitLimit = 50000000; + +// Corresponding limit for hexadecimal digit inputs. This is one fourth the +// amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires +// a binary exponent adjustment of 4. +constexpr int kHexadecimalDigitLimit = kDecimalDigitLimit / 4; + +// The largest exponent we can read is 999999999 (per +// kDecimalExponentDigitsMax), and the largest exponent adjustment we can get +// from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these +// comfortably fits in an integer. +// +// We count kDecimalDigitLimit twice because there are independent limits for +// numbers before and after the decimal point. (In the case where there are no +// significant digits before the decimal point, there are independent limits for +// post-decimal-point leading zeroes and for significant digits.) +static_assert(999999999 + 2 * kDecimalDigitLimit < + std::numeric_limits<int>::max(), + "int type too small"); +static_assert(999999999 + 2 * (4 * kHexadecimalDigitLimit) < + std::numeric_limits<int>::max(), + "int type too small"); + +// Returns true if the provided bitfield allows parsing an exponent value +// (e.g., "1.5e100"). +bool AllowExponent(chars_format flags) { + bool fixed = (flags & chars_format::fixed) == chars_format::fixed; + bool scientific = + (flags & chars_format::scientific) == chars_format::scientific; + return scientific || !fixed; +} + +// Returns true if the provided bitfield requires an exponent value be present. +bool RequireExponent(chars_format flags) { + bool fixed = (flags & chars_format::fixed) == chars_format::fixed; + bool scientific = + (flags & chars_format::scientific) == chars_format::scientific; + return scientific && !fixed; +} + +const int8_t kAsciiToInt[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1}; + +// Returns true if `ch` is a digit in the given base +template <int base> +bool IsDigit(char ch); + +// Converts a valid `ch` to its digit value in the given base. +template <int base> +unsigned ToDigit(char ch); + +// Returns true if `ch` is the exponent delimiter for the given base. +template <int base> +bool IsExponentCharacter(char ch); + +// Returns the maximum number of significant digits we will read for a float +// in the given base. +template <int base> +constexpr int MantissaDigitsMax(); + +// Returns the largest consecutive run of digits we will accept when parsing a +// number in the given base. +template <int base> +constexpr int DigitLimit(); + +// Returns the amount the exponent must be adjusted by for each dropped digit. +// (For decimal this is 1, since the digits are in base 10 and the exponent base +// is also 10, but for hexadecimal this is 4, since the digits are base 16 but +// the exponent base is 2.) +template <int base> +constexpr int DigitMagnitude(); + +template <> +bool IsDigit<10>(char ch) { + return ch >= '0' && ch <= '9'; +} +template <> +bool IsDigit<16>(char ch) { + return kAsciiToInt[static_cast<unsigned char>(ch)] >= 0; +} + +template <> +unsigned ToDigit<10>(char ch) { + return ch - '0'; +} +template <> +unsigned ToDigit<16>(char ch) { + return kAsciiToInt[static_cast<unsigned char>(ch)]; +} + +template <> +bool IsExponentCharacter<10>(char ch) { + return ch == 'e' || ch == 'E'; +} + +template <> +bool IsExponentCharacter<16>(char ch) { + return ch == 'p' || ch == 'P'; +} + +template <> +constexpr int MantissaDigitsMax<10>() { + return kDecimalMantissaDigitsMax; +} +template <> +constexpr int MantissaDigitsMax<16>() { + return kHexadecimalMantissaDigitsMax; +} + +template <> +constexpr int DigitLimit<10>() { + return kDecimalDigitLimit; +} +template <> +constexpr int DigitLimit<16>() { + return kHexadecimalDigitLimit; +} + +template <> +constexpr int DigitMagnitude<10>() { + return 1; +} +template <> +constexpr int DigitMagnitude<16>() { + return 4; +} + +// Reads decimal digits from [begin, end) into *out. Returns the number of +// digits consumed. +// +// After max_digits has been read, keeps consuming characters, but no longer +// adjusts *out. If a nonzero digit is dropped this way, *dropped_nonzero_digit +// is set; otherwise, it is left unmodified. +// +// If no digits are matched, returns 0 and leaves *out unchanged. +// +// ConsumeDigits does not protect against overflow on *out; max_digits must +// be chosen with respect to type T to avoid the possibility of overflow. +template <int base, typename T> +std::size_t ConsumeDigits(const char* begin, const char* end, int max_digits, + T* out, bool* dropped_nonzero_digit) { + if (base == 10) { + assert(max_digits <= std::numeric_limits<T>::digits10); + } else if (base == 16) { + assert(max_digits * 4 <= std::numeric_limits<T>::digits); + } + const char* const original_begin = begin; + + // Skip leading zeros, but only if *out is zero. + // They don't cause an overflow so we don't have to count them for + // `max_digits`. + while (!*out && end != begin && *begin == '0') ++begin; + + T accumulator = *out; + const char* significant_digits_end = + (end - begin > max_digits) ? begin + max_digits : end; + while (begin < significant_digits_end && IsDigit<base>(*begin)) { + // Do not guard against *out overflow; max_digits was chosen to avoid this. + // Do assert against it, to detect problems in debug builds. + auto digit = static_cast<T>(ToDigit<base>(*begin)); + assert(accumulator * base >= accumulator); + accumulator *= base; + assert(accumulator + digit >= accumulator); + accumulator += digit; + ++begin; + } + bool dropped_nonzero = false; + while (begin < end && IsDigit<base>(*begin)) { + dropped_nonzero = dropped_nonzero || (*begin != '0'); + ++begin; + } + if (dropped_nonzero && dropped_nonzero_digit != nullptr) { + *dropped_nonzero_digit = true; + } + *out = accumulator; + return begin - original_begin; +} + +// Returns true if `v` is one of the chars allowed inside parentheses following +// a NaN. +bool IsNanChar(char v) { + return (v == '_') || (v >= '0' && v <= '9') || (v >= 'a' && v <= 'z') || + (v >= 'A' && v <= 'Z'); +} + +// Checks the range [begin, end) for a strtod()-formatted infinity or NaN. If +// one is found, sets `out` appropriately and returns true. +bool ParseInfinityOrNan(const char* begin, const char* end, + strings_internal::ParsedFloat* out) { + if (end - begin < 3) { + return false; + } + switch (*begin) { + case 'i': + case 'I': { + // An infinity string consists of the characters "inf" or "infinity", + // case insensitive. + if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) { + return false; + } + out->type = strings_internal::FloatType::kInfinity; + if (end - begin >= 8 && + strings_internal::memcasecmp(begin + 3, "inity", 5) == 0) { + out->end = begin + 8; + } else { + out->end = begin + 3; + } + return true; + } + case 'n': + case 'N': { + // A NaN consists of the characters "nan", case insensitive, optionally + // followed by a parenthesized sequence of zero or more alphanumeric + // characters and/or underscores. + if (strings_internal::memcasecmp(begin + 1, "an", 2) != 0) { + return false; + } + out->type = strings_internal::FloatType::kNan; + out->end = begin + 3; + // NaN is allowed to be followed by a parenthesized string, consisting of + // only the characters [a-zA-Z0-9_]. Match that if it's present. + begin += 3; + if (begin < end && *begin == '(') { + const char* nan_begin = begin + 1; + while (nan_begin < end && IsNanChar(*nan_begin)) { + ++nan_begin; + } + if (nan_begin < end && *nan_begin == ')') { + // We found an extra NaN specifier range + out->subrange_begin = begin + 1; + out->subrange_end = nan_begin; + out->end = nan_begin + 1; + } + } + return true; + } + default: + return false; + } +} +} // namespace + +namespace strings_internal { + +template <int base> +strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end, + chars_format format_flags) { + strings_internal::ParsedFloat result; + + // Exit early if we're given an empty range. + if (begin == end) return result; + + // Handle the infinity and NaN cases. + if (ParseInfinityOrNan(begin, end, &result)) { + return result; + } + + const char* const mantissa_begin = begin; + while (begin < end && *begin == '0') { + ++begin; // skip leading zeros + } + uint64_t mantissa = 0; + + int exponent_adjustment = 0; + bool mantissa_is_inexact = false; + std::size_t pre_decimal_digits = ConsumeDigits<base>( + begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact); + begin += pre_decimal_digits; + int digits_left; + if (pre_decimal_digits >= DigitLimit<base>()) { + // refuse to parse pathological inputs + return result; + } else if (pre_decimal_digits > MantissaDigitsMax<base>()) { + // We dropped some non-fraction digits on the floor. Adjust our exponent + // to compensate. + exponent_adjustment = + static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>()); + digits_left = 0; + } else { + digits_left = + static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits); + } + if (begin < end && *begin == '.') { + ++begin; + if (mantissa == 0) { + // If we haven't seen any nonzero digits yet, keep skipping zeros. We + // have to adjust the exponent to reflect the changed place value. + const char* begin_zeros = begin; + while (begin < end && *begin == '0') { + ++begin; + } + std::size_t zeros_skipped = begin - begin_zeros; + if (zeros_skipped >= DigitLimit<base>()) { + // refuse to parse pathological inputs + return result; + } + exponent_adjustment -= static_cast<int>(zeros_skipped); + } + std::size_t post_decimal_digits = ConsumeDigits<base>( + begin, end, digits_left, &mantissa, &mantissa_is_inexact); + begin += post_decimal_digits; + + // Since `mantissa` is an integer, each significant digit we read after + // the decimal point requires an adjustment to the exponent. "1.23e0" will + // be stored as `mantissa` == 123 and `exponent` == -2 (that is, + // "123e-2"). + if (post_decimal_digits >= DigitLimit<base>()) { + // refuse to parse pathological inputs + return result; + } else if (post_decimal_digits > digits_left) { + exponent_adjustment -= digits_left; + } else { + exponent_adjustment -= post_decimal_digits; + } + } + // If we've found no mantissa whatsoever, this isn't a number. + if (mantissa_begin == begin) { + return result; + } + // A bare "." doesn't count as a mantissa either. + if (begin - mantissa_begin == 1 && *mantissa_begin == '.') { + return result; + } + + if (mantissa_is_inexact) { + // We dropped significant digits on the floor. Handle this appropriately. + if (base == 10) { + // If we truncated significant decimal digits, store the full range of the + // mantissa for future big integer math for exact rounding. + result.subrange_begin = mantissa_begin; + result.subrange_end = begin; + } else if (base == 16) { + // If we truncated hex digits, reflect this fact by setting the low + // ("sticky") bit. This allows for correct rounding in all cases. + mantissa |= 1; + } + } + result.mantissa = mantissa; + + const char* const exponent_begin = begin; + result.literal_exponent = 0; + bool found_exponent = false; + if (AllowExponent(format_flags) && begin < end && + IsExponentCharacter<base>(*begin)) { + bool negative_exponent = false; + ++begin; + if (begin < end && *begin == '-') { + negative_exponent = true; + ++begin; + } else if (begin < end && *begin == '+') { + ++begin; + } + const char* const exponent_digits_begin = begin; + // Exponent is always expressed in decimal, even for hexadecimal floats. + begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax, + &result.literal_exponent, nullptr); + if (begin == exponent_digits_begin) { + // there were no digits where we expected an exponent. We failed to read + // an exponent and should not consume the 'e' after all. Rewind 'begin'. + found_exponent = false; + begin = exponent_begin; + } else { + found_exponent = true; + if (negative_exponent) { + result.literal_exponent = -result.literal_exponent; + } + } + } + + if (!found_exponent && RequireExponent(format_flags)) { + // Provided flags required an exponent, but none was found. This results + // in a failure to scan. + return result; + } + + // Success! + result.type = strings_internal::FloatType::kNumber; + if (result.mantissa > 0) { + result.exponent = result.literal_exponent + + (DigitMagnitude<base>() * exponent_adjustment); + } else { + result.exponent = 0; + } + result.end = begin; + return result; +} + +template ParsedFloat ParseFloat<10>(const char* begin, const char* end, + chars_format format_flags); +template ParsedFloat ParseFloat<16>(const char* begin, const char* end, + chars_format format_flags); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/charconv_parse.h b/third_party/abseil_cpp/absl/strings/internal/charconv_parse.h new file mode 100644 index 000000000000..505998b5394a --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/charconv_parse.h @@ -0,0 +1,99 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_ +#define ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_ + +#include <cstdint> + +#include "absl/base/config.h" +#include "absl/strings/charconv.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// Enum indicating whether a parsed float is a number or special value. +enum class FloatType { kNumber, kInfinity, kNan }; + +// The decomposed parts of a parsed `float` or `double`. +struct ParsedFloat { + // Representation of the parsed mantissa, with the decimal point adjusted to + // make it an integer. + // + // During decimal scanning, this contains 19 significant digits worth of + // mantissa value. If digits beyond this point are found, they + // are truncated, and if any of these dropped digits are nonzero, then + // `mantissa` is inexact, and the full mantissa is stored in [subrange_begin, + // subrange_end). + // + // During hexadecimal scanning, this contains 15 significant hex digits worth + // of mantissa value. Digits beyond this point are sticky -- they are + // truncated, but if any dropped digits are nonzero, the low bit of mantissa + // will be set. (This allows for precise rounding, and avoids the need + // to store the full mantissa in [subrange_begin, subrange_end).) + uint64_t mantissa = 0; + + // Floating point expontent. This reflects any decimal point adjustments and + // any truncated digits from the mantissa. The absolute value of the parsed + // number is represented by mantissa * (base ** exponent), where base==10 for + // decimal floats, and base==2 for hexadecimal floats. + int exponent = 0; + + // The literal exponent value scanned from the input, or 0 if none was + // present. This does not reflect any adjustments applied to mantissa. + int literal_exponent = 0; + + // The type of number scanned. + FloatType type = FloatType::kNumber; + + // When non-null, [subrange_begin, subrange_end) marks a range of characters + // that require further processing. The meaning is dependent on float type. + // If type == kNumber and this is set, this is a "wide input": the input + // mantissa contained more than 19 digits. The range contains the full + // mantissa. It plus `literal_exponent` need to be examined to find the best + // floating point match. + // If type == kNan and this is set, the range marks the contents of a + // matched parenthesized character region after the NaN. + const char* subrange_begin = nullptr; + const char* subrange_end = nullptr; + + // One-past-the-end of the successfully parsed region, or nullptr if no + // matching pattern was found. + const char* end = nullptr; +}; + +// Read the floating point number in the provided range, and populate +// ParsedFloat accordingly. +// +// format_flags is a bitmask value specifying what patterns this API will match. +// `scientific` and `fixed` are honored per std::from_chars rules +// ([utility.from.chars], C++17): if exactly one of these bits is set, then an +// exponent is required, or dislallowed, respectively. +// +// Template parameter `base` must be either 10 or 16. For base 16, a "0x" is +// *not* consumed. The `hex` bit from format_flags is ignored by ParseFloat. +template <int base> +ParsedFloat ParseFloat(const char* begin, const char* end, + absl::chars_format format_flags); + +extern template ParsedFloat ParseFloat<10>(const char* begin, const char* end, + absl::chars_format format_flags); +extern template ParsedFloat ParseFloat<16>(const char* begin, const char* end, + absl::chars_format format_flags); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl +#endif // ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/charconv_parse_test.cc b/third_party/abseil_cpp/absl/strings/internal/charconv_parse_test.cc new file mode 100644 index 000000000000..bc2d11187651 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/charconv_parse_test.cc @@ -0,0 +1,357 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/charconv_parse.h" + +#include <string> +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/str_cat.h" + +using absl::chars_format; +using absl::strings_internal::FloatType; +using absl::strings_internal::ParsedFloat; +using absl::strings_internal::ParseFloat; + +namespace { + +// Check that a given string input is parsed to the expected mantissa and +// exponent. +// +// Input string `s` must contain a '$' character. It marks the end of the +// characters that should be consumed by the match. It is stripped from the +// input to ParseFloat. +// +// If input string `s` contains '[' and ']' characters, these mark the region +// of characters that should be marked as the "subrange". For NaNs, this is +// the location of the extended NaN string. For numbers, this is the location +// of the full, over-large mantissa. +template <int base> +void ExpectParsedFloat(std::string s, absl::chars_format format_flags, + FloatType expected_type, uint64_t expected_mantissa, + int expected_exponent, + int expected_literal_exponent = -999) { + SCOPED_TRACE(s); + + int begin_subrange = -1; + int end_subrange = -1; + // If s contains '[' and ']', then strip these characters and set the subrange + // indices appropriately. + std::string::size_type open_bracket_pos = s.find('['); + if (open_bracket_pos != std::string::npos) { + begin_subrange = static_cast<int>(open_bracket_pos); + s.replace(open_bracket_pos, 1, ""); + std::string::size_type close_bracket_pos = s.find(']'); + ABSL_RAW_CHECK(close_bracket_pos != absl::string_view::npos, + "Test input contains [ without matching ]"); + end_subrange = static_cast<int>(close_bracket_pos); + s.replace(close_bracket_pos, 1, ""); + } + const std::string::size_type expected_characters_matched = s.find('$'); + ABSL_RAW_CHECK(expected_characters_matched != std::string::npos, + "Input string must contain $"); + s.replace(expected_characters_matched, 1, ""); + + ParsedFloat parsed = + ParseFloat<base>(s.data(), s.data() + s.size(), format_flags); + + EXPECT_NE(parsed.end, nullptr); + if (parsed.end == nullptr) { + return; // The following tests are not useful if we fully failed to parse + } + EXPECT_EQ(parsed.type, expected_type); + if (begin_subrange == -1) { + EXPECT_EQ(parsed.subrange_begin, nullptr); + EXPECT_EQ(parsed.subrange_end, nullptr); + } else { + EXPECT_EQ(parsed.subrange_begin, s.data() + begin_subrange); + EXPECT_EQ(parsed.subrange_end, s.data() + end_subrange); + } + if (parsed.type == FloatType::kNumber) { + EXPECT_EQ(parsed.mantissa, expected_mantissa); + EXPECT_EQ(parsed.exponent, expected_exponent); + if (expected_literal_exponent != -999) { + EXPECT_EQ(parsed.literal_exponent, expected_literal_exponent); + } + } + auto characters_matched = static_cast<int>(parsed.end - s.data()); + EXPECT_EQ(characters_matched, expected_characters_matched); +} + +// Check that a given string input is parsed to the expected mantissa and +// exponent. +// +// Input string `s` must contain a '$' character. It marks the end of the +// characters that were consumed by the match. +template <int base> +void ExpectNumber(std::string s, absl::chars_format format_flags, + uint64_t expected_mantissa, int expected_exponent, + int expected_literal_exponent = -999) { + ExpectParsedFloat<base>(std::move(s), format_flags, FloatType::kNumber, + expected_mantissa, expected_exponent, + expected_literal_exponent); +} + +// Check that a given string input is parsed to the given special value. +// +// This tests against both number bases, since infinities and NaNs have +// identical representations in both modes. +void ExpectSpecial(const std::string& s, absl::chars_format format_flags, + FloatType type) { + ExpectParsedFloat<10>(s, format_flags, type, 0, 0); + ExpectParsedFloat<16>(s, format_flags, type, 0, 0); +} + +// Check that a given input string is not matched by Float. +template <int base> +void ExpectFailedParse(absl::string_view s, absl::chars_format format_flags) { + ParsedFloat parsed = + ParseFloat<base>(s.data(), s.data() + s.size(), format_flags); + EXPECT_EQ(parsed.end, nullptr); +} + +TEST(ParseFloat, SimpleValue) { + // Test that various forms of floating point numbers all parse correctly. + ExpectNumber<10>("1.23456789e5$", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e+5$", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789E5$", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e05$", chars_format::general, 123456789, -3); + ExpectNumber<10>("123.456789e3$", chars_format::general, 123456789, -3); + ExpectNumber<10>("0.000123456789e9$", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456789e-3$", chars_format::general, 123456789, -3); + + ExpectNumber<16>("1.234abcdefp28$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234abcdefp+28$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234ABCDEFp28$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234AbCdEfP0028$", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("123.4abcdefp20$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("0.0001234abcdefp44$", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1234abcd.ef$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1234abcdefp-8$", chars_format::general, 0x1234abcdef, -8); + + // ExpectNumber does not attempt to drop trailing zeroes. + ExpectNumber<10>("0001.2345678900e005$", chars_format::general, 12345678900, + -5); + ExpectNumber<16>("0001.234abcdef000p28$", chars_format::general, + 0x1234abcdef000, -20); + + // Ensure non-matching characters after a number are ignored, even when they + // look like potentially matching characters. + ExpectNumber<10>("1.23456789e5$ ", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$e5e5", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$.25", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$-", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$PUPPERS!!!", chars_format::general, 123456789, + -3); + ExpectNumber<10>("123456.789$efghij", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$e", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$p5", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$.10", chars_format::general, 123456789, -3); + + ExpectNumber<16>("1.234abcdefp28$ ", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1.234abcdefp28$p28", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1.234abcdefp28$.125", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1.234abcdefp28$-", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234abcdefp28$KITTEHS!!!", chars_format::general, + 0x1234abcdef, -8); + ExpectNumber<16>("1234abcd.ef$ghijk", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1234abcd.ef$p", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1234abcd.ef$.10", chars_format::general, 0x1234abcdef, -8); + + // Ensure we can read a full resolution mantissa without overflow. + ExpectNumber<10>("9999999999999999999$", chars_format::general, + 9999999999999999999u, 0); + ExpectNumber<16>("fffffffffffffff$", chars_format::general, + 0xfffffffffffffffu, 0); + + // Check that zero is consistently read. + ExpectNumber<10>("0$", chars_format::general, 0, 0); + ExpectNumber<16>("0$", chars_format::general, 0, 0); + ExpectNumber<10>("000000000000000000000000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<16>("000000000000000000000000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<10>("0000000000000000000000.000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<16>("0000000000000000000000.000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<10>("0.00000000000000000000000000000000e123456$", + chars_format::general, 0, 0); + ExpectNumber<16>("0.00000000000000000000000000000000p123456$", + chars_format::general, 0, 0); +} + +TEST(ParseFloat, LargeDecimalMantissa) { + // After 19 significant decimal digits in the mantissa, ParsedFloat will + // truncate additional digits. We need to test that: + // 1) the truncation to 19 digits happens + // 2) the returned exponent reflects the dropped significant digits + // 3) a correct literal_exponent is set + // + // If and only if a significant digit is found after 19 digits, then the + // entirety of the mantissa in case the exact value is needed to make a + // rounding decision. The [ and ] characters below denote where such a + // subregion was marked by by ParseFloat. They are not part of the input. + + // Mark a capture group only if a dropped digit is significant (nonzero). + ExpectNumber<10>("100000000000000000000000000$", chars_format::general, + 1000000000000000000, + /* adjusted exponent */ 8); + + ExpectNumber<10>("123456789123456789100000000$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8); + + ExpectNumber<10>("[123456789123456789123456789]$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + ExpectNumber<10>("[123456789123456789100000009]$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + ExpectNumber<10>("[123456789123456789120000000]$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + // Leading zeroes should not count towards the 19 significant digit limit + ExpectNumber<10>("[00000000123456789123456789123456789]$", + chars_format::general, 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + ExpectNumber<10>("00000000123456789123456789100000000$", + chars_format::general, 1234567891234567891, + /* adjusted exponent */ 8); + + // Truncated digits after the decimal point should not cause a further + // exponent adjustment. + ExpectNumber<10>("1.234567891234567891e123$", chars_format::general, + 1234567891234567891, 105); + ExpectNumber<10>("[1.23456789123456789123456789]e123$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 105, + /* literal exponent */ 123); + + // Ensure we truncate, and not round. (The from_chars algorithm we use + // depends on our guess missing low, if it misses, so we need the rounding + // error to be downward.) + ExpectNumber<10>("[1999999999999999999999]$", chars_format::general, + 1999999999999999999, + /* adjusted exponent */ 3, + /* literal exponent */ 0); +} + +TEST(ParseFloat, LargeHexadecimalMantissa) { + // After 15 significant hex digits in the mantissa, ParsedFloat will treat + // additional digits as sticky, We need to test that: + // 1) The truncation to 15 digits happens + // 2) The returned exponent reflects the dropped significant digits + // 3) If a nonzero digit is dropped, the low bit of mantissa is set. + + ExpectNumber<16>("123456789abcdef123456789abcdef$", chars_format::general, + 0x123456789abcdef, 60); + + // Leading zeroes should not count towards the 15 significant digit limit + ExpectNumber<16>("000000123456789abcdef123456789abcdef$", + chars_format::general, 0x123456789abcdef, 60); + + // Truncated digits after the radix point should not cause a further + // exponent adjustment. + ExpectNumber<16>("1.23456789abcdefp100$", chars_format::general, + 0x123456789abcdef, 44); + ExpectNumber<16>("1.23456789abcdef123456789abcdefp100$", + chars_format::general, 0x123456789abcdef, 44); + + // test sticky digit behavior. The low bit should be set iff any dropped + // digit is nonzero. + ExpectNumber<16>("123456789abcdee123456789abcdee$", chars_format::general, + 0x123456789abcdef, 60); + ExpectNumber<16>("123456789abcdee000000000000001$", chars_format::general, + 0x123456789abcdef, 60); + ExpectNumber<16>("123456789abcdee000000000000000$", chars_format::general, + 0x123456789abcdee, 60); +} + +TEST(ParseFloat, ScientificVsFixed) { + // In fixed mode, an exponent is never matched (but the remainder of the + // number will be matched.) + ExpectNumber<10>("1.23456789$e5", chars_format::fixed, 123456789, -8); + ExpectNumber<10>("123456.789$", chars_format::fixed, 123456789, -3); + ExpectNumber<16>("1.234abcdef$p28", chars_format::fixed, 0x1234abcdef, -36); + ExpectNumber<16>("1234abcd.ef$", chars_format::fixed, 0x1234abcdef, -8); + + // In scientific mode, numbers don't match *unless* they have an exponent. + ExpectNumber<10>("1.23456789e5$", chars_format::scientific, 123456789, -3); + ExpectFailedParse<10>("-123456.789$", chars_format::scientific); + ExpectNumber<16>("1.234abcdefp28$", chars_format::scientific, 0x1234abcdef, + -8); + ExpectFailedParse<16>("1234abcd.ef$", chars_format::scientific); +} + +TEST(ParseFloat, Infinity) { + ExpectFailedParse<10>("in", chars_format::general); + ExpectFailedParse<16>("in", chars_format::general); + ExpectFailedParse<10>("inx", chars_format::general); + ExpectFailedParse<16>("inx", chars_format::general); + ExpectSpecial("inf$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("Inf$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("INF$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("inf$inite", chars_format::general, FloatType::kInfinity); + ExpectSpecial("iNfInItY$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("infinity$!!!", chars_format::general, FloatType::kInfinity); +} + +TEST(ParseFloat, NaN) { + ExpectFailedParse<10>("na", chars_format::general); + ExpectFailedParse<16>("na", chars_format::general); + ExpectFailedParse<10>("nah", chars_format::general); + ExpectFailedParse<16>("nah", chars_format::general); + ExpectSpecial("nan$", chars_format::general, FloatType::kNan); + ExpectSpecial("NaN$", chars_format::general, FloatType::kNan); + ExpectSpecial("nAn$", chars_format::general, FloatType::kNan); + ExpectSpecial("NAN$", chars_format::general, FloatType::kNan); + ExpectSpecial("NaN$aNaNaNaNaBatman!", chars_format::general, FloatType::kNan); + + // A parenthesized sequence of the characters [a-zA-Z0-9_] is allowed to + // appear after an NaN. Check that this is allowed, and that the correct + // characters are grouped. + // + // (The characters [ and ] in the pattern below delimit the expected matched + // subgroup; they are not part of the input passed to ParseFloat.) + ExpectSpecial("nan([0xabcdef])$", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([0xabcdef])$...", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([0xabcdef])$)...", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([])$", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([aAzZ09_])$", chars_format::general, FloatType::kNan); + // If the subgroup contains illegal characters, don't match it at all. + ExpectSpecial("nan$(bad-char)", chars_format::general, FloatType::kNan); + // Also cope with a missing close paren. + ExpectSpecial("nan$(0xabcdef", chars_format::general, FloatType::kNan); +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/cord_internal.h b/third_party/abseil_cpp/absl/strings/internal/cord_internal.h new file mode 100644 index 000000000000..830ceaf473bd --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/cord_internal.h @@ -0,0 +1,150 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ + +#include <atomic> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <type_traits> + +#include "absl/meta/type_traits.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Wraps std::atomic for reference counting. +class Refcount { + public: + Refcount() : count_{1} {} + ~Refcount() {} + + // Increments the reference count by 1. Imposes no memory ordering. + inline void Increment() { count_.fetch_add(1, std::memory_order_relaxed); } + + // Asserts that the current refcount is greater than 0. If the refcount is + // greater than 1, decrements the reference count by 1. + // + // Returns false if there are no references outstanding; true otherwise. + // Inserts barriers to ensure that state written before this method returns + // false will be visible to a thread that just observed this method returning + // false. + inline bool Decrement() { + int32_t refcount = count_.load(std::memory_order_acquire); + assert(refcount > 0); + return refcount != 1 && count_.fetch_sub(1, std::memory_order_acq_rel) != 1; + } + + // Same as Decrement but expect that refcount is greater than 1. + inline bool DecrementExpectHighRefcount() { + int32_t refcount = count_.fetch_sub(1, std::memory_order_acq_rel); + assert(refcount > 0); + return refcount != 1; + } + + // Returns the current reference count using acquire semantics. + inline int32_t Get() const { return count_.load(std::memory_order_acquire); } + + // Returns whether the atomic integer is 1. + // If the reference count is used in the conventional way, a + // reference count of 1 implies that the current thread owns the + // reference and no other thread shares it. + // This call performs the test for a reference count of one, and + // performs the memory barrier needed for the owning thread + // to act on the object, knowing that it has exclusive access to the + // object. + inline bool IsOne() { return count_.load(std::memory_order_acquire) == 1; } + + private: + std::atomic<int32_t> count_; +}; + +// The overhead of a vtable is too much for Cord, so we roll our own subclasses +// using only a single byte to differentiate classes from each other - the "tag" +// byte. Define the subclasses first so we can provide downcasting helper +// functions in the base class. + +struct CordRepConcat; +struct CordRepSubstring; +struct CordRepExternal; + +struct CordRep { + // The following three fields have to be less than 32 bytes since + // that is the smallest supported flat node size. + size_t length; + Refcount refcount; + // If tag < FLAT, it represents CordRepKind and indicates the type of node. + // Otherwise, the node type is CordRepFlat and the tag is the encoded size. + uint8_t tag; + char data[1]; // Starting point for flat array: MUST BE LAST FIELD of CordRep + + inline CordRepConcat* concat(); + inline const CordRepConcat* concat() const; + inline CordRepSubstring* substring(); + inline const CordRepSubstring* substring() const; + inline CordRepExternal* external(); + inline const CordRepExternal* external() const; +}; + +struct CordRepConcat : public CordRep { + CordRep* left; + CordRep* right; + + uint8_t depth() const { return static_cast<uint8_t>(data[0]); } + void set_depth(uint8_t depth) { data[0] = static_cast<char>(depth); } +}; + +struct CordRepSubstring : public CordRep { + size_t start; // Starting offset of substring in child + CordRep* child; +}; + +// TODO(strel): replace the following logic (and related functions in cord.cc) +// with container_internal::Layout. + +// Alignment requirement for CordRepExternal so that the type erased releaser +// will be stored at a suitably aligned address. +constexpr size_t ExternalRepAlignment() { +#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) + return __STDCPP_DEFAULT_NEW_ALIGNMENT__; +#else + return alignof(max_align_t); +#endif +} + +// Type for function pointer that will invoke and destroy the type-erased +// releaser function object. Accepts a pointer to the releaser and the +// `string_view` that were passed in to `NewExternalRep` below. The return value +// is the size of the `Releaser` type. +using ExternalReleaserInvoker = size_t (*)(void*, absl::string_view); + +// External CordReps are allocated together with a type erased releaser. The +// releaser is stored in the memory directly following the CordRepExternal. +struct alignas(ExternalRepAlignment()) CordRepExternal : public CordRep { + const char* base; + // Pointer to function that knows how to call and destroy the releaser. + ExternalReleaserInvoker releaser_invoker; +}; + +// TODO(strel): look into removing, it doesn't seem like anything relies on this +static_assert(sizeof(CordRepConcat) == sizeof(CordRepSubstring), ""); + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl +#endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/escaping.cc b/third_party/abseil_cpp/absl/strings/internal/escaping.cc new file mode 100644 index 000000000000..c5271286ad00 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/escaping.cc @@ -0,0 +1,180 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/escaping.h" + +#include "absl/base/internal/endian.h" +#include "absl/base/internal/raw_logging.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +const char kBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { + // Base64 encodes three bytes of input at a time. If the input is not + // divisible by three, we pad as appropriate. + // + // (from https://tools.ietf.org/html/rfc3548) + // Special processing is performed if fewer than 24 bits are available + // at the end of the data being encoded. A full encoding quantum is + // always completed at the end of a quantity. When fewer than 24 input + // bits are available in an input group, zero bits are added (on the + // right) to form an integral number of 6-bit groups. Padding at the + // end of the data is performed using the '=' character. Since all base + // 64 input is an integral number of octets, only the following cases + // can arise: + + // Base64 encodes each three bytes of input into four bytes of output. + size_t len = (input_len / 3) * 4; + + if (input_len % 3 == 0) { + // (from https://tools.ietf.org/html/rfc3548) + // (1) the final quantum of encoding input is an integral multiple of 24 + // bits; here, the final unit of encoded output will be an integral + // multiple of 4 characters with no "=" padding, + } else if (input_len % 3 == 1) { + // (from https://tools.ietf.org/html/rfc3548) + // (2) the final quantum of encoding input is exactly 8 bits; here, the + // final unit of encoded output will be two characters followed by two + // "=" padding characters, or + len += 2; + if (do_padding) { + len += 2; + } + } else { // (input_len % 3 == 2) + // (from https://tools.ietf.org/html/rfc3548) + // (3) the final quantum of encoding input is exactly 16 bits; here, the + // final unit of encoded output will be three characters followed by one + // "=" padding character. + len += 3; + if (do_padding) { + len += 1; + } + } + + assert(len >= input_len); // make sure we didn't overflow + return len; +} + +size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, + size_t szdest, const char* base64, + bool do_padding) { + static const char kPad64 = '='; + + if (szsrc * 4 > szdest * 3) return 0; + + char* cur_dest = dest; + const unsigned char* cur_src = src; + + char* const limit_dest = dest + szdest; + const unsigned char* const limit_src = src + szsrc; + + // Three bytes of data encodes to four characters of cyphertext. + // So we can pump through three-byte chunks atomically. + if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3. + while (cur_src < limit_src - 3) { // While we have >= 32 bits. + uint32_t in = absl::big_endian::Load32(cur_src) >> 8; + + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + + cur_dest += 4; + cur_src += 3; + } + } + // To save time, we didn't update szdest or szsrc in the loop. So do it now. + szdest = limit_dest - cur_dest; + szsrc = limit_src - cur_src; + + /* now deal with the tail (<=3 bytes) */ + switch (szsrc) { + case 0: + // Nothing left; nothing more to do. + break; + case 1: { + // One byte left: this encodes to two characters, and (optionally) + // two pad characters to round out the four-character cypherblock. + if (szdest < 2) return 0; + uint32_t in = cur_src[0]; + cur_dest[0] = base64[in >> 2]; + in &= 0x3; + cur_dest[1] = base64[in << 4]; + cur_dest += 2; + szdest -= 2; + if (do_padding) { + if (szdest < 2) return 0; + cur_dest[0] = kPad64; + cur_dest[1] = kPad64; + cur_dest += 2; + szdest -= 2; + } + break; + } + case 2: { + // Two bytes left: this encodes to three characters, and (optionally) + // one pad character to round out the four-character cypherblock. + if (szdest < 3) return 0; + uint32_t in = absl::big_endian::Load16(cur_src); + cur_dest[0] = base64[in >> 10]; + in &= 0x3FF; + cur_dest[1] = base64[in >> 4]; + in &= 0x00F; + cur_dest[2] = base64[in << 2]; + cur_dest += 3; + szdest -= 3; + if (do_padding) { + if (szdest < 1) return 0; + cur_dest[0] = kPad64; + cur_dest += 1; + szdest -= 1; + } + break; + } + case 3: { + // Three bytes left: same as in the big loop above. We can't do this in + // the loop because the loop above always reads 4 bytes, and the fourth + // byte is past the end of the input. + if (szdest < 4) return 0; + uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1); + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + cur_dest += 4; + szdest -= 4; + break; + } + default: + // Should not be reached: blocks of 4 bytes are handled + // in the while loop before this switch statement. + ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc); + break; + } + return (cur_dest - dest); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/escaping.h b/third_party/abseil_cpp/absl/strings/internal/escaping.h new file mode 100644 index 000000000000..6a9ce602d9ed --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/escaping.h @@ -0,0 +1,58 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_H_ +#define ABSL_STRINGS_INTERNAL_ESCAPING_H_ + +#include <cassert> + +#include "absl/strings/internal/resize_uninitialized.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +ABSL_CONST_INIT extern const char kBase64Chars[]; + +// Calculates how long a string will be when it is base64 encoded given its +// length and whether or not the result should be padded. +size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding); + +// Base64-encodes `src` using the alphabet provided in `base64` and writes the +// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars +// until its length is a multiple of 3. Returns the length of `dest`. +size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, + size_t szdest, const char* base64, bool do_padding); + +// Base64-encodes `src` using the alphabet provided in `base64` and writes the +// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars +// until its length is a multiple of 3. +template <typename String> +void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest, + bool do_padding, const char* base64_chars) { + const size_t calc_escaped_size = + CalculateBase64EscapedLenInternal(szsrc, do_padding); + STLStringResizeUninitialized(dest, calc_escaped_size); + + const size_t escaped_len = Base64EscapeInternal( + src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding); + assert(calc_escaped_size == escaped_len); + dest->erase(escaped_len); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_ESCAPING_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/escaping_test_common.h b/third_party/abseil_cpp/absl/strings/internal/escaping_test_common.h new file mode 100644 index 000000000000..7b18017a0890 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/escaping_test_common.h @@ -0,0 +1,133 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This test contains common things needed by both escaping_test.cc and +// escaping_benchmark.cc. + +#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_ +#define ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_ + +#include <array> +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +struct base64_testcase { + absl::string_view plaintext; + absl::string_view cyphertext; +}; + +inline const std::array<base64_testcase, 5>& base64_strings() { + static const std::array<base64_testcase, 5> testcase{{ + // Some google quotes + // Cyphertext created with "uuencode (GNU sharutils) 4.6.3" + // (Note that we're testing the websafe encoding, though, so if + // you add messages, be sure to run "tr -- '+/' '-_'" on the output) + { "I was always good at math and science, and I never realized " + "that was unusual or somehow undesirable. So one of the things " + "I care a lot about is helping to remove that stigma, " + "to show girls that you can be feminine, you can like the things " + "that girls like, but you can also be really good at technology. " + "You can be really good at building things." + " - Marissa Meyer, Newsweek, 2010-12-22" "\n", + + "SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg" + "bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu" + "ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg" + "YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo" + "b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp" + "a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs" + "c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy" + "ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll" + "ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" }, + + { "Typical first year for a new cluster: " + "~0.5 overheating " + "~1 PDU failure " + "~1 rack-move " + "~1 network rewiring " + "~20 rack failures " + "~5 racks go wonky " + "~8 network maintenances " + "~12 router reloads " + "~3 router failures " + "~dozens of minor 30-second blips for dns " + "~1000 individual machine failures " + "~thousands of hard drive failures " + "slow disks, bad memory, misconfigured machines, flaky machines, etc." + " - Jeff Dean, The Joys of Real Hardware" "\n", + + "VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92" + "ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3" + "b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv" + "bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk" + "cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv" + "bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp" + "bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg" + "ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs" + "YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS" + "ZWFsIEhhcmR3YXJlCg" }, + + { "I'm the head of the webspam team at Google. " + "That means that if you type your name into Google and get porn back, " + "it's my fault. Unless you're a porn star, in which case porn is a " + "completely reasonable response." + " - Matt Cutts, Google Plus" "\n", + + "SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg" + "VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv" + "b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz" + "IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg" + "Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs" + "IEdvb2dsZSBQbHVzCg" }, + + { "It will still be a long time before machines approach human " + "intelligence. " + "But luckily, machines don't actually have to be intelligent; " + "they just have to fake it. Access to a wealth of information, " + "combined with a rudimentary decision-making capacity, " + "can often be almost as useful. Of course, the results are better yet " + "when coupled with intelligence. A reference librarian with access to " + "a good search engine is a formidable tool." + " - Craig Silverstein, Siemens Pictures of the Future, Spring 2004" + "\n", + + "SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg" + "YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj" + "aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg" + "dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo" + "IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg" + "ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0" + "IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy" + "IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl" + "bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu" + "Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp" + "biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw" + "NAo" }, + + // Degenerate edge case + { "", + "" }, + }}; + + return testcase; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/memutil.cc b/third_party/abseil_cpp/absl/strings/internal/memutil.cc new file mode 100644 index 000000000000..2519c6881e35 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/memutil.cc @@ -0,0 +1,112 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/memutil.h" + +#include <cstdlib> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +int memcasecmp(const char* s1, const char* s2, size_t len) { + const unsigned char* us1 = reinterpret_cast<const unsigned char*>(s1); + const unsigned char* us2 = reinterpret_cast<const unsigned char*>(s2); + + for (size_t i = 0; i < len; i++) { + const int diff = + int{static_cast<unsigned char>(absl::ascii_tolower(us1[i]))} - + int{static_cast<unsigned char>(absl::ascii_tolower(us2[i]))}; + if (diff != 0) return diff; + } + return 0; +} + +char* memdup(const char* s, size_t slen) { + void* copy; + if ((copy = malloc(slen)) == nullptr) return nullptr; + memcpy(copy, s, slen); + return reinterpret_cast<char*>(copy); +} + +char* memrchr(const char* s, int c, size_t slen) { + for (const char* e = s + slen - 1; e >= s; e--) { + if (*e == c) return const_cast<char*>(e); + } + return nullptr; +} + +size_t memspn(const char* s, size_t slen, const char* accept) { + const char* p = s; + const char* spanp; + char c, sc; + +cont: + c = *p++; + if (slen-- == 0) return p - 1 - s; + for (spanp = accept; (sc = *spanp++) != '\0';) + if (sc == c) goto cont; + return p - 1 - s; +} + +size_t memcspn(const char* s, size_t slen, const char* reject) { + const char* p = s; + const char* spanp; + char c, sc; + + while (slen-- != 0) { + c = *p++; + for (spanp = reject; (sc = *spanp++) != '\0';) + if (sc == c) return p - 1 - s; + } + return p - s; +} + +char* mempbrk(const char* s, size_t slen, const char* accept) { + const char* scanp; + int sc; + + for (; slen; ++s, --slen) { + for (scanp = accept; (sc = *scanp++) != '\0';) + if (sc == *s) return const_cast<char*>(s); + } + return nullptr; +} + +// This is significantly faster for case-sensitive matches with very +// few possible matches. See unit test for benchmarks. +const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, + size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + // A static cast is used here to work around the fact that memchr returns + // a void* on Posix-compliant systems and const void* on Windows. + while ((match = static_cast<const char*>( + memchr(phaystack, pneedle[0], hayend - phaystack)))) { + if (memcmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } + return nullptr; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/memutil.h b/third_party/abseil_cpp/absl/strings/internal/memutil.h new file mode 100644 index 000000000000..9ad05358086c --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/memutil.h @@ -0,0 +1,148 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// These routines provide mem versions of standard C string routines, +// such as strpbrk. They function exactly the same as the str versions, +// so if you wonder what they are, replace the word "mem" by +// "str" and check out the man page. I could return void*, as the +// strutil.h mem*() routines tend to do, but I return char* instead +// since this is by far the most common way these functions are called. +// +// The difference between the mem and str versions is the mem version +// takes a pointer and a length, rather than a '\0'-terminated string. +// The memcase* routines defined here assume the locale is "C" +// (they use absl::ascii_tolower instead of tolower). +// +// These routines are based on the BSD library. +// +// Here's a list of routines from string.h, and their mem analogues. +// Functions in lowercase are defined in string.h; those in UPPERCASE +// are defined here: +// +// strlen -- +// strcat strncat MEMCAT +// strcpy strncpy memcpy +// -- memccpy (very cool function, btw) +// -- memmove +// -- memset +// strcmp strncmp memcmp +// strcasecmp strncasecmp MEMCASECMP +// strchr memchr +// strcoll -- +// strxfrm -- +// strdup strndup MEMDUP +// strrchr MEMRCHR +// strspn MEMSPN +// strcspn MEMCSPN +// strpbrk MEMPBRK +// strstr MEMSTR MEMMEM +// (g)strcasestr MEMCASESTR MEMCASEMEM +// strtok -- +// strprefix MEMPREFIX (strprefix is from strutil.h) +// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h) +// strsuffix MEMSUFFIX (strsuffix is from strutil.h) +// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h) +// -- MEMIS +// -- MEMCASEIS +// strcount MEMCOUNT (strcount is from strutil.h) + +#ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_ +#define ABSL_STRINGS_INTERNAL_MEMUTIL_H_ + +#include <cstddef> +#include <cstring> + +#include "absl/base/port.h" // disable some warnings on Windows +#include "absl/strings/ascii.h" // for absl::ascii_tolower + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +inline char* memcat(char* dest, size_t destlen, const char* src, + size_t srclen) { + return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen)); +} + +int memcasecmp(const char* s1, const char* s2, size_t len); +char* memdup(const char* s, size_t slen); +char* memrchr(const char* s, int c, size_t slen); +size_t memspn(const char* s, size_t slen, const char* accept); +size_t memcspn(const char* s, size_t slen, const char* reject); +char* mempbrk(const char* s, size_t slen, const char* accept); + +// This is for internal use only. Don't call this directly +template <bool case_sensitive> +const char* int_memmatch(const char* haystack, size_t haylen, + const char* needle, size_t neelen) { + if (0 == neelen) { + return haystack; // even if haylen is 0 + } + const char* hayend = haystack + haylen; + const char* needlestart = needle; + const char* needleend = needlestart + neelen; + + for (; haystack < hayend; ++haystack) { + char hay = case_sensitive + ? *haystack + : absl::ascii_tolower(static_cast<unsigned char>(*haystack)); + char nee = case_sensitive + ? *needle + : absl::ascii_tolower(static_cast<unsigned char>(*needle)); + if (hay == nee) { + if (++needle == needleend) { + return haystack + 1 - neelen; + } + } else if (needle != needlestart) { + // must back up haystack in case a prefix matched (find "aab" in "aaab") + haystack -= needle - needlestart; // for loop will advance one more + needle = needlestart; + } + } + return nullptr; +} + +// These are the guys you can call directly +inline const char* memstr(const char* phaystack, size_t haylen, + const char* pneedle) { + return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle)); +} + +inline const char* memcasestr(const char* phaystack, size_t haylen, + const char* pneedle) { + return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle)); +} + +inline const char* memmem(const char* phaystack, size_t haylen, + const char* pneedle, size_t needlelen) { + return int_memmatch<true>(phaystack, haylen, pneedle, needlelen); +} + +inline const char* memcasemem(const char* phaystack, size_t haylen, + const char* pneedle, size_t needlelen) { + return int_memmatch<false>(phaystack, haylen, pneedle, needlelen); +} + +// This is significantly faster for case-sensitive matches with very +// few possible matches. See unit test for benchmarks. +const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, + size_t neelen); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/memutil_benchmark.cc b/third_party/abseil_cpp/absl/strings/internal/memutil_benchmark.cc new file mode 100644 index 000000000000..dc95c3e5e55a --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/memutil_benchmark.cc @@ -0,0 +1,323 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/memutil.h" + +#include <algorithm> +#include <cstdlib> + +#include "benchmark/benchmark.h" +#include "absl/strings/ascii.h" + +// We fill the haystack with aaaaaaaaaaaaaaaaaa...aaaab. +// That gives us: +// - an easy search: 'b' +// - a medium search: 'ab'. That means every letter is a possible match. +// - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack) +// We benchmark case-sensitive and case-insensitive versions of +// three memmem implementations: +// - memmem() from memutil.h +// - search() from STL +// - memmatch(), a custom implementation using memchr and memcmp. +// Here are sample results: +// +// Run on (12 X 3800 MHz CPU s) +// CPU Caches: +// L1 Data 32K (x6) +// L1 Instruction 32K (x6) +// L2 Unified 256K (x6) +// L3 Unified 15360K (x1) +// ---------------------------------------------------------------- +// Benchmark Time CPU Iterations +// ---------------------------------------------------------------- +// BM_Memmem 3583 ns 3582 ns 196469 2.59966GB/s +// BM_MemmemMedium 13743 ns 13742 ns 50901 693.986MB/s +// BM_MemmemPathological 13695030 ns 13693977 ns 51 713.133kB/s +// BM_Memcasemem 3299 ns 3299 ns 212942 2.82309GB/s +// BM_MemcasememMedium 16407 ns 16406 ns 42170 581.309MB/s +// BM_MemcasememPathological 17267745 ns 17266030 ns 41 565.598kB/s +// BM_Search 1610 ns 1609 ns 431321 5.78672GB/s +// BM_SearchMedium 11111 ns 11110 ns 63001 858.414MB/s +// BM_SearchPathological 12117390 ns 12116397 ns 58 805.984kB/s +// BM_Searchcase 3081 ns 3081 ns 229949 3.02313GB/s +// BM_SearchcaseMedium 16003 ns 16001 ns 44170 595.998MB/s +// BM_SearchcasePathological 15823413 ns 15821909 ns 44 617.222kB/s +// BM_Memmatch 197 ns 197 ns 3584225 47.2951GB/s +// BM_MemmatchMedium 52333 ns 52329 ns 13280 182.244MB/s +// BM_MemmatchPathological 659799 ns 659727 ns 1058 14.4556MB/s +// BM_Memcasematch 5460 ns 5460 ns 127606 1.70586GB/s +// BM_MemcasematchMedium 32861 ns 32857 ns 21258 290.248MB/s +// BM_MemcasematchPathological 15154243 ns 15153089 ns 46 644.464kB/s +// BM_MemmemStartup 5 ns 5 ns 150821500 +// BM_SearchStartup 5 ns 5 ns 150644203 +// BM_MemmatchStartup 7 ns 7 ns 97068802 +// +// Conclusions: +// +// The following recommendations are based on the sample results above. However, +// we have found that the performance of STL search can vary significantly +// depending on compiler and standard library implementation. We recommend you +// run the benchmarks for yourself on relevant platforms. +// +// If you need case-insensitive, STL search is slightly better than memmem for +// all cases. +// +// Case-sensitive is more subtle: +// Custom memmatch is _very_ fast at scanning, so if you have very few possible +// matches in your haystack, that's the way to go. Performance drops +// significantly with more matches. +// +// STL search is slightly faster than memmem in the medium and pathological +// benchmarks. However, the performance of memmem is currently more dependable +// across platforms and build configurations. + +namespace { + +constexpr int kHaystackSize = 10000; +constexpr int64_t kHaystackSize64 = kHaystackSize; +const char* MakeHaystack() { + char* haystack = new char[kHaystackSize]; + for (int i = 0; i < kHaystackSize - 1; ++i) haystack[i] = 'a'; + haystack[kHaystackSize - 1] = 'b'; + return haystack; +} +const char* const kHaystack = MakeHaystack(); + +void BM_Memmem(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Memmem); + +void BM_MemmemMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemmemMedium); + +void BM_MemmemPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memmem( + kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, + kHaystackSize - kHaystackSize / 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemmemPathological); + +void BM_Memcasemem(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Memcasemem); + +void BM_MemcasememMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemcasememMedium); + +void BM_MemcasememPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memcasemem( + kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, + kHaystackSize - kHaystackSize / 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemcasememPathological); + +bool case_eq(const char a, const char b) { + return absl::ascii_tolower(a) == absl::ascii_tolower(b); +} + +void BM_Search(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 1, + kHaystack + kHaystackSize)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Search); + +void BM_SearchMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 2, + kHaystack + kHaystackSize)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_SearchMedium); + +void BM_SearchPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize / 2, + kHaystack + kHaystackSize)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_SearchPathological); + +void BM_Searchcase(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 1, + kHaystack + kHaystackSize, case_eq)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Searchcase); + +void BM_SearchcaseMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 2, + kHaystack + kHaystackSize, case_eq)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_SearchcaseMedium); + +void BM_SearchcasePathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize / 2, + kHaystack + kHaystackSize, case_eq)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_SearchcasePathological); + +char* memcasechr(const char* s, int c, size_t slen) { + c = absl::ascii_tolower(c); + for (; slen; ++s, --slen) { + if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s); + } + return nullptr; +} + +const char* memcasematch(const char* phaystack, size_t haylen, + const char* pneedle, size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + while ((match = static_cast<char*>( + memcasechr(phaystack, pneedle[0], hayend - phaystack)))) { + if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } + return nullptr; +} + +void BM_Memmatch(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Memmatch); + +void BM_MemmatchMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemmatchMedium); + +void BM_MemmatchPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memmatch( + kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, + kHaystackSize - kHaystackSize / 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemmatchPathological); + +void BM_Memcasematch(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Memcasematch); + +void BM_MemcasematchMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "ab", 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemcasematchMedium); + +void BM_MemcasematchPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, + kHaystack + kHaystackSize / 2, + kHaystackSize - kHaystackSize / 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemcasematchPathological); + +void BM_MemmemStartup(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memmem( + kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1)); + } +} +BENCHMARK(BM_MemmemStartup); + +void BM_SearchStartup(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize)); + } +} +BENCHMARK(BM_SearchStartup); + +void BM_MemmatchStartup(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memmatch( + kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1)); + } +} +BENCHMARK(BM_MemmatchStartup); + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/memutil_test.cc b/third_party/abseil_cpp/absl/strings/internal/memutil_test.cc new file mode 100644 index 000000000000..d8681ddf4e3b --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/memutil_test.cc @@ -0,0 +1,179 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Unit test for memutil.cc + +#include "absl/strings/internal/memutil.h" + +#include <cstdlib> + +#include "gtest/gtest.h" +#include "absl/strings/ascii.h" + +namespace { + +static char* memcasechr(const char* s, int c, size_t slen) { + c = absl::ascii_tolower(c); + for (; slen; ++s, --slen) { + if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s); + } + return nullptr; +} + +static const char* memcasematch(const char* phaystack, size_t haylen, + const char* pneedle, size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + while ((match = static_cast<char*>( + memcasechr(phaystack, pneedle[0], hayend - phaystack)))) { + if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } + return nullptr; +} + +TEST(MemUtilTest, AllTests) { + // check memutil functions + char a[1000]; + absl::strings_internal::memcat(a, 0, "hello", sizeof("hello") - 1); + absl::strings_internal::memcat(a, 5, " there", sizeof(" there") - 1); + + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there", + sizeof("hello there") - 1), + 0); + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf", + sizeof("hello there") - 1), + -1); + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf", + sizeof("hello there") - 2), + 0); + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0); + + char* p = absl::strings_internal::memdup("hello", 5); + free(p); + + p = absl::strings_internal::memrchr("hello there", 'e', + sizeof("hello there") - 1); + EXPECT_TRUE(p && p[-1] == 'r'); + p = absl::strings_internal::memrchr("hello there", 'e', + sizeof("hello there") - 2); + EXPECT_TRUE(p && p[-1] == 'h'); + p = absl::strings_internal::memrchr("hello there", 'u', + sizeof("hello there") - 1); + EXPECT_TRUE(p == nullptr); + + int len = absl::strings_internal::memspn("hello there", + sizeof("hello there") - 1, "hole"); + EXPECT_EQ(len, sizeof("hello") - 1); + len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, + "u"); + EXPECT_EQ(len, 0); + len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, + ""); + EXPECT_EQ(len, 0); + len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, + "trole h"); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memspn("hello there!", + sizeof("hello there!") - 1, "trole h"); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memspn("hello there!", + sizeof("hello there!") - 2, "trole h!"); + EXPECT_EQ(len, sizeof("hello there!") - 2); + + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, "leho"); + EXPECT_EQ(len, 0); + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, "u"); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, ""); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, " "); + EXPECT_EQ(len, 5); + + p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, + "leho"); + EXPECT_TRUE(p && p[1] == 'e' && p[2] == 'l'); + p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, + "nu"); + EXPECT_TRUE(p == nullptr); + p = absl::strings_internal::mempbrk("hello there!", + sizeof("hello there!") - 2, "!"); + EXPECT_TRUE(p == nullptr); + p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, + " t "); + EXPECT_TRUE(p && p[-1] == 'o' && p[1] == 't'); + + { + const char kHaystack[] = "0123456789"; + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 0, "", 0), kHaystack); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "012", 3), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "0xx", 1), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "789", 3), + kHaystack + 7); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "9xx", 1), + kHaystack + 9); + EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "9xx", 3) == + nullptr); + EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "xxx", 1) == + nullptr); + } + { + const char kHaystack[] = "aBcDeFgHiJ"; + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 0, "", 0), + kHaystack); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Abc", 3), + kHaystack); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Axx", 1), + kHaystack); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "hIj", 3), + kHaystack + 7); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 1), + kHaystack + 9); + EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 3) == + nullptr); + EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "xxx", 1) == + nullptr); + } + { + const char kHaystack[] = "0123456789"; + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 0, "", 0), kHaystack); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "012", 3), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "0xx", 1), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "789", 3), + kHaystack + 7); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 1), + kHaystack + 9); + EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 3) == + nullptr); + EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "xxx", 1) == + nullptr); + } +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/numbers_test_common.h b/third_party/abseil_cpp/absl/strings/internal/numbers_test_common.h new file mode 100644 index 000000000000..eaa88a88975b --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/numbers_test_common.h @@ -0,0 +1,184 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file contains common things needed by numbers_test.cc, +// numbers_legacy_test.cc and numbers_benchmark.cc. + +#ifndef ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_ +#define ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_ + +#include <array> +#include <cstdint> +#include <limits> +#include <string> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +template <typename IntType> +inline bool Itoa(IntType value, int base, std::string* destination) { + destination->clear(); + if (base <= 1 || base > 36) { + return false; + } + + if (value == 0) { + destination->push_back('0'); + return true; + } + + bool negative = value < 0; + while (value != 0) { + const IntType next_value = value / base; + // Can't use std::abs here because of problems when IntType is unsigned. + int remainder = + static_cast<int>(value > next_value * base ? value - next_value * base + : next_value * base - value); + char c = remainder < 10 ? '0' + remainder : 'A' + remainder - 10; + destination->insert(0, 1, c); + value = next_value; + } + + if (negative) { + destination->insert(0, 1, '-'); + } + return true; +} + +struct uint32_test_case { + const char* str; + bool expect_ok; + int base; // base to pass to the conversion function + uint32_t expected; +}; + +inline const std::array<uint32_test_case, 27>& strtouint32_test_cases() { + static const std::array<uint32_test_case, 27> test_cases{{ + {"0xffffffff", true, 16, (std::numeric_limits<uint32_t>::max)()}, + {"0x34234324", true, 16, 0x34234324}, + {"34234324", true, 16, 0x34234324}, + {"0", true, 16, 0}, + {" \t\n 0xffffffff", true, 16, (std::numeric_limits<uint32_t>::max)()}, + {" \f\v 46", true, 10, 46}, // must accept weird whitespace + {" \t\n 72717222", true, 8, 072717222}, + {" \t\n 072717222", true, 8, 072717222}, + {" \t\n 072717228", false, 8, 07271722}, + {"0", true, 0, 0}, + + // Base-10 version. + {"34234324", true, 0, 34234324}, + {"4294967295", true, 0, (std::numeric_limits<uint32_t>::max)()}, + {"34234324 \n\t", true, 10, 34234324}, + + // Unusual base + {"0", true, 3, 0}, + {"2", true, 3, 2}, + {"11", true, 3, 4}, + + // Invalid uints. + {"", false, 0, 0}, + {" ", false, 0, 0}, + {"abc", false, 0, 0}, // would be valid hex, but prefix is missing + {"34234324a", false, 0, 34234324}, + {"34234.3", false, 0, 34234}, + {"-1", false, 0, 0}, + {" -123", false, 0, 0}, + {" \t\n -123", false, 0, 0}, + + // Out of bounds. + {"4294967296", false, 0, (std::numeric_limits<uint32_t>::max)()}, + {"0x100000000", false, 0, (std::numeric_limits<uint32_t>::max)()}, + {nullptr, false, 0, 0}, + }}; + return test_cases; +} + +struct uint64_test_case { + const char* str; + bool expect_ok; + int base; + uint64_t expected; +}; + +inline const std::array<uint64_test_case, 34>& strtouint64_test_cases() { + static const std::array<uint64_test_case, 34> test_cases{{ + {"0x3423432448783446", true, 16, int64_t{0x3423432448783446}}, + {"3423432448783446", true, 16, int64_t{0x3423432448783446}}, + + {"0", true, 16, 0}, + {"000", true, 0, 0}, + {"0", true, 0, 0}, + {" \t\n 0xffffffffffffffff", true, 16, + (std::numeric_limits<uint64_t>::max)()}, + + {"012345670123456701234", true, 8, int64_t{012345670123456701234}}, + {"12345670123456701234", true, 8, int64_t{012345670123456701234}}, + + {"12845670123456701234", false, 8, 0}, + + // Base-10 version. + {"34234324487834466", true, 0, int64_t{34234324487834466}}, + + {" \t\n 18446744073709551615", true, 0, + (std::numeric_limits<uint64_t>::max)()}, + + {"34234324487834466 \n\t ", true, 0, int64_t{34234324487834466}}, + + {" \f\v 46", true, 10, 46}, // must accept weird whitespace + + // Unusual base + {"0", true, 3, 0}, + {"2", true, 3, 2}, + {"11", true, 3, 4}, + + {"0", true, 0, 0}, + + // Invalid uints. + {"", false, 0, 0}, + {" ", false, 0, 0}, + {"abc", false, 0, 0}, + {"34234324487834466a", false, 0, 0}, + {"34234487834466.3", false, 0, 0}, + {"-1", false, 0, 0}, + {" -123", false, 0, 0}, + {" \t\n -123", false, 0, 0}, + + // Out of bounds. + {"18446744073709551616", false, 10, 0}, + {"18446744073709551616", false, 0, 0}, + {"0x10000000000000000", false, 16, + (std::numeric_limits<uint64_t>::max)()}, + {"0X10000000000000000", false, 16, + (std::numeric_limits<uint64_t>::max)()}, // 0X versus 0x. + {"0x10000000000000000", false, 0, (std::numeric_limits<uint64_t>::max)()}, + {"0X10000000000000000", false, 0, + (std::numeric_limits<uint64_t>::max)()}, // 0X versus 0x. + + {"0x1234", true, 16, 0x1234}, + + // Base-10 string version. + {"1234", true, 0, 1234}, + {nullptr, false, 0, 0}, + }}; + return test_cases; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/ostringstream.cc b/third_party/abseil_cpp/absl/strings/internal/ostringstream.cc new file mode 100644 index 000000000000..05324c780c60 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/ostringstream.cc @@ -0,0 +1,36 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/ostringstream.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +OStringStream::Buf::int_type OStringStream::overflow(int c) { + assert(s_); + if (!Buf::traits_type::eq_int_type(c, Buf::traits_type::eof())) + s_->push_back(static_cast<char>(c)); + return 1; +} + +std::streamsize OStringStream::xsputn(const char* s, std::streamsize n) { + assert(s_); + s_->append(s, n); + return n; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/ostringstream.h b/third_party/abseil_cpp/absl/strings/internal/ostringstream.h new file mode 100644 index 000000000000..d25d60473f6d --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/ostringstream.h @@ -0,0 +1,89 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ +#define ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ + +#include <cassert> +#include <ostream> +#include <streambuf> +#include <string> + +#include "absl/base/port.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// The same as std::ostringstream but appends to a user-specified std::string, +// and is faster. It is ~70% faster to create, ~50% faster to write to, and +// completely free to extract the result std::string. +// +// std::string s; +// OStringStream strm(&s); +// strm << 42 << ' ' << 3.14; // appends to `s` +// +// The stream object doesn't have to be named. Starting from C++11 operator<< +// works with rvalues of std::ostream. +// +// std::string s; +// OStringStream(&s) << 42 << ' ' << 3.14; // appends to `s` +// +// OStringStream is faster to create than std::ostringstream but it's still +// relatively slow. Avoid creating multiple streams where a single stream will +// do. +// +// Creates unnecessary instances of OStringStream: slow. +// +// std::string s; +// OStringStream(&s) << 42; +// OStringStream(&s) << ' '; +// OStringStream(&s) << 3.14; +// +// Creates a single instance of OStringStream and reuses it: fast. +// +// std::string s; +// OStringStream strm(&s); +// strm << 42; +// strm << ' '; +// strm << 3.14; +// +// Note: flush() has no effect. No reason to call it. +class OStringStream : private std::basic_streambuf<char>, public std::ostream { + public: + // The argument can be null, in which case you'll need to call str(p) with a + // non-null argument before you can write to the stream. + // + // The destructor of OStringStream doesn't use the std::string. It's OK to + // destroy the std::string before the stream. + explicit OStringStream(std::string* s) : std::ostream(this), s_(s) {} + + std::string* str() { return s_; } + const std::string* str() const { return s_; } + void str(std::string* s) { s_ = s; } + + private: + using Buf = std::basic_streambuf<char>; + + Buf::int_type overflow(int c) override; + std::streamsize xsputn(const char* s, std::streamsize n) override; + + std::string* s_; +}; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/ostringstream_benchmark.cc b/third_party/abseil_cpp/absl/strings/internal/ostringstream_benchmark.cc new file mode 100644 index 000000000000..5979f18236e3 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/ostringstream_benchmark.cc @@ -0,0 +1,106 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/ostringstream.h" + +#include <sstream> +#include <string> + +#include "benchmark/benchmark.h" + +namespace { + +enum StringType { + kNone, + kStdString, +}; + +// Benchmarks for std::ostringstream. +template <StringType kOutput> +void BM_StdStream(benchmark::State& state) { + const int num_writes = state.range(0); + const int bytes_per_write = state.range(1); + const std::string payload(bytes_per_write, 'x'); + for (auto _ : state) { + std::ostringstream strm; + benchmark::DoNotOptimize(strm); + for (int i = 0; i != num_writes; ++i) { + strm << payload; + } + switch (kOutput) { + case kNone: { + break; + } + case kStdString: { + std::string s = strm.str(); + benchmark::DoNotOptimize(s); + break; + } + } + } +} + +// Create the stream, optionally write to it, then destroy it. +BENCHMARK_TEMPLATE(BM_StdStream, kNone) + ->ArgPair(0, 0) + ->ArgPair(1, 16) // 16 bytes is small enough for SSO + ->ArgPair(1, 256) // 256 bytes requires heap allocation + ->ArgPair(1024, 256); +// Create the stream, write to it, get std::string out, then destroy. +BENCHMARK_TEMPLATE(BM_StdStream, kStdString) + ->ArgPair(1, 16) // 16 bytes is small enough for SSO + ->ArgPair(1, 256) // 256 bytes requires heap allocation + ->ArgPair(1024, 256); + +// Benchmarks for OStringStream. +template <StringType kOutput> +void BM_CustomStream(benchmark::State& state) { + const int num_writes = state.range(0); + const int bytes_per_write = state.range(1); + const std::string payload(bytes_per_write, 'x'); + for (auto _ : state) { + std::string out; + absl::strings_internal::OStringStream strm(&out); + benchmark::DoNotOptimize(strm); + for (int i = 0; i != num_writes; ++i) { + strm << payload; + } + switch (kOutput) { + case kNone: { + break; + } + case kStdString: { + std::string s = out; + benchmark::DoNotOptimize(s); + break; + } + } + } +} + +// Create the stream, optionally write to it, then destroy it. +BENCHMARK_TEMPLATE(BM_CustomStream, kNone) + ->ArgPair(0, 0) + ->ArgPair(1, 16) // 16 bytes is small enough for SSO + ->ArgPair(1, 256) // 256 bytes requires heap allocation + ->ArgPair(1024, 256); +// Create the stream, write to it, get std::string out, then destroy. +// It's not useful in practice to extract std::string from OStringStream; we +// measure it for completeness. +BENCHMARK_TEMPLATE(BM_CustomStream, kStdString) + ->ArgPair(1, 16) // 16 bytes is small enough for SSO + ->ArgPair(1, 256) // 256 bytes requires heap allocation + ->ArgPair(1024, 256); + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/ostringstream_test.cc b/third_party/abseil_cpp/absl/strings/internal/ostringstream_test.cc new file mode 100644 index 000000000000..2879e50eb38d --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/ostringstream_test.cc @@ -0,0 +1,102 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/ostringstream.h" + +#include <memory> +#include <ostream> +#include <string> +#include <type_traits> + +#include "gtest/gtest.h" + +namespace { + +TEST(OStringStream, IsOStream) { + static_assert( + std::is_base_of<std::ostream, absl::strings_internal::OStringStream>(), + ""); +} + +TEST(OStringStream, ConstructDestroy) { + { + absl::strings_internal::OStringStream strm(nullptr); + EXPECT_EQ(nullptr, strm.str()); + } + { + std::string s = "abc"; + { + absl::strings_internal::OStringStream strm(&s); + EXPECT_EQ(&s, strm.str()); + } + EXPECT_EQ("abc", s); + } + { + std::unique_ptr<std::string> s(new std::string); + absl::strings_internal::OStringStream strm(s.get()); + s.reset(); + } +} + +TEST(OStringStream, Str) { + std::string s1; + absl::strings_internal::OStringStream strm(&s1); + const absl::strings_internal::OStringStream& c_strm(strm); + + static_assert(std::is_same<decltype(strm.str()), std::string*>(), ""); + static_assert(std::is_same<decltype(c_strm.str()), const std::string*>(), ""); + + EXPECT_EQ(&s1, strm.str()); + EXPECT_EQ(&s1, c_strm.str()); + + strm.str(&s1); + EXPECT_EQ(&s1, strm.str()); + EXPECT_EQ(&s1, c_strm.str()); + + std::string s2; + strm.str(&s2); + EXPECT_EQ(&s2, strm.str()); + EXPECT_EQ(&s2, c_strm.str()); + + strm.str(nullptr); + EXPECT_EQ(nullptr, strm.str()); + EXPECT_EQ(nullptr, c_strm.str()); +} + +TEST(OStreamStream, WriteToLValue) { + std::string s = "abc"; + { + absl::strings_internal::OStringStream strm(&s); + EXPECT_EQ("abc", s); + strm << ""; + EXPECT_EQ("abc", s); + strm << 42; + EXPECT_EQ("abc42", s); + strm << 'x' << 'y'; + EXPECT_EQ("abc42xy", s); + } + EXPECT_EQ("abc42xy", s); +} + +TEST(OStreamStream, WriteToRValue) { + std::string s = "abc"; + absl::strings_internal::OStringStream(&s) << ""; + EXPECT_EQ("abc", s); + absl::strings_internal::OStringStream(&s) << 42; + EXPECT_EQ("abc42", s); + absl::strings_internal::OStringStream(&s) << 'x' << 'y'; + EXPECT_EQ("abc42xy", s); +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/pow10_helper.cc b/third_party/abseil_cpp/absl/strings/internal/pow10_helper.cc new file mode 100644 index 000000000000..42e96c3425d2 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/pow10_helper.cc @@ -0,0 +1,122 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/pow10_helper.h" + +#include <cmath> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +namespace { + +// The exact value of 1e23 falls precisely halfway between two representable +// doubles. Furthermore, the rounding rules we prefer (break ties by rounding +// to the nearest even) dictate in this case that the number should be rounded +// down, but this is not completely specified for floating-point literals in +// C++. (It just says to use the default rounding mode of the standard +// library.) We ensure the result we want by using a number that has an +// unambiguous correctly rounded answer. +constexpr double k1e23 = 9999999999999999e7; + +constexpr double kPowersOfTen[] = { + 0.0, 1e-323, 1e-322, 1e-321, 1e-320, 1e-319, 1e-318, 1e-317, 1e-316, + 1e-315, 1e-314, 1e-313, 1e-312, 1e-311, 1e-310, 1e-309, 1e-308, 1e-307, + 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, 1e-299, 1e-298, + 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, 1e-290, 1e-289, + 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, 1e-281, 1e-280, + 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, 1e-272, 1e-271, + 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, 1e-263, 1e-262, + 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, 1e-254, 1e-253, + 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, 1e-245, 1e-244, + 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, 1e-236, 1e-235, + 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, 1e-227, 1e-226, + 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, 1e-218, 1e-217, + 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, 1e-209, 1e-208, + 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, 1e-200, 1e-199, + 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, 1e-191, 1e-190, + 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, 1e-182, 1e-181, + 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, 1e-173, 1e-172, + 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, 1e-164, 1e-163, + 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, 1e-155, 1e-154, + 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, 1e-146, 1e-145, + 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, 1e-137, 1e-136, + 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, 1e-128, 1e-127, + 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, 1e-119, 1e-118, + 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, 1e-110, 1e-109, + 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, 1e-101, 1e-100, + 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92, 1e-91, + 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83, 1e-82, + 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74, 1e-73, + 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 1e-64, + 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, + 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, + 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37, + 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, + 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, + 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, + 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, + 1e+0, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, + 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, + 1e+18, 1e+19, 1e+20, 1e+21, 1e+22, k1e23, 1e+24, 1e+25, 1e+26, + 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, + 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, 1e+41, 1e+42, 1e+43, 1e+44, + 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, + 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, 1e+61, 1e+62, + 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, + 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80, + 1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, + 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, + 1e+99, 1e+100, 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, + 1e+108, 1e+109, 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, + 1e+117, 1e+118, 1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, + 1e+126, 1e+127, 1e+128, 1e+129, 1e+130, 1e+131, 1e+132, 1e+133, 1e+134, + 1e+135, 1e+136, 1e+137, 1e+138, 1e+139, 1e+140, 1e+141, 1e+142, 1e+143, + 1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149, 1e+150, 1e+151, 1e+152, + 1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159, 1e+160, 1e+161, + 1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168, 1e+169, 1e+170, + 1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177, 1e+178, 1e+179, + 1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186, 1e+187, 1e+188, + 1e+189, 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195, 1e+196, 1e+197, + 1e+198, 1e+199, 1e+200, 1e+201, 1e+202, 1e+203, 1e+204, 1e+205, 1e+206, + 1e+207, 1e+208, 1e+209, 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215, + 1e+216, 1e+217, 1e+218, 1e+219, 1e+220, 1e+221, 1e+222, 1e+223, 1e+224, + 1e+225, 1e+226, 1e+227, 1e+228, 1e+229, 1e+230, 1e+231, 1e+232, 1e+233, + 1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239, 1e+240, 1e+241, 1e+242, + 1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249, 1e+250, 1e+251, + 1e+252, 1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258, 1e+259, 1e+260, + 1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269, + 1e+270, 1e+271, 1e+272, 1e+273, 1e+274, 1e+275, 1e+276, 1e+277, 1e+278, + 1e+279, 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285, 1e+286, 1e+287, + 1e+288, 1e+289, 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296, + 1e+297, 1e+298, 1e+299, 1e+300, 1e+301, 1e+302, 1e+303, 1e+304, 1e+305, + 1e+306, 1e+307, 1e+308, +}; + +} // namespace + +double Pow10(int exp) { + if (exp < -324) { + return 0.0; + } else if (exp > 308) { + return INFINITY; + } else { + return kPowersOfTen[exp + 324]; + } +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/pow10_helper.h b/third_party/abseil_cpp/absl/strings/internal/pow10_helper.h new file mode 100644 index 000000000000..c37c2c3ffee5 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/pow10_helper.h @@ -0,0 +1,40 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This test helper library contains a table of powers of 10, to guarantee +// precise values are computed across the full range of doubles. We can't rely +// on the pow() function, because not all standard libraries ship a version +// that is precise. +#ifndef ABSL_STRINGS_INTERNAL_POW10_HELPER_H_ +#define ABSL_STRINGS_INTERNAL_POW10_HELPER_H_ + +#include <vector> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// Computes the precise value of 10^exp. (I.e. the nearest representable +// double to the exact value, rounding to nearest-even in the (single) case of +// being exactly halfway between.) +double Pow10(int exp); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_POW10_HELPER_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/pow10_helper_test.cc b/third_party/abseil_cpp/absl/strings/internal/pow10_helper_test.cc new file mode 100644 index 000000000000..a4ff76d31e10 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/pow10_helper_test.cc @@ -0,0 +1,122 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/pow10_helper.h" + +#include <cmath> + +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +namespace { + +struct TestCase { + int power; // Testing Pow10(power) + uint64_t significand; // Raw bits of the expected value + int radix; // significand is adjusted by 2^radix +}; + +TEST(Pow10HelperTest, Works) { + // The logic in pow10_helper.cc is so simple that theoretically we don't even + // need a test. However, we're paranoid and believe that there may be + // compilers that don't round floating-point literals correctly, even though + // it is specified by the standard. We check various edge cases, just to be + // sure. + constexpr TestCase kTestCases[] = { + // Subnormals + {-323, 0x2, -1074}, + {-322, 0x14, -1074}, + {-321, 0xca, -1074}, + {-320, 0x7e8, -1074}, + {-319, 0x4f10, -1074}, + {-318, 0x316a2, -1074}, + {-317, 0x1ee257, -1074}, + {-316, 0x134d761, -1074}, + {-315, 0xc1069cd, -1074}, + {-314, 0x78a42205, -1074}, + {-313, 0x4b6695433, -1074}, + {-312, 0x2f201d49fb, -1074}, + {-311, 0x1d74124e3d1, -1074}, + {-310, 0x12688b70e62b, -1074}, + {-309, 0xb8157268fdaf, -1074}, + {-308, 0x730d67819e8d2, -1074}, + // Values that are very close to rounding the other way. + // Comment shows difference of significand from the true value. + {-307, 0x11fa182c40c60d, -1072}, // -.4588 + {-290, 0x18f2b061aea072, -1016}, // .4854 + {-276, 0x11BA03F5B21000, -969}, // .4709 + {-259, 0x1899C2F6732210, -913}, // .4830 + {-252, 0x1D53844EE47DD1, -890}, // -.4743 + {-227, 0x1E5297287C2F45, -807}, // -.4708 + {-198, 0x1322E220A5B17E, -710}, // -.4714 + {-195, 0x12B010D3E1CF56, -700}, // .4928 + {-192, 0x123FF06EEA847A, -690}, // .4968 + {-163, 0x1708D0F84D3DE7, -594}, // -.4977 + {-145, 0x13FAAC3E3FA1F3, -534}, // -.4785 + {-111, 0x133D4032C2C7F5, -421}, // .4774 + {-106, 0x1D5B561574765B, -405}, // -.4869 + {-104, 0x16EF5B40C2FC77, -398}, // -.4741 + {-88, 0x197683DF2F268D, -345}, // -.4738 + {-86, 0x13E497065CD61F, -338}, // .4736 + {-76, 0x17288E1271F513, -305}, // -.4761 + {-63, 0x1A53FC9631D10D, -262}, // .4929 + {-30, 0x14484BFEEBC2A0, -152}, // .4758 + {-21, 0x12E3B40A0E9B4F, -122}, // -.4916 + {-5, 0x14F8B588E368F1, -69}, // .4829 + {23, 0x152D02C7E14AF6, 24}, // -.5000 (exactly, round-to-even) + {29, 0x1431E0FAE6D721, 44}, // -.4870 + {34, 0x1ED09BEAD87C03, 60}, // -.4721 + {70, 0x172EBAD6DDC73D, 180}, // .4733 + {105, 0x1BE7ABD3781ECA, 296}, // -.4850 + {126, 0x17A2ECC414A03F, 366}, // -.4999 + {130, 0x1CDA62055B2D9E, 379}, // .4855 + {165, 0x115D847AD00087, 496}, // -.4913 + {172, 0x14B378469B6732, 519}, // .4818 + {187, 0x1262DFEEBBB0F9, 569}, // -.4805 + {210, 0x18557F31326BBB, 645}, // -.4992 + {212, 0x1302CB5E6F642A, 652}, // -.4838 + {215, 0x1290BA9A38C7D1, 662}, // -.4881 + {236, 0x1F736F9B3494E9, 731}, // .4707 + {244, 0x176EC98994F489, 758}, // .4924 + {250, 0x1658E3AB795204, 778}, // -.4963 + {252, 0x117571DDF6C814, 785}, // .4873 + {254, 0x1B4781EAD1989E, 791}, // -.4887 + {260, 0x1A03FDE214CAF1, 811}, // .4784 + {284, 0x1585041B2C477F, 891}, // .4798 + {304, 0x1D2A1BE4048F90, 957}, // -.4987 + // Out-of-range values + {-324, 0x0, 0}, + {-325, 0x0, 0}, + {-326, 0x0, 0}, + {309, 1, 2000}, + {310, 1, 2000}, + {311, 1, 2000}, + }; + for (const TestCase& test_case : kTestCases) { + EXPECT_EQ(Pow10(test_case.power), + std::ldexp(test_case.significand, test_case.radix)) + << absl::StrFormat("Failure for Pow10(%d): %a vs %a", test_case.power, + Pow10(test_case.power), + std::ldexp(test_case.significand, test_case.radix)); + } +} + +} // namespace +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/resize_uninitialized.h b/third_party/abseil_cpp/absl/strings/internal/resize_uninitialized.h new file mode 100644 index 000000000000..e42628e3949a --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/resize_uninitialized.h @@ -0,0 +1,73 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ +#define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ + +#include <string> +#include <type_traits> +#include <utility> + +#include "absl/base/port.h" +#include "absl/meta/type_traits.h" // for void_t + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// Is a subclass of true_type or false_type, depending on whether or not +// T has a __resize_default_init member. +template <typename string_type, typename = void> +struct ResizeUninitializedTraits { + using HasMember = std::false_type; + static void Resize(string_type* s, size_t new_size) { s->resize(new_size); } +}; + +// __resize_default_init is provided by libc++ >= 8.0 +template <typename string_type> +struct ResizeUninitializedTraits< + string_type, absl::void_t<decltype(std::declval<string_type&>() + .__resize_default_init(237))> > { + using HasMember = std::true_type; + static void Resize(string_type* s, size_t new_size) { + s->__resize_default_init(new_size); + } +}; + +// Returns true if the std::string implementation supports a resize where +// the new characters added to the std::string are left untouched. +// +// (A better name might be "STLStringSupportsUninitializedResize", alluding to +// the previous function.) +template <typename string_type> +inline constexpr bool STLStringSupportsNontrashingResize(string_type*) { + return ResizeUninitializedTraits<string_type>::HasMember::value; +} + +// Like str->resize(new_size), except any new characters added to "*str" as a +// result of resizing may be left uninitialized, rather than being filled with +// '0' bytes. Typically used when code is then going to overwrite the backing +// store of the std::string with known data. +template <typename string_type, typename = void> +inline void STLStringResizeUninitialized(string_type* s, size_t new_size) { + ResizeUninitializedTraits<string_type>::Resize(s, new_size); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/resize_uninitialized_test.cc b/third_party/abseil_cpp/absl/strings/internal/resize_uninitialized_test.cc new file mode 100644 index 000000000000..0f8b3c2a95b8 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/resize_uninitialized_test.cc @@ -0,0 +1,82 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/resize_uninitialized.h" + +#include "gtest/gtest.h" + +namespace { + +int resize_call_count = 0; + +// A mock string class whose only purpose is to track how many times its +// resize() method has been called. +struct resizable_string { + size_t size() const { return 0; } + char& operator[](size_t) { + static char c = '\0'; + return c; + } + void resize(size_t) { resize_call_count += 1; } +}; + +int resize_default_init_call_count = 0; + +// A mock string class whose only purpose is to track how many times its +// resize() and __resize_default_init() methods have been called. +struct resize_default_init_string { + size_t size() const { return 0; } + char& operator[](size_t) { + static char c = '\0'; + return c; + } + void resize(size_t) { resize_call_count += 1; } + void __resize_default_init(size_t) { resize_default_init_call_count += 1; } +}; + +TEST(ResizeUninit, WithAndWithout) { + resize_call_count = 0; + resize_default_init_call_count = 0; + { + resizable_string rs; + + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_FALSE( + absl::strings_internal::STLStringSupportsNontrashingResize(&rs)); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_default_init_call_count, 0); + absl::strings_internal::STLStringResizeUninitialized(&rs, 237); + EXPECT_EQ(resize_call_count, 1); + EXPECT_EQ(resize_default_init_call_count, 0); + } + + resize_call_count = 0; + resize_default_init_call_count = 0; + { + resize_default_init_string rus; + + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_TRUE( + absl::strings_internal::STLStringSupportsNontrashingResize(&rus)); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_default_init_call_count, 0); + absl::strings_internal::STLStringResizeUninitialized(&rus, 237); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_default_init_call_count, 1); + } +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/stl_type_traits.h b/third_party/abseil_cpp/absl/strings/internal/stl_type_traits.h new file mode 100644 index 000000000000..6035ca45cbd2 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/stl_type_traits.h @@ -0,0 +1,248 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Thie file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type +// trait metafunction to assist in working with the _GLIBCXX_DEBUG debug +// wrappers of STL containers. +// +// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including +// absl/strings/str_split.h. +// +// IWYU pragma: private, include "absl/strings/str_split.h" + +#ifndef ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_ +#define ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_ + +#include <array> +#include <bitset> +#include <deque> +#include <forward_list> +#include <list> +#include <map> +#include <set> +#include <type_traits> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include "absl/meta/type_traits.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +template <typename C, template <typename...> class T> +struct IsSpecializationImpl : std::false_type {}; +template <template <typename...> class T, typename... Args> +struct IsSpecializationImpl<T<Args...>, T> : std::true_type {}; +template <typename C, template <typename...> class T> +using IsSpecialization = IsSpecializationImpl<absl::decay_t<C>, T>; + +template <typename C> +struct IsArrayImpl : std::false_type {}; +template <template <typename, size_t> class A, typename T, size_t N> +struct IsArrayImpl<A<T, N>> : std::is_same<A<T, N>, std::array<T, N>> {}; +template <typename C> +using IsArray = IsArrayImpl<absl::decay_t<C>>; + +template <typename C> +struct IsBitsetImpl : std::false_type {}; +template <template <size_t> class B, size_t N> +struct IsBitsetImpl<B<N>> : std::is_same<B<N>, std::bitset<N>> {}; +template <typename C> +using IsBitset = IsBitsetImpl<absl::decay_t<C>>; + +template <typename C> +struct IsSTLContainer + : absl::disjunction< + IsArray<C>, IsBitset<C>, IsSpecialization<C, std::deque>, + IsSpecialization<C, std::forward_list>, + IsSpecialization<C, std::list>, IsSpecialization<C, std::map>, + IsSpecialization<C, std::multimap>, IsSpecialization<C, std::set>, + IsSpecialization<C, std::multiset>, + IsSpecialization<C, std::unordered_map>, + IsSpecialization<C, std::unordered_multimap>, + IsSpecialization<C, std::unordered_set>, + IsSpecialization<C, std::unordered_multiset>, + IsSpecialization<C, std::vector>> {}; + +template <typename C, template <typename...> class T, typename = void> +struct IsBaseOfSpecializationImpl : std::false_type {}; +// IsBaseOfSpecializationImpl needs multiple partial specializations to SFINAE +// on the existence of container dependent types and plug them into the STL +// template. +template <typename C, template <typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, absl::void_t<typename C::value_type, typename C::allocator_type>> + : std::is_base_of<C, + T<typename C::value_type, typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::key_compare, + typename C::allocator_type>> + : std::is_base_of<C, T<typename C::key_type, typename C::key_compare, + typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::mapped_type, + typename C::key_compare, typename C::allocator_type>> + : std::is_base_of<C, + T<typename C::key_type, typename C::mapped_type, + typename C::key_compare, typename C::allocator_type>> { +}; +template <typename C, template <typename, typename, typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::hasher, + typename C::key_equal, typename C::allocator_type>> + : std::is_base_of<C, T<typename C::key_type, typename C::hasher, + typename C::key_equal, typename C::allocator_type>> { +}; +template <typename C, + template <typename, typename, typename, typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::mapped_type, + typename C::hasher, typename C::key_equal, + typename C::allocator_type>> + : std::is_base_of<C, T<typename C::key_type, typename C::mapped_type, + typename C::hasher, typename C::key_equal, + typename C::allocator_type>> {}; +template <typename C, template <typename...> class T> +using IsBaseOfSpecialization = IsBaseOfSpecializationImpl<absl::decay_t<C>, T>; + +template <typename C> +struct IsBaseOfArrayImpl : std::false_type {}; +template <template <typename, size_t> class A, typename T, size_t N> +struct IsBaseOfArrayImpl<A<T, N>> : std::is_base_of<A<T, N>, std::array<T, N>> { +}; +template <typename C> +using IsBaseOfArray = IsBaseOfArrayImpl<absl::decay_t<C>>; + +template <typename C> +struct IsBaseOfBitsetImpl : std::false_type {}; +template <template <size_t> class B, size_t N> +struct IsBaseOfBitsetImpl<B<N>> : std::is_base_of<B<N>, std::bitset<N>> {}; +template <typename C> +using IsBaseOfBitset = IsBaseOfBitsetImpl<absl::decay_t<C>>; + +template <typename C> +struct IsBaseOfSTLContainer + : absl::disjunction<IsBaseOfArray<C>, IsBaseOfBitset<C>, + IsBaseOfSpecialization<C, std::deque>, + IsBaseOfSpecialization<C, std::forward_list>, + IsBaseOfSpecialization<C, std::list>, + IsBaseOfSpecialization<C, std::map>, + IsBaseOfSpecialization<C, std::multimap>, + IsBaseOfSpecialization<C, std::set>, + IsBaseOfSpecialization<C, std::multiset>, + IsBaseOfSpecialization<C, std::unordered_map>, + IsBaseOfSpecialization<C, std::unordered_multimap>, + IsBaseOfSpecialization<C, std::unordered_set>, + IsBaseOfSpecialization<C, std::unordered_multiset>, + IsBaseOfSpecialization<C, std::vector>> {}; + +template <typename C, template <typename...> class T, typename = void> +struct IsConvertibleToSpecializationImpl : std::false_type {}; +// IsConvertibleToSpecializationImpl needs multiple partial specializations to +// SFINAE on the existence of container dependent types and plug them into the +// STL template. +template <typename C, template <typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, absl::void_t<typename C::value_type, typename C::allocator_type>> + : std::is_convertible< + C, T<typename C::value_type, typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::key_compare, + typename C::allocator_type>> + : std::is_convertible<C, T<typename C::key_type, typename C::key_compare, + typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::mapped_type, + typename C::key_compare, typename C::allocator_type>> + : std::is_convertible< + C, T<typename C::key_type, typename C::mapped_type, + typename C::key_compare, typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::hasher, + typename C::key_equal, typename C::allocator_type>> + : std::is_convertible< + C, T<typename C::key_type, typename C::hasher, typename C::key_equal, + typename C::allocator_type>> {}; +template <typename C, + template <typename, typename, typename, typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::mapped_type, + typename C::hasher, typename C::key_equal, + typename C::allocator_type>> + : std::is_convertible<C, T<typename C::key_type, typename C::mapped_type, + typename C::hasher, typename C::key_equal, + typename C::allocator_type>> {}; +template <typename C, template <typename...> class T> +using IsConvertibleToSpecialization = + IsConvertibleToSpecializationImpl<absl::decay_t<C>, T>; + +template <typename C> +struct IsConvertibleToArrayImpl : std::false_type {}; +template <template <typename, size_t> class A, typename T, size_t N> +struct IsConvertibleToArrayImpl<A<T, N>> + : std::is_convertible<A<T, N>, std::array<T, N>> {}; +template <typename C> +using IsConvertibleToArray = IsConvertibleToArrayImpl<absl::decay_t<C>>; + +template <typename C> +struct IsConvertibleToBitsetImpl : std::false_type {}; +template <template <size_t> class B, size_t N> +struct IsConvertibleToBitsetImpl<B<N>> + : std::is_convertible<B<N>, std::bitset<N>> {}; +template <typename C> +using IsConvertibleToBitset = IsConvertibleToBitsetImpl<absl::decay_t<C>>; + +template <typename C> +struct IsConvertibleToSTLContainer + : absl::disjunction< + IsConvertibleToArray<C>, IsConvertibleToBitset<C>, + IsConvertibleToSpecialization<C, std::deque>, + IsConvertibleToSpecialization<C, std::forward_list>, + IsConvertibleToSpecialization<C, std::list>, + IsConvertibleToSpecialization<C, std::map>, + IsConvertibleToSpecialization<C, std::multimap>, + IsConvertibleToSpecialization<C, std::set>, + IsConvertibleToSpecialization<C, std::multiset>, + IsConvertibleToSpecialization<C, std::unordered_map>, + IsConvertibleToSpecialization<C, std::unordered_multimap>, + IsConvertibleToSpecialization<C, std::unordered_set>, + IsConvertibleToSpecialization<C, std::unordered_multiset>, + IsConvertibleToSpecialization<C, std::vector>> {}; + +template <typename C> +struct IsStrictlyBaseOfAndConvertibleToSTLContainer + : absl::conjunction<absl::negation<IsSTLContainer<C>>, + IsBaseOfSTLContainer<C>, + IsConvertibleToSTLContainer<C>> {}; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl +#endif // ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/arg.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/arg.cc new file mode 100644 index 000000000000..9feb22487932 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/arg.cc @@ -0,0 +1,474 @@ +// +// POSIX spec: +// http://pubs.opengroup.org/onlinepubs/009695399/functions/fprintf.html +// +#include "absl/strings/internal/str_format/arg.h" + +#include <cassert> +#include <cerrno> +#include <cstdlib> +#include <string> +#include <type_traits> + +#include "absl/base/port.h" +#include "absl/strings/internal/str_format/float_conversion.h" +#include "absl/strings/numbers.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { +namespace { + +// Reduce *capacity by s.size(), clipped to a 0 minimum. +void ReducePadding(string_view s, size_t *capacity) { + *capacity = Excess(s.size(), *capacity); +} + +// Reduce *capacity by n, clipped to a 0 minimum. +void ReducePadding(size_t n, size_t *capacity) { + *capacity = Excess(n, *capacity); +} + +template <typename T> +struct MakeUnsigned : std::make_unsigned<T> {}; +template <> +struct MakeUnsigned<absl::int128> { + using type = absl::uint128; +}; +template <> +struct MakeUnsigned<absl::uint128> { + using type = absl::uint128; +}; + +template <typename T> +struct IsSigned : std::is_signed<T> {}; +template <> +struct IsSigned<absl::int128> : std::true_type {}; +template <> +struct IsSigned<absl::uint128> : std::false_type {}; + +// Integral digit printer. +// Call one of the PrintAs* routines after construction once. +// Use with_neg_and_zero/without_neg_or_zero/is_negative to access the results. +class IntDigits { + public: + // Print the unsigned integer as octal. + // Supports unsigned integral types and uint128. + template <typename T> + void PrintAsOct(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + do { + *--p = static_cast<char>('0' + (static_cast<size_t>(v) & 7)); + v >>= 3; + } while (v); + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + + // Print the signed or unsigned integer as decimal. + // Supports all integral types. + template <typename T> + void PrintAsDec(T v) { + static_assert(std::is_integral<T>::value, ""); + start_ = storage_; + size_ = numbers_internal::FastIntToBuffer(v, storage_) - storage_; + } + + void PrintAsDec(int128 v) { + auto u = static_cast<uint128>(v); + bool add_neg = false; + if (v < 0) { + add_neg = true; + u = uint128{} - u; + } + PrintAsDec(u, add_neg); + } + + void PrintAsDec(uint128 v, bool add_neg = false) { + // This function can be sped up if needed. We can call FastIntToBuffer + // twice, or fix FastIntToBuffer to support uint128. + char *p = storage_ + sizeof(storage_); + do { + p -= 2; + numbers_internal::PutTwoDigits(static_cast<size_t>(v % 100), p); + v /= 100; + } while (v); + if (p[0] == '0') { + // We printed one too many hexits. + ++p; + } + if (add_neg) { + *--p = '-'; + } + size_ = storage_ + sizeof(storage_) - p; + start_ = p; + } + + // Print the unsigned integer as hex using lowercase. + // Supports unsigned integral types and uint128. + template <typename T> + void PrintAsHexLower(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + + do { + p -= 2; + constexpr const char* table = numbers_internal::kHexTable; + std::memcpy(p, table + 2 * (static_cast<size_t>(v) & 0xFF), 2); + if (sizeof(T) == 1) break; + v >>= 8; + } while (v); + if (p[0] == '0') { + // We printed one too many digits. + ++p; + } + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + + // Print the unsigned integer as hex using uppercase. + // Supports unsigned integral types and uint128. + template <typename T> + void PrintAsHexUpper(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + + // kHexTable is only lowercase, so do it manually for uppercase. + do { + *--p = "0123456789ABCDEF"[static_cast<size_t>(v) & 15]; + v >>= 4; + } while (v); + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + + // The printed value including the '-' sign if available. + // For inputs of value `0`, this will return "0" + string_view with_neg_and_zero() const { return {start_, size_}; } + + // The printed value not including the '-' sign. + // For inputs of value `0`, this will return "". + string_view without_neg_or_zero() const { + static_assert('-' < '0', "The check below verifies both."); + size_t advance = start_[0] <= '0' ? 1 : 0; + return {start_ + advance, size_ - advance}; + } + + bool is_negative() const { return start_[0] == '-'; } + + private: + const char *start_; + size_t size_; + // Max size: 128 bit value as octal -> 43 digits, plus sign char + char storage_[128 / 3 + 1 + 1]; +}; + +// Note: 'o' conversions do not have a base indicator, it's just that +// the '#' flag is specified to modify the precision for 'o' conversions. +string_view BaseIndicator(const IntDigits &as_digits, + const FormatConversionSpecImpl conv) { + // always show 0x for %p. + bool alt = conv.has_alt_flag() || + conv.conversion_char() == FormatConversionCharInternal::p; + bool hex = (conv.conversion_char() == FormatConversionCharInternal::x || + conv.conversion_char() == FormatConversionCharInternal::X || + conv.conversion_char() == FormatConversionCharInternal::p); + // From the POSIX description of '#' flag: + // "For x or X conversion specifiers, a non-zero result shall have + // 0x (or 0X) prefixed to it." + if (alt && hex && !as_digits.without_neg_or_zero().empty()) { + return conv.conversion_char() == FormatConversionCharInternal::X ? "0X" + : "0x"; + } + return {}; +} + +string_view SignColumn(bool neg, const FormatConversionSpecImpl conv) { + if (conv.conversion_char() == FormatConversionCharInternal::d || + conv.conversion_char() == FormatConversionCharInternal::i) { + if (neg) return "-"; + if (conv.has_show_pos_flag()) return "+"; + if (conv.has_sign_col_flag()) return " "; + } + return {}; +} + +bool ConvertCharImpl(unsigned char v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + size_t fill = 0; + if (conv.width() >= 0) fill = conv.width(); + ReducePadding(1, &fill); + if (!conv.has_left_flag()) sink->Append(fill, ' '); + sink->Append(1, v); + if (conv.has_left_flag()) sink->Append(fill, ' '); + return true; +} + +bool ConvertIntImplInnerSlow(const IntDigits &as_digits, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + // Print as a sequence of Substrings: + // [left_spaces][sign][base_indicator][zeroes][formatted][right_spaces] + size_t fill = 0; + if (conv.width() >= 0) fill = conv.width(); + + string_view formatted = as_digits.without_neg_or_zero(); + ReducePadding(formatted, &fill); + + string_view sign = SignColumn(as_digits.is_negative(), conv); + ReducePadding(sign, &fill); + + string_view base_indicator = BaseIndicator(as_digits, conv); + ReducePadding(base_indicator, &fill); + + int precision = conv.precision(); + bool precision_specified = precision >= 0; + if (!precision_specified) + precision = 1; + + if (conv.has_alt_flag() && + conv.conversion_char() == FormatConversionCharInternal::o) { + // From POSIX description of the '#' (alt) flag: + // "For o conversion, it increases the precision (if necessary) to + // force the first digit of the result to be zero." + if (formatted.empty() || *formatted.begin() != '0') { + int needed = static_cast<int>(formatted.size()) + 1; + precision = std::max(precision, needed); + } + } + + size_t num_zeroes = Excess(formatted.size(), precision); + ReducePadding(num_zeroes, &fill); + + size_t num_left_spaces = !conv.has_left_flag() ? fill : 0; + size_t num_right_spaces = conv.has_left_flag() ? fill : 0; + + // From POSIX description of the '0' (zero) flag: + // "For d, i, o, u, x, and X conversion specifiers, if a precision + // is specified, the '0' flag is ignored." + if (!precision_specified && conv.has_zero_flag()) { + num_zeroes += num_left_spaces; + num_left_spaces = 0; + } + + sink->Append(num_left_spaces, ' '); + sink->Append(sign); + sink->Append(base_indicator); + sink->Append(num_zeroes, '0'); + sink->Append(formatted); + sink->Append(num_right_spaces, ' '); + return true; +} + +template <typename T> +bool ConvertIntArg(T v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + using U = typename MakeUnsigned<T>::type; + IntDigits as_digits; + + // This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes + // it to complain about a switch/case type mismatch, even though both are + // FormatConverionChar. Likely this is because at this point + // FormatConversionChar is declared, but not defined. + switch (static_cast<uint8_t>(conv.conversion_char())) { + case static_cast<uint8_t>(FormatConversionCharInternal::c): + return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink); + + case static_cast<uint8_t>(FormatConversionCharInternal::o): + as_digits.PrintAsOct(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::x): + as_digits.PrintAsHexLower(static_cast<U>(v)); + break; + case static_cast<uint8_t>(FormatConversionCharInternal::X): + as_digits.PrintAsHexUpper(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::u): + as_digits.PrintAsDec(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::d): + case static_cast<uint8_t>(FormatConversionCharInternal::i): + as_digits.PrintAsDec(v); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::a): + case static_cast<uint8_t>(FormatConversionCharInternal::e): + case static_cast<uint8_t>(FormatConversionCharInternal::f): + case static_cast<uint8_t>(FormatConversionCharInternal::g): + case static_cast<uint8_t>(FormatConversionCharInternal::A): + case static_cast<uint8_t>(FormatConversionCharInternal::E): + case static_cast<uint8_t>(FormatConversionCharInternal::F): + case static_cast<uint8_t>(FormatConversionCharInternal::G): + return ConvertFloatImpl(static_cast<double>(v), conv, sink); + + default: + ABSL_INTERNAL_ASSUME(false); + } + + if (conv.is_basic()) { + sink->Append(as_digits.with_neg_and_zero()); + return true; + } + return ConvertIntImplInnerSlow(as_digits, conv, sink); +} + +template <typename T> +bool ConvertFloatArg(T v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return FormatConversionCharIsFloat(conv.conversion_char()) && + ConvertFloatImpl(v, conv, sink); +} + +inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + if (conv.is_basic()) { + sink->Append(v); + return true; + } + return sink->PutPaddedString(v, conv.width(), conv.precision(), + conv.has_left_flag()); +} + +} // namespace + +// ==================== Strings ==================== +StringConvertResult FormatConvertImpl(const std::string &v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertStringArg(v, conv, sink)}; +} + +StringConvertResult FormatConvertImpl(string_view v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertStringArg(v, conv, sink)}; +} + +ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> +FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + if (conv.conversion_char() == FormatConversionCharInternal::p) + return {FormatConvertImpl(VoidPtr(v), conv, sink).value}; + size_t len; + if (v == nullptr) { + len = 0; + } else if (conv.precision() < 0) { + len = std::strlen(v); + } else { + // If precision is set, we look for the NUL-terminator on the valid range. + len = std::find(v, v + conv.precision(), '\0') - v; + } + return {ConvertStringArg(string_view(v, len), conv, sink)}; +} + +// ==================== Raw pointers ==================== +ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl( + VoidPtr v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { + if (!v.value) { + sink->Append("(nil)"); + return {true}; + } + IntDigits as_digits; + as_digits.PrintAsHexLower(v.value); + return {ConvertIntImplInnerSlow(as_digits, conv, sink)}; +} + +// ==================== Floats ==================== +FloatingConvertResult FormatConvertImpl(float v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertFloatArg(v, conv, sink)}; +} +FloatingConvertResult FormatConvertImpl(double v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertFloatArg(v, conv, sink)}; +} +FloatingConvertResult FormatConvertImpl(long double v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertFloatArg(v, conv, sink)}; +} + +// ==================== Chars ==================== +IntegralConvertResult FormatConvertImpl(char v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(signed char v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned char v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} + +// ==================== Ints ==================== +IntegralConvertResult FormatConvertImpl(short v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(int v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(long v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(long long v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(absl::int128 v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(absl::uint128 v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} + +ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(); + + + +} // namespace str_format_internal + +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/arg.h b/third_party/abseil_cpp/absl/strings/internal/str_format/arg.h new file mode 100644 index 000000000000..d441e87fff33 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/arg.h @@ -0,0 +1,474 @@ +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ + +#include <string.h> +#include <wchar.h> + +#include <cstdio> +#include <iomanip> +#include <limits> +#include <memory> +#include <sstream> +#include <string> +#include <type_traits> + +#include "absl/base/port.h" +#include "absl/meta/type_traits.h" +#include "absl/numeric/int128.h" +#include "absl/strings/internal/str_format/extension.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +class Cord; +class FormatCountCapture; +class FormatSink; + +namespace str_format_internal { + +class FormatConversionSpec; + +template <typename T, typename = void> +struct HasUserDefinedConvert : std::false_type {}; + +template <typename T> +struct HasUserDefinedConvert<T, void_t<decltype(AbslFormatConvert( + std::declval<const T&>(), + std::declval<const FormatConversionSpec&>(), + std::declval<FormatSink*>()))>> + : std::true_type {}; + +template <typename T> +class StreamedWrapper; + +// If 'v' can be converted (in the printf sense) according to 'conv', +// then convert it, appending to `sink` and return `true`. +// Otherwise fail and return `false`. + +// Raw pointers. +struct VoidPtr { + VoidPtr() = default; + template <typename T, + decltype(reinterpret_cast<uintptr_t>(std::declval<T*>())) = 0> + VoidPtr(T* ptr) // NOLINT + : value(ptr ? reinterpret_cast<uintptr_t>(ptr) : 0) {} + uintptr_t value; +}; + +template <FormatConversionCharSet C> +struct ArgConvertResult { + bool value; +}; + +template <FormatConversionCharSet C> +constexpr FormatConversionCharSet ExtractCharSet(ArgConvertResult<C>) { + return C; +} + +using StringConvertResult = + ArgConvertResult<FormatConversionCharSetInternal::s>; +ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl( + VoidPtr v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); + +// Strings. +StringConvertResult FormatConvertImpl(const std::string& v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +StringConvertResult FormatConvertImpl(string_view v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> +FormatConvertImpl(const char* v, const FormatConversionSpecImpl conv, + FormatSinkImpl* sink); + +template <class AbslCord, typename std::enable_if<std::is_same< + AbslCord, absl::Cord>::value>::type* = nullptr> +StringConvertResult FormatConvertImpl(const AbslCord& value, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + bool is_left = conv.has_left_flag(); + size_t space_remaining = 0; + + int width = conv.width(); + if (width >= 0) space_remaining = width; + + size_t to_write = value.size(); + + int precision = conv.precision(); + if (precision >= 0) + to_write = (std::min)(to_write, static_cast<size_t>(precision)); + + space_remaining = Excess(to_write, space_remaining); + + if (space_remaining > 0 && !is_left) sink->Append(space_remaining, ' '); + + for (string_view piece : value.Chunks()) { + if (piece.size() > to_write) { + piece.remove_suffix(piece.size() - to_write); + to_write = 0; + } else { + to_write -= piece.size(); + } + sink->Append(piece); + if (to_write == 0) { + break; + } + } + + if (space_remaining > 0 && is_left) sink->Append(space_remaining, ' '); + return {true}; +} + +using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::c, + FormatConversionCharSetInternal::kNumeric, + FormatConversionCharSetInternal::kStar)>; +using FloatingConvertResult = + ArgConvertResult<FormatConversionCharSetInternal::kFloating>; + +// Floats. +FloatingConvertResult FormatConvertImpl(float v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +FloatingConvertResult FormatConvertImpl(double v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +FloatingConvertResult FormatConvertImpl(long double v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); + +// Chars. +IntegralConvertResult FormatConvertImpl(char v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(signed char v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned char v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); + +// Ints. +IntegralConvertResult FormatConvertImpl(short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(int v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(int128 v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(uint128 v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +template <typename T, enable_if_t<std::is_same<T, bool>::value, int> = 0> +IntegralConvertResult FormatConvertImpl(T v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + return FormatConvertImpl(static_cast<int>(v), conv, sink); +} + +// We provide this function to help the checker, but it is never defined. +// FormatArgImpl will use the underlying Convert functions instead. +template <typename T> +typename std::enable_if<std::is_enum<T>::value && + !HasUserDefinedConvert<T>::value, + IntegralConvertResult>::type +FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); + +template <typename T> +StringConvertResult FormatConvertImpl(const StreamedWrapper<T>& v, + FormatConversionSpecImpl conv, + FormatSinkImpl* out) { + std::ostringstream oss; + oss << v.v_; + if (!oss) return {false}; + return str_format_internal::FormatConvertImpl(oss.str(), conv, out); +} + +// Use templates and dependent types to delay evaluation of the function +// until after FormatCountCapture is fully defined. +struct FormatCountCaptureHelper { + template <class T = int> + static ArgConvertResult<FormatConversionCharSetInternal::n> ConvertHelper( + const FormatCountCapture& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + const absl::enable_if_t<sizeof(T) != 0, FormatCountCapture>& v2 = v; + + if (conv.conversion_char() != + str_format_internal::FormatConversionCharInternal::n) { + return {false}; + } + *v2.p_ = static_cast<int>(sink->size()); + return {true}; + } +}; + +template <class T = int> +ArgConvertResult<FormatConversionCharSetInternal::n> FormatConvertImpl( + const FormatCountCapture& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + return FormatCountCaptureHelper::ConvertHelper(v, conv, sink); +} + +// Helper friend struct to hide implementation details from the public API of +// FormatArgImpl. +struct FormatArgImplFriend { + template <typename Arg> + static bool ToInt(Arg arg, int* out) { + // A value initialized FormatConversionSpecImpl has a `none` conv, which + // tells the dispatcher to run the `int` conversion. + return arg.dispatcher_(arg.data_, {}, out); + } + + template <typename Arg> + static bool Convert(Arg arg, FormatConversionSpecImpl conv, + FormatSinkImpl* out) { + return arg.dispatcher_(arg.data_, conv, out); + } + + template <typename Arg> + static typename Arg::Dispatcher GetVTablePtrForTest(Arg arg) { + return arg.dispatcher_; + } +}; + +template <typename Arg> +constexpr FormatConversionCharSet ArgumentToConv() { + return absl::str_format_internal::ExtractCharSet( + decltype(str_format_internal::FormatConvertImpl( + std::declval<const Arg&>(), + std::declval<const FormatConversionSpecImpl&>(), + std::declval<FormatSinkImpl*>())){}); +} + +// A type-erased handle to a format argument. +class FormatArgImpl { + private: + enum { kInlinedSpace = 8 }; + + using VoidPtr = str_format_internal::VoidPtr; + + union Data { + const void* ptr; + const volatile void* volatile_ptr; + char buf[kInlinedSpace]; + }; + + using Dispatcher = bool (*)(Data, FormatConversionSpecImpl, void* out); + + template <typename T> + struct store_by_value + : std::integral_constant<bool, (sizeof(T) <= kInlinedSpace) && + (std::is_integral<T>::value || + std::is_floating_point<T>::value || + std::is_pointer<T>::value || + std::is_same<VoidPtr, T>::value)> {}; + + enum StoragePolicy { ByPointer, ByVolatilePointer, ByValue }; + template <typename T> + struct storage_policy + : std::integral_constant<StoragePolicy, + (std::is_volatile<T>::value + ? ByVolatilePointer + : (store_by_value<T>::value ? ByValue + : ByPointer))> { + }; + + // To reduce the number of vtables we will decay values before hand. + // Anything with a user-defined Convert will get its own vtable. + // For everything else: + // - Decay char* and char arrays into `const char*` + // - Decay any other pointer to `const void*` + // - Decay all enums to their underlying type. + // - Decay function pointers to void*. + template <typename T, typename = void> + struct DecayType { + static constexpr bool kHasUserDefined = + str_format_internal::HasUserDefinedConvert<T>::value; + using type = typename std::conditional< + !kHasUserDefined && std::is_convertible<T, const char*>::value, + const char*, + typename std::conditional<!kHasUserDefined && + std::is_convertible<T, VoidPtr>::value, + VoidPtr, const T&>::type>::type; + }; + template <typename T> + struct DecayType<T, + typename std::enable_if< + !str_format_internal::HasUserDefinedConvert<T>::value && + std::is_enum<T>::value>::type> { + using type = typename std::underlying_type<T>::type; + }; + + public: + template <typename T> + explicit FormatArgImpl(const T& value) { + using D = typename DecayType<T>::type; + static_assert( + std::is_same<D, const T&>::value || storage_policy<D>::value == ByValue, + "Decayed types must be stored by value"); + Init(static_cast<D>(value)); + } + + private: + friend struct str_format_internal::FormatArgImplFriend; + template <typename T, StoragePolicy = storage_policy<T>::value> + struct Manager; + + template <typename T> + struct Manager<T, ByPointer> { + static Data SetValue(const T& value) { + Data data; + data.ptr = std::addressof(value); + return data; + } + + static const T& Value(Data arg) { return *static_cast<const T*>(arg.ptr); } + }; + + template <typename T> + struct Manager<T, ByVolatilePointer> { + static Data SetValue(const T& value) { + Data data; + data.volatile_ptr = &value; + return data; + } + + static const T& Value(Data arg) { + return *static_cast<const T*>(arg.volatile_ptr); + } + }; + + template <typename T> + struct Manager<T, ByValue> { + static Data SetValue(const T& value) { + Data data; + memcpy(data.buf, &value, sizeof(value)); + return data; + } + + static T Value(Data arg) { + T value; + memcpy(&value, arg.buf, sizeof(T)); + return value; + } + }; + + template <typename T> + void Init(const T& value) { + data_ = Manager<T>::SetValue(value); + dispatcher_ = &Dispatch<T>; + } + + template <typename T> + static int ToIntVal(const T& val) { + using CommonType = typename std::conditional<std::is_signed<T>::value, + int64_t, uint64_t>::type; + if (static_cast<CommonType>(val) > + static_cast<CommonType>((std::numeric_limits<int>::max)())) { + return (std::numeric_limits<int>::max)(); + } else if (std::is_signed<T>::value && + static_cast<CommonType>(val) < + static_cast<CommonType>((std::numeric_limits<int>::min)())) { + return (std::numeric_limits<int>::min)(); + } + return static_cast<int>(val); + } + + template <typename T> + static bool ToInt(Data arg, int* out, std::true_type /* is_integral */, + std::false_type) { + *out = ToIntVal(Manager<T>::Value(arg)); + return true; + } + + template <typename T> + static bool ToInt(Data arg, int* out, std::false_type, + std::true_type /* is_enum */) { + *out = ToIntVal(static_cast<typename std::underlying_type<T>::type>( + Manager<T>::Value(arg))); + return true; + } + + template <typename T> + static bool ToInt(Data, int*, std::false_type, std::false_type) { + return false; + } + + template <typename T> + static bool Dispatch(Data arg, FormatConversionSpecImpl spec, void* out) { + // A `none` conv indicates that we want the `int` conversion. + if (ABSL_PREDICT_FALSE(spec.conversion_char() == + FormatConversionCharInternal::kNone)) { + return ToInt<T>(arg, static_cast<int*>(out), std::is_integral<T>(), + std::is_enum<T>()); + } + if (ABSL_PREDICT_FALSE(!Contains(ArgumentToConv<T>(), + spec.conversion_char()))) { + return false; + } + return str_format_internal::FormatConvertImpl( + Manager<T>::Value(arg), spec, + static_cast<FormatSinkImpl*>(out)) + .value; + } + + Data data_; + Dispatcher dispatcher_; +}; + +#define ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(T, E) \ + E template bool FormatArgImpl::Dispatch<T>(Data, FormatConversionSpecImpl, \ + void*) + +#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(str_format_internal::VoidPtr, \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(bool, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(char, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(signed char, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned char, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(short, __VA_ARGS__); /* NOLINT */ \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned short, /* NOLINT */ \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned int, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long, __VA_ARGS__); /* NOLINT */ \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long, /* NOLINT */ \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long long, /* NOLINT */ \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long long, /* NOLINT */ \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int128, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(uint128, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(float, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(double, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long double, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(const char*, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(std::string, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(string_view, __VA_ARGS__) + +ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(extern); + + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/arg_test.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/arg_test.cc new file mode 100644 index 000000000000..bf3d7e8e3777 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/arg_test.cc @@ -0,0 +1,114 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +#include "absl/strings/internal/str_format/arg.h" + +#include <ostream> +#include <string> +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { +namespace { + +class FormatArgImplTest : public ::testing::Test { + public: + enum Color { kRed, kGreen, kBlue }; + + static const char *hi() { return "hi"; } +}; + +TEST_F(FormatArgImplTest, ToInt) { + int out = 0; + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(1), &out)); + EXPECT_EQ(1, out); + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(-1), &out)); + EXPECT_EQ(-1, out); + EXPECT_TRUE( + FormatArgImplFriend::ToInt(FormatArgImpl(static_cast<char>(64)), &out)); + EXPECT_EQ(64, out); + EXPECT_TRUE(FormatArgImplFriend::ToInt( + FormatArgImpl(static_cast<unsigned long long>(123456)), &out)); // NOLINT + EXPECT_EQ(123456, out); + EXPECT_TRUE(FormatArgImplFriend::ToInt( + FormatArgImpl(static_cast<unsigned long long>( // NOLINT + std::numeric_limits<int>::max()) + + 1), + &out)); + EXPECT_EQ(std::numeric_limits<int>::max(), out); + EXPECT_TRUE(FormatArgImplFriend::ToInt( + FormatArgImpl(static_cast<long long>( // NOLINT + std::numeric_limits<int>::min()) - + 10), + &out)); + EXPECT_EQ(std::numeric_limits<int>::min(), out); + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(false), &out)); + EXPECT_EQ(0, out); + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(true), &out)); + EXPECT_EQ(1, out); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(2.2), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(3.2f), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt( + FormatArgImpl(static_cast<int *>(nullptr)), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(hi()), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl("hi"), &out)); + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(kBlue), &out)); + EXPECT_EQ(2, out); +} + +extern const char kMyArray[]; + +TEST_F(FormatArgImplTest, CharArraysDecayToCharPtr) { + const char* a = ""; + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(""))); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("A"))); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("ABC"))); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(kMyArray))); +} + +TEST_F(FormatArgImplTest, OtherPtrDecayToVoidPtr) { + auto expected = FormatArgImplFriend::GetVTablePtrForTest( + FormatArgImpl(static_cast<void *>(nullptr))); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest( + FormatArgImpl(static_cast<int *>(nullptr))), + expected); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest( + FormatArgImpl(static_cast<volatile int *>(nullptr))), + expected); + + auto p = static_cast<void (*)()>([] {}); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(p)), + expected); +} + +TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) { + std::string s; + FormatSinkImpl sink(&s); + FormatConversionSpecImpl conv; + FormatConversionSpecImplFriend::SetConversionChar( + FormatConversionCharInternal::s, &conv); + FormatConversionSpecImplFriend::SetFlags(Flags(), &conv); + FormatConversionSpecImplFriend::SetWidth(-1, &conv); + FormatConversionSpecImplFriend::SetPrecision(-1, &conv); + EXPECT_TRUE( + FormatArgImplFriend::Convert(FormatArgImpl(kMyArray), conv, &sink)); + sink.Flush(); + EXPECT_EQ("ABCDE", s); +} +const char kMyArray[] = "ABCDE"; + +} // namespace +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/bind.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/bind.cc new file mode 100644 index 000000000000..6980ed1d8f0e --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/bind.cc @@ -0,0 +1,245 @@ +#include "absl/strings/internal/str_format/bind.h" + +#include <cerrno> +#include <limits> +#include <sstream> +#include <string> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +namespace { + +inline bool BindFromPosition(int position, int* value, + absl::Span<const FormatArgImpl> pack) { + assert(position > 0); + if (static_cast<size_t>(position) > pack.size()) { + return false; + } + // -1 because positions are 1-based + return FormatArgImplFriend::ToInt(pack[position - 1], value); +} + +class ArgContext { + public: + explicit ArgContext(absl::Span<const FormatArgImpl> pack) : pack_(pack) {} + + // Fill 'bound' with the results of applying the context's argument pack + // to the specified 'unbound'. We synthesize a BoundConversion by + // lining up a UnboundConversion with a user argument. We also + // resolve any '*' specifiers for width and precision, so after + // this call, 'bound' has all the information it needs to be formatted. + // Returns false on failure. + bool Bind(const UnboundConversion* unbound, BoundConversion* bound); + + private: + absl::Span<const FormatArgImpl> pack_; +}; + +inline bool ArgContext::Bind(const UnboundConversion* unbound, + BoundConversion* bound) { + const FormatArgImpl* arg = nullptr; + int arg_position = unbound->arg_position; + if (static_cast<size_t>(arg_position - 1) >= pack_.size()) return false; + arg = &pack_[arg_position - 1]; // 1-based + + if (!unbound->flags.basic) { + int width = unbound->width.value(); + bool force_left = false; + if (unbound->width.is_from_arg()) { + if (!BindFromPosition(unbound->width.get_from_arg(), &width, pack_)) + return false; + if (width < 0) { + // "A negative field width is taken as a '-' flag followed by a + // positive field width." + force_left = true; + // Make sure we don't overflow the width when negating it. + width = -std::max(width, -std::numeric_limits<int>::max()); + } + } + + int precision = unbound->precision.value(); + if (unbound->precision.is_from_arg()) { + if (!BindFromPosition(unbound->precision.get_from_arg(), &precision, + pack_)) + return false; + } + + FormatConversionSpecImplFriend::SetWidth(width, bound); + FormatConversionSpecImplFriend::SetPrecision(precision, bound); + + if (force_left) { + Flags flags = unbound->flags; + flags.left = true; + FormatConversionSpecImplFriend::SetFlags(flags, bound); + } else { + FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound); + } + } else { + FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound); + FormatConversionSpecImplFriend::SetWidth(-1, bound); + FormatConversionSpecImplFriend::SetPrecision(-1, bound); + } + FormatConversionSpecImplFriend::SetConversionChar(unbound->conv, bound); + bound->set_arg(arg); + return true; +} + +template <typename Converter> +class ConverterConsumer { + public: + ConverterConsumer(Converter converter, absl::Span<const FormatArgImpl> pack) + : converter_(converter), arg_context_(pack) {} + + bool Append(string_view s) { + converter_.Append(s); + return true; + } + bool ConvertOne(const UnboundConversion& conv, string_view conv_string) { + BoundConversion bound; + if (!arg_context_.Bind(&conv, &bound)) return false; + return converter_.ConvertOne(bound, conv_string); + } + + private: + Converter converter_; + ArgContext arg_context_; +}; + +template <typename Converter> +bool ConvertAll(const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args, Converter converter) { + if (format.has_parsed_conversion()) { + return format.parsed_conversion()->ProcessFormat( + ConverterConsumer<Converter>(converter, args)); + } else { + return ParseFormatString(format.str(), + ConverterConsumer<Converter>(converter, args)); + } +} + +class DefaultConverter { + public: + explicit DefaultConverter(FormatSinkImpl* sink) : sink_(sink) {} + + void Append(string_view s) const { sink_->Append(s); } + + bool ConvertOne(const BoundConversion& bound, string_view /*conv*/) const { + return FormatArgImplFriend::Convert(*bound.arg(), bound, sink_); + } + + private: + FormatSinkImpl* sink_; +}; + +class SummarizingConverter { + public: + explicit SummarizingConverter(FormatSinkImpl* sink) : sink_(sink) {} + + void Append(string_view s) const { sink_->Append(s); } + + bool ConvertOne(const BoundConversion& bound, string_view /*conv*/) const { + UntypedFormatSpecImpl spec("%d"); + + std::ostringstream ss; + ss << "{" << Streamable(spec, {*bound.arg()}) << ":" + << FormatConversionSpecImplFriend::FlagsToString(bound); + if (bound.width() >= 0) ss << bound.width(); + if (bound.precision() >= 0) ss << "." << bound.precision(); + ss << bound.conversion_char() << "}"; + Append(ss.str()); + return true; + } + + private: + FormatSinkImpl* sink_; +}; + +} // namespace + +bool BindWithPack(const UnboundConversion* props, + absl::Span<const FormatArgImpl> pack, + BoundConversion* bound) { + return ArgContext(pack).Bind(props, bound); +} + +std::string Summarize(const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args) { + typedef SummarizingConverter Converter; + std::string out; + { + // inner block to destroy sink before returning out. It ensures a last + // flush. + FormatSinkImpl sink(&out); + if (!ConvertAll(format, args, Converter(&sink))) { + return ""; + } + } + return out; +} + +bool FormatUntyped(FormatRawSinkImpl raw_sink, + const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args) { + FormatSinkImpl sink(raw_sink); + using Converter = DefaultConverter; + return ConvertAll(format, args, Converter(&sink)); +} + +std::ostream& Streamable::Print(std::ostream& os) const { + if (!FormatUntyped(&os, format_, args_)) os.setstate(std::ios::failbit); + return os; +} + +std::string& AppendPack(std::string* out, const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args) { + size_t orig = out->size(); + if (ABSL_PREDICT_FALSE(!FormatUntyped(out, format, args))) { + out->erase(orig); + } + return *out; +} + +std::string FormatPack(const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args) { + std::string out; + if (ABSL_PREDICT_FALSE(!FormatUntyped(&out, format, args))) { + out.clear(); + } + return out; +} + +int FprintF(std::FILE* output, const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args) { + FILERawSink sink(output); + if (!FormatUntyped(&sink, format, args)) { + errno = EINVAL; + return -1; + } + if (sink.error()) { + errno = sink.error(); + return -1; + } + if (sink.count() > std::numeric_limits<int>::max()) { + errno = EFBIG; + return -1; + } + return static_cast<int>(sink.count()); +} + +int SnprintF(char* output, size_t size, const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args) { + BufferRawSink sink(output, size ? size - 1 : 0); + if (!FormatUntyped(&sink, format, args)) { + errno = EINVAL; + return -1; + } + size_t total = sink.total_written(); + if (size) output[std::min(total, size - 1)] = 0; + return static_cast<int>(total); +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/bind.h b/third_party/abseil_cpp/absl/strings/internal/str_format/bind.h new file mode 100644 index 000000000000..585246e77e56 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/bind.h @@ -0,0 +1,202 @@ +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ + +#include <array> +#include <cstdio> +#include <sstream> +#include <string> + +#include "absl/base/port.h" +#include "absl/strings/internal/str_format/arg.h" +#include "absl/strings/internal/str_format/checker.h" +#include "absl/strings/internal/str_format/parser.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +class UntypedFormatSpec; + +namespace str_format_internal { + +class BoundConversion : public FormatConversionSpecImpl { + public: + const FormatArgImpl* arg() const { return arg_; } + void set_arg(const FormatArgImpl* a) { arg_ = a; } + + private: + const FormatArgImpl* arg_; +}; + +// This is the type-erased class that the implementation uses. +class UntypedFormatSpecImpl { + public: + UntypedFormatSpecImpl() = delete; + + explicit UntypedFormatSpecImpl(string_view s) + : data_(s.data()), size_(s.size()) {} + explicit UntypedFormatSpecImpl( + const str_format_internal::ParsedFormatBase* pc) + : data_(pc), size_(~size_t{}) {} + + bool has_parsed_conversion() const { return size_ == ~size_t{}; } + + string_view str() const { + assert(!has_parsed_conversion()); + return string_view(static_cast<const char*>(data_), size_); + } + const str_format_internal::ParsedFormatBase* parsed_conversion() const { + assert(has_parsed_conversion()); + return static_cast<const str_format_internal::ParsedFormatBase*>(data_); + } + + template <typename T> + static const UntypedFormatSpecImpl& Extract(const T& s) { + return s.spec_; + } + + private: + const void* data_; + size_t size_; +}; + +template <typename T, FormatConversionCharSet...> +struct MakeDependent { + using type = T; +}; + +// Implicitly convertible from `const char*`, `string_view`, and the +// `ExtendedParsedFormat` type. This abstraction allows all format functions to +// operate on any without providing too many overloads. +template <FormatConversionCharSet... Args> +class FormatSpecTemplate + : public MakeDependent<UntypedFormatSpec, Args...>::type { + using Base = typename MakeDependent<UntypedFormatSpec, Args...>::type; + + public: +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + + // Honeypot overload for when the string is not constexpr. + // We use the 'unavailable' attribute to give a better compiler error than + // just 'method is deleted'. + FormatSpecTemplate(...) // NOLINT + __attribute__((unavailable("Format string is not constexpr."))); + + // Honeypot overload for when the format is constexpr and invalid. + // We use the 'unavailable' attribute to give a better compiler error than + // just 'method is deleted'. + // To avoid checking the format twice, we just check that the format is + // constexpr. If is it valid, then the overload below will kick in. + // We add the template here to make this overload have lower priority. + template <typename = void> + FormatSpecTemplate(const char* s) // NOLINT + __attribute__(( + enable_if(str_format_internal::EnsureConstexpr(s), "constexpr trap"), + unavailable( + "Format specified does not match the arguments passed."))); + + template <typename T = void> + FormatSpecTemplate(string_view s) // NOLINT + __attribute__((enable_if(str_format_internal::EnsureConstexpr(s), + "constexpr trap"))) { + static_assert(sizeof(T*) == 0, + "Format specified does not match the arguments passed."); + } + + // Good format overload. + FormatSpecTemplate(const char* s) // NOLINT + __attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap"))) + : Base(s) {} + + FormatSpecTemplate(string_view s) // NOLINT + __attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap"))) + : Base(s) {} + +#else // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + + FormatSpecTemplate(const char* s) : Base(s) {} // NOLINT + FormatSpecTemplate(string_view s) : Base(s) {} // NOLINT + +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + + template <FormatConversionCharSet... C, + typename = typename std::enable_if< + AllOf(sizeof...(C) == sizeof...(Args), Contains(Args, + C)...)>::type> + FormatSpecTemplate(const ExtendedParsedFormat<C...>& pc) // NOLINT + : Base(&pc) {} +}; + +class Streamable { + public: + Streamable(const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args) + : format_(format) { + if (args.size() <= ABSL_ARRAYSIZE(few_args_)) { + for (size_t i = 0; i < args.size(); ++i) { + few_args_[i] = args[i]; + } + args_ = absl::MakeSpan(few_args_, args.size()); + } else { + many_args_.assign(args.begin(), args.end()); + args_ = many_args_; + } + } + + std::ostream& Print(std::ostream& os) const; + + friend std::ostream& operator<<(std::ostream& os, const Streamable& l) { + return l.Print(os); + } + + private: + const UntypedFormatSpecImpl& format_; + absl::Span<const FormatArgImpl> args_; + // if args_.size() is 4 or less: + FormatArgImpl few_args_[4] = {FormatArgImpl(0), FormatArgImpl(0), + FormatArgImpl(0), FormatArgImpl(0)}; + // if args_.size() is more than 4: + std::vector<FormatArgImpl> many_args_; +}; + +// for testing +std::string Summarize(UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args); +bool BindWithPack(const UnboundConversion* props, + absl::Span<const FormatArgImpl> pack, BoundConversion* bound); + +bool FormatUntyped(FormatRawSinkImpl raw_sink, + UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args); + +std::string& AppendPack(std::string* out, UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args); + +std::string FormatPack(const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args); + +int FprintF(std::FILE* output, UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args); +int SnprintF(char* output, size_t size, UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args); + +// Returned by Streamed(v). Converts via '%s' to the std::string created +// by std::ostream << v. +template <typename T> +class StreamedWrapper { + public: + explicit StreamedWrapper(const T& v) : v_(v) { } + + private: + template <typename S> + friend ArgConvertResult<FormatConversionCharSetInternal::s> FormatConvertImpl( + const StreamedWrapper<S>& v, FormatConversionSpecImpl conv, + FormatSinkImpl* out); + const T& v_; +}; + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/bind_test.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/bind_test.cc new file mode 100644 index 000000000000..64790a85fd23 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/bind_test.cc @@ -0,0 +1,143 @@ +#include "absl/strings/internal/str_format/bind.h" + +#include <string.h> +#include <limits> + +#include "gtest/gtest.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { +namespace { + +class FormatBindTest : public ::testing::Test { + public: + bool Extract(const char *s, UnboundConversion *props, int *next) const { + return ConsumeUnboundConversion(s, s + strlen(s), props, next) == + s + strlen(s); + } +}; + +TEST_F(FormatBindTest, BindSingle) { + struct Expectation { + int line; + const char *fmt; + int ok_phases; + const FormatArgImpl *arg; + int width; + int precision; + int next_arg; + }; + const int no = -1; + const int ia[] = { 10, 20, 30, 40}; + const FormatArgImpl args[] = {FormatArgImpl(ia[0]), FormatArgImpl(ia[1]), + FormatArgImpl(ia[2]), FormatArgImpl(ia[3])}; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + const Expectation kExpect[] = { + {__LINE__, "d", 2, &args[0], no, no, 2}, + {__LINE__, "4d", 2, &args[0], 4, no, 2}, + {__LINE__, ".5d", 2, &args[0], no, 5, 2}, + {__LINE__, "4.5d", 2, &args[0], 4, 5, 2}, + {__LINE__, "*d", 2, &args[1], 10, no, 3}, + {__LINE__, ".*d", 2, &args[1], no, 10, 3}, + {__LINE__, "*.*d", 2, &args[2], 10, 20, 4}, + {__LINE__, "1$d", 2, &args[0], no, no, 0}, + {__LINE__, "2$d", 2, &args[1], no, no, 0}, + {__LINE__, "3$d", 2, &args[2], no, no, 0}, + {__LINE__, "4$d", 2, &args[3], no, no, 0}, + {__LINE__, "2$*1$d", 2, &args[1], 10, no, 0}, + {__LINE__, "2$*2$d", 2, &args[1], 20, no, 0}, + {__LINE__, "2$*3$d", 2, &args[1], 30, no, 0}, + {__LINE__, "2$.*1$d", 2, &args[1], no, 10, 0}, + {__LINE__, "2$.*2$d", 2, &args[1], no, 20, 0}, + {__LINE__, "2$.*3$d", 2, &args[1], no, 30, 0}, + {__LINE__, "2$*3$.*1$d", 2, &args[1], 30, 10, 0}, + {__LINE__, "2$*2$.*2$d", 2, &args[1], 20, 20, 0}, + {__LINE__, "2$*1$.*3$d", 2, &args[1], 10, 30, 0}, + {__LINE__, "2$*3$.*1$d", 2, &args[1], 30, 10, 0}, + {__LINE__, "1$*d", 0}, // indexed, then positional + {__LINE__, "*2$d", 0}, // positional, then indexed + {__LINE__, "6$d", 1}, // arg position out of bounds + {__LINE__, "1$6$d", 0}, // width position incorrectly specified + {__LINE__, "1$.6$d", 0}, // precision position incorrectly specified + {__LINE__, "1$*6$d", 1}, // width position out of bounds + {__LINE__, "1$.*6$d", 1}, // precision position out of bounds + }; +#pragma GCC diagnostic pop + for (const Expectation &e : kExpect) { + SCOPED_TRACE(e.line); + SCOPED_TRACE(e.fmt); + UnboundConversion props; + BoundConversion bound; + int ok_phases = 0; + int next = 0; + if (Extract(e.fmt, &props, &next)) { + ++ok_phases; + if (BindWithPack(&props, args, &bound)) { + ++ok_phases; + } + } + EXPECT_EQ(e.ok_phases, ok_phases); + if (e.ok_phases < 2) continue; + if (e.arg != nullptr) { + EXPECT_EQ(e.arg, bound.arg()); + } + EXPECT_EQ(e.width, bound.width()); + EXPECT_EQ(e.precision, bound.precision()); + } +} + +TEST_F(FormatBindTest, WidthUnderflowRegression) { + UnboundConversion props; + BoundConversion bound; + int next = 0; + const int args_i[] = {std::numeric_limits<int>::min(), 17}; + const FormatArgImpl args[] = {FormatArgImpl(args_i[0]), + FormatArgImpl(args_i[1])}; + ASSERT_TRUE(Extract("*d", &props, &next)); + ASSERT_TRUE(BindWithPack(&props, args, &bound)); + + EXPECT_EQ(bound.width(), std::numeric_limits<int>::max()); + EXPECT_EQ(bound.arg(), args + 1); +} + +TEST_F(FormatBindTest, FormatPack) { + struct Expectation { + int line; + const char *fmt; + const char *summary; + }; + const int ia[] = { 10, 20, 30, 40, -10 }; + const FormatArgImpl args[] = {FormatArgImpl(ia[0]), FormatArgImpl(ia[1]), + FormatArgImpl(ia[2]), FormatArgImpl(ia[3]), + FormatArgImpl(ia[4])}; + const Expectation kExpect[] = { + {__LINE__, "a%4db%dc", "a{10:4d}b{20:d}c"}, + {__LINE__, "a%.4db%dc", "a{10:.4d}b{20:d}c"}, + {__LINE__, "a%4.5db%dc", "a{10:4.5d}b{20:d}c"}, + {__LINE__, "a%db%4.5dc", "a{10:d}b{20:4.5d}c"}, + {__LINE__, "a%db%*.*dc", "a{10:d}b{40:20.30d}c"}, + {__LINE__, "a%.*fb", "a{20:.10f}b"}, + {__LINE__, "a%1$db%2$*3$.*4$dc", "a{10:d}b{20:30.40d}c"}, + {__LINE__, "a%4$db%3$*2$.*1$dc", "a{40:d}b{30:20.10d}c"}, + {__LINE__, "a%04ldb", "a{10:04d}b"}, + {__LINE__, "a%-#04lldb", "a{10:-#04d}b"}, + {__LINE__, "a%1$*5$db", "a{10:-10d}b"}, + {__LINE__, "a%1$.*5$db", "a{10:d}b"}, + }; + for (const Expectation &e : kExpect) { + absl::string_view fmt = e.fmt; + SCOPED_TRACE(e.line); + SCOPED_TRACE(e.fmt); + UntypedFormatSpecImpl format(fmt); + EXPECT_EQ(e.summary, + str_format_internal::Summarize(format, absl::MakeSpan(args))) + << "line:" << e.line; + } +} + +} // namespace +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/checker.h b/third_party/abseil_cpp/absl/strings/internal/str_format/checker.h new file mode 100644 index 000000000000..424c51f74f17 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/checker.h @@ -0,0 +1,319 @@ +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ + +#include "absl/base/attributes.h" +#include "absl/strings/internal/str_format/arg.h" +#include "absl/strings/internal/str_format/extension.h" + +// Compile time check support for entry points. + +#ifndef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER +#if ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__) +#define ABSL_INTERNAL_ENABLE_FORMAT_CHECKER 1 +#endif // ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__) +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +constexpr bool AllOf() { return true; } + +template <typename... T> +constexpr bool AllOf(bool b, T... t) { + return b && AllOf(t...); +} + +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +constexpr bool ContainsChar(const char* chars, char c) { + return *chars == c || (*chars && ContainsChar(chars + 1, c)); +} + +// A constexpr compatible list of Convs. +struct ConvList { + const FormatConversionCharSet* array; + int count; + + // We do the bound check here to avoid having to do it on the callers. + // Returning an empty FormatConversionCharSet has the same effect as + // short circuiting because it will never match any conversion. + constexpr FormatConversionCharSet operator[](int i) const { + return i < count ? array[i] : FormatConversionCharSet{}; + } + + constexpr ConvList without_front() const { + return count != 0 ? ConvList{array + 1, count - 1} : *this; + } +}; + +template <size_t count> +struct ConvListT { + // Make sure the array has size > 0. + FormatConversionCharSet list[count ? count : 1]; +}; + +constexpr char GetChar(string_view str, size_t index) { + return index < str.size() ? str[index] : char{}; +} + +constexpr string_view ConsumeFront(string_view str, size_t len = 1) { + return len <= str.size() ? string_view(str.data() + len, str.size() - len) + : string_view(); +} + +constexpr string_view ConsumeAnyOf(string_view format, const char* chars) { + return ContainsChar(chars, GetChar(format, 0)) + ? ConsumeAnyOf(ConsumeFront(format), chars) + : format; +} + +constexpr bool IsDigit(char c) { return c >= '0' && c <= '9'; } + +// Helper class for the ParseDigits function. +// It encapsulates the two return values we need there. +struct Integer { + string_view format; + int value; + + // If the next character is a '$', consume it. + // Otherwise, make `this` an invalid positional argument. + constexpr Integer ConsumePositionalDollar() const { + return GetChar(format, 0) == '$' ? Integer{ConsumeFront(format), value} + : Integer{format, 0}; + } +}; + +constexpr Integer ParseDigits(string_view format, int value = 0) { + return IsDigit(GetChar(format, 0)) + ? ParseDigits(ConsumeFront(format), + 10 * value + GetChar(format, 0) - '0') + : Integer{format, value}; +} + +// Parse digits for a positional argument. +// The parsing also consumes the '$'. +constexpr Integer ParsePositional(string_view format) { + return ParseDigits(format).ConsumePositionalDollar(); +} + +// Parses a single conversion specifier. +// See ConvParser::Run() for post conditions. +class ConvParser { + constexpr ConvParser SetFormat(string_view format) const { + return ConvParser(format, args_, error_, arg_position_, is_positional_); + } + + constexpr ConvParser SetArgs(ConvList args) const { + return ConvParser(format_, args, error_, arg_position_, is_positional_); + } + + constexpr ConvParser SetError(bool error) const { + return ConvParser(format_, args_, error_ || error, arg_position_, + is_positional_); + } + + constexpr ConvParser SetArgPosition(int arg_position) const { + return ConvParser(format_, args_, error_, arg_position, is_positional_); + } + + // Consumes the next arg and verifies that it matches `conv`. + // `error_` is set if there is no next arg or if it doesn't match `conv`. + constexpr ConvParser ConsumeNextArg(char conv) const { + return SetArgs(args_.without_front()).SetError(!Contains(args_[0], conv)); + } + + // Verify that positional argument `i.value` matches `conv`. + // `error_` is set if `i.value` is not a valid argument or if it doesn't + // match. + constexpr ConvParser VerifyPositional(Integer i, char conv) const { + return SetFormat(i.format).SetError(!Contains(args_[i.value - 1], conv)); + } + + // Parse the position of the arg and store it in `arg_position_`. + constexpr ConvParser ParseArgPosition(Integer arg) const { + return SetFormat(arg.format).SetArgPosition(arg.value); + } + + // Consume the flags. + constexpr ConvParser ParseFlags() const { + return SetFormat(ConsumeAnyOf(format_, "-+ #0")); + } + + // Consume the width. + // If it is '*', we verify that it matches `args_`. `error_` is set if it + // doesn't match. + constexpr ConvParser ParseWidth() const { + return IsDigit(GetChar(format_, 0)) + ? SetFormat(ParseDigits(format_).format) + : GetChar(format_, 0) == '*' + ? is_positional_ + ? VerifyPositional( + ParsePositional(ConsumeFront(format_)), '*') + : SetFormat(ConsumeFront(format_)) + .ConsumeNextArg('*') + : *this; + } + + // Consume the precision. + // If it is '*', we verify that it matches `args_`. `error_` is set if it + // doesn't match. + constexpr ConvParser ParsePrecision() const { + return GetChar(format_, 0) != '.' + ? *this + : GetChar(format_, 1) == '*' + ? is_positional_ + ? VerifyPositional( + ParsePositional(ConsumeFront(format_, 2)), '*') + : SetFormat(ConsumeFront(format_, 2)) + .ConsumeNextArg('*') + : SetFormat(ParseDigits(ConsumeFront(format_)).format); + } + + // Consume the length characters. + constexpr ConvParser ParseLength() const { + return SetFormat(ConsumeAnyOf(format_, "lLhjztq")); + } + + // Consume the conversion character and verify that it matches `args_`. + // `error_` is set if it doesn't match. + constexpr ConvParser ParseConversion() const { + return is_positional_ + ? VerifyPositional({ConsumeFront(format_), arg_position_}, + GetChar(format_, 0)) + : ConsumeNextArg(GetChar(format_, 0)) + .SetFormat(ConsumeFront(format_)); + } + + constexpr ConvParser(string_view format, ConvList args, bool error, + int arg_position, bool is_positional) + : format_(format), + args_(args), + error_(error), + arg_position_(arg_position), + is_positional_(is_positional) {} + + public: + constexpr ConvParser(string_view format, ConvList args, bool is_positional) + : format_(format), + args_(args), + error_(false), + arg_position_(0), + is_positional_(is_positional) {} + + // Consume the whole conversion specifier. + // `format()` will be set to the character after the conversion character. + // `error()` will be set if any of the arguments do not match. + constexpr ConvParser Run() const { + return (is_positional_ ? ParseArgPosition(ParsePositional(format_)) : *this) + .ParseFlags() + .ParseWidth() + .ParsePrecision() + .ParseLength() + .ParseConversion(); + } + + constexpr string_view format() const { return format_; } + constexpr ConvList args() const { return args_; } + constexpr bool error() const { return error_; } + constexpr bool is_positional() const { return is_positional_; } + + private: + string_view format_; + // Current list of arguments. If we are not in positional mode we will consume + // from the front. + ConvList args_; + bool error_; + // Holds the argument position of the conversion character, if we are in + // positional mode. Otherwise, it is unspecified. + int arg_position_; + // Whether we are in positional mode. + // It changes the behavior of '*' and where to find the converted argument. + bool is_positional_; +}; + +// Parses a whole format expression. +// See FormatParser::Run(). +class FormatParser { + static constexpr bool FoundPercent(string_view format) { + return format.empty() || + (GetChar(format, 0) == '%' && GetChar(format, 1) != '%'); + } + + // We use an inner function to increase the recursion limit. + // The inner function consumes up to `limit` characters on every run. + // This increases the limit from 512 to ~512*limit. + static constexpr string_view ConsumeNonPercentInner(string_view format, + int limit = 20) { + return FoundPercent(format) || !limit + ? format + : ConsumeNonPercentInner( + ConsumeFront(format, GetChar(format, 0) == '%' && + GetChar(format, 1) == '%' + ? 2 + : 1), + limit - 1); + } + + // Consume characters until the next conversion spec %. + // It skips %%. + static constexpr string_view ConsumeNonPercent(string_view format) { + return FoundPercent(format) + ? format + : ConsumeNonPercent(ConsumeNonPercentInner(format)); + } + + static constexpr bool IsPositional(string_view format) { + return IsDigit(GetChar(format, 0)) ? IsPositional(ConsumeFront(format)) + : GetChar(format, 0) == '$'; + } + + constexpr bool RunImpl(bool is_positional) const { + // In non-positional mode we require all arguments to be consumed. + // In positional mode just reaching the end of the format without errors is + // enough. + return (format_.empty() && (is_positional || args_.count == 0)) || + (!format_.empty() && + ValidateArg( + ConvParser(ConsumeFront(format_), args_, is_positional).Run())); + } + + constexpr bool ValidateArg(ConvParser conv) const { + return !conv.error() && FormatParser(conv.format(), conv.args()) + .RunImpl(conv.is_positional()); + } + + public: + constexpr FormatParser(string_view format, ConvList args) + : format_(ConsumeNonPercent(format)), args_(args) {} + + // Runs the parser for `format` and `args`. + // It verifies that the format is valid and that all conversion specifiers + // match the arguments passed. + // In non-positional mode it also verfies that all arguments are consumed. + constexpr bool Run() const { + return RunImpl(!format_.empty() && IsPositional(ConsumeFront(format_))); + } + + private: + string_view format_; + // Current list of arguments. + // If we are not in positional mode we will consume from the front and will + // have to be empty in the end. + ConvList args_; +}; + +template <FormatConversionCharSet... C> +constexpr bool ValidFormatImpl(string_view format) { + return FormatParser(format, + {ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)}) + .Run(); +} + +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/checker_test.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/checker_test.cc new file mode 100644 index 000000000000..a76d70b0586c --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/checker_test.cc @@ -0,0 +1,156 @@ +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { +namespace { + +std::string ConvToString(FormatConversionCharSet conv) { + std::string out; +#define CONV_SET_CASE(c) \ + if (Contains(conv, FormatConversionCharSetInternal::c)) { \ + out += #c; \ + } + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) +#undef CONV_SET_CASE + if (Contains(conv, FormatConversionCharSetInternal::kStar)) { + out += "*"; + } + return out; +} + +TEST(StrFormatChecker, ArgumentToConv) { + FormatConversionCharSet conv = ArgumentToConv<std::string>(); + EXPECT_EQ(ConvToString(conv), "s"); + + conv = ArgumentToConv<const char*>(); + EXPECT_EQ(ConvToString(conv), "sp"); + + conv = ArgumentToConv<double>(); + EXPECT_EQ(ConvToString(conv), "fFeEgGaA"); + + conv = ArgumentToConv<int>(); + EXPECT_EQ(ConvToString(conv), "cdiouxXfFeEgGaA*"); + + conv = ArgumentToConv<std::string*>(); + EXPECT_EQ(ConvToString(conv), "p"); +} + +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +struct Case { + bool result; + const char* format; +}; + +template <typename... Args> +constexpr Case ValidFormat(const char* format) { + return {ValidFormatImpl<ArgumentToConv<Args>()...>(format), format}; +} + +TEST(StrFormatChecker, ValidFormat) { + // We want to make sure these expressions are constexpr and they have the + // expected value. + // If they are not constexpr the attribute will just ignore them and not give + // a compile time error. + enum e {}; + enum class e2 {}; + constexpr Case trues[] = { + ValidFormat<>("abc"), // + + ValidFormat<e>("%d"), // + ValidFormat<e2>("%d"), // + ValidFormat<int>("%% %d"), // + ValidFormat<int>("%ld"), // + ValidFormat<int>("%lld"), // + ValidFormat<std::string>("%s"), // + ValidFormat<std::string>("%10s"), // + ValidFormat<int>("%.10x"), // + ValidFormat<int, int>("%*.3x"), // + ValidFormat<int>("%1.d"), // + ValidFormat<int>("%.d"), // + ValidFormat<int, double>("%d %g"), // + ValidFormat<int, std::string>("%*s"), // + ValidFormat<int, double>("%.*f"), // + ValidFormat<void (*)(), volatile int*>("%p %p"), // + ValidFormat<string_view, const char*, double, void*>( + "string_view=%s const char*=%s double=%f void*=%p)"), + + ValidFormat<int>("%% %1$d"), // + ValidFormat<int>("%1$ld"), // + ValidFormat<int>("%1$lld"), // + ValidFormat<std::string>("%1$s"), // + ValidFormat<std::string>("%1$10s"), // + ValidFormat<int>("%1$.10x"), // + ValidFormat<int>("%1$*1$.*1$d"), // + ValidFormat<int, int>("%1$*2$.3x"), // + ValidFormat<int>("%1$1.d"), // + ValidFormat<int>("%1$.d"), // + ValidFormat<double, int>("%2$d %1$g"), // + ValidFormat<int, std::string>("%2$*1$s"), // + ValidFormat<int, double>("%2$.*1$f"), // + ValidFormat<void*, string_view, const char*, double>( + "string_view=%2$s const char*=%3$s double=%4$f void*=%1$p " + "repeat=%3$s)")}; + + for (Case c : trues) { + EXPECT_TRUE(c.result) << c.format; + } + + constexpr Case falses[] = { + ValidFormat<int>(""), // + + ValidFormat<e>("%s"), // + ValidFormat<e2>("%s"), // + ValidFormat<>("%s"), // + ValidFormat<>("%r"), // + ValidFormat<int>("%s"), // + ValidFormat<int>("%.1.d"), // + ValidFormat<int>("%*1d"), // + ValidFormat<int>("%1-d"), // + ValidFormat<std::string, int>("%*s"), // + ValidFormat<int>("%*d"), // + ValidFormat<std::string>("%p"), // + ValidFormat<int (*)(int)>("%d"), // + + ValidFormat<>("%3$d"), // + ValidFormat<>("%1$r"), // + ValidFormat<int>("%1$s"), // + ValidFormat<int>("%1$.1.d"), // + ValidFormat<int>("%1$*2$1d"), // + ValidFormat<int>("%1$1-d"), // + ValidFormat<std::string, int>("%2$*1$s"), // + ValidFormat<std::string>("%1$p"), + + ValidFormat<int, int>("%d %2$d"), // + }; + + for (Case c : falses) { + EXPECT_FALSE(c.result) << c.format; + } +} + +TEST(StrFormatChecker, LongFormat) { +#define CHARS_X_40 "1234567890123456789012345678901234567890" +#define CHARS_X_400 \ + CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 \ + CHARS_X_40 CHARS_X_40 CHARS_X_40 +#define CHARS_X_4000 \ + CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 \ + CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 + constexpr char long_format[] = + CHARS_X_4000 "%d" CHARS_X_4000 "%s" CHARS_X_4000; + constexpr bool is_valid = ValidFormat<int, std::string>(long_format).result; + EXPECT_TRUE(is_valid); +} + +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +} // namespace +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/convert_test.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/convert_test.cc new file mode 100644 index 000000000000..20c6229fcb37 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/convert_test.cc @@ -0,0 +1,865 @@ +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> + +#include <cctype> +#include <cmath> +#include <limits> +#include <string> +#include <thread> // NOLINT + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/str_format/bind.h" +#include "absl/types/optional.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { +namespace { + +template <typename T, size_t N> +size_t ArraySize(T (&)[N]) { + return N; +} + +std::string LengthModFor(float) { return ""; } +std::string LengthModFor(double) { return ""; } +std::string LengthModFor(long double) { return "L"; } +std::string LengthModFor(char) { return "hh"; } +std::string LengthModFor(signed char) { return "hh"; } +std::string LengthModFor(unsigned char) { return "hh"; } +std::string LengthModFor(short) { return "h"; } // NOLINT +std::string LengthModFor(unsigned short) { return "h"; } // NOLINT +std::string LengthModFor(int) { return ""; } +std::string LengthModFor(unsigned) { return ""; } +std::string LengthModFor(long) { return "l"; } // NOLINT +std::string LengthModFor(unsigned long) { return "l"; } // NOLINT +std::string LengthModFor(long long) { return "ll"; } // NOLINT +std::string LengthModFor(unsigned long long) { return "ll"; } // NOLINT + +std::string EscCharImpl(int v) { + if (std::isprint(static_cast<unsigned char>(v))) { + return std::string(1, static_cast<char>(v)); + } + char buf[64]; + int n = snprintf(buf, sizeof(buf), "\\%#.2x", + static_cast<unsigned>(v & 0xff)); + assert(n > 0 && n < sizeof(buf)); + return std::string(buf, n); +} + +std::string Esc(char v) { return EscCharImpl(v); } +std::string Esc(signed char v) { return EscCharImpl(v); } +std::string Esc(unsigned char v) { return EscCharImpl(v); } + +template <typename T> +std::string Esc(const T &v) { + std::ostringstream oss; + oss << v; + return oss.str(); +} + +void StrAppendV(std::string *dst, const char *format, va_list ap) { + // First try with a small fixed size buffer + static const int kSpaceLength = 1024; + char space[kSpaceLength]; + + // It's possible for methods that use a va_list to invalidate + // the data in it upon use. The fix is to make a copy + // of the structure before using it and use that copy instead. + va_list backup_ap; + va_copy(backup_ap, ap); + int result = vsnprintf(space, kSpaceLength, format, backup_ap); + va_end(backup_ap); + if (result < kSpaceLength) { + if (result >= 0) { + // Normal case -- everything fit. + dst->append(space, result); + return; + } + if (result < 0) { + // Just an error. + return; + } + } + + // Increase the buffer size to the size requested by vsnprintf, + // plus one for the closing \0. + int length = result + 1; + char *buf = new char[length]; + + // Restore the va_list before we use it again + va_copy(backup_ap, ap); + result = vsnprintf(buf, length, format, backup_ap); + va_end(backup_ap); + + if (result >= 0 && result < length) { + // It fit + dst->append(buf, result); + } + delete[] buf; +} + +void StrAppend(std::string *out, const char *format, ...) { + va_list ap; + va_start(ap, format); + StrAppendV(out, format, ap); + va_end(ap); +} + +std::string StrPrint(const char *format, ...) { + va_list ap; + va_start(ap, format); + std::string result; + StrAppendV(&result, format, ap); + va_end(ap); + return result; +} + +class FormatConvertTest : public ::testing::Test { }; + +template <typename T> +void TestStringConvert(const T& str) { + const FormatArgImpl args[] = {FormatArgImpl(str)}; + struct Expectation { + const char *out; + const char *fmt; + }; + const Expectation kExpect[] = { + {"hello", "%1$s" }, + {"", "%1$.s" }, + {"", "%1$.0s" }, + {"h", "%1$.1s" }, + {"he", "%1$.2s" }, + {"hello", "%1$.10s" }, + {" hello", "%1$6s" }, + {" he", "%1$5.2s" }, + {"he ", "%1$-5.2s" }, + {"hello ", "%1$-6.10s" }, + }; + for (const Expectation &e : kExpect) { + UntypedFormatSpecImpl format(e.fmt); + EXPECT_EQ(e.out, FormatPack(format, absl::MakeSpan(args))); + } +} + +TEST_F(FormatConvertTest, BasicString) { + TestStringConvert("hello"); // As char array. + TestStringConvert(static_cast<const char*>("hello")); + TestStringConvert(std::string("hello")); + TestStringConvert(string_view("hello")); +} + +TEST_F(FormatConvertTest, NullString) { + const char* p = nullptr; + UntypedFormatSpecImpl format("%s"); + EXPECT_EQ("", FormatPack(format, {FormatArgImpl(p)})); +} + +TEST_F(FormatConvertTest, StringPrecision) { + // We cap at the precision. + char c = 'a'; + const char* p = &c; + UntypedFormatSpecImpl format("%.1s"); + EXPECT_EQ("a", FormatPack(format, {FormatArgImpl(p)})); + + // We cap at the NUL-terminator. + p = "ABC"; + UntypedFormatSpecImpl format2("%.10s"); + EXPECT_EQ("ABC", FormatPack(format2, {FormatArgImpl(p)})); +} + +// Pointer formatting is implementation defined. This checks that the argument +// can be matched to `ptr`. +MATCHER_P(MatchesPointerString, ptr, "") { + if (ptr == nullptr && arg == "(nil)") { + return true; + } + void* parsed = nullptr; + if (sscanf(arg.c_str(), "%p", &parsed) != 1) { + ABSL_RAW_LOG(FATAL, "Could not parse %s", arg.c_str()); + } + return ptr == parsed; +} + +TEST_F(FormatConvertTest, Pointer) { + static int x = 0; + const int *xp = &x; + char c = 'h'; + char *mcp = &c; + const char *cp = "hi"; + const char *cnil = nullptr; + const int *inil = nullptr; + using VoidF = void (*)(); + VoidF fp = [] {}, fnil = nullptr; + volatile char vc; + volatile char *vcp = &vc; + volatile char *vcnil = nullptr; + const FormatArgImpl args_array[] = { + FormatArgImpl(xp), FormatArgImpl(cp), FormatArgImpl(inil), + FormatArgImpl(cnil), FormatArgImpl(mcp), FormatArgImpl(fp), + FormatArgImpl(fnil), FormatArgImpl(vcp), FormatArgImpl(vcnil), + }; + auto args = absl::MakeConstSpan(args_array); + + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%20p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.1p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.20p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%30.20p"), args), + MatchesPointerString(&x)); + + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-20p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-.1p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.20p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-30.20p"), args), + MatchesPointerString(&x)); + + // const char* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%2$p"), args), + MatchesPointerString(cp)); + // null const int* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%3$p"), args), + MatchesPointerString(nullptr)); + // null const char* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%4$p"), args), + MatchesPointerString(nullptr)); + // nonconst char* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%5$p"), args), + MatchesPointerString(mcp)); + + // function pointers + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%6$p"), args), + MatchesPointerString(reinterpret_cast<const void*>(fp))); + EXPECT_THAT( + FormatPack(UntypedFormatSpecImpl("%8$p"), args), + MatchesPointerString(reinterpret_cast<volatile const void *>(vcp))); + + // null function pointers + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%7$p"), args), + MatchesPointerString(nullptr)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%9$p"), args), + MatchesPointerString(nullptr)); +} + +struct Cardinal { + enum Pos { k1 = 1, k2 = 2, k3 = 3 }; + enum Neg { kM1 = -1, kM2 = -2, kM3 = -3 }; +}; + +TEST_F(FormatConvertTest, Enum) { + const Cardinal::Pos k3 = Cardinal::k3; + const Cardinal::Neg km3 = Cardinal::kM3; + const FormatArgImpl args[] = {FormatArgImpl(k3), FormatArgImpl(km3)}; + UntypedFormatSpecImpl format("%1$d"); + UntypedFormatSpecImpl format2("%2$d"); + EXPECT_EQ("3", FormatPack(format, absl::MakeSpan(args))); + EXPECT_EQ("-3", FormatPack(format2, absl::MakeSpan(args))); +} + +template <typename T> +class TypedFormatConvertTest : public FormatConvertTest { }; + +TYPED_TEST_SUITE_P(TypedFormatConvertTest); + +std::vector<std::string> AllFlagCombinations() { + const char kFlags[] = {'-', '#', '0', '+', ' '}; + std::vector<std::string> result; + for (size_t fsi = 0; fsi < (1ull << ArraySize(kFlags)); ++fsi) { + std::string flag_set; + for (size_t fi = 0; fi < ArraySize(kFlags); ++fi) + if (fsi & (1ull << fi)) + flag_set += kFlags[fi]; + result.push_back(flag_set); + } + return result; +} + +TYPED_TEST_P(TypedFormatConvertTest, AllIntsWithFlags) { + typedef TypeParam T; + typedef typename std::make_unsigned<T>::type UnsignedT; + using remove_volatile_t = typename std::remove_volatile<T>::type; + const T kMin = std::numeric_limits<remove_volatile_t>::min(); + const T kMax = std::numeric_limits<remove_volatile_t>::max(); + const T kVals[] = { + remove_volatile_t(1), + remove_volatile_t(2), + remove_volatile_t(3), + remove_volatile_t(123), + remove_volatile_t(-1), + remove_volatile_t(-2), + remove_volatile_t(-3), + remove_volatile_t(-123), + remove_volatile_t(0), + kMax - remove_volatile_t(1), + kMax, + kMin + remove_volatile_t(1), + kMin, + }; + const char kConvChars[] = {'d', 'i', 'u', 'o', 'x', 'X'}; + const std::string kWid[] = {"", "4", "10"}; + const std::string kPrec[] = {"", ".", ".0", ".4", ".10"}; + + const std::vector<std::string> flag_sets = AllFlagCombinations(); + + for (size_t vi = 0; vi < ArraySize(kVals); ++vi) { + const T val = kVals[vi]; + SCOPED_TRACE(Esc(val)); + const FormatArgImpl args[] = {FormatArgImpl(val)}; + for (size_t ci = 0; ci < ArraySize(kConvChars); ++ci) { + const char conv_char = kConvChars[ci]; + for (size_t fsi = 0; fsi < flag_sets.size(); ++fsi) { + const std::string &flag_set = flag_sets[fsi]; + for (size_t wi = 0; wi < ArraySize(kWid); ++wi) { + const std::string &wid = kWid[wi]; + for (size_t pi = 0; pi < ArraySize(kPrec); ++pi) { + const std::string &prec = kPrec[pi]; + + const bool is_signed_conv = (conv_char == 'd' || conv_char == 'i'); + const bool is_unsigned_to_signed = + !std::is_signed<T>::value && is_signed_conv; + // Don't consider sign-related flags '+' and ' ' when doing + // unsigned to signed conversions. + if (is_unsigned_to_signed && + flag_set.find_first_of("+ ") != std::string::npos) { + continue; + } + + std::string new_fmt("%"); + new_fmt += flag_set; + new_fmt += wid; + new_fmt += prec; + // old and new always agree up to here. + std::string old_fmt = new_fmt; + new_fmt += conv_char; + std::string old_result; + if (is_unsigned_to_signed) { + // don't expect agreement on unsigned formatted as signed, + // as printf can't do that conversion properly. For those + // cases, we do expect agreement with printf with a "%u" + // and the unsigned equivalent of 'val'. + UnsignedT uval = val; + old_fmt += LengthModFor(uval); + old_fmt += "u"; + old_result = StrPrint(old_fmt.c_str(), uval); + } else { + old_fmt += LengthModFor(val); + old_fmt += conv_char; + old_result = StrPrint(old_fmt.c_str(), val); + } + + SCOPED_TRACE(std::string() + " old_fmt: \"" + old_fmt + + "\"'" + " new_fmt: \"" + + new_fmt + "\""); + UntypedFormatSpecImpl format(new_fmt); + EXPECT_EQ(old_result, FormatPack(format, absl::MakeSpan(args))); + } + } + } + } + } +} + +TYPED_TEST_P(TypedFormatConvertTest, Char) { + typedef TypeParam T; + using remove_volatile_t = typename std::remove_volatile<T>::type; + static const T kMin = std::numeric_limits<remove_volatile_t>::min(); + static const T kMax = std::numeric_limits<remove_volatile_t>::max(); + T kVals[] = { + remove_volatile_t(1), remove_volatile_t(2), remove_volatile_t(10), + remove_volatile_t(-1), remove_volatile_t(-2), remove_volatile_t(-10), + remove_volatile_t(0), + kMin + remove_volatile_t(1), kMin, + kMax - remove_volatile_t(1), kMax + }; + for (const T &c : kVals) { + const FormatArgImpl args[] = {FormatArgImpl(c)}; + UntypedFormatSpecImpl format("%c"); + EXPECT_EQ(StrPrint("%c", c), FormatPack(format, absl::MakeSpan(args))); + } +} + +REGISTER_TYPED_TEST_CASE_P(TypedFormatConvertTest, AllIntsWithFlags, Char); + +typedef ::testing::Types< + int, unsigned, volatile int, + short, unsigned short, + long, unsigned long, + long long, unsigned long long, + signed char, unsigned char, char> + AllIntTypes; +INSTANTIATE_TYPED_TEST_CASE_P(TypedFormatConvertTestWithAllIntTypes, + TypedFormatConvertTest, AllIntTypes); +TEST_F(FormatConvertTest, VectorBool) { + // Make sure vector<bool>'s values behave as bools. + std::vector<bool> v = {true, false}; + const std::vector<bool> cv = {true, false}; + EXPECT_EQ("1,0,1,0", + FormatPack(UntypedFormatSpecImpl("%d,%d,%d,%d"), + absl::Span<const FormatArgImpl>( + {FormatArgImpl(v[0]), FormatArgImpl(v[1]), + FormatArgImpl(cv[0]), FormatArgImpl(cv[1])}))); +} + + +TEST_F(FormatConvertTest, Int128) { + absl::int128 positive = static_cast<absl::int128>(0x1234567890abcdef) * 1979; + absl::int128 negative = -positive; + absl::int128 max = absl::Int128Max(), min = absl::Int128Min(); + const FormatArgImpl args[] = {FormatArgImpl(positive), + FormatArgImpl(negative), FormatArgImpl(max), + FormatArgImpl(min)}; + + struct Case { + const char* format; + const char* expected; + } cases[] = { + {"%1$d", "2595989796776606496405"}, + {"%1$30d", " 2595989796776606496405"}, + {"%1$-30d", "2595989796776606496405 "}, + {"%1$u", "2595989796776606496405"}, + {"%1$x", "8cba9876066020f695"}, + {"%2$d", "-2595989796776606496405"}, + {"%2$30d", " -2595989796776606496405"}, + {"%2$-30d", "-2595989796776606496405 "}, + {"%2$u", "340282366920938460867384810655161715051"}, + {"%2$x", "ffffffffffffff73456789f99fdf096b"}, + {"%3$d", "170141183460469231731687303715884105727"}, + {"%3$u", "170141183460469231731687303715884105727"}, + {"%3$x", "7fffffffffffffffffffffffffffffff"}, + {"%4$d", "-170141183460469231731687303715884105728"}, + {"%4$x", "80000000000000000000000000000000"}, + }; + + for (auto c : cases) { + UntypedFormatSpecImpl format(c.format); + EXPECT_EQ(c.expected, FormatPack(format, absl::MakeSpan(args))); + } +} + +TEST_F(FormatConvertTest, Uint128) { + absl::uint128 v = static_cast<absl::uint128>(0x1234567890abcdef) * 1979; + absl::uint128 max = absl::Uint128Max(); + const FormatArgImpl args[] = {FormatArgImpl(v), FormatArgImpl(max)}; + + struct Case { + const char* format; + const char* expected; + } cases[] = { + {"%1$d", "2595989796776606496405"}, + {"%1$30d", " 2595989796776606496405"}, + {"%1$-30d", "2595989796776606496405 "}, + {"%1$u", "2595989796776606496405"}, + {"%1$x", "8cba9876066020f695"}, + {"%2$d", "340282366920938463463374607431768211455"}, + {"%2$u", "340282366920938463463374607431768211455"}, + {"%2$x", "ffffffffffffffffffffffffffffffff"}, + }; + + for (auto c : cases) { + UntypedFormatSpecImpl format(c.format); + EXPECT_EQ(c.expected, FormatPack(format, absl::MakeSpan(args))); + } +} + +TEST_F(FormatConvertTest, Float) { +#ifdef _MSC_VER + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + return; +#endif // _MSC_VER + + const char *const kFormats[] = { + "%", "%.3", "%8.5", "%500", "%.5000", "%.60", "%.30", "%03", + "%+", "% ", "%-10", "%#15.3", "%#.0", "%.0", "%1$*2$", "%1$.*2$"}; + + std::vector<double> doubles = {0.0, + -0.0, + .99999999999999, + 99999999999999., + std::numeric_limits<double>::max(), + -std::numeric_limits<double>::max(), + std::numeric_limits<double>::min(), + -std::numeric_limits<double>::min(), + std::numeric_limits<double>::lowest(), + -std::numeric_limits<double>::lowest(), + std::numeric_limits<double>::epsilon(), + std::numeric_limits<double>::epsilon() + 1, + std::numeric_limits<double>::infinity(), + -std::numeric_limits<double>::infinity()}; + + // Some regression tests. + doubles.push_back(0.99999999999999989); + + if (std::numeric_limits<double>::has_denorm != std::denorm_absent) { + doubles.push_back(std::numeric_limits<double>::denorm_min()); + doubles.push_back(-std::numeric_limits<double>::denorm_min()); + } + + for (double base : + {1., 12., 123., 1234., 12345., 123456., 1234567., 12345678., 123456789., + 1234567890., 12345678901., 123456789012., 1234567890123.}) { + for (int exp = -123; exp <= 123; ++exp) { + for (int sign : {1, -1}) { + doubles.push_back(sign * std::ldexp(base, exp)); + } + } + } + + // Workaround libc bug. + // https://sourceware.org/bugzilla/show_bug.cgi?id=22142 + const bool gcc_bug_22142 = + StrPrint("%f", std::numeric_limits<double>::max()) != + "1797693134862315708145274237317043567980705675258449965989174768031" + "5726078002853876058955863276687817154045895351438246423432132688946" + "4182768467546703537516986049910576551282076245490090389328944075868" + "5084551339423045832369032229481658085593321233482747978262041447231" + "68738177180919299881250404026184124858368.000000"; + + if (!gcc_bug_22142) { + for (int exp = -300; exp <= 300; ++exp) { + const double all_ones_mantissa = 0x1fffffffffffff; + doubles.push_back(std::ldexp(all_ones_mantissa, exp)); + } + } + + if (gcc_bug_22142) { + for (auto &d : doubles) { + using L = std::numeric_limits<double>; + double d2 = std::abs(d); + if (d2 == L::max() || d2 == L::min() || d2 == L::denorm_min()) { + d = 0; + } + } + } + + // Remove duplicates to speed up the logic below. + std::sort(doubles.begin(), doubles.end()); + doubles.erase(std::unique(doubles.begin(), doubles.end()), doubles.end()); + +#ifndef __APPLE__ + // Apple formats NaN differently (+nan) vs. (nan) + doubles.push_back(std::nan("")); +#endif + + // Reserve the space to ensure we don't allocate memory in the output itself. + std::string str_format_result; + str_format_result.reserve(1 << 20); + std::string string_printf_result; + string_printf_result.reserve(1 << 20); + + for (const char *fmt : kFormats) { + for (char f : {'f', 'F', // + 'g', 'G', // + 'a', 'A', // + 'e', 'E'}) { + std::string fmt_str = std::string(fmt) + f; + + if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F') { + // This particular test takes way too long with snprintf. + // Disable for the case we are not implementing natively. + continue; + } + + for (double d : doubles) { + int i = -10; + FormatArgImpl args[2] = {FormatArgImpl(d), FormatArgImpl(i)}; + UntypedFormatSpecImpl format(fmt_str); + + string_printf_result.clear(); + StrAppend(&string_printf_result, fmt_str.c_str(), d, i); + str_format_result.clear(); + + { + AppendPack(&str_format_result, format, absl::MakeSpan(args)); + } + + if (string_printf_result != str_format_result) { + // We use ASSERT_EQ here because failures are usually correlated and a + // bug would print way too many failed expectations causing the test + // to time out. + ASSERT_EQ(string_printf_result, str_format_result) + << fmt_str << " " << StrPrint("%.18g", d) << " " + << StrPrint("%a", d) << " " << StrPrint("%.1080f", d); + } + } + } + } +} + +TEST_F(FormatConvertTest, FloatRound) { + std::string s; + const auto format = [&](const char *fmt, double d) -> std::string & { + s.clear(); + FormatArgImpl args[1] = {FormatArgImpl(d)}; + AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); +#if !defined(_MSC_VER) + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + EXPECT_EQ(StrPrint(fmt, d), s); +#endif // _MSC_VER + + return s; + }; + // All of these values have to be exactly represented. + // Otherwise we might not be testing what we think we are testing. + + // These values can fit in a 64bit "fast" representation. + const double exact_value = 0.00000000000005684341886080801486968994140625; + assert(exact_value == std::pow(2, -44)); + // Round up at a 5xx. + EXPECT_EQ(format("%.13f", exact_value), "0.0000000000001"); + // Round up at a >5 + EXPECT_EQ(format("%.14f", exact_value), "0.00000000000006"); + // Round down at a <5 + EXPECT_EQ(format("%.16f", exact_value), "0.0000000000000568"); + // Nine handling + EXPECT_EQ(format("%.35f", exact_value), + "0.00000000000005684341886080801486969"); + EXPECT_EQ(format("%.36f", exact_value), + "0.000000000000056843418860808014869690"); + // Round down the last nine. + EXPECT_EQ(format("%.37f", exact_value), + "0.0000000000000568434188608080148696899"); + EXPECT_EQ(format("%.10f", 0.000003814697265625), "0.0000038147"); + // Round up the last nine + EXPECT_EQ(format("%.11f", 0.000003814697265625), "0.00000381470"); + EXPECT_EQ(format("%.12f", 0.000003814697265625), "0.000003814697"); + + // Round to even (down) + EXPECT_EQ(format("%.43f", exact_value), + "0.0000000000000568434188608080148696899414062"); + // Exact + EXPECT_EQ(format("%.44f", exact_value), + "0.00000000000005684341886080801486968994140625"); + // Round to even (up), let make the last digits 75 instead of 25 + EXPECT_EQ(format("%.43f", exact_value + std::pow(2, -43)), + "0.0000000000001705302565824240446090698242188"); + // Exact, just to check. + EXPECT_EQ(format("%.44f", exact_value + std::pow(2, -43)), + "0.00000000000017053025658242404460906982421875"); + + // This value has to be small enough that it won't fit in the uint128 + // representation for printing. + const double small_exact_value = + 0.000000000000000000000000000000000000752316384526264005099991383822237233803945956334136013765601092018187046051025390625; // NOLINT + assert(small_exact_value == std::pow(2, -120)); + // Round up at a 5xx. + EXPECT_EQ(format("%.37f", small_exact_value), + "0.0000000000000000000000000000000000008"); + // Round down at a <5 + EXPECT_EQ(format("%.38f", small_exact_value), + "0.00000000000000000000000000000000000075"); + // Round up at a >5 + EXPECT_EQ(format("%.41f", small_exact_value), + "0.00000000000000000000000000000000000075232"); + // Nine handling + EXPECT_EQ(format("%.55f", small_exact_value), + "0.0000000000000000000000000000000000007523163845262640051"); + EXPECT_EQ(format("%.56f", small_exact_value), + "0.00000000000000000000000000000000000075231638452626400510"); + EXPECT_EQ(format("%.57f", small_exact_value), + "0.000000000000000000000000000000000000752316384526264005100"); + EXPECT_EQ(format("%.58f", small_exact_value), + "0.0000000000000000000000000000000000007523163845262640051000"); + // Round down the last nine + EXPECT_EQ(format("%.59f", small_exact_value), + "0.00000000000000000000000000000000000075231638452626400509999"); + // Round up the last nine + EXPECT_EQ(format("%.79f", small_exact_value), + "0.000000000000000000000000000000000000" + "7523163845262640050999913838222372338039460"); + + // Round to even (down) + EXPECT_EQ(format("%.119f", small_exact_value), + "0.000000000000000000000000000000000000" + "75231638452626400509999138382223723380" + "394595633413601376560109201818704605102539062"); + // Exact + EXPECT_EQ(format("%.120f", small_exact_value), + "0.000000000000000000000000000000000000" + "75231638452626400509999138382223723380" + "3945956334136013765601092018187046051025390625"); + // Round to even (up), let make the last digits 75 instead of 25 + EXPECT_EQ(format("%.119f", small_exact_value + std::pow(2, -119)), + "0.000000000000000000000000000000000002" + "25694915357879201529997415146671170141" + "183786900240804129680327605456113815307617188"); + // Exact, just to check. + EXPECT_EQ(format("%.120f", small_exact_value + std::pow(2, -119)), + "0.000000000000000000000000000000000002" + "25694915357879201529997415146671170141" + "1837869002408041296803276054561138153076171875"); +} + +// We don't actually store the results. This is just to exercise the rest of the +// machinery. +struct NullSink { + friend void AbslFormatFlush(NullSink *sink, string_view str) {} +}; + +template <typename... T> +bool FormatWithNullSink(absl::string_view fmt, const T &... a) { + NullSink sink; + FormatArgImpl args[] = {FormatArgImpl(a)...}; + return FormatUntyped(&sink, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); +} + +TEST_F(FormatConvertTest, ExtremeWidthPrecision) { + for (const char *fmt : {"f"}) { + for (double d : {1e-100, 1.0, 1e100}) { + constexpr int max = std::numeric_limits<int>::max(); + EXPECT_TRUE(FormatWithNullSink(std::string("%.*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%1.*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%*.*") + fmt, max, max, d)); + } + } +} + +TEST_F(FormatConvertTest, LongDouble) { +#ifdef _MSC_VER + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + return; +#endif // _MSC_VER + const char *const kFormats[] = {"%", "%.3", "%8.5", "%9", "%.5000", + "%.60", "%+", "% ", "%-10"}; + + std::vector<long double> doubles = { + 0.0, + -0.0, + std::numeric_limits<long double>::max(), + -std::numeric_limits<long double>::max(), + std::numeric_limits<long double>::min(), + -std::numeric_limits<long double>::min(), + std::numeric_limits<long double>::infinity(), + -std::numeric_limits<long double>::infinity()}; + + for (long double base : {1.L, 12.L, 123.L, 1234.L, 12345.L, 123456.L, + 1234567.L, 12345678.L, 123456789.L, 1234567890.L, + 12345678901.L, 123456789012.L, 1234567890123.L, + // This value is not representable in double, but it + // is in long double that uses the extended format. + // This is to verify that we are not truncating the + // value mistakenly through a double. + 10000000000000000.25L}) { + for (int exp : {-1000, -500, 0, 500, 1000}) { + for (int sign : {1, -1}) { + doubles.push_back(sign * std::ldexp(base, exp)); + doubles.push_back(sign / std::ldexp(base, exp)); + } + } + } + + for (const char *fmt : kFormats) { + for (char f : {'f', 'F', // + 'g', 'G', // + 'a', 'A', // + 'e', 'E'}) { + std::string fmt_str = std::string(fmt) + 'L' + f; + + if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F') { + // This particular test takes way too long with snprintf. + // Disable for the case we are not implementing natively. + continue; + } + + for (auto d : doubles) { + FormatArgImpl arg(d); + UntypedFormatSpecImpl format(fmt_str); + // We use ASSERT_EQ here because failures are usually correlated and a + // bug would print way too many failed expectations causing the test to + // time out. + ASSERT_EQ(StrPrint(fmt_str.c_str(), d), FormatPack(format, {&arg, 1})) + << fmt_str << " " << StrPrint("%.18Lg", d) << " " + << StrPrint("%La", d) << " " << StrPrint("%.1080Lf", d); + } + } + } +} + +TEST_F(FormatConvertTest, IntAsFloat) { + const int kMin = std::numeric_limits<int>::min(); + const int kMax = std::numeric_limits<int>::max(); + const int ia[] = { + 1, 2, 3, 123, + -1, -2, -3, -123, + 0, kMax - 1, kMax, kMin + 1, kMin }; + for (const int fx : ia) { + SCOPED_TRACE(fx); + const FormatArgImpl args[] = {FormatArgImpl(fx)}; + struct Expectation { + int line; + std::string out; + const char *fmt; + }; + const double dx = static_cast<double>(fx); + const Expectation kExpect[] = { + { __LINE__, StrPrint("%f", dx), "%f" }, + { __LINE__, StrPrint("%12f", dx), "%12f" }, + { __LINE__, StrPrint("%.12f", dx), "%.12f" }, + { __LINE__, StrPrint("%12a", dx), "%12a" }, + { __LINE__, StrPrint("%.12a", dx), "%.12a" }, + }; + for (const Expectation &e : kExpect) { + SCOPED_TRACE(e.line); + SCOPED_TRACE(e.fmt); + UntypedFormatSpecImpl format(e.fmt); + EXPECT_EQ(e.out, FormatPack(format, absl::MakeSpan(args))); + } + } +} + +template <typename T> +bool FormatFails(const char* test_format, T value) { + std::string format_string = std::string("<<") + test_format + ">>"; + UntypedFormatSpecImpl format(format_string); + + int one = 1; + const FormatArgImpl args[] = {FormatArgImpl(value), FormatArgImpl(one)}; + EXPECT_EQ(FormatPack(format, absl::MakeSpan(args)), "") + << "format=" << test_format << " value=" << value; + return FormatPack(format, absl::MakeSpan(args)).empty(); +} + +TEST_F(FormatConvertTest, ExpectedFailures) { + // Int input + EXPECT_TRUE(FormatFails("%p", 1)); + EXPECT_TRUE(FormatFails("%s", 1)); + EXPECT_TRUE(FormatFails("%n", 1)); + + // Double input + EXPECT_TRUE(FormatFails("%p", 1.)); + EXPECT_TRUE(FormatFails("%s", 1.)); + EXPECT_TRUE(FormatFails("%n", 1.)); + EXPECT_TRUE(FormatFails("%c", 1.)); + EXPECT_TRUE(FormatFails("%d", 1.)); + EXPECT_TRUE(FormatFails("%x", 1.)); + EXPECT_TRUE(FormatFails("%*d", 1.)); + + // String input + EXPECT_TRUE(FormatFails("%n", "")); + EXPECT_TRUE(FormatFails("%c", "")); + EXPECT_TRUE(FormatFails("%d", "")); + EXPECT_TRUE(FormatFails("%x", "")); + EXPECT_TRUE(FormatFails("%f", "")); + EXPECT_TRUE(FormatFails("%*d", "")); +} + +} // namespace +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/extension.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/extension.cc new file mode 100644 index 000000000000..94f2b9c209aa --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/extension.cc @@ -0,0 +1,52 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/extension.h" + +#include <errno.h> +#include <algorithm> +#include <string> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +std::string Flags::ToString() const { + std::string s; + s.append(left ? "-" : ""); + s.append(show_pos ? "+" : ""); + s.append(sign_col ? " " : ""); + s.append(alt ? "#" : ""); + s.append(zero ? "0" : ""); + return s; +} + +bool FormatSinkImpl::PutPaddedString(string_view value, int width, + int precision, bool left) { + size_t space_remaining = 0; + if (width >= 0) space_remaining = width; + size_t n = value.size(); + if (precision >= 0) n = std::min(n, static_cast<size_t>(precision)); + string_view shown(value.data(), n); + space_remaining = Excess(shown.size(), space_remaining); + if (!left) Append(space_remaining, ' '); + Append(shown); + if (left) Append(space_remaining, ' '); + return true; +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/extension.h b/third_party/abseil_cpp/absl/strings/internal/str_format/extension.h new file mode 100644 index 000000000000..6c60c6c3a379 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/extension.h @@ -0,0 +1,429 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ + +#include <limits.h> + +#include <cstddef> +#include <cstring> +#include <ostream> + +#include "absl/base/config.h" +#include "absl/base/port.h" +#include "absl/meta/type_traits.h" +#include "absl/strings/internal/str_format/output.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +namespace str_format_internal { + +enum class FormatConversionChar : uint8_t; +enum class FormatConversionCharSet : uint64_t; + +class FormatRawSinkImpl { + public: + // Implicitly convert from any type that provides the hook function as + // described above. + template <typename T, decltype(str_format_internal::InvokeFlush( + std::declval<T*>(), string_view()))* = nullptr> + FormatRawSinkImpl(T* raw) // NOLINT + : sink_(raw), write_(&FormatRawSinkImpl::Flush<T>) {} + + void Write(string_view s) { write_(sink_, s); } + + template <typename T> + static FormatRawSinkImpl Extract(T s) { + return s.sink_; + } + + private: + template <typename T> + static void Flush(void* r, string_view s) { + str_format_internal::InvokeFlush(static_cast<T*>(r), s); + } + + void* sink_; + void (*write_)(void*, string_view); +}; + +// An abstraction to which conversions write their string data. +class FormatSinkImpl { + public: + explicit FormatSinkImpl(FormatRawSinkImpl raw) : raw_(raw) {} + + ~FormatSinkImpl() { Flush(); } + + void Flush() { + raw_.Write(string_view(buf_, pos_ - buf_)); + pos_ = buf_; + } + + void Append(size_t n, char c) { + if (n == 0) return; + size_ += n; + auto raw_append = [&](size_t count) { + memset(pos_, c, count); + pos_ += count; + }; + while (n > Avail()) { + n -= Avail(); + if (Avail() > 0) { + raw_append(Avail()); + } + Flush(); + } + raw_append(n); + } + + void Append(string_view v) { + size_t n = v.size(); + if (n == 0) return; + size_ += n; + if (n >= Avail()) { + Flush(); + raw_.Write(v); + return; + } + memcpy(pos_, v.data(), n); + pos_ += n; + } + + size_t size() const { return size_; } + + // Put 'v' to 'sink' with specified width, precision, and left flag. + bool PutPaddedString(string_view v, int width, int precision, bool left); + + template <typename T> + T Wrap() { + return T(this); + } + + template <typename T> + static FormatSinkImpl* Extract(T* s) { + return s->sink_; + } + + private: + size_t Avail() const { return buf_ + sizeof(buf_) - pos_; } + + FormatRawSinkImpl raw_; + size_t size_ = 0; + char* pos_ = buf_; + char buf_[1024]; +}; + +struct Flags { + bool basic : 1; // fastest conversion: no flags, width, or precision + bool left : 1; // "-" + bool show_pos : 1; // "+" + bool sign_col : 1; // " " + bool alt : 1; // "#" + bool zero : 1; // "0" + std::string ToString() const; + friend std::ostream& operator<<(std::ostream& os, const Flags& v) { + return os << v.ToString(); + } +}; + +// clang-format off +#define ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) \ + /* text */ \ + X_VAL(c) X_SEP X_VAL(s) X_SEP \ + /* ints */ \ + X_VAL(d) X_SEP X_VAL(i) X_SEP X_VAL(o) X_SEP \ + X_VAL(u) X_SEP X_VAL(x) X_SEP X_VAL(X) X_SEP \ + /* floats */ \ + X_VAL(f) X_SEP X_VAL(F) X_SEP X_VAL(e) X_SEP X_VAL(E) X_SEP \ + X_VAL(g) X_SEP X_VAL(G) X_SEP X_VAL(a) X_SEP X_VAL(A) X_SEP \ + /* misc */ \ + X_VAL(n) X_SEP X_VAL(p) +// clang-format on + +// This type should not be referenced, it exists only to provide labels +// internally that match the values declared in FormatConversionChar in +// str_format.h. This is meant to allow internal libraries to use the same +// declared interface type as the public interface +// (absl::StrFormatConversionChar) while keeping the definition in a public +// header. +// Internal libraries should use the form +// `FormatConversionCharInternal::c`, `FormatConversionCharInternal::kNone` for +// comparisons. Use in switch statements is not recommended due to a bug in how +// gcc 4.9 -Wswitch handles declared but undefined enums. +struct FormatConversionCharInternal { + FormatConversionCharInternal() = delete; + + private: + // clang-format off + enum class Enum : uint8_t { + c, s, // text + d, i, o, u, x, X, // int + f, F, e, E, g, G, a, A, // float + n, p, // misc + kNone + }; + // clang-format on + public: +#define ABSL_INTERNAL_X_VAL(id) \ + static constexpr FormatConversionChar id = \ + static_cast<FormatConversionChar>(Enum::id); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL + static constexpr FormatConversionChar kNone = + static_cast<FormatConversionChar>(Enum::kNone); +}; +// clang-format on + +inline FormatConversionChar FormatConversionCharFromChar(char c) { + switch (c) { +#define ABSL_INTERNAL_X_VAL(id) \ + case #id[0]: \ + return FormatConversionCharInternal::id; + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL + } + return FormatConversionCharInternal::kNone; +} + +inline bool FormatConversionCharIsUpper(FormatConversionChar c) { + if (c == FormatConversionCharInternal::X || + c == FormatConversionCharInternal::F || + c == FormatConversionCharInternal::E || + c == FormatConversionCharInternal::G || + c == FormatConversionCharInternal::A) { + return true; + } else { + return false; + } +} + +inline bool FormatConversionCharIsFloat(FormatConversionChar c) { + if (c == FormatConversionCharInternal::a || + c == FormatConversionCharInternal::e || + c == FormatConversionCharInternal::f || + c == FormatConversionCharInternal::g || + c == FormatConversionCharInternal::A || + c == FormatConversionCharInternal::E || + c == FormatConversionCharInternal::F || + c == FormatConversionCharInternal::G) { + return true; + } else { + return false; + } +} + +inline char FormatConversionCharToChar(FormatConversionChar c) { + if (c == FormatConversionCharInternal::kNone) { + return '\0'; + +#define ABSL_INTERNAL_X_VAL(e) \ + } else if (c == FormatConversionCharInternal::e) { \ + return #e[0]; +#define ABSL_INTERNAL_X_SEP + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, + ABSL_INTERNAL_X_SEP) + } else { + return '\0'; + } + +#undef ABSL_INTERNAL_X_VAL +#undef ABSL_INTERNAL_X_SEP +} + +// The associated char. +inline std::ostream& operator<<(std::ostream& os, FormatConversionChar v) { + char c = FormatConversionCharToChar(v); + if (!c) c = '?'; + return os << c; +} + +struct FormatConversionSpecImplFriend; + +class FormatConversionSpecImpl { + public: + // Width and precison are not specified, no flags are set. + bool is_basic() const { return flags_.basic; } + bool has_left_flag() const { return flags_.left; } + bool has_show_pos_flag() const { return flags_.show_pos; } + bool has_sign_col_flag() const { return flags_.sign_col; } + bool has_alt_flag() const { return flags_.alt; } + bool has_zero_flag() const { return flags_.zero; } + + FormatConversionChar conversion_char() const { + // Keep this field first in the struct . It generates better code when + // accessing it when ConversionSpec is passed by value in registers. + static_assert(offsetof(FormatConversionSpecImpl, conv_) == 0, ""); + return conv_; + } + + // Returns the specified width. If width is unspecfied, it returns a negative + // value. + int width() const { return width_; } + // Returns the specified precision. If precision is unspecfied, it returns a + // negative value. + int precision() const { return precision_; } + + template <typename T> + T Wrap() { + return T(*this); + } + + private: + friend struct str_format_internal::FormatConversionSpecImplFriend; + FormatConversionChar conv_ = FormatConversionCharInternal::kNone; + Flags flags_; + int width_; + int precision_; +}; + +struct FormatConversionSpecImplFriend final { + static void SetFlags(Flags f, FormatConversionSpecImpl* conv) { + conv->flags_ = f; + } + static void SetConversionChar(FormatConversionChar c, + FormatConversionSpecImpl* conv) { + conv->conv_ = c; + } + static void SetWidth(int w, FormatConversionSpecImpl* conv) { + conv->width_ = w; + } + static void SetPrecision(int p, FormatConversionSpecImpl* conv) { + conv->precision_ = p; + } + static std::string FlagsToString(const FormatConversionSpecImpl& spec) { + return spec.flags_.ToString(); + } +}; + +// Type safe OR operator. +// We need this for two reasons: +// 1. operator| on enums makes them decay to integers and the result is an +// integer. We need the result to stay as an enum. +// 2. We use "enum class" which would not work even if we accepted the decay. +constexpr FormatConversionCharSet FormatConversionCharSetUnion( + FormatConversionCharSet a) { + return a; +} + +template <typename... CharSet> +constexpr FormatConversionCharSet FormatConversionCharSetUnion( + FormatConversionCharSet a, CharSet... rest) { + return static_cast<FormatConversionCharSet>( + static_cast<uint64_t>(a) | + static_cast<uint64_t>(FormatConversionCharSetUnion(rest...))); +} + +constexpr uint64_t FormatConversionCharToConvInt(FormatConversionChar c) { + return uint64_t{1} << (1 + static_cast<uint8_t>(c)); +} + +constexpr uint64_t FormatConversionCharToConvInt(char conv) { + return +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + conv == #c[0] \ + ? FormatConversionCharToConvInt(FormatConversionCharInternal::c) \ + : + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) +#undef ABSL_INTERNAL_CHAR_SET_CASE + conv == '*' + ? 1 + : 0; +} + +constexpr FormatConversionCharSet FormatConversionCharToConvValue(char conv) { + return static_cast<FormatConversionCharSet>( + FormatConversionCharToConvInt(conv)); +} + +struct FormatConversionCharSetInternal { +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + static constexpr FormatConversionCharSet c = \ + FormatConversionCharToConvValue(#c[0]); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) +#undef ABSL_INTERNAL_CHAR_SET_CASE + + // Used for width/precision '*' specification. + static constexpr FormatConversionCharSet kStar = + FormatConversionCharToConvValue('*'); + + // Some predefined values (TODO(matthewbr), delete any that are unused). + static constexpr FormatConversionCharSet kIntegral = + FormatConversionCharSetUnion(d, i, u, o, x, X); + static constexpr FormatConversionCharSet kFloating = + FormatConversionCharSetUnion(a, e, f, g, A, E, F, G); + static constexpr FormatConversionCharSet kNumeric = + FormatConversionCharSetUnion(kIntegral, kFloating); + static constexpr FormatConversionCharSet kString = s; + static constexpr FormatConversionCharSet kPointer = p; +}; + +// Type safe OR operator. +// We need this for two reasons: +// 1. operator| on enums makes them decay to integers and the result is an +// integer. We need the result to stay as an enum. +// 2. We use "enum class" which would not work even if we accepted the decay. +constexpr FormatConversionCharSet operator|(FormatConversionCharSet a, + FormatConversionCharSet b) { + return FormatConversionCharSetUnion(a, b); +} + +// Overloaded conversion functions to support absl::ParsedFormat. +// Get a conversion with a single character in it. +constexpr FormatConversionCharSet ToFormatConversionCharSet(char c) { + return static_cast<FormatConversionCharSet>( + FormatConversionCharToConvValue(c)); +} + +// Get a conversion with a single character in it. +constexpr FormatConversionCharSet ToFormatConversionCharSet( + FormatConversionCharSet c) { + return c; +} + +template <typename T> +void ToFormatConversionCharSet(T) = delete; + +// Checks whether `c` exists in `set`. +constexpr bool Contains(FormatConversionCharSet set, char c) { + return (static_cast<uint64_t>(set) & + static_cast<uint64_t>(FormatConversionCharToConvValue(c))) != 0; +} + +// Checks whether all the characters in `c` are contained in `set` +constexpr bool Contains(FormatConversionCharSet set, + FormatConversionCharSet c) { + return (static_cast<uint64_t>(set) & static_cast<uint64_t>(c)) == + static_cast<uint64_t>(c); +} + +// Checks whether all the characters in `c` are contained in `set` +constexpr bool Contains(FormatConversionCharSet set, FormatConversionChar c) { + return (static_cast<uint64_t>(set) & FormatConversionCharToConvInt(c)) != 0; +} + +// Return capacity - used, clipped to a minimum of 0. +inline size_t Excess(size_t used, size_t capacity) { + return used < capacity ? capacity - used : 0; +} + +} // namespace str_format_internal + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/extension_test.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/extension_test.cc new file mode 100644 index 000000000000..0a023f9c0333 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/extension_test.cc @@ -0,0 +1,83 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "absl/strings/internal/str_format/extension.h" + +#include <random> +#include <string> + +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" + +namespace my_namespace { +class UserDefinedType { + public: + UserDefinedType() = default; + + void Append(absl::string_view str) { value_.append(str.data(), str.size()); } + const std::string& Value() const { return value_; } + + friend void AbslFormatFlush(UserDefinedType* x, absl::string_view str) { + x->Append(str); + } + + private: + std::string value_; +}; +} // namespace my_namespace + +namespace { + +std::string MakeRandomString(size_t len) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis('a', 'z'); + std::string s(len, '0'); + for (char& c : s) { + c = dis(gen); + } + return s; +} + +TEST(FormatExtensionTest, SinkAppendSubstring) { + for (size_t chunk_size : {1, 10, 100, 1000, 10000}) { + std::string expected, actual; + absl::str_format_internal::FormatSinkImpl sink(&actual); + for (size_t chunks = 0; chunks < 10; ++chunks) { + std::string rand = MakeRandomString(chunk_size); + expected += rand; + sink.Append(rand); + } + sink.Flush(); + EXPECT_EQ(actual, expected); + } +} + +TEST(FormatExtensionTest, SinkAppendChars) { + for (size_t chunk_size : {1, 10, 100, 1000, 10000}) { + std::string expected, actual; + absl::str_format_internal::FormatSinkImpl sink(&actual); + for (size_t chunks = 0; chunks < 10; ++chunks) { + std::string rand = MakeRandomString(1); + expected.append(chunk_size, rand[0]); + sink.Append(chunk_size, rand[0]); + } + sink.Flush(); + EXPECT_EQ(actual, expected); + } +} +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/float_conversion.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/float_conversion.cc new file mode 100644 index 000000000000..a761a5a5f9ce --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/float_conversion.cc @@ -0,0 +1,1144 @@ +#include "absl/strings/internal/str_format/float_conversion.h" + +#include <string.h> + +#include <algorithm> +#include <cassert> +#include <cmath> +#include <limits> +#include <string> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/base/internal/bits.h" +#include "absl/base/optimization.h" +#include "absl/functional/function_ref.h" +#include "absl/meta/type_traits.h" +#include "absl/numeric/int128.h" +#include "absl/types/optional.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +namespace { + +// The code below wants to avoid heap allocations. +// To do so it needs to allocate memory on the stack. +// `StackArray` will allocate memory on the stack in the form of a uint32_t +// array and call the provided callback with said memory. +// It will allocate memory in increments of 512 bytes. We could allocate the +// largest needed unconditionally, but that is more than we need in most of +// cases. This way we use less stack in the common cases. +class StackArray { + using Func = absl::FunctionRef<void(absl::Span<uint32_t>)>; + static constexpr size_t kStep = 512 / sizeof(uint32_t); + // 5 steps is 2560 bytes, which is enough to hold a long double with the + // largest/smallest exponents. + // The operations below will static_assert their particular maximum. + static constexpr size_t kNumSteps = 5; + + // We do not want this function to be inlined. + // Otherwise the caller will allocate the stack space unnecessarily for all + // the variants even though it only calls one. + template <size_t steps> + ABSL_ATTRIBUTE_NOINLINE static void RunWithCapacityImpl(Func f) { + uint32_t values[steps * kStep]{}; + f(absl::MakeSpan(values)); + } + + public: + static constexpr size_t kMaxCapacity = kStep * kNumSteps; + + static void RunWithCapacity(size_t capacity, Func f) { + assert(capacity <= kMaxCapacity); + const size_t step = (capacity + kStep - 1) / kStep; + assert(step <= kNumSteps); + switch (step) { + case 1: + return RunWithCapacityImpl<1>(f); + case 2: + return RunWithCapacityImpl<2>(f); + case 3: + return RunWithCapacityImpl<3>(f); + case 4: + return RunWithCapacityImpl<4>(f); + case 5: + return RunWithCapacityImpl<5>(f); + } + + assert(false && "Invalid capacity"); + } +}; + +// Calculates `10 * (*v) + carry` and stores the result in `*v` and returns +// the carry. +template <typename Int> +inline Int MultiplyBy10WithCarry(Int *v, Int carry) { + using BiggerInt = absl::conditional_t<sizeof(Int) == 4, uint64_t, uint128>; + BiggerInt tmp = 10 * static_cast<BiggerInt>(*v) + carry; + *v = static_cast<Int>(tmp); + return static_cast<Int>(tmp >> (sizeof(Int) * 8)); +} + +// Calculates `(2^64 * carry + *v) / 10`. +// Stores the quotient in `*v` and returns the remainder. +// Requires: `0 <= carry <= 9` +inline uint64_t DivideBy10WithCarry(uint64_t *v, uint64_t carry) { + constexpr uint64_t divisor = 10; + // 2^64 / divisor = chunk_quotient + chunk_remainder / divisor + constexpr uint64_t chunk_quotient = (uint64_t{1} << 63) / (divisor / 2); + constexpr uint64_t chunk_remainder = uint64_t{} - chunk_quotient * divisor; + + const uint64_t mod = *v % divisor; + const uint64_t next_carry = chunk_remainder * carry + mod; + *v = *v / divisor + carry * chunk_quotient + next_carry / divisor; + return next_carry % divisor; +} + +// Generates the decimal representation for an integer of the form `v * 2^exp`, +// where `v` and `exp` are both positive integers. +// It generates the digits from the left (ie the most significant digit first) +// to allow for direct printing into the sink. +// +// Requires `0 <= exp` and `exp <= numeric_limits<long double>::max_exponent`. +class BinaryToDecimal { + static constexpr int ChunksNeeded(int exp) { + // We will left shift a uint128 by `exp` bits, so we need `128+exp` total + // bits. Round up to 32. + // See constructor for details about adding `10%` to the value. + return (128 + exp + 31) / 32 * 11 / 10; + } + + public: + // Run the conversion for `v * 2^exp` and call `f(binary_to_decimal)`. + // This function will allocate enough stack space to perform the conversion. + static void RunConversion(uint128 v, int exp, + absl::FunctionRef<void(BinaryToDecimal)> f) { + assert(exp > 0); + assert(exp <= std::numeric_limits<long double>::max_exponent); + static_assert( + StackArray::kMaxCapacity >= + ChunksNeeded(std::numeric_limits<long double>::max_exponent), + ""); + + StackArray::RunWithCapacity( + ChunksNeeded(exp), + [=](absl::Span<uint32_t> input) { f(BinaryToDecimal(input, v, exp)); }); + } + + int TotalDigits() const { + return static_cast<int>((decimal_end_ - decimal_start_) * kDigitsPerChunk + + CurrentDigits().size()); + } + + // See the current block of digits. + absl::string_view CurrentDigits() const { + return absl::string_view(digits_ + kDigitsPerChunk - size_, size_); + } + + // Advance the current view of digits. + // Returns `false` when no more digits are available. + bool AdvanceDigits() { + if (decimal_start_ >= decimal_end_) return false; + + uint32_t w = data_[decimal_start_++]; + for (size_ = 0; size_ < kDigitsPerChunk; w /= 10) { + digits_[kDigitsPerChunk - ++size_] = w % 10 + '0'; + } + return true; + } + + private: + BinaryToDecimal(absl::Span<uint32_t> data, uint128 v, int exp) : data_(data) { + // We need to print the digits directly into the sink object without + // buffering them all first. To do this we need two things: + // - to know the total number of digits to do padding when necessary + // - to generate the decimal digits from the left. + // + // In order to do this, we do a two pass conversion. + // On the first pass we convert the binary representation of the value into + // a decimal representation in which each uint32_t chunk holds up to 9 + // decimal digits. In the second pass we take each decimal-holding-uint32_t + // value and generate the ascii decimal digits into `digits_`. + // + // The binary and decimal representations actually share the same memory + // region. As we go converting the chunks from binary to decimal we free + // them up and reuse them for the decimal representation. One caveat is that + // the decimal representation is around 7% less efficient in space than the + // binary one. We allocate an extra 10% memory to account for this. See + // ChunksNeeded for this calculation. + int chunk_index = exp / 32; + decimal_start_ = decimal_end_ = ChunksNeeded(exp); + const int offset = exp % 32; + // Left shift v by exp bits. + data_[chunk_index] = static_cast<uint32_t>(v << offset); + for (v >>= (32 - offset); v; v >>= 32) + data_[++chunk_index] = static_cast<uint32_t>(v); + + while (chunk_index >= 0) { + // While we have more than one chunk available, go in steps of 1e9. + // `data_[chunk_index]` holds the highest non-zero binary chunk, so keep + // the variable updated. + uint32_t carry = 0; + for (int i = chunk_index; i >= 0; --i) { + uint64_t tmp = uint64_t{data_[i]} + (uint64_t{carry} << 32); + data_[i] = static_cast<uint32_t>(tmp / uint64_t{1000000000}); + carry = static_cast<uint32_t>(tmp % uint64_t{1000000000}); + } + + // If the highest chunk is now empty, remove it from view. + if (data_[chunk_index] == 0) --chunk_index; + + --decimal_start_; + assert(decimal_start_ != chunk_index); + data_[decimal_start_] = carry; + } + + // Fill the first set of digits. The first chunk might not be complete, so + // handle differently. + for (uint32_t first = data_[decimal_start_++]; first != 0; first /= 10) { + digits_[kDigitsPerChunk - ++size_] = first % 10 + '0'; + } + } + + private: + static constexpr size_t kDigitsPerChunk = 9; + + int decimal_start_; + int decimal_end_; + + char digits_[kDigitsPerChunk]; + int size_ = 0; + + absl::Span<uint32_t> data_; +}; + +// Converts a value of the form `x * 2^-exp` into a sequence of decimal digits. +// Requires `-exp < 0` and +// `-exp >= limits<long double>::min_exponent - limits<long double>::digits`. +class FractionalDigitGenerator { + public: + // Run the conversion for `v * 2^exp` and call `f(generator)`. + // This function will allocate enough stack space to perform the conversion. + static void RunConversion( + uint128 v, int exp, absl::FunctionRef<void(FractionalDigitGenerator)> f) { + assert(-exp < 0); + assert(-exp >= std::numeric_limits<long double>::min_exponent - 128); + static_assert( + StackArray::kMaxCapacity >= + (128 - std::numeric_limits<long double>::min_exponent + 31) / 32, + ""); + StackArray::RunWithCapacity((exp + 31) / 32, + [=](absl::Span<uint32_t> input) { + f(FractionalDigitGenerator(input, v, exp)); + }); + } + + // Returns true if there are any more non-zero digits left. + bool HasMoreDigits() const { return next_digit_ != 0 || chunk_index_ >= 0; } + + // Returns true if the remainder digits are greater than 5000... + bool IsGreaterThanHalf() const { + return next_digit_ > 5 || (next_digit_ == 5 && chunk_index_ >= 0); + } + // Returns true if the remainder digits are exactly 5000... + bool IsExactlyHalf() const { return next_digit_ == 5 && chunk_index_ < 0; } + + struct Digits { + int digit_before_nine; + int num_nines; + }; + + // Get the next set of digits. + // They are composed by a non-9 digit followed by a runs of zero or more 9s. + Digits GetDigits() { + Digits digits{next_digit_, 0}; + + next_digit_ = GetOneDigit(); + while (next_digit_ == 9) { + ++digits.num_nines; + next_digit_ = GetOneDigit(); + } + + return digits; + } + + private: + // Return the next digit. + int GetOneDigit() { + if (chunk_index_ < 0) return 0; + + uint32_t carry = 0; + for (int i = chunk_index_; i >= 0; --i) { + carry = MultiplyBy10WithCarry(&data_[i], carry); + } + // If the lowest chunk is now empty, remove it from view. + if (data_[chunk_index_] == 0) --chunk_index_; + return carry; + } + + FractionalDigitGenerator(absl::Span<uint32_t> data, uint128 v, int exp) + : chunk_index_(exp / 32), data_(data) { + const int offset = exp % 32; + // Right shift `v` by `exp` bits. + data_[chunk_index_] = static_cast<uint32_t>(v << (32 - offset)); + v >>= offset; + // Make sure we don't overflow the data. We already calculated that + // non-zero bits fit, so we might not have space for leading zero bits. + for (int pos = chunk_index_; v; v >>= 32) + data_[--pos] = static_cast<uint32_t>(v); + + // Fill next_digit_, as GetDigits expects it to be populated always. + next_digit_ = GetOneDigit(); + } + + int next_digit_; + int chunk_index_; + absl::Span<uint32_t> data_; +}; + +// Count the number of leading zero bits. +int LeadingZeros(uint64_t v) { return base_internal::CountLeadingZeros64(v); } +int LeadingZeros(uint128 v) { + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + return high != 0 ? base_internal::CountLeadingZeros64(high) + : 64 + base_internal::CountLeadingZeros64(low); +} + +// Round up the text digits starting at `p`. +// The buffer must have an extra digit that is known to not need rounding. +// This is done below by having an extra '0' digit on the left. +void RoundUp(char *p) { + while (*p == '9' || *p == '.') { + if (*p == '9') *p = '0'; + --p; + } + ++*p; +} + +// Check the previous digit and round up or down to follow the round-to-even +// policy. +void RoundToEven(char *p) { + if (*p == '.') --p; + if (*p % 2 == 1) RoundUp(p); +} + +// Simple integral decimal digit printing for values that fit in 64-bits. +// Returns the pointer to the last written digit. +char *PrintIntegralDigitsFromRightFast(uint64_t v, char *p) { + do { + *--p = DivideBy10WithCarry(&v, 0) + '0'; + } while (v != 0); + return p; +} + +// Simple integral decimal digit printing for values that fit in 128-bits. +// Returns the pointer to the last written digit. +char *PrintIntegralDigitsFromRightFast(uint128 v, char *p) { + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + + while (high != 0) { + uint64_t carry = DivideBy10WithCarry(&high, 0); + carry = DivideBy10WithCarry(&low, carry); + *--p = carry + '0'; + } + return PrintIntegralDigitsFromRightFast(low, p); +} + +// Simple fractional decimal digit printing for values that fir in 64-bits after +// shifting. +// Performs rounding if necessary to fit within `precision`. +// Returns the pointer to one after the last character written. +char *PrintFractionalDigitsFast(uint64_t v, char *start, int exp, + int precision) { + char *p = start; + v <<= (64 - exp); + while (precision > 0) { + if (!v) return p; + *p++ = MultiplyBy10WithCarry(&v, uint64_t{0}) + '0'; + --precision; + } + + // We need to round. + if (v < 0x8000000000000000) { + // We round down, so nothing to do. + } else if (v > 0x8000000000000000) { + // We round up. + RoundUp(p - 1); + } else { + RoundToEven(p - 1); + } + + assert(precision == 0); + // Precision can only be zero here. + return p; +} + +// Simple fractional decimal digit printing for values that fir in 128-bits +// after shifting. +// Performs rounding if necessary to fit within `precision`. +// Returns the pointer to one after the last character written. +char *PrintFractionalDigitsFast(uint128 v, char *start, int exp, + int precision) { + char *p = start; + v <<= (128 - exp); + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + + // While we have digits to print and `low` is not empty, do the long + // multiplication. + while (precision > 0 && low != 0) { + uint64_t carry = MultiplyBy10WithCarry(&low, uint64_t{0}); + carry = MultiplyBy10WithCarry(&high, carry); + + *p++ = carry + '0'; + --precision; + } + + // Now `low` is empty, so use a faster approach for the rest of the digits. + // This block is pretty much the same as the main loop for the 64-bit case + // above. + while (precision > 0) { + if (!high) return p; + *p++ = MultiplyBy10WithCarry(&high, uint64_t{0}) + '0'; + --precision; + } + + // We need to round. + if (high < 0x8000000000000000) { + // We round down, so nothing to do. + } else if (high > 0x8000000000000000 || low != 0) { + // We round up. + RoundUp(p - 1); + } else { + RoundToEven(p - 1); + } + + assert(precision == 0); + // Precision can only be zero here. + return p; +} + +struct FormatState { + char sign_char; + int precision; + const FormatConversionSpecImpl &conv; + FormatSinkImpl *sink; + + // In `alt` mode (flag #) we keep the `.` even if there are no fractional + // digits. In non-alt mode, we strip it. + bool ShouldPrintDot() const { return precision != 0 || conv.has_alt_flag(); } +}; + +struct Padding { + int left_spaces; + int zeros; + int right_spaces; +}; + +Padding ExtraWidthToPadding(size_t total_size, const FormatState &state) { + if (state.conv.width() < 0 || state.conv.width() <= total_size) + return {0, 0, 0}; + int missing_chars = state.conv.width() - total_size; + if (state.conv.has_left_flag()) { + return {0, 0, missing_chars}; + } else if (state.conv.has_zero_flag()) { + return {0, missing_chars, 0}; + } else { + return {missing_chars, 0, 0}; + } +} + +void FinalPrint(absl::string_view data, int trailing_zeros, + const FormatState &state) { + if (state.conv.width() < 0) { + // No width specified. Fast-path. + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(data); + state.sink->Append(trailing_zeros, '0'); + return; + } + + auto padding = + ExtraWidthToPadding((state.sign_char != '\0' ? 1 : 0) + data.size() + + static_cast<size_t>(trailing_zeros), + state); + + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(padding.zeros, '0'); + state.sink->Append(data); + state.sink->Append(trailing_zeros, '0'); + state.sink->Append(padding.right_spaces, ' '); +} + +// Fastpath %f formatter for when the shifted value fits in a simple integral +// type. +// Prints `v*2^exp` with the options from `state`. +template <typename Int> +void FormatFFast(Int v, int exp, const FormatState &state) { + constexpr int input_bits = sizeof(Int) * 8; + + static constexpr size_t integral_size = + /* in case we need to round up an extra digit */ 1 + + /* decimal digits for uint128 */ 40 + 1; + char buffer[integral_size + /* . */ 1 + /* max digits uint128 */ 128]; + buffer[integral_size] = '.'; + char *const integral_digits_end = buffer + integral_size; + char *integral_digits_start; + char *const fractional_digits_start = buffer + integral_size + 1; + char *fractional_digits_end = fractional_digits_start; + + if (exp >= 0) { + const int total_bits = input_bits - LeadingZeros(v) + exp; + integral_digits_start = + total_bits <= 64 + ? PrintIntegralDigitsFromRightFast(static_cast<uint64_t>(v) << exp, + integral_digits_end) + : PrintIntegralDigitsFromRightFast(static_cast<uint128>(v) << exp, + integral_digits_end); + } else { + exp = -exp; + + integral_digits_start = PrintIntegralDigitsFromRightFast( + exp < input_bits ? v >> exp : 0, integral_digits_end); + // PrintFractionalDigits may pull a carried 1 all the way up through the + // integral portion. + integral_digits_start[-1] = '0'; + + fractional_digits_end = + exp <= 64 ? PrintFractionalDigitsFast(v, fractional_digits_start, exp, + state.precision) + : PrintFractionalDigitsFast(static_cast<uint128>(v), + fractional_digits_start, exp, + state.precision); + // There was a carry, so include the first digit too. + if (integral_digits_start[-1] != '0') --integral_digits_start; + } + + size_t size = fractional_digits_end - integral_digits_start; + + // In `alt` mode (flag #) we keep the `.` even if there are no fractional + // digits. In non-alt mode, we strip it. + if (!state.ShouldPrintDot()) --size; + FinalPrint(absl::string_view(integral_digits_start, size), + static_cast<int>(state.precision - (fractional_digits_end - + fractional_digits_start)), + state); +} + +// Slow %f formatter for when the shifted value does not fit in a uint128, and +// `exp > 0`. +// Prints `v*2^exp` with the options from `state`. +// This one is guaranteed to not have fractional digits, so we don't have to +// worry about anything after the `.`. +void FormatFPositiveExpSlow(uint128 v, int exp, const FormatState &state) { + BinaryToDecimal::RunConversion(v, exp, [&](BinaryToDecimal btd) { + const size_t total_digits = + btd.TotalDigits() + + (state.ShouldPrintDot() ? static_cast<size_t>(state.precision) + 1 : 0); + + const auto padding = ExtraWidthToPadding( + total_digits + (state.sign_char != '\0' ? 1 : 0), state); + + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(padding.zeros, '0'); + + do { + state.sink->Append(btd.CurrentDigits()); + } while (btd.AdvanceDigits()); + + if (state.ShouldPrintDot()) state.sink->Append(1, '.'); + state.sink->Append(state.precision, '0'); + state.sink->Append(padding.right_spaces, ' '); + }); +} + +// Slow %f formatter for when the shifted value does not fit in a uint128, and +// `exp < 0`. +// Prints `v*2^exp` with the options from `state`. +// This one is guaranteed to be < 1.0, so we don't have to worry about integral +// digits. +void FormatFNegativeExpSlow(uint128 v, int exp, const FormatState &state) { + const size_t total_digits = + /* 0 */ 1 + + (state.ShouldPrintDot() ? static_cast<size_t>(state.precision) + 1 : 0); + auto padding = + ExtraWidthToPadding(total_digits + (state.sign_char ? 1 : 0), state); + padding.zeros += 1; + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(padding.zeros, '0'); + + if (state.ShouldPrintDot()) state.sink->Append(1, '.'); + + // Print digits + int digits_to_go = state.precision; + + FractionalDigitGenerator::RunConversion( + v, exp, [&](FractionalDigitGenerator digit_gen) { + // There are no digits to print here. + if (state.precision == 0) return; + + // We go one digit at a time, while keeping track of runs of nines. + // The runs of nines are used to perform rounding when necessary. + + while (digits_to_go > 0 && digit_gen.HasMoreDigits()) { + auto digits = digit_gen.GetDigits(); + + // Now we have a digit and a run of nines. + // See if we can print them all. + if (digits.num_nines + 1 < digits_to_go) { + // We don't have to round yet, so print them. + state.sink->Append(1, digits.digit_before_nine + '0'); + state.sink->Append(digits.num_nines, '9'); + digits_to_go -= digits.num_nines + 1; + + } else { + // We can't print all the nines, see where we have to truncate. + + bool round_up = false; + if (digits.num_nines + 1 > digits_to_go) { + // We round up at a nine. No need to print them. + round_up = true; + } else { + // We can fit all the nines, but truncate just after it. + if (digit_gen.IsGreaterThanHalf()) { + round_up = true; + } else if (digit_gen.IsExactlyHalf()) { + // Round to even + round_up = + digits.num_nines != 0 || digits.digit_before_nine % 2 == 1; + } + } + + if (round_up) { + state.sink->Append(1, digits.digit_before_nine + '1'); + --digits_to_go; + // The rest will be zeros. + } else { + state.sink->Append(1, digits.digit_before_nine + '0'); + state.sink->Append(digits_to_go - 1, '9'); + digits_to_go = 0; + } + return; + } + } + }); + + state.sink->Append(digits_to_go, '0'); + state.sink->Append(padding.right_spaces, ' '); +} + +template <typename Int> +void FormatF(Int mantissa, int exp, const FormatState &state) { + if (exp >= 0) { + const int total_bits = sizeof(Int) * 8 - LeadingZeros(mantissa) + exp; + + // Fallback to the slow stack-based approach if we can't do it in a 64 or + // 128 bit state. + if (ABSL_PREDICT_FALSE(total_bits > 128)) { + return FormatFPositiveExpSlow(mantissa, exp, state); + } + } else { + // Fallback to the slow stack-based approach if we can't do it in a 64 or + // 128 bit state. + if (ABSL_PREDICT_FALSE(exp < -128)) { + return FormatFNegativeExpSlow(mantissa, -exp, state); + } + } + return FormatFFast(mantissa, exp, state); +} + +char *CopyStringTo(absl::string_view v, char *out) { + std::memcpy(out, v.data(), v.size()); + return out + v.size(); +} + +template <typename Float> +bool FallbackToSnprintf(const Float v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + int w = conv.width() >= 0 ? conv.width() : 0; + int p = conv.precision() >= 0 ? conv.precision() : -1; + char fmt[32]; + { + char *fp = fmt; + *fp++ = '%'; + fp = CopyStringTo(FormatConversionSpecImplFriend::FlagsToString(conv), fp); + fp = CopyStringTo("*.*", fp); + if (std::is_same<long double, Float>()) { + *fp++ = 'L'; + } + *fp++ = FormatConversionCharToChar(conv.conversion_char()); + *fp = 0; + assert(fp < fmt + sizeof(fmt)); + } + std::string space(512, '\0'); + absl::string_view result; + while (true) { + int n = snprintf(&space[0], space.size(), fmt, w, p, v); + if (n < 0) return false; + if (static_cast<size_t>(n) < space.size()) { + result = absl::string_view(space.data(), n); + break; + } + space.resize(n + 1); + } + sink->Append(result); + return true; +} + +// 128-bits in decimal: ceil(128*log(2)/log(10)) +// or std::numeric_limits<__uint128_t>::digits10 +constexpr int kMaxFixedPrecision = 39; + +constexpr int kBufferLength = /*sign*/ 1 + + /*integer*/ kMaxFixedPrecision + + /*point*/ 1 + + /*fraction*/ kMaxFixedPrecision + + /*exponent e+123*/ 5; + +struct Buffer { + void push_front(char c) { + assert(begin > data); + *--begin = c; + } + void push_back(char c) { + assert(end < data + sizeof(data)); + *end++ = c; + } + void pop_back() { + assert(begin < end); + --end; + } + + char &back() { + assert(begin < end); + return end[-1]; + } + + char last_digit() const { return end[-1] == '.' ? end[-2] : end[-1]; } + + int size() const { return static_cast<int>(end - begin); } + + char data[kBufferLength]; + char *begin; + char *end; +}; + +enum class FormatStyle { Fixed, Precision }; + +// If the value is Inf or Nan, print it and return true. +// Otherwise, return false. +template <typename Float> +bool ConvertNonNumericFloats(char sign_char, Float v, + const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + char text[4], *ptr = text; + if (sign_char != '\0') *ptr++ = sign_char; + if (std::isnan(v)) { + ptr = std::copy_n( + FormatConversionCharIsUpper(conv.conversion_char()) ? "NAN" : "nan", 3, + ptr); + } else if (std::isinf(v)) { + ptr = std::copy_n( + FormatConversionCharIsUpper(conv.conversion_char()) ? "INF" : "inf", 3, + ptr); + } else { + return false; + } + + return sink->PutPaddedString(string_view(text, ptr - text), conv.width(), -1, + conv.has_left_flag()); +} + +// Round up the last digit of the value. +// It will carry over and potentially overflow. 'exp' will be adjusted in that +// case. +template <FormatStyle mode> +void RoundUp(Buffer *buffer, int *exp) { + char *p = &buffer->back(); + while (p >= buffer->begin && (*p == '9' || *p == '.')) { + if (*p == '9') *p = '0'; + --p; + } + + if (p < buffer->begin) { + *p = '1'; + buffer->begin = p; + if (mode == FormatStyle::Precision) { + std::swap(p[1], p[2]); // move the . + ++*exp; + buffer->pop_back(); + } + } else { + ++*p; + } +} + +void PrintExponent(int exp, char e, Buffer *out) { + out->push_back(e); + if (exp < 0) { + out->push_back('-'); + exp = -exp; + } else { + out->push_back('+'); + } + // Exponent digits. + if (exp > 99) { + out->push_back(exp / 100 + '0'); + out->push_back(exp / 10 % 10 + '0'); + out->push_back(exp % 10 + '0'); + } else { + out->push_back(exp / 10 + '0'); + out->push_back(exp % 10 + '0'); + } +} + +template <typename Float, typename Int> +constexpr bool CanFitMantissa() { + return +#if defined(__clang__) && !defined(__SSE3__) + // Workaround for clang bug: https://bugs.llvm.org/show_bug.cgi?id=38289 + // Casting from long double to uint64_t is miscompiled and drops bits. + (!std::is_same<Float, long double>::value || + !std::is_same<Int, uint64_t>::value) && +#endif + std::numeric_limits<Float>::digits <= std::numeric_limits<Int>::digits; +} + +template <typename Float> +struct Decomposed { + using MantissaType = + absl::conditional_t<std::is_same<long double, Float>::value, uint128, + uint64_t>; + static_assert(std::numeric_limits<Float>::digits <= sizeof(MantissaType) * 8, + ""); + MantissaType mantissa; + int exponent; +}; + +// Decompose the double into an integer mantissa and an exponent. +template <typename Float> +Decomposed<Float> Decompose(Float v) { + int exp; + Float m = std::frexp(v, &exp); + m = std::ldexp(m, std::numeric_limits<Float>::digits); + exp -= std::numeric_limits<Float>::digits; + + return {static_cast<typename Decomposed<Float>::MantissaType>(m), exp}; +} + +// Print 'digits' as decimal. +// In Fixed mode, we add a '.' at the end. +// In Precision mode, we add a '.' after the first digit. +template <FormatStyle mode, typename Int> +int PrintIntegralDigits(Int digits, Buffer *out) { + int printed = 0; + if (digits) { + for (; digits; digits /= 10) out->push_front(digits % 10 + '0'); + printed = out->size(); + if (mode == FormatStyle::Precision) { + out->push_front(*out->begin); + out->begin[1] = '.'; + } else { + out->push_back('.'); + } + } else if (mode == FormatStyle::Fixed) { + out->push_front('0'); + out->push_back('.'); + printed = 1; + } + return printed; +} + +// Back out 'extra_digits' digits and round up if necessary. +bool RemoveExtraPrecision(int extra_digits, bool has_leftover_value, + Buffer *out, int *exp_out) { + if (extra_digits <= 0) return false; + + // Back out the extra digits + out->end -= extra_digits; + + bool needs_to_round_up = [&] { + // We look at the digit just past the end. + // There must be 'extra_digits' extra valid digits after end. + if (*out->end > '5') return true; + if (*out->end < '5') return false; + if (has_leftover_value || std::any_of(out->end + 1, out->end + extra_digits, + [](char c) { return c != '0'; })) + return true; + + // Ends in ...50*, round to even. + return out->last_digit() % 2 == 1; + }(); + + if (needs_to_round_up) { + RoundUp<FormatStyle::Precision>(out, exp_out); + } + return true; +} + +// Print the value into the buffer. +// This will not include the exponent, which will be returned in 'exp_out' for +// Precision mode. +template <typename Int, typename Float, FormatStyle mode> +bool FloatToBufferImpl(Int int_mantissa, int exp, int precision, Buffer *out, + int *exp_out) { + assert((CanFitMantissa<Float, Int>())); + + const int int_bits = std::numeric_limits<Int>::digits; + + // In precision mode, we start printing one char to the right because it will + // also include the '.' + // In fixed mode we put the dot afterwards on the right. + out->begin = out->end = + out->data + 1 + kMaxFixedPrecision + (mode == FormatStyle::Precision); + + if (exp >= 0) { + if (std::numeric_limits<Float>::digits + exp > int_bits) { + // The value will overflow the Int + return false; + } + int digits_printed = PrintIntegralDigits<mode>(int_mantissa << exp, out); + int digits_to_zero_pad = precision; + if (mode == FormatStyle::Precision) { + *exp_out = digits_printed - 1; + digits_to_zero_pad -= digits_printed - 1; + if (RemoveExtraPrecision(-digits_to_zero_pad, false, out, exp_out)) { + return true; + } + } + for (; digits_to_zero_pad-- > 0;) out->push_back('0'); + return true; + } + + exp = -exp; + // We need at least 4 empty bits for the next decimal digit. + // We will multiply by 10. + if (exp > int_bits - 4) return false; + + const Int mask = (Int{1} << exp) - 1; + + // Print the integral part first. + int digits_printed = PrintIntegralDigits<mode>(int_mantissa >> exp, out); + int_mantissa &= mask; + + int fractional_count = precision; + if (mode == FormatStyle::Precision) { + if (digits_printed == 0) { + // Find the first non-zero digit, when in Precision mode. + *exp_out = 0; + if (int_mantissa) { + while (int_mantissa <= mask) { + int_mantissa *= 10; + --*exp_out; + } + } + out->push_front(static_cast<char>(int_mantissa >> exp) + '0'); + out->push_back('.'); + int_mantissa &= mask; + } else { + // We already have a digit, and a '.' + *exp_out = digits_printed - 1; + fractional_count -= *exp_out; + if (RemoveExtraPrecision(-fractional_count, int_mantissa != 0, out, + exp_out)) { + // If we had enough digits, return right away. + // The code below will try to round again otherwise. + return true; + } + } + } + + auto get_next_digit = [&] { + int_mantissa *= 10; + int digit = static_cast<int>(int_mantissa >> exp); + int_mantissa &= mask; + return digit; + }; + + // Print fractional_count more digits, if available. + for (; fractional_count > 0; --fractional_count) { + out->push_back(get_next_digit() + '0'); + } + + int next_digit = get_next_digit(); + if (next_digit > 5 || + (next_digit == 5 && (int_mantissa || out->last_digit() % 2 == 1))) { + RoundUp<mode>(out, exp_out); + } + + return true; +} + +template <FormatStyle mode, typename Float> +bool FloatToBuffer(Decomposed<Float> decomposed, int precision, Buffer *out, + int *exp) { + if (precision > kMaxFixedPrecision) return false; + + // Try with uint64_t. + if (CanFitMantissa<Float, std::uint64_t>() && + FloatToBufferImpl<std::uint64_t, Float, mode>( + static_cast<std::uint64_t>(decomposed.mantissa), + static_cast<std::uint64_t>(decomposed.exponent), precision, out, exp)) + return true; + +#if defined(ABSL_HAVE_INTRINSIC_INT128) + // If that is not enough, try with __uint128_t. + return CanFitMantissa<Float, __uint128_t>() && + FloatToBufferImpl<__uint128_t, Float, mode>( + static_cast<__uint128_t>(decomposed.mantissa), + static_cast<__uint128_t>(decomposed.exponent), precision, out, + exp); +#endif + return false; +} + +void WriteBufferToSink(char sign_char, absl::string_view str, + const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + int left_spaces = 0, zeros = 0, right_spaces = 0; + int missing_chars = + conv.width() >= 0 ? std::max(conv.width() - static_cast<int>(str.size()) - + static_cast<int>(sign_char != 0), + 0) + : 0; + if (conv.has_left_flag()) { + right_spaces = missing_chars; + } else if (conv.has_zero_flag()) { + zeros = missing_chars; + } else { + left_spaces = missing_chars; + } + + sink->Append(left_spaces, ' '); + if (sign_char != '\0') sink->Append(1, sign_char); + sink->Append(zeros, '0'); + sink->Append(str); + sink->Append(right_spaces, ' '); +} + +template <typename Float> +bool FloatToSink(const Float v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + // Print the sign or the sign column. + Float abs_v = v; + char sign_char = 0; + if (std::signbit(abs_v)) { + sign_char = '-'; + abs_v = -abs_v; + } else if (conv.has_show_pos_flag()) { + sign_char = '+'; + } else if (conv.has_sign_col_flag()) { + sign_char = ' '; + } + + // Print nan/inf. + if (ConvertNonNumericFloats(sign_char, abs_v, conv, sink)) { + return true; + } + + int precision = conv.precision() < 0 ? 6 : conv.precision(); + + int exp = 0; + + auto decomposed = Decompose(abs_v); + + Buffer buffer; + + FormatConversionChar c = conv.conversion_char(); + + if (c == FormatConversionCharInternal::f || + c == FormatConversionCharInternal::F) { + FormatF(decomposed.mantissa, decomposed.exponent, + {sign_char, precision, conv, sink}); + return true; + } else if (c == FormatConversionCharInternal::e || + c == FormatConversionCharInternal::E) { + if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, + &exp)) { + return FallbackToSnprintf(v, conv, sink); + } + if (!conv.has_alt_flag() && buffer.back() == '.') buffer.pop_back(); + PrintExponent( + exp, FormatConversionCharIsUpper(conv.conversion_char()) ? 'E' : 'e', + &buffer); + } else if (c == FormatConversionCharInternal::g || + c == FormatConversionCharInternal::G) { + precision = std::max(0, precision - 1); + if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, + &exp)) { + return FallbackToSnprintf(v, conv, sink); + } + if (precision + 1 > exp && exp >= -4) { + if (exp < 0) { + // Have 1.23456, needs 0.00123456 + // Move the first digit + buffer.begin[1] = *buffer.begin; + // Add some zeros + for (; exp < -1; ++exp) *buffer.begin-- = '0'; + *buffer.begin-- = '.'; + *buffer.begin = '0'; + } else if (exp > 0) { + // Have 1.23456, needs 1234.56 + // Move the '.' exp positions to the right. + std::rotate(buffer.begin + 1, buffer.begin + 2, buffer.begin + exp + 2); + } + exp = 0; + } + if (!conv.has_alt_flag()) { + while (buffer.back() == '0') buffer.pop_back(); + if (buffer.back() == '.') buffer.pop_back(); + } + if (exp) { + PrintExponent( + exp, FormatConversionCharIsUpper(conv.conversion_char()) ? 'E' : 'e', + &buffer); + } + } else if (c == FormatConversionCharInternal::a || + c == FormatConversionCharInternal::A) { + return FallbackToSnprintf(v, conv, sink); + } else { + return false; + } + + WriteBufferToSink(sign_char, + absl::string_view(buffer.begin, buffer.end - buffer.begin), + conv, sink); + + return true; +} + +} // namespace + +bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + if (std::numeric_limits<long double>::digits == + 2 * std::numeric_limits<double>::digits) { + // This is the `double-double` representation of `long double`. + // We do not handle it natively. Fallback to snprintf. + return FallbackToSnprintf(v, conv, sink); + } + + return FloatToSink(v, conv, sink); +} + +bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + return FloatToSink(v, conv, sink); +} + +bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + return FloatToSink(v, conv, sink); +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/float_conversion.h b/third_party/abseil_cpp/absl/strings/internal/str_format/float_conversion.h new file mode 100644 index 000000000000..e78bc19106ff --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/float_conversion.h @@ -0,0 +1,23 @@ +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ + +#include "absl/strings/internal/str_format/extension.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink); + +bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink); + +bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink); + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/output.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/output.cc new file mode 100644 index 000000000000..c4b24706132c --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/output.cc @@ -0,0 +1,72 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/output.h" + +#include <errno.h> +#include <cstring> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +namespace { +struct ClearErrnoGuard { + ClearErrnoGuard() : old_value(errno) { errno = 0; } + ~ClearErrnoGuard() { + if (!errno) errno = old_value; + } + int old_value; +}; +} // namespace + +void BufferRawSink::Write(string_view v) { + size_t to_write = std::min(v.size(), size_); + std::memcpy(buffer_, v.data(), to_write); + buffer_ += to_write; + size_ -= to_write; + total_written_ += v.size(); +} + +void FILERawSink::Write(string_view v) { + while (!v.empty() && !error_) { + // Reset errno to zero in case the libc implementation doesn't set errno + // when a failure occurs. + ClearErrnoGuard guard; + + if (size_t result = std::fwrite(v.data(), 1, v.size(), output_)) { + // Some progress was made. + count_ += result; + v.remove_prefix(result); + } else { + if (errno == EINTR) { + continue; + } else if (errno) { + error_ = errno; + } else if (std::ferror(output_)) { + // Non-POSIX compliant libc implementations may not set errno, so we + // have check the streams error indicator. + error_ = EBADF; + } else { + // We're likely on a non-POSIX system that encountered EINTR but had no + // way of reporting it. + continue; + } + } + } +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/output.h b/third_party/abseil_cpp/absl/strings/internal/str_format/output.h new file mode 100644 index 000000000000..8030dae00f4f --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/output.h @@ -0,0 +1,96 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Output extension hooks for the Format library. +// `internal::InvokeFlush` calls the appropriate flush function for the +// specified output argument. +// `BufferRawSink` is a simple output sink for a char buffer. Used by SnprintF. +// `FILERawSink` is a std::FILE* based sink. Used by PrintF and FprintF. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_ + +#include <cstdio> +#include <ostream> +#include <string> + +#include "absl/base/port.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +// RawSink implementation that writes into a char* buffer. +// It will not overflow the buffer, but will keep the total count of chars +// that would have been written. +class BufferRawSink { + public: + BufferRawSink(char* buffer, size_t size) : buffer_(buffer), size_(size) {} + + size_t total_written() const { return total_written_; } + void Write(string_view v); + + private: + char* buffer_; + size_t size_; + size_t total_written_ = 0; +}; + +// RawSink implementation that writes into a FILE*. +// It keeps track of the total number of bytes written and any error encountered +// during the writes. +class FILERawSink { + public: + explicit FILERawSink(std::FILE* output) : output_(output) {} + + void Write(string_view v); + + size_t count() const { return count_; } + int error() const { return error_; } + + private: + std::FILE* output_; + int error_ = 0; + size_t count_ = 0; +}; + +// Provide RawSink integration with common types from the STL. +inline void AbslFormatFlush(std::string* out, string_view s) { + out->append(s.data(), s.size()); +} +inline void AbslFormatFlush(std::ostream* out, string_view s) { + out->write(s.data(), s.size()); +} + +inline void AbslFormatFlush(FILERawSink* sink, string_view v) { + sink->Write(v); +} + +inline void AbslFormatFlush(BufferRawSink* sink, string_view v) { + sink->Write(v); +} + +// This is a SFINAE to get a better compiler error message when the type +// is not supported. +template <typename T> +auto InvokeFlush(T* out, string_view s) -> decltype(AbslFormatFlush(out, s)) { + AbslFormatFlush(out, s); +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/output_test.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/output_test.cc new file mode 100644 index 000000000000..ce2e91a0bbe8 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/output_test.cc @@ -0,0 +1,79 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/output.h" + +#include <sstream> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/cord.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace { + +TEST(InvokeFlush, String) { + std::string str = "ABC"; + str_format_internal::InvokeFlush(&str, "DEF"); + EXPECT_EQ(str, "ABCDEF"); +} + +TEST(InvokeFlush, Stream) { + std::stringstream str; + str << "ABC"; + str_format_internal::InvokeFlush(&str, "DEF"); + EXPECT_EQ(str.str(), "ABCDEF"); +} + +TEST(InvokeFlush, Cord) { + absl::Cord str("ABC"); + str_format_internal::InvokeFlush(&str, "DEF"); + EXPECT_EQ(str, "ABCDEF"); +} + +TEST(BufferRawSink, Limits) { + char buf[16]; + { + std::fill(std::begin(buf), std::end(buf), 'x'); + str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1); + str_format_internal::InvokeFlush(&bufsink, "Hello World237"); + EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World237xx"); + } + { + std::fill(std::begin(buf), std::end(buf), 'x'); + str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1); + str_format_internal::InvokeFlush(&bufsink, "Hello World237237"); + EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World2372x"); + } + { + std::fill(std::begin(buf), std::end(buf), 'x'); + str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1); + str_format_internal::InvokeFlush(&bufsink, "Hello World"); + str_format_internal::InvokeFlush(&bufsink, "237"); + EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World237xx"); + } + { + std::fill(std::begin(buf), std::end(buf), 'x'); + str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1); + str_format_internal::InvokeFlush(&bufsink, "Hello World"); + str_format_internal::InvokeFlush(&bufsink, "237237"); + EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World2372x"); + } +} + +} // namespace +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/parser.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/parser.cc new file mode 100644 index 000000000000..cc55dfa9c74e --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/parser.cc @@ -0,0 +1,336 @@ +#include "absl/strings/internal/str_format/parser.h" + +#include <assert.h> +#include <string.h> +#include <wchar.h> +#include <cctype> +#include <cstdint> + +#include <algorithm> +#include <initializer_list> +#include <limits> +#include <ostream> +#include <string> +#include <unordered_set> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +using CC = FormatConversionCharInternal; +using LM = LengthMod; + +ABSL_CONST_INIT const ConvTag kTags[256] = { + {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 + {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f + {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 + {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f + {}, {}, {}, {}, {}, {}, {}, {}, // 20-27 + {}, {}, {}, {}, {}, {}, {}, {}, // 28-2f + {}, {}, {}, {}, {}, {}, {}, {}, // 30-37 + {}, {}, {}, {}, {}, {}, {}, {}, // 38-3f + {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG + {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO + {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW + CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ + {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg + LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno + CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw + CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! + {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 + {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f + {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 + {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f + {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 + {}, {}, {}, {}, {}, {}, {}, {}, // a8-af + {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 + {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf + {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 + {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf + {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 + {}, {}, {}, {}, {}, {}, {}, {}, // d8-df + {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 + {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef + {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 + {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff +}; + +namespace { + +bool CheckFastPathSetting(const UnboundConversion& conv) { + bool should_be_basic = !conv.flags.left && // + !conv.flags.show_pos && // + !conv.flags.sign_col && // + !conv.flags.alt && // + !conv.flags.zero && // + (conv.width.value() == -1) && + (conv.precision.value() == -1); + if (should_be_basic != conv.flags.basic) { + fprintf(stderr, + "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d " + "width=%d precision=%d\n", + conv.flags.basic, conv.flags.left, conv.flags.show_pos, + conv.flags.sign_col, conv.flags.alt, conv.flags.zero, + conv.width.value(), conv.precision.value()); + } + return should_be_basic == conv.flags.basic; +} + +template <bool is_positional> +const char *ConsumeConversion(const char *pos, const char *const end, + UnboundConversion *conv, int *next_arg) { + const char* const original_pos = pos; + char c; + // Read the next char into `c` and update `pos`. Returns false if there are + // no more chars to read. +#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \ + do { \ + if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \ + c = *pos++; \ + } while (0) + + const auto parse_digits = [&] { + int digits = c - '0'; + // We do not want to overflow `digits` so we consume at most digits10 + // digits. If there are more digits the parsing will fail later on when the + // digit doesn't match the expected characters. + int num_digits = std::numeric_limits<int>::digits10; + for (;;) { + if (ABSL_PREDICT_FALSE(pos == end)) break; + c = *pos++; + if (!std::isdigit(c)) break; + --num_digits; + if (ABSL_PREDICT_FALSE(!num_digits)) break; + digits = 10 * digits + c - '0'; + } + return digits; + }; + + if (is_positional) { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->arg_position = parse_digits(); + assert(conv->arg_position > 0); + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; + } + + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + + // We should start with the basic flag on. + assert(conv->flags.basic); + + // Any non alpha character makes this conversion not basic. + // This includes flags (-+ #0), width (1-9, *) or precision (.). + // All conversion characters and length modifiers are alpha characters. + if (c < 'A') { + conv->flags.basic = false; + + for (; c <= '0';) { + // FIXME: We might be able to speed this up reusing the lookup table from + // above. It might require changing Flags to be a plain integer where we + // can |= a value. + switch (c) { + case '-': + conv->flags.left = true; + break; + case '+': + conv->flags.show_pos = true; + break; + case ' ': + conv->flags.sign_col = true; + break; + case '#': + conv->flags.alt = true; + break; + case '0': + conv->flags.zero = true; + break; + default: + goto flags_done; + } + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } +flags_done: + + if (c <= '9') { + if (c >= '0') { + int maybe_width = parse_digits(); + if (!is_positional && c == '$') { + if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; + // Positional conversion. + *next_arg = -1; + conv->flags = Flags(); + conv->flags.basic = true; + return ConsumeConversion<true>(original_pos, end, conv, next_arg); + } + conv->width.set_value(maybe_width); + } else if (c == '*') { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (is_positional) { + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->width.set_from_arg(parse_digits()); + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->width.set_from_arg(++*next_arg); + } + } + } + + if (c == '.') { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (std::isdigit(c)) { + conv->precision.set_value(parse_digits()); + } else if (c == '*') { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (is_positional) { + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->precision.set_from_arg(parse_digits()); + if (c != '$') return nullptr; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->precision.set_from_arg(++*next_arg); + } + } else { + conv->precision.set_value(0); + } + } + } + + auto tag = GetTagForChar(c); + + if (ABSL_PREDICT_FALSE(!tag.is_conv())) { + if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr; + + // It is a length modifier. + using str_format_internal::LengthMod; + LengthMod length_mod = tag.as_length(); + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (c == 'h' && length_mod == LengthMod::h) { + conv->length_mod = LengthMod::hh; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else if (c == 'l' && length_mod == LengthMod::l) { + conv->length_mod = LengthMod::ll; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->length_mod = length_mod; + } + tag = GetTagForChar(c); + if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr; + } + + assert(CheckFastPathSetting(*conv)); + (void)(&CheckFastPathSetting); + + conv->conv = tag.as_conv(); + if (!is_positional) conv->arg_position = ++*next_arg; + return pos; +} + +} // namespace + +std::string LengthModToString(LengthMod v) { + switch (v) { + case LengthMod::h: + return "h"; + case LengthMod::hh: + return "hh"; + case LengthMod::l: + return "l"; + case LengthMod::ll: + return "ll"; + case LengthMod::L: + return "L"; + case LengthMod::j: + return "j"; + case LengthMod::z: + return "z"; + case LengthMod::t: + return "t"; + case LengthMod::q: + return "q"; + case LengthMod::none: + return ""; + } + return ""; +} + +const char *ConsumeUnboundConversion(const char *p, const char *end, + UnboundConversion *conv, int *next_arg) { + if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg); + return ConsumeConversion<false>(p, end, conv, next_arg); +} + +struct ParsedFormatBase::ParsedFormatConsumer { + explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat) + : parsed(parsedformat), data_pos(parsedformat->data_.get()) {} + + bool Append(string_view s) { + if (s.empty()) return true; + + size_t text_end = AppendText(s); + + if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) { + // Let's extend the existing text run. + parsed->items_.back().text_end = text_end; + } else { + // Let's make a new text run. + parsed->items_.push_back({false, text_end, {}}); + } + return true; + } + + bool ConvertOne(const UnboundConversion &conv, string_view s) { + size_t text_end = AppendText(s); + parsed->items_.push_back({true, text_end, conv}); + return true; + } + + size_t AppendText(string_view s) { + memcpy(data_pos, s.data(), s.size()); + data_pos += s.size(); + return static_cast<size_t>(data_pos - parsed->data_.get()); + } + + ParsedFormatBase *parsed; + char* data_pos; +}; + +ParsedFormatBase::ParsedFormatBase( + string_view format, bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) + : data_(format.empty() ? nullptr : new char[format.size()]) { + has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) || + !MatchesConversions(allow_ignored, convs); +} + +bool ParsedFormatBase::MatchesConversions( + bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) const { + std::unordered_set<int> used; + auto add_if_valid_conv = [&](int pos, char c) { + if (static_cast<size_t>(pos) > convs.size() || + !Contains(convs.begin()[pos - 1], c)) + return false; + used.insert(pos); + return true; + }; + for (const ConversionItem &item : items_) { + if (!item.is_conversion) continue; + auto &conv = item.conv; + if (conv.precision.is_from_arg() && + !add_if_valid_conv(conv.precision.get_from_arg(), '*')) + return false; + if (conv.width.is_from_arg() && + !add_if_valid_conv(conv.width.get_from_arg(), '*')) + return false; + if (!add_if_valid_conv(conv.arg_position, + FormatConversionCharToChar(conv.conv))) + return false; + } + return used.size() == convs.size() || allow_ignored; +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/parser.h b/third_party/abseil_cpp/absl/strings/internal/str_format/parser.h new file mode 100644 index 000000000000..fffed04fa072 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/parser.h @@ -0,0 +1,335 @@ +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ + +#include <limits.h> +#include <stddef.h> +#include <stdlib.h> + +#include <cassert> +#include <cstdint> +#include <initializer_list> +#include <iosfwd> +#include <iterator> +#include <memory> +#include <string> +#include <vector> + +#include "absl/strings/internal/str_format/checker.h" +#include "absl/strings/internal/str_format/extension.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none }; + +std::string LengthModToString(LengthMod v); + +// The analyzed properties of a single specified conversion. +struct UnboundConversion { + UnboundConversion() + : flags() /* This is required to zero all the fields of flags. */ { + flags.basic = true; + } + + class InputValue { + public: + void set_value(int value) { + assert(value >= 0); + value_ = value; + } + int value() const { return value_; } + + // Marks the value as "from arg". aka the '*' format. + // Requires `value >= 1`. + // When set, is_from_arg() return true and get_from_arg() returns the + // original value. + // `value()`'s return value is unspecfied in this state. + void set_from_arg(int value) { + assert(value > 0); + value_ = -value - 1; + } + bool is_from_arg() const { return value_ < -1; } + int get_from_arg() const { + assert(is_from_arg()); + return -value_ - 1; + } + + private: + int value_ = -1; + }; + + // No need to initialize. It will always be set in the parser. + int arg_position; + + InputValue width; + InputValue precision; + + Flags flags; + LengthMod length_mod = LengthMod::none; + FormatConversionChar conv = FormatConversionCharInternal::kNone; +}; + +// Consume conversion spec prefix (not including '%') of [p, end) if valid. +// Examples of valid specs would be e.g.: "s", "d", "-12.6f". +// If valid, it returns the first character following the conversion spec, +// and the spec part is broken down and returned in 'conv'. +// If invalid, returns nullptr. +const char* ConsumeUnboundConversion(const char* p, const char* end, + UnboundConversion* conv, int* next_arg); + +// Helper tag class for the table below. +// It allows fast `char -> ConversionChar/LengthMod` checking and +// conversions. +class ConvTag { + public: + constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT + : tag_(static_cast<int8_t>(conversion_char)) {} + // We invert the length modifiers to make them negative so that we can easily + // test for them. + constexpr ConvTag(LengthMod length_mod) // NOLINT + : tag_(~static_cast<std::int8_t>(length_mod)) {} + // Everything else is -128, which is negative to make is_conv() simpler. + constexpr ConvTag() : tag_(-128) {} + + bool is_conv() const { return tag_ >= 0; } + bool is_length() const { return tag_ < 0 && tag_ != -128; } + FormatConversionChar as_conv() const { + assert(is_conv()); + return static_cast<FormatConversionChar>(tag_); + } + LengthMod as_length() const { + assert(is_length()); + return static_cast<LengthMod>(~tag_); + } + + private: + std::int8_t tag_; +}; + +extern const ConvTag kTags[256]; +// Keep a single table for all the conversion chars and length modifiers. +inline ConvTag GetTagForChar(char c) { + return kTags[static_cast<unsigned char>(c)]; +} + +// Parse the format string provided in 'src' and pass the identified items into +// 'consumer'. +// Text runs will be passed by calling +// Consumer::Append(string_view); +// ConversionItems will be passed by calling +// Consumer::ConvertOne(UnboundConversion, string_view); +// In the case of ConvertOne, the string_view that is passed is the +// portion of the format string corresponding to the conversion, not including +// the leading %. On success, it returns true. On failure, it stops and returns +// false. +template <typename Consumer> +bool ParseFormatString(string_view src, Consumer consumer) { + int next_arg = 0; + const char* p = src.data(); + const char* const end = p + src.size(); + while (p != end) { + const char* percent = static_cast<const char*>(memchr(p, '%', end - p)); + if (!percent) { + // We found the last substring. + return consumer.Append(string_view(p, end - p)); + } + // We found a percent, so push the text run then process the percent. + if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) { + return false; + } + if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false; + + auto tag = GetTagForChar(percent[1]); + if (tag.is_conv()) { + if (ABSL_PREDICT_FALSE(next_arg < 0)) { + // This indicates an error in the format string. + // The only way to get `next_arg < 0` here is to have a positional + // argument first which sets next_arg to -1 and then a non-positional + // argument. + return false; + } + p = percent + 2; + + // Keep this case separate from the one below. + // ConvertOne is more efficient when the compiler can see that the `basic` + // flag is set. + UnboundConversion conv; + conv.conv = tag.as_conv(); + conv.arg_position = ++next_arg; + if (ABSL_PREDICT_FALSE( + !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) { + return false; + } + } else if (percent[1] != '%') { + UnboundConversion conv; + p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg); + if (ABSL_PREDICT_FALSE(p == nullptr)) return false; + if (ABSL_PREDICT_FALSE(!consumer.ConvertOne( + conv, string_view(percent + 1, p - (percent + 1))))) { + return false; + } + } else { + if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false; + p = percent + 2; + continue; + } + } + return true; +} + +// Always returns true, or fails to compile in a constexpr context if s does not +// point to a constexpr char array. +constexpr bool EnsureConstexpr(string_view s) { + return s.empty() || s[0] == s[0]; +} + +class ParsedFormatBase { + public: + explicit ParsedFormatBase( + string_view format, bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs); + + ParsedFormatBase(const ParsedFormatBase& other) { *this = other; } + + ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); } + + ParsedFormatBase& operator=(const ParsedFormatBase& other) { + if (this == &other) return *this; + has_error_ = other.has_error_; + items_ = other.items_; + size_t text_size = items_.empty() ? 0 : items_.back().text_end; + data_.reset(new char[text_size]); + memcpy(data_.get(), other.data_.get(), text_size); + return *this; + } + + ParsedFormatBase& operator=(ParsedFormatBase&& other) { + if (this == &other) return *this; + has_error_ = other.has_error_; + data_ = std::move(other.data_); + items_ = std::move(other.items_); + // Reset the vector to make sure the invariants hold. + other.items_.clear(); + return *this; + } + + template <typename Consumer> + bool ProcessFormat(Consumer consumer) const { + const char* const base = data_.get(); + string_view text(base, 0); + for (const auto& item : items_) { + const char* const end = text.data() + text.size(); + text = string_view(end, (base + item.text_end) - end); + if (item.is_conversion) { + if (!consumer.ConvertOne(item.conv, text)) return false; + } else { + if (!consumer.Append(text)) return false; + } + } + return !has_error_; + } + + bool has_error() const { return has_error_; } + + private: + // Returns whether the conversions match and if !allow_ignored it verifies + // that all conversions are used by the format. + bool MatchesConversions( + bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) const; + + struct ParsedFormatConsumer; + + struct ConversionItem { + bool is_conversion; + // Points to the past-the-end location of this element in the data_ array. + size_t text_end; + UnboundConversion conv; + }; + + bool has_error_; + std::unique_ptr<char[]> data_; + std::vector<ConversionItem> items_; +}; + + +// A value type representing a preparsed format. These can be created, copied +// around, and reused to speed up formatting loops. +// The user must specify through the template arguments the conversion +// characters used in the format. This will be checked at compile time. +// +// This class uses Conv enum values to specify each argument. +// This allows for more flexibility as you can specify multiple possible +// conversion characters for each argument. +// ParsedFormat<char...> is a simplified alias for when the user only +// needs to specify a single conversion character for each argument. +// +// Example: +// // Extended format supports multiple characters per argument: +// using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>; +// MyFormat GetFormat(bool use_hex) { +// if (use_hex) return MyFormat("foo %x bar"); +// return MyFormat("foo %d bar"); +// } +// // 'format' can be used with any value that supports 'd' and 'x', +// // like `int`. +// auto format = GetFormat(use_hex); +// value = StringF(format, i); +// +// This class also supports runtime format checking with the ::New() and +// ::NewAllowIgnored() factory functions. +// This is the only API that allows the user to pass a runtime specified format +// string. These factory functions will return NULL if the format does not match +// the conversions requested by the user. +template <FormatConversionCharSet... C> +class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase { + public: + explicit ExtendedParsedFormat(string_view format) +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + __attribute__(( + enable_if(str_format_internal::EnsureConstexpr(format), + "Format string is not constexpr."), + enable_if(str_format_internal::ValidFormatImpl<C...>(format), + "Format specified does not match the template arguments."))) +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + : ExtendedParsedFormat(format, false) { + } + + // ExtendedParsedFormat factory function. + // The user still has to specify the conversion characters, but they will not + // be checked at compile time. Instead, it will be checked at runtime. + // This delays the checking to runtime, but allows the user to pass + // dynamically sourced formats. + // It returns NULL if the format does not match the conversion characters. + // The user is responsible for checking the return value before using it. + // + // The 'New' variant will check that all the specified arguments are being + // consumed by the format and return NULL if any argument is being ignored. + // The 'NewAllowIgnored' variant will not verify this and will allow formats + // that ignore arguments. + static std::unique_ptr<ExtendedParsedFormat> New(string_view format) { + return New(format, false); + } + static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored( + string_view format) { + return New(format, true); + } + + private: + static std::unique_ptr<ExtendedParsedFormat> New(string_view format, + bool allow_ignored) { + std::unique_ptr<ExtendedParsedFormat> conv( + new ExtendedParsedFormat(format, allow_ignored)); + if (conv->has_error()) return nullptr; + return conv; + } + + ExtendedParsedFormat(string_view s, bool allow_ignored) + : ParsedFormatBase(s, allow_ignored, {C...}) {} +}; +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/parser_test.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/parser_test.cc new file mode 100644 index 000000000000..5aced987200d --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/parser_test.cc @@ -0,0 +1,413 @@ +#include "absl/strings/internal/str_format/parser.h" + +#include <string.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/macros.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +namespace { + +using testing::Pair; + +TEST(LengthModTest, Names) { + struct Expectation { + int line; + LengthMod mod; + const char *name; + }; + const Expectation kExpect[] = { + {__LINE__, LengthMod::none, "" }, + {__LINE__, LengthMod::h, "h" }, + {__LINE__, LengthMod::hh, "hh"}, + {__LINE__, LengthMod::l, "l" }, + {__LINE__, LengthMod::ll, "ll"}, + {__LINE__, LengthMod::L, "L" }, + {__LINE__, LengthMod::j, "j" }, + {__LINE__, LengthMod::z, "z" }, + {__LINE__, LengthMod::t, "t" }, + {__LINE__, LengthMod::q, "q" }, + }; + EXPECT_EQ(ABSL_ARRAYSIZE(kExpect), 10); + for (auto e : kExpect) { + SCOPED_TRACE(e.line); + EXPECT_EQ(e.name, LengthModToString(e.mod)); + } +} + +TEST(ConversionCharTest, Names) { + struct Expectation { + FormatConversionChar id; + char name; + }; + // clang-format off + const Expectation kExpect[] = { +#define X(c) {FormatConversionCharInternal::c, #c[0]} + X(c), X(s), // text + X(d), X(i), X(o), X(u), X(x), X(X), // int + X(f), X(F), X(e), X(E), X(g), X(G), X(a), X(A), // float + X(n), X(p), // misc +#undef X + {FormatConversionCharInternal::kNone, '\0'}, + }; + // clang-format on + for (auto e : kExpect) { + SCOPED_TRACE(e.name); + FormatConversionChar v = e.id; + EXPECT_EQ(e.name, FormatConversionCharToChar(v)); + } +} + +class ConsumeUnboundConversionTest : public ::testing::Test { + public: + std::pair<string_view, string_view> Consume(string_view src) { + int next = 0; + o = UnboundConversion(); // refresh + const char* p = ConsumeUnboundConversion( + src.data(), src.data() + src.size(), &o, &next); + if (!p) return {{}, src}; + return {string_view(src.data(), p - src.data()), + string_view(p, src.data() + src.size() - p)}; + } + + bool Run(const char *fmt, bool force_positional = false) { + int next = force_positional ? -1 : 0; + o = UnboundConversion(); // refresh + return ConsumeUnboundConversion(fmt, fmt + strlen(fmt), &o, &next) == + fmt + strlen(fmt); + } + UnboundConversion o; +}; + +TEST_F(ConsumeUnboundConversionTest, ConsumeSpecification) { + struct Expectation { + int line; + string_view src; + string_view out; + string_view src_post; + }; + const Expectation kExpect[] = { + {__LINE__, "", "", "" }, + {__LINE__, "b", "", "b" }, // 'b' is invalid + {__LINE__, "ba", "", "ba"}, // 'b' is invalid + {__LINE__, "l", "", "l" }, // just length mod isn't okay + {__LINE__, "d", "d", "" }, // basic + {__LINE__, "d ", "d", " " }, // leave suffix + {__LINE__, "dd", "d", "d" }, // don't be greedy + {__LINE__, "d9", "d", "9" }, // leave non-space suffix + {__LINE__, "dzz", "d", "zz"}, // length mod as suffix + {__LINE__, "1$*2$d", "1$*2$d", "" }, // arg indexing and * allowed. + {__LINE__, "0-14.3hhd", "0-14.3hhd", ""}, // precision, width + {__LINE__, " 0-+#14.3hhd", " 0-+#14.3hhd", ""}, // flags + }; + for (const auto& e : kExpect) { + SCOPED_TRACE(e.line); + EXPECT_THAT(Consume(e.src), Pair(e.out, e.src_post)); + } +} + +TEST_F(ConsumeUnboundConversionTest, BasicConversion) { + EXPECT_FALSE(Run("")); + EXPECT_FALSE(Run("z")); + + EXPECT_FALSE(Run("dd")); // no excess allowed + + EXPECT_TRUE(Run("d")); + EXPECT_EQ('d', FormatConversionCharToChar(o.conv)); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_LT(o.precision.value(), 0); + EXPECT_EQ(1, o.arg_position); +} + +TEST_F(ConsumeUnboundConversionTest, ArgPosition) { + EXPECT_TRUE(Run("d")); + EXPECT_EQ(1, o.arg_position); + EXPECT_TRUE(Run("3$d")); + EXPECT_EQ(3, o.arg_position); + EXPECT_TRUE(Run("1$d")); + EXPECT_EQ(1, o.arg_position); + EXPECT_TRUE(Run("1$d", true)); + EXPECT_EQ(1, o.arg_position); + EXPECT_TRUE(Run("123$d")); + EXPECT_EQ(123, o.arg_position); + EXPECT_TRUE(Run("123$d", true)); + EXPECT_EQ(123, o.arg_position); + EXPECT_TRUE(Run("10$d")); + EXPECT_EQ(10, o.arg_position); + EXPECT_TRUE(Run("10$d", true)); + EXPECT_EQ(10, o.arg_position); + + // Position can't be zero. + EXPECT_FALSE(Run("0$d")); + EXPECT_FALSE(Run("0$d", true)); + EXPECT_FALSE(Run("1$*0$d")); + EXPECT_FALSE(Run("1$.*0$d")); + + // Position can't start with a zero digit at all. That is not a 'decimal'. + EXPECT_FALSE(Run("01$p")); + EXPECT_FALSE(Run("01$p", true)); + EXPECT_FALSE(Run("1$*01$p")); + EXPECT_FALSE(Run("1$.*01$p")); +} + +TEST_F(ConsumeUnboundConversionTest, WidthAndPrecision) { + EXPECT_TRUE(Run("14d")); + EXPECT_EQ('d', FormatConversionCharToChar(o.conv)); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_EQ(14, o.width.value()); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_LT(o.precision.value(), 0); + + EXPECT_TRUE(Run("14.d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(14, o.width.value()); + EXPECT_EQ(0, o.precision.value()); + + EXPECT_TRUE(Run(".d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(0, o.precision.value()); + + EXPECT_TRUE(Run(".5d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(5, o.precision.value()); + + EXPECT_TRUE(Run(".0d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(0, o.precision.value()); + + EXPECT_TRUE(Run("14.5d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(14, o.width.value()); + EXPECT_EQ(5, o.precision.value()); + + EXPECT_TRUE(Run("*.*d")); + EXPECT_TRUE(o.width.is_from_arg()); + EXPECT_EQ(1, o.width.get_from_arg()); + EXPECT_TRUE(o.precision.is_from_arg()); + EXPECT_EQ(2, o.precision.get_from_arg()); + EXPECT_EQ(3, o.arg_position); + + EXPECT_TRUE(Run("*d")); + EXPECT_TRUE(o.width.is_from_arg()); + EXPECT_EQ(1, o.width.get_from_arg()); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_LT(o.precision.value(), 0); + EXPECT_EQ(2, o.arg_position); + + EXPECT_TRUE(Run(".*d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_TRUE(o.precision.is_from_arg()); + EXPECT_EQ(1, o.precision.get_from_arg()); + EXPECT_EQ(2, o.arg_position); + + // mixed implicit and explicit: didn't specify arg position. + EXPECT_FALSE(Run("*23$.*34$d")); + + EXPECT_TRUE(Run("12$*23$.*34$d")); + EXPECT_EQ(12, o.arg_position); + EXPECT_TRUE(o.width.is_from_arg()); + EXPECT_EQ(23, o.width.get_from_arg()); + EXPECT_TRUE(o.precision.is_from_arg()); + EXPECT_EQ(34, o.precision.get_from_arg()); + + EXPECT_TRUE(Run("2$*5$.*9$d")); + EXPECT_EQ(2, o.arg_position); + EXPECT_TRUE(o.width.is_from_arg()); + EXPECT_EQ(5, o.width.get_from_arg()); + EXPECT_TRUE(o.precision.is_from_arg()); + EXPECT_EQ(9, o.precision.get_from_arg()); + + EXPECT_FALSE(Run(".*0$d")) << "no arg 0"; + + // Large values + EXPECT_TRUE(Run("999999999.999999999d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_EQ(999999999, o.width.value()); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(999999999, o.precision.value()); + + EXPECT_FALSE(Run("1000000000.999999999d")); + EXPECT_FALSE(Run("999999999.1000000000d")); + EXPECT_FALSE(Run("9999999999d")); + EXPECT_FALSE(Run(".9999999999d")); +} + +TEST_F(ConsumeUnboundConversionTest, Flags) { + static const char kAllFlags[] = "-+ #0"; + static const int kNumFlags = ABSL_ARRAYSIZE(kAllFlags) - 1; + for (int rev = 0; rev < 2; ++rev) { + for (int i = 0; i < 1 << kNumFlags; ++i) { + std::string fmt; + for (int k = 0; k < kNumFlags; ++k) + if ((i >> k) & 1) fmt += kAllFlags[k]; + // flag order shouldn't matter + if (rev == 1) { std::reverse(fmt.begin(), fmt.end()); } + fmt += 'd'; + SCOPED_TRACE(fmt); + EXPECT_TRUE(Run(fmt.c_str())); + EXPECT_EQ(fmt.find('-') == std::string::npos, !o.flags.left); + EXPECT_EQ(fmt.find('+') == std::string::npos, !o.flags.show_pos); + EXPECT_EQ(fmt.find(' ') == std::string::npos, !o.flags.sign_col); + EXPECT_EQ(fmt.find('#') == std::string::npos, !o.flags.alt); + EXPECT_EQ(fmt.find('0') == std::string::npos, !o.flags.zero); + } + } +} + +TEST_F(ConsumeUnboundConversionTest, BasicFlag) { + // Flag is on + for (const char* fmt : {"d", "llx", "G", "1$X"}) { + SCOPED_TRACE(fmt); + EXPECT_TRUE(Run(fmt)); + EXPECT_TRUE(o.flags.basic); + } + + // Flag is off + for (const char* fmt : {"3d", ".llx", "-G", "1$#X"}) { + SCOPED_TRACE(fmt); + EXPECT_TRUE(Run(fmt)); + EXPECT_FALSE(o.flags.basic); + } +} + +TEST_F(ConsumeUnboundConversionTest, LengthMod) { + EXPECT_TRUE(Run("d")); + EXPECT_EQ(LengthMod::none, o.length_mod); + EXPECT_TRUE(Run("hd")); + EXPECT_EQ(LengthMod::h, o.length_mod); + EXPECT_TRUE(Run("hhd")); + EXPECT_EQ(LengthMod::hh, o.length_mod); + EXPECT_TRUE(Run("ld")); + EXPECT_EQ(LengthMod::l, o.length_mod); + EXPECT_TRUE(Run("lld")); + EXPECT_EQ(LengthMod::ll, o.length_mod); + EXPECT_TRUE(Run("Lf")); + EXPECT_EQ(LengthMod::L, o.length_mod); + EXPECT_TRUE(Run("qf")); + EXPECT_EQ(LengthMod::q, o.length_mod); + EXPECT_TRUE(Run("jd")); + EXPECT_EQ(LengthMod::j, o.length_mod); + EXPECT_TRUE(Run("zd")); + EXPECT_EQ(LengthMod::z, o.length_mod); + EXPECT_TRUE(Run("td")); + EXPECT_EQ(LengthMod::t, o.length_mod); +} + +struct SummarizeConsumer { + std::string* out; + explicit SummarizeConsumer(std::string* out) : out(out) {} + + bool Append(string_view s) { + *out += "[" + std::string(s) + "]"; + return true; + } + + bool ConvertOne(const UnboundConversion& conv, string_view s) { + *out += "{"; + *out += std::string(s); + *out += ":"; + *out += std::to_string(conv.arg_position) + "$"; + if (conv.width.is_from_arg()) { + *out += std::to_string(conv.width.get_from_arg()) + "$*"; + } + if (conv.precision.is_from_arg()) { + *out += "." + std::to_string(conv.precision.get_from_arg()) + "$*"; + } + *out += FormatConversionCharToChar(conv.conv); + *out += "}"; + return true; + } +}; + +std::string SummarizeParsedFormat(const ParsedFormatBase& pc) { + std::string out; + if (!pc.ProcessFormat(SummarizeConsumer(&out))) out += "!"; + return out; +} + +class ParsedFormatTest : public testing::Test {}; + +TEST_F(ParsedFormatTest, ValueSemantics) { + ParsedFormatBase p1({}, true, {}); // empty format + EXPECT_EQ("", SummarizeParsedFormat(p1)); + + ParsedFormatBase p2 = p1; // copy construct (empty) + EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2)); + + p1 = ParsedFormatBase("hello%s", true, + {FormatConversionCharSetInternal::s}); // move assign + EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p1)); + + ParsedFormatBase p3 = p1; // copy construct (nonempty) + EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p3)); + + using std::swap; + swap(p1, p2); + EXPECT_EQ("", SummarizeParsedFormat(p1)); + EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p2)); + swap(p1, p2); // undo + + p2 = p1; // copy assign + EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2)); +} + +struct ExpectParse { + const char* in; + std::initializer_list<FormatConversionCharSet> conv_set; + const char* out; +}; + +TEST_F(ParsedFormatTest, Parsing) { + // Parse should be equivalent to that obtained by ConversionParseIterator. + // No need to retest the parsing edge cases here. + const ExpectParse kExpect[] = { + {"", {}, ""}, + {"ab", {}, "[ab]"}, + {"a%d", {FormatConversionCharSetInternal::d}, "[a]{d:1$d}"}, + {"a%+d", {FormatConversionCharSetInternal::d}, "[a]{+d:1$d}"}, + {"a% d", {FormatConversionCharSetInternal::d}, "[a]{ d:1$d}"}, + {"a%b %d", {}, "[a]!"}, // stop after error + }; + for (const auto& e : kExpect) { + SCOPED_TRACE(e.in); + EXPECT_EQ(e.out, + SummarizeParsedFormat(ParsedFormatBase(e.in, false, e.conv_set))); + } +} + +TEST_F(ParsedFormatTest, ParsingFlagOrder) { + const ExpectParse kExpect[] = { + {"a%+ 0d", {FormatConversionCharSetInternal::d}, "[a]{+ 0d:1$d}"}, + {"a%+0 d", {FormatConversionCharSetInternal::d}, "[a]{+0 d:1$d}"}, + {"a%0+ d", {FormatConversionCharSetInternal::d}, "[a]{0+ d:1$d}"}, + {"a% +0d", {FormatConversionCharSetInternal::d}, "[a]{ +0d:1$d}"}, + {"a%0 +d", {FormatConversionCharSetInternal::d}, "[a]{0 +d:1$d}"}, + {"a% 0+d", {FormatConversionCharSetInternal::d}, "[a]{ 0+d:1$d}"}, + {"a%+ 0+d", {FormatConversionCharSetInternal::d}, "[a]{+ 0+d:1$d}"}, + }; + for (const auto& e : kExpect) { + SCOPED_TRACE(e.in); + EXPECT_EQ(e.out, + SummarizeParsedFormat(ParsedFormatBase(e.in, false, e.conv_set))); + } +} + +} // namespace +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_join_internal.h b/third_party/abseil_cpp/absl/strings/internal/str_join_internal.h new file mode 100644 index 000000000000..31dbf672f0b6 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_join_internal.h @@ -0,0 +1,314 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// This file declares INTERNAL parts of the Join API that are inlined/templated +// or otherwise need to be available at compile time. The main abstractions +// defined in this file are: +// +// - A handful of default Formatters +// - JoinAlgorithm() overloads +// - JoinRange() overloads +// - JoinTuple() +// +// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including +// absl/strings/str_join.h +// +// IWYU pragma: private, include "absl/strings/str_join.h" + +#ifndef ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ + +#include <cstring> +#include <iterator> +#include <memory> +#include <string> +#include <type_traits> +#include <utility> + +#include "absl/strings/internal/ostringstream.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/str_cat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// +// Formatter objects +// +// The following are implementation classes for standard Formatter objects. The +// factory functions that users will call to create and use these formatters are +// defined and documented in strings/join.h. +// + +// The default formatter. Converts alpha-numeric types to strings. +struct AlphaNumFormatterImpl { + // This template is needed in order to support passing in a dereferenced + // vector<bool>::iterator + template <typename T> + void operator()(std::string* out, const T& t) const { + StrAppend(out, AlphaNum(t)); + } + + void operator()(std::string* out, const AlphaNum& t) const { + StrAppend(out, t); + } +}; + +// A type that's used to overload the JoinAlgorithm() function (defined below) +// for ranges that do not require additional formatting (e.g., a range of +// strings). + +struct NoFormatter : public AlphaNumFormatterImpl {}; + +// Formats types to strings using the << operator. +class StreamFormatterImpl { + public: + // The method isn't const because it mutates state. Making it const will + // render StreamFormatterImpl thread-hostile. + template <typename T> + void operator()(std::string* out, const T& t) { + // The stream is created lazily to avoid paying the relatively high cost + // of its construction when joining an empty range. + if (strm_) { + strm_->clear(); // clear the bad, fail and eof bits in case they were set + strm_->str(out); + } else { + strm_.reset(new strings_internal::OStringStream(out)); + } + *strm_ << t; + } + + private: + std::unique_ptr<strings_internal::OStringStream> strm_; +}; + +// Formats a std::pair<>. The 'first' member is formatted using f1_ and the +// 'second' member is formatted using f2_. sep_ is the separator. +template <typename F1, typename F2> +class PairFormatterImpl { + public: + PairFormatterImpl(F1 f1, absl::string_view sep, F2 f2) + : f1_(std::move(f1)), sep_(sep), f2_(std::move(f2)) {} + + template <typename T> + void operator()(std::string* out, const T& p) { + f1_(out, p.first); + out->append(sep_); + f2_(out, p.second); + } + + template <typename T> + void operator()(std::string* out, const T& p) const { + f1_(out, p.first); + out->append(sep_); + f2_(out, p.second); + } + + private: + F1 f1_; + std::string sep_; + F2 f2_; +}; + +// Wraps another formatter and dereferences the argument to operator() then +// passes the dereferenced argument to the wrapped formatter. This can be +// useful, for example, to join a std::vector<int*>. +template <typename Formatter> +class DereferenceFormatterImpl { + public: + DereferenceFormatterImpl() : f_() {} + explicit DereferenceFormatterImpl(Formatter&& f) + : f_(std::forward<Formatter>(f)) {} + + template <typename T> + void operator()(std::string* out, const T& t) { + f_(out, *t); + } + + template <typename T> + void operator()(std::string* out, const T& t) const { + f_(out, *t); + } + + private: + Formatter f_; +}; + +// DefaultFormatter<T> is a traits class that selects a default Formatter to use +// for the given type T. The ::Type member names the Formatter to use. This is +// used by the strings::Join() functions that do NOT take a Formatter argument, +// in which case a default Formatter must be chosen. +// +// AlphaNumFormatterImpl is the default in the base template, followed by +// specializations for other types. +template <typename ValueType> +struct DefaultFormatter { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<const char*> { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<char*> { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<std::string> { + typedef NoFormatter Type; +}; +template <> +struct DefaultFormatter<absl::string_view> { + typedef NoFormatter Type; +}; +template <typename ValueType> +struct DefaultFormatter<ValueType*> { + typedef DereferenceFormatterImpl<typename DefaultFormatter<ValueType>::Type> + Type; +}; + +template <typename ValueType> +struct DefaultFormatter<std::unique_ptr<ValueType>> + : public DefaultFormatter<ValueType*> {}; + +// +// JoinAlgorithm() functions +// + +// The main joining algorithm. This simply joins the elements in the given +// iterator range, each separated by the given separator, into an output string, +// and formats each element using the provided Formatter object. +template <typename Iterator, typename Formatter> +std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s, + Formatter&& f) { + std::string result; + absl::string_view sep(""); + for (Iterator it = start; it != end; ++it) { + result.append(sep.data(), sep.size()); + f(&result, *it); + sep = s; + } + return result; +} + +// A joining algorithm that's optimized for a forward iterator range of +// string-like objects that do not need any additional formatting. This is to +// optimize the common case of joining, say, a std::vector<string> or a +// std::vector<absl::string_view>. +// +// This is an overload of the previous JoinAlgorithm() function. Here the +// Formatter argument is of type NoFormatter. Since NoFormatter is an internal +// type, this overload is only invoked when strings::Join() is called with a +// range of string-like objects (e.g., std::string, absl::string_view), and an +// explicit Formatter argument was NOT specified. +// +// The optimization is that the needed space will be reserved in the output +// string to avoid the need to resize while appending. To do this, the iterator +// range will be traversed twice: once to calculate the total needed size, and +// then again to copy the elements and delimiters to the output string. +template <typename Iterator, + typename = typename std::enable_if<std::is_convertible< + typename std::iterator_traits<Iterator>::iterator_category, + std::forward_iterator_tag>::value>::type> +std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s, + NoFormatter) { + std::string result; + if (start != end) { + // Sums size + size_t result_size = start->size(); + for (Iterator it = start; ++it != end;) { + result_size += s.size(); + result_size += it->size(); + } + + if (result_size > 0) { + STLStringResizeUninitialized(&result, result_size); + + // Joins strings + char* result_buf = &*result.begin(); + memcpy(result_buf, start->data(), start->size()); + result_buf += start->size(); + for (Iterator it = start; ++it != end;) { + memcpy(result_buf, s.data(), s.size()); + result_buf += s.size(); + memcpy(result_buf, it->data(), it->size()); + result_buf += it->size(); + } + } + } + + return result; +} + +// JoinTupleLoop implements a loop over the elements of a std::tuple, which +// are heterogeneous. The primary template matches the tuple interior case. It +// continues the iteration after appending a separator (for nonzero indices) +// and formatting an element of the tuple. The specialization for the I=N case +// matches the end-of-tuple, and terminates the iteration. +template <size_t I, size_t N> +struct JoinTupleLoop { + template <typename Tup, typename Formatter> + void operator()(std::string* out, const Tup& tup, absl::string_view sep, + Formatter&& fmt) { + if (I > 0) out->append(sep.data(), sep.size()); + fmt(out, std::get<I>(tup)); + JoinTupleLoop<I + 1, N>()(out, tup, sep, fmt); + } +}; +template <size_t N> +struct JoinTupleLoop<N, N> { + template <typename Tup, typename Formatter> + void operator()(std::string*, const Tup&, absl::string_view, Formatter&&) {} +}; + +template <typename... T, typename Formatter> +std::string JoinAlgorithm(const std::tuple<T...>& tup, absl::string_view sep, + Formatter&& fmt) { + std::string result; + JoinTupleLoop<0, sizeof...(T)>()(&result, tup, sep, fmt); + return result; +} + +template <typename Iterator> +std::string JoinRange(Iterator first, Iterator last, + absl::string_view separator) { + // No formatter was explicitly given, so a default must be chosen. + typedef typename std::iterator_traits<Iterator>::value_type ValueType; + typedef typename DefaultFormatter<ValueType>::Type Formatter; + return JoinAlgorithm(first, last, separator, Formatter()); +} + +template <typename Range, typename Formatter> +std::string JoinRange(const Range& range, absl::string_view separator, + Formatter&& fmt) { + using std::begin; + using std::end; + return JoinAlgorithm(begin(range), end(range), separator, fmt); +} + +template <typename Range> +std::string JoinRange(const Range& range, absl::string_view separator) { + using std::begin; + using std::end; + return JoinRange(begin(range), end(range), separator); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_split_internal.h b/third_party/abseil_cpp/absl/strings/internal/str_split_internal.h new file mode 100644 index 000000000000..6f5bc095fbbe --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_split_internal.h @@ -0,0 +1,455 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// This file declares INTERNAL parts of the Split API that are inline/templated +// or otherwise need to be available at compile time. The main abstractions +// defined in here are +// +// - ConvertibleToStringView +// - SplitIterator<> +// - Splitter<> +// +// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including +// absl/strings/str_split.h. +// +// IWYU pragma: private, include "absl/strings/str_split.h" + +#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ + +#include <array> +#include <initializer_list> +#include <iterator> +#include <map> +#include <type_traits> +#include <utility> +#include <vector> + +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/meta/type_traits.h" +#include "absl/strings/string_view.h" + +#ifdef _GLIBCXX_DEBUG +#include "absl/strings/internal/stl_type_traits.h" +#endif // _GLIBCXX_DEBUG + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// This class is implicitly constructible from everything that absl::string_view +// is implicitly constructible from. If it's constructed from a temporary +// string, the data is moved into a data member so its lifetime matches that of +// the ConvertibleToStringView instance. +class ConvertibleToStringView { + public: + ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit) + : value_(s) {} + ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit) + ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit) + : value_(s) {} + ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit) + : value_(s) {} + + // Matches rvalue strings and moves their data to a member. + ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit) + : copy_(std::move(s)), value_(copy_) {} + + ConvertibleToStringView(const ConvertibleToStringView& other) + : copy_(other.copy_), + value_(other.IsSelfReferential() ? copy_ : other.value_) {} + + ConvertibleToStringView(ConvertibleToStringView&& other) { + StealMembers(std::move(other)); + } + + ConvertibleToStringView& operator=(ConvertibleToStringView other) { + StealMembers(std::move(other)); + return *this; + } + + absl::string_view value() const { return value_; } + + private: + // Returns true if ctsp's value refers to its internal copy_ member. + bool IsSelfReferential() const { return value_.data() == copy_.data(); } + + void StealMembers(ConvertibleToStringView&& other) { + if (other.IsSelfReferential()) { + copy_ = std::move(other.copy_); + value_ = copy_; + other.value_ = other.copy_; + } else { + value_ = other.value_; + } + } + + // Holds the data moved from temporary std::string arguments. Declared first + // so that 'value' can refer to 'copy_'. + std::string copy_; + absl::string_view value_; +}; + +// An iterator that enumerates the parts of a string from a Splitter. The text +// to be split, the Delimiter, and the Predicate are all taken from the given +// Splitter object. Iterators may only be compared if they refer to the same +// Splitter instance. +// +// This class is NOT part of the public splitting API. +template <typename Splitter> +class SplitIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = absl::string_view; + using difference_type = ptrdiff_t; + using pointer = const value_type*; + using reference = const value_type&; + + enum State { kInitState, kLastState, kEndState }; + SplitIterator(State state, const Splitter* splitter) + : pos_(0), + state_(state), + splitter_(splitter), + delimiter_(splitter->delimiter()), + predicate_(splitter->predicate()) { + // Hack to maintain backward compatibility. This one block makes it so an + // empty absl::string_view whose .data() happens to be nullptr behaves + // *differently* from an otherwise empty absl::string_view whose .data() is + // not nullptr. This is an undesirable difference in general, but this + // behavior is maintained to avoid breaking existing code that happens to + // depend on this old behavior/bug. Perhaps it will be fixed one day. The + // difference in behavior is as follows: + // Split(absl::string_view(""), '-'); // {""} + // Split(absl::string_view(), '-'); // {} + if (splitter_->text().data() == nullptr) { + state_ = kEndState; + pos_ = splitter_->text().size(); + return; + } + + if (state_ == kEndState) { + pos_ = splitter_->text().size(); + } else { + ++(*this); + } + } + + bool at_end() const { return state_ == kEndState; } + + reference operator*() const { return curr_; } + pointer operator->() const { return &curr_; } + + SplitIterator& operator++() { + do { + if (state_ == kLastState) { + state_ = kEndState; + return *this; + } + const absl::string_view text = splitter_->text(); + const absl::string_view d = delimiter_.Find(text, pos_); + if (d.data() == text.data() + text.size()) state_ = kLastState; + curr_ = text.substr(pos_, d.data() - (text.data() + pos_)); + pos_ += curr_.size() + d.size(); + } while (!predicate_(curr_)); + return *this; + } + + SplitIterator operator++(int) { + SplitIterator old(*this); + ++(*this); + return old; + } + + friend bool operator==(const SplitIterator& a, const SplitIterator& b) { + return a.state_ == b.state_ && a.pos_ == b.pos_; + } + + friend bool operator!=(const SplitIterator& a, const SplitIterator& b) { + return !(a == b); + } + + private: + size_t pos_; + State state_; + absl::string_view curr_; + const Splitter* splitter_; + typename Splitter::DelimiterType delimiter_; + typename Splitter::PredicateType predicate_; +}; + +// HasMappedType<T>::value is true iff there exists a type T::mapped_type. +template <typename T, typename = void> +struct HasMappedType : std::false_type {}; +template <typename T> +struct HasMappedType<T, absl::void_t<typename T::mapped_type>> + : std::true_type {}; + +// HasValueType<T>::value is true iff there exists a type T::value_type. +template <typename T, typename = void> +struct HasValueType : std::false_type {}; +template <typename T> +struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type { +}; + +// HasConstIterator<T>::value is true iff there exists a type T::const_iterator. +template <typename T, typename = void> +struct HasConstIterator : std::false_type {}; +template <typename T> +struct HasConstIterator<T, absl::void_t<typename T::const_iterator>> + : std::true_type {}; + +// IsInitializerList<T>::value is true iff T is an std::initializer_list. More +// details below in Splitter<> where this is used. +std::false_type IsInitializerListDispatch(...); // default: No +template <typename T> +std::true_type IsInitializerListDispatch(std::initializer_list<T>*); +template <typename T> +struct IsInitializerList + : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {}; + +// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition +// is true for type 'C'. +// +// Restricts conversion to container-like types (by testing for the presence of +// a const_iterator member type) and also to disable conversion to an +// std::initializer_list (which also has a const_iterator). Otherwise, code +// compiled in C++11 will get an error due to ambiguous conversion paths (in +// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T> +// or an std::initializer_list<T>). + +template <typename C, bool has_value_type, bool has_mapped_type> +struct SplitterIsConvertibleToImpl : std::false_type {}; + +template <typename C> +struct SplitterIsConvertibleToImpl<C, true, false> + : std::is_constructible<typename C::value_type, absl::string_view> {}; + +template <typename C> +struct SplitterIsConvertibleToImpl<C, true, true> + : absl::conjunction< + std::is_constructible<typename C::key_type, absl::string_view>, + std::is_constructible<typename C::mapped_type, absl::string_view>> {}; + +template <typename C> +struct SplitterIsConvertibleTo + : SplitterIsConvertibleToImpl< + C, +#ifdef _GLIBCXX_DEBUG + !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value && +#endif // _GLIBCXX_DEBUG + !IsInitializerList< + typename std::remove_reference<C>::type>::value && + HasValueType<C>::value && HasConstIterator<C>::value, + HasMappedType<C>::value> { +}; + +// This class implements the range that is returned by absl::StrSplit(). This +// class has templated conversion operators that allow it to be implicitly +// converted to a variety of types that the caller may have specified on the +// left-hand side of an assignment. +// +// The main interface for interacting with this class is through its implicit +// conversion operators. However, this class may also be used like a container +// in that it has .begin() and .end() member functions. It may also be used +// within a range-for loop. +// +// Output containers can be collections of any type that is constructible from +// an absl::string_view. +// +// An Predicate functor may be supplied. This predicate will be used to filter +// the split strings: only strings for which the predicate returns true will be +// kept. A Predicate object is any unary functor that takes an absl::string_view +// and returns bool. +template <typename Delimiter, typename Predicate> +class Splitter { + public: + using DelimiterType = Delimiter; + using PredicateType = Predicate; + using const_iterator = strings_internal::SplitIterator<Splitter>; + using value_type = typename std::iterator_traits<const_iterator>::value_type; + + Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p) + : text_(std::move(input_text)), + delimiter_(std::move(d)), + predicate_(std::move(p)) {} + + absl::string_view text() const { return text_.value(); } + const Delimiter& delimiter() const { return delimiter_; } + const Predicate& predicate() const { return predicate_; } + + // Range functions that iterate the split substrings as absl::string_view + // objects. These methods enable a Splitter to be used in a range-based for + // loop. + const_iterator begin() const { return {const_iterator::kInitState, this}; } + const_iterator end() const { return {const_iterator::kEndState, this}; } + + // An implicit conversion operator that is restricted to only those containers + // that the splitter is convertible to. + template <typename Container, + typename = typename std::enable_if< + SplitterIsConvertibleTo<Container>::value>::type> + operator Container() const { // NOLINT(runtime/explicit) + return ConvertToContainer<Container, typename Container::value_type, + HasMappedType<Container>::value>()(*this); + } + + // Returns a pair with its .first and .second members set to the first two + // strings returned by the begin() iterator. Either/both of .first and .second + // will be constructed with empty strings if the iterator doesn't have a + // corresponding value. + template <typename First, typename Second> + operator std::pair<First, Second>() const { // NOLINT(runtime/explicit) + absl::string_view first, second; + auto it = begin(); + if (it != end()) { + first = *it; + if (++it != end()) { + second = *it; + } + } + return {First(first), Second(second)}; + } + + private: + // ConvertToContainer is a functor converting a Splitter to the requested + // Container of ValueType. It is specialized below to optimize splitting to + // certain combinations of Container and ValueType. + // + // This base template handles the generic case of storing the split results in + // the requested non-map-like container and converting the split substrings to + // the requested type. + template <typename Container, typename ValueType, bool is_map = false> + struct ConvertToContainer { + Container operator()(const Splitter& splitter) const { + Container c; + auto it = std::inserter(c, c.end()); + for (const auto sp : splitter) { + *it++ = ValueType(sp); + } + return c; + } + }; + + // Partial specialization for a std::vector<absl::string_view>. + // + // Optimized for the common case of splitting to a + // std::vector<absl::string_view>. In this case we first split the results to + // a small array of absl::string_view on the stack, to reduce reallocations. + template <typename A> + struct ConvertToContainer<std::vector<absl::string_view, A>, + absl::string_view, false> { + std::vector<absl::string_view, A> operator()( + const Splitter& splitter) const { + struct raw_view { + const char* data; + size_t size; + operator absl::string_view() const { // NOLINT(runtime/explicit) + return {data, size}; + } + }; + std::vector<absl::string_view, A> v; + std::array<raw_view, 16> ar; + for (auto it = splitter.begin(); !it.at_end();) { + size_t index = 0; + do { + ar[index].data = it->data(); + ar[index].size = it->size(); + ++it; + } while (++index != ar.size() && !it.at_end()); + v.insert(v.end(), ar.begin(), ar.begin() + index); + } + return v; + } + }; + + // Partial specialization for a std::vector<std::string>. + // + // Optimized for the common case of splitting to a std::vector<std::string>. + // In this case we first split the results to a std::vector<absl::string_view> + // so the returned std::vector<std::string> can have space reserved to avoid + // std::string moves. + template <typename A> + struct ConvertToContainer<std::vector<std::string, A>, std::string, false> { + std::vector<std::string, A> operator()(const Splitter& splitter) const { + const std::vector<absl::string_view> v = splitter; + return std::vector<std::string, A>(v.begin(), v.end()); + } + }; + + // Partial specialization for containers of pairs (e.g., maps). + // + // The algorithm is to insert a new pair into the map for each even-numbered + // item, with the even-numbered item as the key with a default-constructed + // value. Each odd-numbered item will then be assigned to the last pair's + // value. + template <typename Container, typename First, typename Second> + struct ConvertToContainer<Container, std::pair<const First, Second>, true> { + Container operator()(const Splitter& splitter) const { + Container m; + typename Container::iterator it; + bool insert = true; + for (const auto sp : splitter) { + if (insert) { + it = Inserter<Container>::Insert(&m, First(sp), Second()); + } else { + it->second = Second(sp); + } + insert = !insert; + } + return m; + } + + // Inserts the key and value into the given map, returning an iterator to + // the inserted item. Specialized for std::map and std::multimap to use + // emplace() and adapt emplace()'s return value. + template <typename Map> + struct Inserter { + using M = Map; + template <typename... Args> + static typename M::iterator Insert(M* m, Args&&... args) { + return m->insert(std::make_pair(std::forward<Args>(args)...)).first; + } + }; + + template <typename... Ts> + struct Inserter<std::map<Ts...>> { + using M = std::map<Ts...>; + template <typename... Args> + static typename M::iterator Insert(M* m, Args&&... args) { + return m->emplace(std::make_pair(std::forward<Args>(args)...)).first; + } + }; + + template <typename... Ts> + struct Inserter<std::multimap<Ts...>> { + using M = std::multimap<Ts...>; + template <typename... Args> + static typename M::iterator Insert(M* m, Args&&... args) { + return m->emplace(std::make_pair(std::forward<Args>(args)...)); + } + }; + }; + + ConvertibleToStringView text_; + Delimiter delimiter_; + Predicate predicate_; +}; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/utf8.cc b/third_party/abseil_cpp/absl/strings/internal/utf8.cc new file mode 100644 index 000000000000..8fd8edc1ec6f --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/utf8.cc @@ -0,0 +1,53 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// UTF8 utilities, implemented to reduce dependencies. + +#include "absl/strings/internal/utf8.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) { + if (utf8_char <= 0x7F) { + *buffer = static_cast<char>(utf8_char); + return 1; + } else if (utf8_char <= 0x7FF) { + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xC0 | utf8_char; + return 2; + } else if (utf8_char <= 0xFFFF) { + buffer[2] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xE0 | utf8_char; + return 3; + } else { + buffer[3] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[2] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xF0 | utf8_char; + return 4; + } +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/utf8.h b/third_party/abseil_cpp/absl/strings/internal/utf8.h new file mode 100644 index 000000000000..32fb1093bea3 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/utf8.h @@ -0,0 +1,50 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// UTF8 utilities, implemented to reduce dependencies. + +#ifndef ABSL_STRINGS_INTERNAL_UTF8_H_ +#define ABSL_STRINGS_INTERNAL_UTF8_H_ + +#include <cstddef> +#include <cstdint> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// For Unicode code points 0 through 0x10FFFF, EncodeUTF8Char writes +// out the UTF-8 encoding into buffer, and returns the number of chars +// it wrote. +// +// As described in https://tools.ietf.org/html/rfc3629#section-3 , the encodings +// are: +// 00 - 7F : 0xxxxxxx +// 80 - 7FF : 110xxxxx 10xxxxxx +// 800 - FFFF : 1110xxxx 10xxxxxx 10xxxxxx +// 10000 - 10FFFF : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx +// +// Values greater than 0x10FFFF are not supported and may or may not write +// characters into buffer, however never will more than kMaxEncodedUTF8Size +// bytes be written, regardless of the value of utf8_char. +enum { kMaxEncodedUTF8Size = 4 }; +size_t EncodeUTF8Char(char *buffer, char32_t utf8_char); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_UTF8_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/utf8_test.cc b/third_party/abseil_cpp/absl/strings/internal/utf8_test.cc new file mode 100644 index 000000000000..88dd5036e3da --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/utf8_test.cc @@ -0,0 +1,66 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/utf8.h" + +#include <cstdint> +#include <utility> + +#include "gtest/gtest.h" +#include "absl/base/port.h" + +namespace { + +#if !defined(__cpp_char8_t) +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++2a-compat" +#endif +TEST(EncodeUTF8Char, BasicFunction) { + std::pair<char32_t, std::string> tests[] = {{0x0030, u8"\u0030"}, + {0x00A3, u8"\u00A3"}, + {0x00010000, u8"\U00010000"}, + {0x0000FFFF, u8"\U0000FFFF"}, + {0x0010FFFD, u8"\U0010FFFD"}}; + for (auto &test : tests) { + char buf0[7] = {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}; + char buf1[7] = {'\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF'}; + char *buf0_written = + &buf0[absl::strings_internal::EncodeUTF8Char(buf0, test.first)]; + char *buf1_written = + &buf1[absl::strings_internal::EncodeUTF8Char(buf1, test.first)]; + int apparent_length = 7; + while (buf0[apparent_length - 1] == '\x00' && + buf1[apparent_length - 1] == '\xFF') { + if (--apparent_length == 0) break; + } + EXPECT_EQ(apparent_length, buf0_written - buf0); + EXPECT_EQ(apparent_length, buf1_written - buf1); + EXPECT_EQ(apparent_length, test.second.length()); + EXPECT_EQ(std::string(buf0, apparent_length), test.second); + EXPECT_EQ(std::string(buf1, apparent_length), test.second); + } + char buf[32] = "Don't Tread On Me"; + EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf, 0x00110000), + absl::strings_internal::kMaxEncodedUTF8Size); + char buf2[32] = "Negative is invalid but sane"; + EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf2, -1), + absl::strings_internal::kMaxEncodedUTF8Size); +} +#if defined(__clang__) +#pragma clang diagnostic pop +#endif +#endif // !defined(__cpp_char8_t) + +} // namespace |