diff options
Diffstat (limited to 'absl/strings')
-rw-r--r-- | absl/strings/BUILD.bazel | 69 | ||||
-rw-r--r-- | absl/strings/CMakeLists.txt | 54 | ||||
-rw-r--r-- | absl/strings/cord.cc | 2019 | ||||
-rw-r--r-- | absl/strings/cord.h | 1121 | ||||
-rw-r--r-- | absl/strings/cord_test.cc | 1526 | ||||
-rw-r--r-- | absl/strings/cord_test_helpers.h | 60 | ||||
-rw-r--r-- | absl/strings/internal/cord_internal.h | 151 | ||||
-rw-r--r-- | absl/strings/internal/str_format/arg.cc | 37 | ||||
-rw-r--r-- | absl/strings/internal/str_format/arg.h | 7 | ||||
-rw-r--r-- | absl/strings/internal/str_format/arg_test.cc | 2 | ||||
-rw-r--r-- | absl/strings/internal/str_format/extension.cc | 11 | ||||
-rw-r--r-- | absl/strings/internal/str_format/extension.h | 200 | ||||
-rw-r--r-- | absl/strings/internal/str_format/float_conversion.cc | 18 | ||||
-rw-r--r-- | absl/strings/internal/str_format/parser.cc | 6 | ||||
-rw-r--r-- | absl/strings/internal/str_format/parser.h | 10 | ||||
-rw-r--r-- | absl/strings/internal/str_format/parser_test.cc | 14 | ||||
-rw-r--r-- | absl/strings/str_format_test.cc | 2 |
17 files changed, 5155 insertions, 152 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index d5a362d05777..b950ec769fa4 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -126,6 +126,7 @@ cc_test( copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], deps = [ + ":cord", ":strings", "//absl/base:core_headers", "//absl/container:fixed_array", @@ -250,6 +251,74 @@ cc_test( ], ) +cc_library( + name = "cord_internal", + hdrs = ["internal/cord_internal.h"], + copts = ABSL_DEFAULT_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/meta:type_traits", + ], +) + +cc_library( + name = "cord", + srcs = [ + "cord.cc", + ], + hdrs = [ + "cord.h", + ], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":cord_internal", + ":internal", + ":str_format", + ":strings", + "//absl/base", + "//absl/base:base_internal", + "//absl/base:core_headers", + "//absl/base:endian", + "//absl/base:raw_logging_internal", + "//absl/container:fixed_array", + "//absl/container:inlined_vector", + "//absl/functional:function_ref", + "//absl/meta:type_traits", + ], +) + +cc_library( + name = "cord_test_helpers", + testonly = 1, + hdrs = [ + "cord_test_helpers.h", + ], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":cord", + ], +) + +cc_test( + name = "cord_test", + size = "medium", + srcs = ["cord_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord", + ":cord_test_helpers", + ":strings", + "//absl/base", + "//absl/base:config", + "//absl/base:endian", + "//absl/base:raw_logging_internal", + "//absl/container:fixed_array", + "@com_google_googletest//:gtest_main", + ], +) + cc_test( name = "substitute_test", size = "small", diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 3feb5e94c4e0..cebc59289347 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -526,3 +526,57 @@ absl_cc_test( absl::str_format gmock_main ) + +absl_cc_library( + NAME + cord + HDRS + "cord.h" + SRCS + "cord.cc" + "internal/cord_internal.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::strings_internal + absl::base + absl::base_internal + absl::core_headers + absl::endian + absl::fixed_array + absl::function_ref + absl::inlined_vector + absl::raw_logging_internal + absl::type_traits + PUBLIC +) + +absl_cc_library( + NAME + cord_test_helpers + HDRS + "cord_test_helpers.h" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::cord + TESTONLY +) + +absl_cc_test( + NAME + cord_test + SRCS + "cord_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::cord + absl::strings + absl::base + absl::config + absl::endian + absl::raw_logging_internal + absl::fixed_array + gmock_main +) diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc new file mode 100644 index 000000000000..cc0cc9d7075e --- /dev/null +++ b/absl/strings/cord.cc @@ -0,0 +1,2019 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/cord.h" + +#include <algorithm> +#include <cstddef> +#include <cstdio> +#include <cstdlib> +#include <iomanip> +#include <limits> +#include <ostream> +#include <sstream> +#include <type_traits> +#include <unordered_set> +#include <vector> + +#include "absl/base/casts.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/port.h" +#include "absl/container/fixed_array.h" +#include "absl/container/inlined_vector.h" +#include "absl/strings/escaping.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +using ::absl::cord_internal::CordRep; +using ::absl::cord_internal::CordRepConcat; +using ::absl::cord_internal::CordRepExternal; +using ::absl::cord_internal::CordRepSubstring; + +// Various representations that we allow +enum CordRepKind { + CONCAT = 0, + EXTERNAL = 1, + SUBSTRING = 2, + + // We have different tags for different sized flat arrays, + // starting with FLAT + FLAT = 3, +}; + +namespace { + +// Type used with std::allocator for allocating and deallocating +// `CordRepExternal`. std::allocator is used because it opaquely handles the +// different new / delete overloads available on a given platform. +using ExternalAllocType = + absl::aligned_storage_t<absl::cord_internal::ExternalRepAlignment(), + absl::cord_internal::ExternalRepAlignment()>; + +// Returns the number of objects to pass in to std::allocator<ExternalAllocType> +// allocate() and deallocate() to create enough room for `CordRepExternal` with +// `releaser_size` bytes on the end. +constexpr size_t GetExternalAllocNumObjects(size_t releaser_size) { + // Be sure to round up since `releaser_size` could be smaller than + // sizeof(ExternalAllocType)`. + return (sizeof(CordRepExternal) + releaser_size + sizeof(ExternalAllocType) - + 1) / + sizeof(ExternalAllocType); +} + +// Allocates enough memory for `CordRepExternal` and a releaser with size +// `releaser_size` bytes. +void* AllocateExternal(size_t releaser_size) { + return std::allocator<ExternalAllocType>().allocate( + GetExternalAllocNumObjects(releaser_size)); +} + +// Deallocates the memory for a `CordRepExternal` assuming it was allocated with +// a releaser of given size and alignment. +void DeallocateExternal(CordRepExternal* p, size_t releaser_size) { + std::allocator<ExternalAllocType>().deallocate( + reinterpret_cast<ExternalAllocType*>(p), + GetExternalAllocNumObjects(releaser_size)); +} + +// Returns a pointer to the type erased releaser for the given CordRepExternal. +void* GetExternalReleaser(CordRepExternal* rep) { + return rep + 1; +} + +} // namespace + +namespace cord_internal { + +inline CordRepConcat* CordRep::concat() { + assert(tag == CONCAT); + return static_cast<CordRepConcat*>(this); +} + +inline const CordRepConcat* CordRep::concat() const { + assert(tag == CONCAT); + return static_cast<const CordRepConcat*>(this); +} + +inline CordRepSubstring* CordRep::substring() { + assert(tag == SUBSTRING); + return static_cast<CordRepSubstring*>(this); +} + +inline const CordRepSubstring* CordRep::substring() const { + assert(tag == SUBSTRING); + return static_cast<const CordRepSubstring*>(this); +} + +inline CordRepExternal* CordRep::external() { + assert(tag == EXTERNAL); + return static_cast<CordRepExternal*>(this); +} + +inline const CordRepExternal* CordRep::external() const { + assert(tag == EXTERNAL); + return static_cast<const CordRepExternal*>(this); +} + +} // namespace cord_internal + +static const size_t kFlatOverhead = offsetof(CordRep, data); + +static_assert(kFlatOverhead == 13, "Unittests assume kFlatOverhead == 13"); + +// Largest and smallest flat node lengths we are willing to allocate +// Flat allocation size is stored in tag, which currently can encode sizes up +// to 4K, encoded as multiple of either 8 or 32 bytes. +// If we allow for larger sizes, we need to change this to 8/64, 16/128, etc. +static constexpr size_t kMaxFlatSize = 4096; +static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead; +static constexpr size_t kMinFlatLength = 32 - kFlatOverhead; + +// Prefer copying blocks of at most this size, otherwise reference count. +static const size_t kMaxBytesToCopy = 511; + +// Helper functions for rounded div, and rounding to exact sizes. +static size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; } +static size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; } + +// Returns the size to the nearest equal or larger value that can be +// expressed exactly as a tag value. +static size_t RoundUpForTag(size_t size) { + return RoundUp(size, (size <= 1024) ? 8 : 32); +} + +// Converts the allocated size to a tag, rounding down if the size +// does not exactly match a 'tag expressible' size value. The result is +// undefined if the size exceeds the maximum size that can be encoded in +// a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>). +static uint8_t AllocatedSizeToTag(size_t size) { + const size_t tag = (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32; + assert(tag <= std::numeric_limits<uint8_t>::max()); + return tag; +} + +// Converts the provided tag to the corresponding allocated size +static constexpr size_t TagToAllocatedSize(uint8_t tag) { + return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32); +} + +// Converts the provided tag to the corresponding available data length +static constexpr size_t TagToLength(uint8_t tag) { + return TagToAllocatedSize(tag) - kFlatOverhead; +} + +// Enforce that kMaxFlatSize maps to a well-known exact tag value. +static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic"); + +constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) { + return n == 0 ? a : Fibonacci(n - 1, b, a + b); +} + +static_assert(Fibonacci(63) == 6557470319842, + "Fibonacci values computed incorrectly"); + +// Minimum length required for a given depth tree -- a tree is considered +// balanced if +// length(t) >= min_length[depth(t)] +// The root node depth is allowed to become twice as large to reduce rebalancing +// for larger strings (see IsRootBalanced). +static constexpr uint64_t min_length[] = { + Fibonacci(2), + Fibonacci(3), + Fibonacci(4), + Fibonacci(5), + Fibonacci(6), + Fibonacci(7), + Fibonacci(8), + Fibonacci(9), + Fibonacci(10), + Fibonacci(11), + Fibonacci(12), + Fibonacci(13), + Fibonacci(14), + Fibonacci(15), + Fibonacci(16), + Fibonacci(17), + Fibonacci(18), + Fibonacci(19), + Fibonacci(20), + Fibonacci(21), + Fibonacci(22), + Fibonacci(23), + Fibonacci(24), + Fibonacci(25), + Fibonacci(26), + Fibonacci(27), + Fibonacci(28), + Fibonacci(29), + Fibonacci(30), + Fibonacci(31), + Fibonacci(32), + Fibonacci(33), + Fibonacci(34), + Fibonacci(35), + Fibonacci(36), + Fibonacci(37), + Fibonacci(38), + Fibonacci(39), + Fibonacci(40), + Fibonacci(41), + Fibonacci(42), + Fibonacci(43), + Fibonacci(44), + Fibonacci(45), + Fibonacci(46), + Fibonacci(47), + 0xffffffffffffffffull, // Avoid overflow +}; + +static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length); + +// The inlined size to use with absl::InlinedVector. +// +// Note: The InlinedVectors in this file (and in cord.h) do not need to use +// the same value for their inlined size. The fact that they do is historical. +// It may be desirable for each to use a different inlined size optimized for +// that InlinedVector's usage. +// +// TODO(jgm): Benchmark to see if there's a more optimal value than 47 for +// the inlined vector size (47 exists for backward compatibility). +static const int kInlinedVectorSize = 47; + +static inline bool IsRootBalanced(CordRep* node) { + if (node->tag != CONCAT) { + return true; + } else if (node->concat()->depth() <= 15) { + return true; + } else if (node->concat()->depth() > kMinLengthSize) { + return false; + } else { + // Allow depth to become twice as large as implied by fibonacci rule to + // reduce rebalancing for larger strings. + return (node->length >= min_length[node->concat()->depth() / 2]); + } +} + +static CordRep* Rebalance(CordRep* node); +static void DumpNode(CordRep* rep, bool include_data, std::ostream* os); +static bool VerifyNode(CordRep* root, CordRep* start_node, + bool full_validation); + +static inline CordRep* VerifyTree(CordRep* node) { + // Verification is expensive, so only do it in debug mode. + // Even in debug mode we normally do only light validation. + // If you are debugging Cord itself, you should define the + // macro EXTRA_CORD_VALIDATION, e.g. by adding + // --copt=-DEXTRA_CORD_VALIDATION to the blaze line. +#ifdef EXTRA_CORD_VALIDATION + assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true)); +#else // EXTRA_CORD_VALIDATION + assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false)); +#endif // EXTRA_CORD_VALIDATION + static_cast<void>(&VerifyNode); + + return node; +} + +// -------------------------------------------------------------------- +// Memory management + +inline CordRep* Ref(CordRep* rep) { + if (rep != nullptr) { + rep->refcount.Increment(); + } + return rep; +} + +// This internal routine is called from the cold path of Unref below. Keeping it +// in a separate routine allows good inlining of Unref into many profitable call +// sites. However, the call to this function can be highly disruptive to the +// register pressure in those callers. To minimize the cost to callers, we use +// a special LLVM calling convention that preserves most registers. This allows +// the call to this routine in cold paths to not disrupt the caller's register +// pressure. This calling convention is not available on all platforms; we +// intentionally allow LLVM to ignore the attribute rather than attempting to +// hardcode the list of supported platforms. +#if defined(__clang__) && !defined(__i386__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wattributes" +__attribute__((preserve_most)) +#pragma clang diagnostic pop +#endif +static void UnrefInternal(CordRep* rep) { + assert(rep != nullptr); + + absl::InlinedVector<CordRep*, kInlinedVectorSize> pending; + while (true) { + if (rep->tag == CONCAT) { + CordRepConcat* rep_concat = rep->concat(); + CordRep* right = rep_concat->right; + if (!right->refcount.Decrement()) { + pending.push_back(right); + } + CordRep* left = rep_concat->left; + delete rep_concat; + rep = nullptr; + if (!left->refcount.Decrement()) { + rep = left; + continue; + } + } else if (rep->tag == EXTERNAL) { + CordRepExternal* rep_external = rep->external(); + absl::string_view data(rep_external->base, rep->length); + void* releaser = GetExternalReleaser(rep_external); + size_t releaser_size = rep_external->releaser_invoker(releaser, data); + rep_external->~CordRepExternal(); + DeallocateExternal(rep_external, releaser_size); + rep = nullptr; + } else if (rep->tag == SUBSTRING) { + CordRepSubstring* rep_substring = rep->substring(); + CordRep* child = rep_substring->child; + delete rep_substring; + rep = nullptr; + if (!child->refcount.Decrement()) { + rep = child; + continue; + } + } else { + // Flat CordReps are allocated and constructed with raw ::operator new + // and placement new, and must be destructed and deallocated + // accordingly. +#if defined(__cpp_sized_deallocation) + size_t size = TagToAllocatedSize(rep->tag); + rep->~CordRep(); + ::operator delete(rep, size); +#else + rep->~CordRep(); + ::operator delete(rep); +#endif + rep = nullptr; + } + + if (!pending.empty()) { + rep = pending.back(); + pending.pop_back(); + } else { + break; + } + } +} + +inline void Unref(CordRep* rep) { + // Fast-path for two common, hot cases: a null rep and a shared root. + if (ABSL_PREDICT_TRUE(rep == nullptr || + rep->refcount.DecrementExpectHighRefcount())) { + return; + } + + UnrefInternal(rep); +} + +// Return the depth of a node +static int Depth(const CordRep* rep) { + if (rep->tag == CONCAT) { + return rep->concat()->depth(); + } else { + return 0; + } +} + +static void SetConcatChildren(CordRepConcat* concat, CordRep* left, + CordRep* right) { + concat->left = left; + concat->right = right; + + concat->length = left->length + right->length; + concat->set_depth(1 + std::max(Depth(left), Depth(right))); +} + +// Create a concatenation of the specified nodes. +// Does not change the refcounts of "left" and "right". +// The returned node has a refcount of 1. +static CordRep* RawConcat(CordRep* left, CordRep* right) { + // Avoid making degenerate concat nodes (one child is empty) + if (left == nullptr || left->length == 0) { + Unref(left); + return right; + } + if (right == nullptr || right->length == 0) { + Unref(right); + return left; + } + + CordRepConcat* rep = new CordRepConcat(); + rep->tag = CONCAT; + SetConcatChildren(rep, left, right); + + return rep; +} + +static CordRep* Concat(CordRep* left, CordRep* right) { + CordRep* rep = RawConcat(left, right); + if (rep != nullptr && !IsRootBalanced(rep)) { + rep = Rebalance(rep); + } + return VerifyTree(rep); +} + +// Make a balanced tree out of an array of leaf nodes. +static CordRep* MakeBalancedTree(CordRep** reps, size_t n) { + // Make repeated passes over the array, merging adjacent pairs + // until we are left with just a single node. + while (n > 1) { + size_t dst = 0; + for (size_t src = 0; src < n; src += 2) { + if (src + 1 < n) { + reps[dst] = Concat(reps[src], reps[src + 1]); + } else { + reps[dst] = reps[src]; + } + dst++; + } + n = dst; + } + + return reps[0]; +} + +// Create a new flat node. +static CordRep* NewFlat(size_t length_hint) { + if (length_hint <= kMinFlatLength) { + length_hint = kMinFlatLength; + } else if (length_hint > kMaxFlatLength) { + length_hint = kMaxFlatLength; + } + + // Round size up so it matches a size we can exactly express in a tag. + const size_t size = RoundUpForTag(length_hint + kFlatOverhead); + void* const raw_rep = ::operator new(size); + CordRep* rep = new (raw_rep) CordRep(); + rep->tag = AllocatedSizeToTag(size); + return VerifyTree(rep); +} + +// Create a new tree out of the specified array. +// The returned node has a refcount of 1. +static CordRep* NewTree(const char* data, + size_t length, + size_t alloc_hint) { + if (length == 0) return nullptr; + absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1); + size_t n = 0; + do { + const size_t len = std::min(length, kMaxFlatLength); + CordRep* rep = NewFlat(len + alloc_hint); + rep->length = len; + memcpy(rep->data, data, len); + reps[n++] = VerifyTree(rep); + data += len; + length -= len; + } while (length != 0); + return MakeBalancedTree(reps.data(), n); +} + +namespace cord_internal { + +ExternalRepReleaserPair NewExternalWithUninitializedReleaser( + absl::string_view data, ExternalReleaserInvoker invoker, + size_t releaser_size) { + assert(!data.empty()); + + void* raw_rep = AllocateExternal(releaser_size); + auto* rep = new (raw_rep) CordRepExternal(); + rep->length = data.size(); + rep->tag = EXTERNAL; + rep->base = data.data(); + rep->releaser_invoker = invoker; + return {VerifyTree(rep), GetExternalReleaser(rep)}; +} + +} // namespace cord_internal + +static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) { + // Never create empty substring nodes + if (length == 0) { + Unref(child); + return nullptr; + } else { + CordRepSubstring* rep = new CordRepSubstring(); + assert((offset + length) <= child->length); + rep->length = length; + rep->tag = SUBSTRING; + rep->start = offset; + rep->child = child; + return VerifyTree(rep); + } +} + +// -------------------------------------------------------------------- +// Cord::InlineRep functions + +// This will trigger LNK2005 in MSVC. +#ifndef COMPILER_MSVC +const unsigned char Cord::InlineRep::kMaxInline; +#endif // COMPILER_MSVC + +inline void Cord::InlineRep::set_data(const char* data, size_t n, + bool nullify_tail) { + static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15"); + + cord_internal::SmallMemmove(data_, data, n, nullify_tail); + data_[kMaxInline] = static_cast<char>(n); +} + +inline char* Cord::InlineRep::set_data(size_t n) { + assert(n <= kMaxInline); + memset(data_, 0, sizeof(data_)); + data_[kMaxInline] = static_cast<char>(n); + return data_; +} + +inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) { + size_t len = data_[kMaxInline]; + CordRep* result; + if (len > kMaxInline) { + memcpy(&result, data_, sizeof(result)); + } else { + result = NewFlat(len + extra_hint); + result->length = len; + memcpy(result->data, data_, len); + set_tree(result); + } + return result; +} + +inline void Cord::InlineRep::reduce_size(size_t n) { + size_t tag = data_[kMaxInline]; + assert(tag <= kMaxInline); + assert(tag >= n); + tag -= n; + memset(data_ + tag, 0, n); + data_[kMaxInline] = static_cast<char>(tag); +} + +inline void Cord::InlineRep::remove_prefix(size_t n) { + cord_internal::SmallMemmove(data_, data_ + n, data_[kMaxInline] - n); + reduce_size(n); +} + +void Cord::InlineRep::AppendTree(CordRep* tree) { + if (tree == nullptr) return; + size_t len = data_[kMaxInline]; + if (len == 0) { + set_tree(tree); + } else { + set_tree(Concat(force_tree(0), tree)); + } +} + +void Cord::InlineRep::PrependTree(CordRep* tree) { + if (tree == nullptr) return; + size_t len = data_[kMaxInline]; + if (len == 0) { + set_tree(tree); + } else { + set_tree(Concat(tree, force_tree(0))); + } +} + +// Searches for a non-full flat node at the rightmost leaf of the tree. If a +// suitable leaf is found, the function will update the length field for all +// nodes to account for the size increase. The append region address will be +// written to region and the actual size increase will be written to size. +static inline bool PrepareAppendRegion(CordRep* root, char** region, + size_t* size, size_t max_length) { + // Search down the right-hand path for a non-full FLAT node. + CordRep* dst = root; + while (dst->tag == CONCAT && dst->refcount.IsOne()) { + dst = dst->concat()->right; + } + + if (dst->tag < FLAT || !dst->refcount.IsOne()) { + *region = nullptr; + *size = 0; + return false; + } + + const size_t in_use = dst->length; + const size_t capacity = TagToLength(dst->tag); + if (in_use == capacity) { + *region = nullptr; + *size = 0; + return false; + } + + size_t size_increase = std::min(capacity - in_use, max_length); + + // We need to update the length fields for all nodes, including the leaf node. + for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) { + rep->length += size_increase; + } + dst->length += size_increase; + + *region = dst->data + in_use; + *size = size_increase; + return true; +} + +void Cord::InlineRep::GetAppendRegion(char** region, size_t* size, + size_t max_length) { + if (max_length == 0) { + *region = nullptr; + *size = 0; + return; + } + + // Try to fit in the inline buffer if possible. + size_t inline_length = data_[kMaxInline]; + if (inline_length < kMaxInline && max_length <= kMaxInline - inline_length) { + *region = data_ + inline_length; + *size = max_length; + data_[kMaxInline] = static_cast<char>(inline_length + max_length); + return; + } + + CordRep* root = force_tree(max_length); + + if (PrepareAppendRegion(root, region, size, max_length)) { + return; + } + + // Allocate new node. + CordRep* new_node = + NewFlat(std::max(static_cast<size_t>(root->length), max_length)); + new_node->length = + std::min(static_cast<size_t>(TagToLength(new_node->tag)), max_length); + *region = new_node->data; + *size = new_node->length; + replace_tree(Concat(root, new_node)); +} + +void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) { + const size_t max_length = std::numeric_limits<size_t>::max(); + + // Try to fit in the inline buffer if possible. + size_t inline_length = data_[kMaxInline]; + if (inline_length < kMaxInline) { + *region = data_ + inline_length; + *size = kMaxInline - inline_length; + data_[kMaxInline] = kMaxInline; + return; + } + + CordRep* root = force_tree(max_length); + + if (PrepareAppendRegion(root, region, size, max_length)) { + return; + } + + // Allocate new node. + CordRep* new_node = NewFlat(root->length); + new_node->length = TagToLength(new_node->tag); + *region = new_node->data; + *size = new_node->length; + replace_tree(Concat(root, new_node)); +} + +// If the rep is a leaf, this will increment the value at total_mem_usage and +// will return true. +static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) { + if (rep->tag >= FLAT) { + *total_mem_usage += TagToAllocatedSize(rep->tag); + return true; + } + if (rep->tag == EXTERNAL) { + *total_mem_usage += sizeof(CordRepConcat) + rep->length; + return true; + } + return false; +} + +void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) { + ClearSlow(); + + memcpy(data_, src.data_, sizeof(data_)); + if (is_tree()) { + Ref(tree()); + } +} + +void Cord::InlineRep::ClearSlow() { + if (is_tree()) { + Unref(tree()); + } + memset(data_, 0, sizeof(data_)); +} + +// -------------------------------------------------------------------- +// Constructors and destructors + +Cord::Cord(const Cord& src) : contents_(src.contents_) { + Ref(contents_.tree()); // Does nothing if contents_ has embedded data +} + +Cord::Cord(absl::string_view src) { + const size_t n = src.size(); + if (n <= InlineRep::kMaxInline) { + contents_.set_data(src.data(), n, false); + } else { + contents_.set_tree(NewTree(src.data(), n, 0)); + } +} + +// The destruction code is separate so that the compiler can determine +// that it does not need to call the destructor on a moved-from Cord. +void Cord::DestroyCordSlow() { + Unref(VerifyTree(contents_.tree())); +} + +// -------------------------------------------------------------------- +// Mutators + +void Cord::Clear() { + Unref(contents_.clear()); +} + +Cord& Cord::operator=(absl::string_view src) { + + const char* data = src.data(); + size_t length = src.size(); + CordRep* tree = contents_.tree(); + if (length <= InlineRep::kMaxInline) { + // Embed into this->contents_ + contents_.set_data(data, length, true); + Unref(tree); + return *this; + } + if (tree != nullptr && tree->tag >= FLAT && + TagToLength(tree->tag) >= length && tree->refcount.IsOne()) { + // Copy in place if the existing FLAT node is reusable. + memmove(tree->data, data, length); + tree->length = length; + VerifyTree(tree); + return *this; + } + contents_.set_tree(NewTree(data, length, 0)); + Unref(tree); + return *this; +} + +// TODO(sanjay): Move to Cord::InlineRep section of file. For now, +// we keep it here to make diffs easier. +void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) { + if (src_size == 0) return; // memcpy(_, nullptr, 0) is undefined. + // Try to fit in the inline buffer if possible. + size_t inline_length = data_[kMaxInline]; + if (inline_length < kMaxInline && src_size <= kMaxInline - inline_length) { + // Append new data to embedded array + data_[kMaxInline] = static_cast<char>(inline_length + src_size); + memcpy(data_ + inline_length, src_data, src_size); + return; + } + + CordRep* root = tree(); + + size_t appended = 0; + if (root) { + char* region; + if (PrepareAppendRegion(root, ®ion, &appended, src_size)) { + memcpy(region, src_data, appended); + } + } else { + // It is possible that src_data == data_, but when we transition from an + // InlineRep to a tree we need to assign data_ = root via set_tree. To + // avoid corrupting the source data before we copy it, delay calling + // set_tree until after we've copied data. + // We are going from an inline size to beyond inline size. Make the new size + // either double the inlined size, or the added size + 10%. + const size_t size1 = inline_length * 2 + src_size; + const size_t size2 = inline_length + src_size / 10; + root = NewFlat(std::max<size_t>(size1, size2)); + appended = std::min(src_size, TagToLength(root->tag) - inline_length); + memcpy(root->data, data_, inline_length); + memcpy(root->data + inline_length, src_data, appended); + root->length = inline_length + appended; + set_tree(root); + } + + src_data += appended; + src_size -= appended; + if (src_size == 0) { + return; + } + + // Use new block(s) for any remaining bytes that were not handled above. + // Alloc extra memory only if the right child of the root of the new tree is + // going to be a FLAT node, which will permit further inplace appends. + size_t length = src_size; + if (src_size < kMaxFlatLength) { + // The new length is either + // - old size + 10% + // - old_size + src_size + // This will cause a reasonable conservative step-up in size that is still + // large enough to avoid excessive amounts of small fragments being added. + length = std::max<size_t>(root->length / 10, src_size); + } + set_tree(Concat(root, NewTree(src_data, src_size, length - src_size))); +} + +inline CordRep* Cord::TakeRep() const& { + return Ref(contents_.tree()); +} + +inline CordRep* Cord::TakeRep() && { + CordRep* rep = contents_.tree(); + contents_.clear(); + return rep; +} + +template <typename C> +inline void Cord::AppendImpl(C&& src) { + if (empty()) { + // In case of an empty destination avoid allocating a new node, do not copy + // data. + *this = std::forward<C>(src); + return; + } + + // For short cords, it is faster to copy data if there is room in dst. + const size_t src_size = src.contents_.size(); + if (src_size <= kMaxBytesToCopy) { + CordRep* src_tree = src.contents_.tree(); + if (src_tree == nullptr) { + // src has embedded data. + contents_.AppendArray(src.contents_.data(), src_size); + return; + } + if (src_tree->tag >= FLAT) { + // src tree just has one flat node. + contents_.AppendArray(src_tree->data, src_size); + return; + } + if (&src == this) { + // ChunkIterator below assumes that src is not modified during traversal. + Append(Cord(src)); + return; + } + // TODO(mec): Should we only do this if "dst" has space? + for (absl::string_view chunk : src.Chunks()) { + Append(chunk); + } + return; + } + + contents_.AppendTree(std::forward<C>(src).TakeRep()); +} + +void Cord::Append(const Cord& src) { AppendImpl(src); } + +void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); } + +void Cord::Prepend(const Cord& src) { + CordRep* src_tree = src.contents_.tree(); + if (src_tree != nullptr) { + Ref(src_tree); + contents_.PrependTree(src_tree); + return; + } + + // `src` cord is inlined. + absl::string_view src_contents(src.contents_.data(), src.contents_.size()); + return Prepend(src_contents); +} + +void Cord::Prepend(absl::string_view src) { + if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. + size_t cur_size = contents_.size(); + if (!contents_.is_tree() && cur_size + src.size() <= InlineRep::kMaxInline) { + // Use embedded storage. + char data[InlineRep::kMaxInline + 1] = {0}; + data[InlineRep::kMaxInline] = cur_size + src.size(); // set size + memcpy(data, src.data(), src.size()); + memcpy(data + src.size(), contents_.data(), cur_size); + memcpy(reinterpret_cast<void*>(&contents_), data, + InlineRep::kMaxInline + 1); + } else { + contents_.PrependTree(NewTree(src.data(), src.size(), 0)); + } +} + +static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { + if (n >= node->length) return nullptr; + if (n == 0) return Ref(node); + absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack; + + while (node->tag == CONCAT) { + assert(n <= node->length); + if (n < node->concat()->left->length) { + // Push right to stack, descend left. + rhs_stack.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Drop left, descend right. + n -= node->concat()->left->length; + node = node->concat()->right; + } + } + assert(n <= node->length); + + if (n == 0) { + Ref(node); + } else { + size_t start = n; + size_t len = node->length - n; + if (node->tag == SUBSTRING) { + // Consider in-place update of node, similar to in RemoveSuffixFrom(). + start += node->substring()->start; + node = node->substring()->child; + } + node = NewSubstring(Ref(node), start, len); + } + while (!rhs_stack.empty()) { + node = Concat(node, Ref(rhs_stack.back())); + rhs_stack.pop_back(); + } + return node; +} + +// RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the +// exception that removing a suffix has an optimization where a node may be +// edited in place iff that node and all its ancestors have a refcount of 1. +static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { + if (n >= node->length) return nullptr; + if (n == 0) return Ref(node); + absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack; + bool inplace_ok = node->refcount.IsOne(); + + while (node->tag == CONCAT) { + assert(n <= node->length); + if (n < node->concat()->right->length) { + // Push left to stack, descend right. + lhs_stack.push_back(node->concat()->left); + node = node->concat()->right; + } else { + // Drop right, descend left. + n -= node->concat()->right->length; + node = node->concat()->left; + } + inplace_ok = inplace_ok && node->refcount.IsOne(); + } + assert(n <= node->length); + + if (n == 0) { + Ref(node); + } else if (inplace_ok && node->tag != EXTERNAL) { + // Consider making a new buffer if the current node capacity is much + // larger than the new length. + Ref(node); + node->length -= n; + } else { + size_t start = 0; + size_t len = node->length - n; + if (node->tag == SUBSTRING) { + start = node->substring()->start; + node = node->substring()->child; + } + node = NewSubstring(Ref(node), start, len); + } + while (!lhs_stack.empty()) { + node = Concat(Ref(lhs_stack.back()), node); + lhs_stack.pop_back(); + } + return node; +} + +void Cord::RemovePrefix(size_t n) { + ABSL_INTERNAL_CHECK(n <= size(), + absl::StrCat("Requested prefix size ", n, + " exceeds Cord's size ", size())); + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + contents_.remove_prefix(n); + } else { + CordRep* newrep = RemovePrefixFrom(tree, n); + Unref(tree); + contents_.replace_tree(VerifyTree(newrep)); + } +} + +void Cord::RemoveSuffix(size_t n) { + ABSL_INTERNAL_CHECK(n <= size(), + absl::StrCat("Requested suffix size ", n, + " exceeds Cord's size ", size())); + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + contents_.reduce_size(n); + } else { + CordRep* newrep = RemoveSuffixFrom(tree, n); + Unref(tree); + contents_.replace_tree(VerifyTree(newrep)); + } +} + +// Work item for NewSubRange(). +struct SubRange { + SubRange(CordRep* a_node, size_t a_pos, size_t a_n) + : node(a_node), pos(a_pos), n(a_n) {} + CordRep* node; // nullptr means concat last 2 results. + size_t pos; + size_t n; +}; + +static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) { + absl::InlinedVector<CordRep*, kInlinedVectorSize> results; + absl::InlinedVector<SubRange, kInlinedVectorSize> todo; + todo.push_back(SubRange(node, pos, n)); + do { + const SubRange& sr = todo.back(); + node = sr.node; + pos = sr.pos; + n = sr.n; + todo.pop_back(); + + if (node == nullptr) { + assert(results.size() >= 2); + CordRep* right = results.back(); + results.pop_back(); + CordRep* left = results.back(); + results.pop_back(); + results.push_back(Concat(left, right)); + } else if (pos == 0 && n == node->length) { + results.push_back(Ref(node)); + } else if (node->tag != CONCAT) { + if (node->tag == SUBSTRING) { + pos += node->substring()->start; + node = node->substring()->child; + } + results.push_back(NewSubstring(Ref(node), pos, n)); + } else if (pos + n <= node->concat()->left->length) { + todo.push_back(SubRange(node->concat()->left, pos, n)); + } else if (pos >= node->concat()->left->length) { + pos -= node->concat()->left->length; + todo.push_back(SubRange(node->concat()->right, pos, n)); + } else { + size_t left_n = node->concat()->left->length - pos; + todo.push_back(SubRange(nullptr, 0, 0)); // Concat() + todo.push_back(SubRange(node->concat()->right, 0, n - left_n)); + todo.push_back(SubRange(node->concat()->left, pos, left_n)); + } + } while (!todo.empty()); + assert(results.size() == 1); + return results[0]; +} + +Cord Cord::Subcord(size_t pos, size_t new_size) const { + Cord sub_cord; + size_t length = size(); + if (pos > length) pos = length; + if (new_size > length - pos) new_size = length - pos; + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + // sub_cord is newly constructed, no need to re-zero-out the tail of + // contents_ memory. + sub_cord.contents_.set_data(contents_.data() + pos, new_size, false); + } else if (new_size == 0) { + // We want to return empty subcord, so nothing to do. + } else if (new_size <= InlineRep::kMaxInline) { + Cord::ChunkIterator it = chunk_begin(); + it.AdvanceBytes(pos); + char* dest = sub_cord.contents_.data_; + size_t remaining_size = new_size; + while (remaining_size > it->size()) { + cord_internal::SmallMemmove(dest, it->data(), it->size()); + remaining_size -= it->size(); + dest += it->size(); + ++it; + } + cord_internal::SmallMemmove(dest, it->data(), remaining_size); + sub_cord.contents_.data_[InlineRep::kMaxInline] = new_size; + } else { + sub_cord.contents_.set_tree(NewSubRange(tree, pos, new_size)); + } + return sub_cord; +} + +// -------------------------------------------------------------------- +// Balancing + +class CordForest { + public: + explicit CordForest(size_t length) + : root_length_(length), trees_(kMinLengthSize, nullptr) {} + + void Build(CordRep* cord_root) { + std::vector<CordRep*> pending = {cord_root}; + + while (!pending.empty()) { + CordRep* node = pending.back(); + pending.pop_back(); + CheckNode(node); + if (ABSL_PREDICT_FALSE(node->tag != CONCAT)) { + AddNode(node); + continue; + } + + CordRepConcat* concat_node = node->concat(); + if (concat_node->depth() >= kMinLengthSize || + concat_node->length < min_length[concat_node->depth()]) { + pending.push_back(concat_node->right); + pending.push_back(concat_node->left); + + if (concat_node->refcount.IsOne()) { + concat_node->left = concat_freelist_; + concat_freelist_ = concat_node; + } else { + Ref(concat_node->right); + Ref(concat_node->left); + Unref(concat_node); + } + } else { + AddNode(node); + } + } + } + + CordRep* ConcatNodes() { + CordRep* sum = nullptr; + for (auto* node : trees_) { + if (node == nullptr) continue; + + sum = PrependNode(node, sum); + root_length_ -= node->length; + if (root_length_ == 0) break; + } + ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node"); + return VerifyTree(sum); + } + + private: + CordRep* AppendNode(CordRep* node, CordRep* sum) { + return (sum == nullptr) ? node : MakeConcat(sum, node); + } + + CordRep* PrependNode(CordRep* node, CordRep* sum) { + return (sum == nullptr) ? node : MakeConcat(node, sum); + } + + void AddNode(CordRep* node) { + CordRep* sum = nullptr; + + // Collect together everything with which we will merge node + int i = 0; + for (; node->length > min_length[i + 1]; ++i) { + auto& tree_at_i = trees_[i]; + + if (tree_at_i == nullptr) continue; + sum = PrependNode(tree_at_i, sum); + tree_at_i = nullptr; + } + + sum = AppendNode(node, sum); + + // Insert sum into appropriate place in the forest + for (; sum->length >= min_length[i]; ++i) { + auto& tree_at_i = trees_[i]; + if (tree_at_i == nullptr) continue; + + sum = MakeConcat(tree_at_i, sum); + tree_at_i = nullptr; + } + + // min_length[0] == 1, which means sum->length >= min_length[0] + assert(i > 0); + trees_[i - 1] = sum; + } + + // Make concat node trying to resue existing CordRepConcat nodes we + // already collected in the concat_freelist_. + CordRep* MakeConcat(CordRep* left, CordRep* right) { + if (concat_freelist_ == nullptr) return RawConcat(left, right); + + CordRepConcat* rep = concat_freelist_; + if (concat_freelist_->left == nullptr) { + concat_freelist_ = nullptr; + } else { + concat_freelist_ = concat_freelist_->left->concat(); + } + SetConcatChildren(rep, left, right); + + return rep; + } + + static void CheckNode(CordRep* node) { + ABSL_INTERNAL_CHECK(node->length != 0u, ""); + if (node->tag == CONCAT) { + ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, ""); + ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, ""); + ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length + + node->concat()->right->length), + ""); + } + } + + size_t root_length_; + + // use an inlined vector instead of a flat array to get bounds checking + absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_; + + // List of concat nodes we can re-use for Cord balancing. + CordRepConcat* concat_freelist_ = nullptr; +}; + +static CordRep* Rebalance(CordRep* node) { + VerifyTree(node); + assert(node->tag == CONCAT); + + if (node->length == 0) { + return nullptr; + } + + CordForest forest(node->length); + forest.Build(node); + return forest.ConcatNodes(); +} + +// -------------------------------------------------------------------- +// Comparators + +namespace { + +int ClampResult(int memcmp_res) { + return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0); +} + +int CompareChunks(absl::string_view* lhs, absl::string_view* rhs, + size_t* size_to_compare) { + size_t compared_size = std::min(lhs->size(), rhs->size()); + assert(*size_to_compare >= compared_size); + *size_to_compare -= compared_size; + + int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size); + if (memcmp_res != 0) return memcmp_res; + + lhs->remove_prefix(compared_size); + rhs->remove_prefix(compared_size); + + return 0; +} + +// This overload set computes comparison results from memcmp result. This +// interface is used inside GenericCompare below. Differet implementations +// are specialized for int and bool. For int we clamp result to {-1, 0, 1} +// set. For bool we just interested in "value == 0". +template <typename ResultType> +ResultType ComputeCompareResult(int memcmp_res) { + return ClampResult(memcmp_res); +} +template <> +bool ComputeCompareResult<bool>(int memcmp_res) { + return memcmp_res == 0; +} + +} // namespace + +// Helper routine. Locates the first flat chunk of the Cord without +// initializing the iterator. +inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { + size_t n = data_[kMaxInline]; + if (n <= kMaxInline) { + return absl::string_view(data_, n); + } + + CordRep* node = tree(); + if (node->tag >= FLAT) { + return absl::string_view(node->data, node->length); + } + + if (node->tag == EXTERNAL) { + return absl::string_view(node->external()->base, node->length); + } + + // Walk down the left branches until we hit a non-CONCAT node. + while (node->tag == CONCAT) { + node = node->concat()->left; + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + assert(length != 0); + + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + if (node->tag >= FLAT) { + return absl::string_view(node->data + offset, length); + } + + assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here"); + + return absl::string_view(node->external()->base + offset, length); +} + +inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size, + size_t size_to_compare) const { + auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) { + if (!chunk->empty()) return true; + ++*it; + if (it->bytes_remaining_ == 0) return false; + *chunk = **it; + return true; + }; + + Cord::ChunkIterator lhs_it = chunk_begin(); + + // compared_size is inside first chunk. + absl::string_view lhs_chunk = + (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view(); + assert(compared_size <= lhs_chunk.size()); + assert(compared_size <= rhs.size()); + lhs_chunk.remove_prefix(compared_size); + rhs.remove_prefix(compared_size); + size_to_compare -= compared_size; // skip already compared size. + + while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) { + int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare); + if (comparison_result != 0) return comparison_result; + if (size_to_compare == 0) return 0; + } + + return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty()); +} + +inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size, + size_t size_to_compare) const { + auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) { + if (!chunk->empty()) return true; + ++*it; + if (it->bytes_remaining_ == 0) return false; + *chunk = **it; + return true; + }; + + Cord::ChunkIterator lhs_it = chunk_begin(); + Cord::ChunkIterator rhs_it = rhs.chunk_begin(); + + // compared_size is inside both first chunks. + absl::string_view lhs_chunk = + (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view(); + absl::string_view rhs_chunk = + (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view(); + assert(compared_size <= lhs_chunk.size()); + assert(compared_size <= rhs_chunk.size()); + lhs_chunk.remove_prefix(compared_size); + rhs_chunk.remove_prefix(compared_size); + size_to_compare -= compared_size; // skip already compared size. + + while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) { + int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare); + if (memcmp_res != 0) return memcmp_res; + if (size_to_compare == 0) return 0; + } + + return static_cast<int>(rhs_chunk.empty()) - + static_cast<int>(lhs_chunk.empty()); +} + +inline absl::string_view Cord::GetFirstChunk(const Cord& c) { + return c.contents_.FindFlatStartPiece(); +} +inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) { + return sv; +} + +// Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed +// that 'size_to_compare' is greater that size of smallest of first chunks. +template <typename ResultType, typename RHS> +ResultType GenericCompare(const Cord& lhs, const RHS& rhs, + size_t size_to_compare) { + absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs); + absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs); + + size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size()); + assert(size_to_compare >= compared_size); + int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size); + if (compared_size == size_to_compare || memcmp_res != 0) { + return ComputeCompareResult<ResultType>(memcmp_res); + } + + return ComputeCompareResult<ResultType>( + lhs.CompareSlowPath(rhs, compared_size, size_to_compare)); +} + +bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const { + return GenericCompare<bool>(*this, rhs, size_to_compare); +} + +bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const { + return GenericCompare<bool>(*this, rhs, size_to_compare); +} + +template <typename RHS> +inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) { + size_t lhs_size = lhs.size(); + size_t rhs_size = rhs.size(); + if (lhs_size == rhs_size) { + return GenericCompare<int>(lhs, rhs, lhs_size); + } + if (lhs_size < rhs_size) { + auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size); + return data_comp_res == 0 ? -1 : data_comp_res; + } + + auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size); + return data_comp_res == 0 ? +1 : data_comp_res; +} + +int Cord::Compare(absl::string_view rhs) const { + return SharedCompareImpl(*this, rhs); +} + +int Cord::CompareImpl(const Cord& rhs) const { + return SharedCompareImpl(*this, rhs); +} + +bool Cord::EndsWith(absl::string_view rhs) const { + size_t my_size = size(); + size_t rhs_size = rhs.size(); + + if (my_size < rhs_size) return false; + + Cord tmp(*this); + tmp.RemovePrefix(my_size - rhs_size); + return tmp.EqualsImpl(rhs, rhs_size); +} + +bool Cord::EndsWith(const Cord& rhs) const { + size_t my_size = size(); + size_t rhs_size = rhs.size(); + + if (my_size < rhs_size) return false; + + Cord tmp(*this); + tmp.RemovePrefix(my_size - rhs_size); + return tmp.EqualsImpl(rhs, rhs_size); +} + +// -------------------------------------------------------------------- +// Misc. + +Cord::operator std::string() const { + std::string s; + absl::CopyCordToString(*this, &s); + return s; +} + +void CopyCordToString(const Cord& src, std::string* dst) { + if (!src.contents_.is_tree()) { + src.contents_.CopyTo(dst); + } else { + absl::strings_internal::STLStringResizeUninitialized(dst, src.size()); + src.CopyToArraySlowPath(&(*dst)[0]); + } +} + +void Cord::CopyToArraySlowPath(char* dst) const { + assert(contents_.is_tree()); + absl::string_view fragment; + if (GetFlatAux(contents_.tree(), &fragment)) { + memcpy(dst, fragment.data(), fragment.size()); + return; + } + for (absl::string_view chunk : Chunks()) { + memcpy(dst, chunk.data(), chunk.size()); + dst += chunk.size(); + } +} + +Cord::ChunkIterator& Cord::ChunkIterator::operator++() { + assert(bytes_remaining_ > 0 && "Attempted to iterate past `end()`"); + assert(bytes_remaining_ >= current_chunk_.size()); + bytes_remaining_ -= current_chunk_.size(); + + if (stack_of_right_children_.empty()) { + assert(!current_chunk_.empty()); // Called on invalid iterator. + // We have reached the end of the Cord. + return *this; + } + + // Process the next node on the stack. + CordRep* node = stack_of_right_children_.back(); + stack_of_right_children_.pop_back(); + + // Walk down the left branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->tag == CONCAT) { + stack_of_right_children_.push_back(node->concat()->right); + node = node->concat()->left; + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(length != 0); + const char* data = + node->tag == EXTERNAL ? node->external()->base : node->data; + current_chunk_ = absl::string_view(data + offset, length); + current_leaf_ = node; + return *this; +} + +Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { + assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); + Cord subcord; + + if (n <= InlineRep::kMaxInline) { + // Range to read fits in inline data. Flatten it. + char* data = subcord.contents_.set_data(n); + while (n > current_chunk_.size()) { + memcpy(data, current_chunk_.data(), current_chunk_.size()); + data += current_chunk_.size(); + n -= current_chunk_.size(); + ++*this; + } + memcpy(data, current_chunk_.data(), n); + if (n < current_chunk_.size()) { + RemoveChunkPrefix(n); + } else if (n > 0) { + ++*this; + } + return subcord; + } + if (n < current_chunk_.size()) { + // Range to read is a proper subrange of the current chunk. + assert(current_leaf_ != nullptr); + CordRep* subnode = Ref(current_leaf_); + const char* data = + subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data; + subnode = NewSubstring(subnode, current_chunk_.data() - data, n); + subcord.contents_.set_tree(VerifyTree(subnode)); + RemoveChunkPrefix(n); + return subcord; + } + + // Range to read begins with a proper subrange of the current chunk. + assert(!current_chunk_.empty()); + assert(current_leaf_ != nullptr); + CordRep* subnode = Ref(current_leaf_); + if (current_chunk_.size() < subnode->length) { + const char* data = + subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data; + subnode = NewSubstring(subnode, current_chunk_.data() - data, + current_chunk_.size()); + } + n -= current_chunk_.size(); + bytes_remaining_ -= current_chunk_.size(); + + // Process the next node(s) on the stack, reading whole subtrees depending on + // their length and how many bytes we are advancing. + CordRep* node = nullptr; + while (!stack_of_right_children_.empty()) { + node = stack_of_right_children_.back(); + stack_of_right_children_.pop_back(); + if (node->length > n) break; + // TODO(qrczak): This might unnecessarily recreate existing concat nodes. + // Avoiding that would need pretty complicated logic (instead of + // current_leaf_, keep current_subtree_ which points to the highest node + // such that the current leaf can be found on the path of left children + // starting from current_subtree_; delay creating subnode while node is + // below current_subtree_; find the proper node along the path of left + // children starting from current_subtree_ if this loop exits while staying + // below current_subtree_; etc.; alternatively, push parents instead of + // right children on the stack). + subnode = Concat(subnode, Ref(node)); + n -= node->length; + bytes_remaining_ -= node->length; + node = nullptr; + } + + if (node == nullptr) { + // We have reached the end of the Cord. + assert(bytes_remaining_ == 0); + subcord.contents_.set_tree(VerifyTree(subnode)); + return subcord; + } + + // Walk down the appropriate branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->tag == CONCAT) { + if (node->concat()->left->length > n) { + // Push right, descend left. + stack_of_right_children_.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Read left, descend right. + subnode = Concat(subnode, Ref(node->concat()->left)); + n -= node->concat()->left->length; + bytes_remaining_ -= node->concat()->left->length; + node = node->concat()->right; + } + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + // Range to read ends with a proper (possibly empty) subrange of the current + // chunk. + assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(length > n); + if (n > 0) subnode = Concat(subnode, NewSubstring(Ref(node), offset, n)); + const char* data = + node->tag == EXTERNAL ? node->external()->base : node->data; + current_chunk_ = absl::string_view(data + offset + n, length - n); + current_leaf_ = node; + bytes_remaining_ -= n; + subcord.contents_.set_tree(VerifyTree(subnode)); + return subcord; +} + +void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { + assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); + assert(n >= current_chunk_.size()); // This should only be called when + // iterating to a new node. + + n -= current_chunk_.size(); + bytes_remaining_ -= current_chunk_.size(); + + // Process the next node(s) on the stack, skipping whole subtrees depending on + // their length and how many bytes we are advancing. + CordRep* node = nullptr; + while (!stack_of_right_children_.empty()) { + node = stack_of_right_children_.back(); + stack_of_right_children_.pop_back(); + if (node->length > n) break; + n -= node->length; + bytes_remaining_ -= node->length; + node = nullptr; + } + + if (node == nullptr) { + // We have reached the end of the Cord. + assert(bytes_remaining_ == 0); + return; + } + + // Walk down the appropriate branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->tag == CONCAT) { + if (node->concat()->left->length > n) { + // Push right, descend left. + stack_of_right_children_.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Skip left, descend right. + n -= node->concat()->left->length; + bytes_remaining_ -= node->concat()->left->length; + node = node->concat()->right; + } + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(length > n); + const char* data = + node->tag == EXTERNAL ? node->external()->base : node->data; + current_chunk_ = absl::string_view(data + offset + n, length - n); + current_leaf_ = node; + bytes_remaining_ -= n; +} + +char Cord::operator[](size_t i) const { + assert(i < size()); + size_t offset = i; + const CordRep* rep = contents_.tree(); + if (rep == nullptr) { + return contents_.data()[i]; + } + while (true) { + assert(rep != nullptr); + assert(offset < rep->length); + if (rep->tag >= FLAT) { + // Get the "i"th character directly from the flat array. + return rep->data[offset]; + } else if (rep->tag == EXTERNAL) { + // Get the "i"th character from the external array. + return rep->external()->base[offset]; + } else if (rep->tag == CONCAT) { + // Recursively branch to the side of the concatenation that the "i"th + // character is on. + size_t left_length = rep->concat()->left->length; + if (offset < left_length) { + rep = rep->concat()->left; + } else { + offset -= left_length; + rep = rep->concat()->right; + } + } else { + // This must be a substring a node, so bypass it to get to the child. + assert(rep->tag == SUBSTRING); + offset += rep->substring()->start; + rep = rep->substring()->child; + } + } +} + +absl::string_view Cord::FlattenSlowPath() { + size_t total_size = size(); + CordRep* new_rep; + char* new_buffer; + + // Try to put the contents into a new flat rep. If they won't fit in the + // biggest possible flat node, use an external rep instead. + if (total_size <= kMaxFlatLength) { + new_rep = NewFlat(total_size); + new_rep->length = total_size; + new_buffer = new_rep->data; + CopyToArraySlowPath(new_buffer); + } else { + new_buffer = std::allocator<char>().allocate(total_size); + CopyToArraySlowPath(new_buffer); + new_rep = absl::cord_internal::NewExternalRep( + absl::string_view(new_buffer, total_size), [](absl::string_view s) { + std::allocator<char>().deallocate(const_cast<char*>(s.data()), + s.size()); + }); + } + Unref(contents_.tree()); + contents_.set_tree(new_rep); + return absl::string_view(new_buffer, total_size); +} + +/* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) { + assert(rep != nullptr); + if (rep->tag >= FLAT) { + *fragment = absl::string_view(rep->data, rep->length); + return true; + } else if (rep->tag == EXTERNAL) { + *fragment = absl::string_view(rep->external()->base, rep->length); + return true; + } else if (rep->tag == SUBSTRING) { + CordRep* child = rep->substring()->child; + if (child->tag >= FLAT) { + *fragment = + absl::string_view(child->data + rep->substring()->start, rep->length); + return true; + } else if (child->tag == EXTERNAL) { + *fragment = absl::string_view( + child->external()->base + rep->substring()->start, rep->length); + return true; + } + } + return false; +} + +/* static */ void Cord::ForEachChunkAux( + absl::cord_internal::CordRep* rep, + absl::FunctionRef<void(absl::string_view)> callback) { + assert(rep != nullptr); + int stack_pos = 0; + constexpr int stack_max = 128; + // Stack of right branches for tree traversal + absl::cord_internal::CordRep* stack[stack_max]; + absl::cord_internal::CordRep* current_node = rep; + while (true) { + if (current_node->tag == CONCAT) { + if (stack_pos == stack_max) { + // There's no more room on our stack array to add another right branch, + // and the idea is to avoid allocations, so call this function + // recursively to navigate this subtree further. (This is not something + // we expect to happen in practice). + ForEachChunkAux(current_node, callback); + + // Pop the next right branch and iterate. + current_node = stack[--stack_pos]; + continue; + } else { + // Save the right branch for later traversal and continue down the left + // branch. + stack[stack_pos++] = current_node->concat()->right; + current_node = current_node->concat()->left; + continue; + } + } + // This is a leaf node, so invoke our callback. + absl::string_view chunk; + bool success = GetFlatAux(current_node, &chunk); + assert(success); + if (success) { + callback(chunk); + } + if (stack_pos == 0) { + // end of traversal + return; + } + current_node = stack[--stack_pos]; + } +} + +static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) { + const int kIndentStep = 1; + int indent = 0; + absl::InlinedVector<CordRep*, kInlinedVectorSize> stack; + absl::InlinedVector<int, kInlinedVectorSize> indents; + for (;;) { + *os << std::setw(3) << rep->refcount.Get(); + *os << " " << std::setw(7) << rep->length; + *os << " ["; + if (include_data) *os << static_cast<void*>(rep); + *os << "]"; + *os << " " << (IsRootBalanced(rep) ? 'b' : 'u'); + *os << " " << std::setw(indent) << ""; + if (rep->tag == CONCAT) { + *os << "CONCAT depth=" << Depth(rep) << "\n"; + indent += kIndentStep; + indents.push_back(indent); + stack.push_back(rep->concat()->right); + rep = rep->concat()->left; + } else if (rep->tag == SUBSTRING) { + *os << "SUBSTRING @ " << rep->substring()->start << "\n"; + indent += kIndentStep; + rep = rep->substring()->child; + } else { // Leaf + if (rep->tag == EXTERNAL) { + *os << "EXTERNAL ["; + if (include_data) + *os << absl::CEscape(std::string(rep->external()->base, rep->length)); + *os << "]\n"; + } else { + *os << "FLAT cap=" << TagToLength(rep->tag) << " ["; + if (include_data) + *os << absl::CEscape(std::string(rep->data, rep->length)); + *os << "]\n"; + } + if (stack.empty()) break; + rep = stack.back(); + stack.pop_back(); + indent = indents.back(); + indents.pop_back(); + } + } + ABSL_INTERNAL_CHECK(indents.empty(), ""); +} + +static std::string ReportError(CordRep* root, CordRep* node) { + std::ostringstream buf; + buf << "Error at node " << node << " in:"; + DumpNode(root, true, &buf); + return buf.str(); +} + +static bool VerifyNode(CordRep* root, CordRep* start_node, + bool full_validation) { + absl::InlinedVector<CordRep*, 2> worklist; + worklist.push_back(start_node); + do { + CordRep* node = worklist.back(); + worklist.pop_back(); + + ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node)); + if (node != root) { + ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node)); + } + + if (node->tag == CONCAT) { + ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, + ReportError(root, node)); + ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, + ReportError(root, node)); + ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length + + node->concat()->right->length), + ReportError(root, node)); + if (full_validation) { + worklist.push_back(node->concat()->right); + worklist.push_back(node->concat()->left); + } + } else if (node->tag >= FLAT) { + ABSL_INTERNAL_CHECK(node->length <= TagToLength(node->tag), + ReportError(root, node)); + } else if (node->tag == EXTERNAL) { + ABSL_INTERNAL_CHECK(node->external()->base != nullptr, + ReportError(root, node)); + } else if (node->tag == SUBSTRING) { + ABSL_INTERNAL_CHECK( + node->substring()->start < node->substring()->child->length, + ReportError(root, node)); + ABSL_INTERNAL_CHECK(node->substring()->start + node->length <= + node->substring()->child->length, + ReportError(root, node)); + } + } while (!worklist.empty()); + return true; +} + +// Traverses the tree and computes the total memory allocated. +/* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) { + size_t total_mem_usage = 0; + + // Allow a quick exit for the common case that the root is a leaf. + if (RepMemoryUsageLeaf(rep, &total_mem_usage)) { + return total_mem_usage; + } + + // Iterate over the tree. cur_node is never a leaf node and leaf nodes will + // never be appended to tree_stack. This reduces overhead from manipulating + // tree_stack. + absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack; + const CordRep* cur_node = rep; + while (true) { + const CordRep* next_node = nullptr; + + if (cur_node->tag == CONCAT) { + total_mem_usage += sizeof(CordRepConcat); + const CordRep* left = cur_node->concat()->left; + if (!RepMemoryUsageLeaf(left, &total_mem_usage)) { + next_node = left; + } + + const CordRep* right = cur_node->concat()->right; + if (!RepMemoryUsageLeaf(right, &total_mem_usage)) { + if (next_node) { + tree_stack.push_back(next_node); + } + next_node = right; + } + } else { + // Since cur_node is not a leaf or a concat node it must be a substring. + assert(cur_node->tag == SUBSTRING); + total_mem_usage += sizeof(CordRepSubstring); + next_node = cur_node->substring()->child; + if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) { + next_node = nullptr; + } + } + + if (!next_node) { + if (tree_stack.empty()) { + return total_mem_usage; + } + next_node = tree_stack.back(); + tree_stack.pop_back(); + } + cur_node = next_node; + } +} + +std::ostream& operator<<(std::ostream& out, const Cord& cord) { + for (absl::string_view chunk : cord.Chunks()) { + out.write(chunk.data(), chunk.size()); + } + return out; +} + +namespace strings_internal { +size_t CordTestAccess::FlatOverhead() { return kFlatOverhead; } +size_t CordTestAccess::MaxFlatLength() { return kMaxFlatLength; } +size_t CordTestAccess::FlatTagToLength(uint8_t tag) { + return TagToLength(tag); +} +uint8_t CordTestAccess::LengthToTag(size_t s) { + ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s)); + return AllocatedSizeToTag(s + kFlatOverhead); +} +size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); } +size_t CordTestAccess::SizeofCordRepExternal() { + return sizeof(CordRepExternal); +} +size_t CordTestAccess::SizeofCordRepSubstring() { + return sizeof(CordRepSubstring); +} +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/cord.h b/absl/strings/cord.h new file mode 100644 index 000000000000..40566cbaa011 --- /dev/null +++ b/absl/strings/cord.h @@ -0,0 +1,1121 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// A Cord is a sequence of characters with some unusual access propreties. +// A Cord supports efficient insertions and deletions at the start and end of +// the byte sequence, but random access reads are slower, and random access +// modifications are not supported by the API. Cord also provides cheap copies +// (using a copy-on-write strategy) and cheap substring operations. +// +// Thread safety +// ------------- +// Cord has the same thread-safety properties as many other types like +// std::string, std::vector<>, int, etc -- it is thread-compatible. In +// particular, if no thread may call a non-const method, then it is safe to +// concurrently call const methods. Copying a Cord produces a new instance that +// can be used concurrently with the original in arbitrary ways. +// +// Implementation is similar to the "Ropes" described in: +// Ropes: An alternative to strings +// Hans J. Boehm, Russ Atkinson, Michael Plass +// Software Practice and Experience, December 1995 + +#ifndef ABSL_STRINGS_CORD_H_ +#define ABSL_STRINGS_CORD_H_ + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iostream> +#include <iterator> +#include <string> + +#include "absl/base/internal/endian.h" +#include "absl/base/internal/invoke.h" +#include "absl/base/internal/per_thread_tls.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/container/inlined_vector.h" +#include "absl/functional/function_ref.h" +#include "absl/meta/type_traits.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +class Cord; +class CordTestPeer; +template <typename Releaser> +Cord MakeCordFromExternal(absl::string_view, Releaser&&); +void CopyCordToString(const Cord& src, std::string* dst); +namespace hash_internal { +template <typename H> +H HashFragmentedCord(H, const Cord&); +} + +// A Cord is a sequence of characters. +class Cord { + private: + template <typename T> + using EnableIfString = + absl::enable_if_t<std::is_same<T, std::string>::value, int>; + + public: + // -------------------------------------------------------------------- + // Constructors, destructors and helper factories + + // Create an empty cord + constexpr Cord() noexcept; + + // Cord is copyable and efficiently movable. + // The moved-from state is valid but unspecified. + Cord(const Cord& src); + Cord(Cord&& src) noexcept; + Cord& operator=(const Cord& x); + Cord& operator=(Cord&& x) noexcept; + + // Create a cord out of "src". This constructor is explicit on + // purpose so that people do not get automatic type conversions. + explicit Cord(absl::string_view src); + Cord& operator=(absl::string_view src); + + // These are templated to avoid ambiguities for types that are convertible to + // both `absl::string_view` and `std::string`, such as `const char*`. + // + // Note that these functions reserve the right to reuse the `string&&`'s + // memory and that they will do so in the future. + template <typename T, EnableIfString<T> = 0> + explicit Cord(T&& src) : Cord(absl::string_view(src)) {} + template <typename T, EnableIfString<T> = 0> + Cord& operator=(T&& src); + + // Destroy the cord + ~Cord() { + if (contents_.is_tree()) DestroyCordSlow(); + } + + // Creates a Cord that takes ownership of external memory. The contents of + // `data` are not copied. + // + // This function takes a callable that is invoked when all Cords are + // finished with `data`. The data must remain live and unchanging until the + // releaser is called. The requirements for the releaser are that it: + // * is move constructible, + // * supports `void operator()(absl::string_view) const`, + // * does not have alignment requirement greater than what is guaranteed by + // ::operator new. This is dictated by alignof(std::max_align_t) before + // C++17 and __STDCPP_DEFAULT_NEW_ALIGNMENT__ if compiling with C++17 or + // it is supported by the implementation. + // + // Example: + // + // Cord MakeCord(BlockPool* pool) { + // Block* block = pool->NewBlock(); + // FillBlock(block); + // return absl::MakeCordFromExternal( + // block->ToStringView(), + // [pool, block](absl::string_view /*ignored*/) { + // pool->FreeBlock(block); + // }); + // } + // + // WARNING: It's likely a bug if your releaser doesn't do anything. + // For example, consider the following: + // + // void Foo(const char* buffer, int len) { + // auto c = absl::MakeCordFromExternal(absl::string_view(buffer, len), + // [](absl::string_view) {}); + // + // // BUG: If Bar() copies its cord for any reason, including keeping a + // // substring of it, the lifetime of buffer might be extended beyond + // // when Foo() returns. + // Bar(c); + // } + template <typename Releaser> + friend Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser); + + // -------------------------------------------------------------------- + // Mutations + + void Clear(); + + void Append(const Cord& src); + void Append(Cord&& src); + void Append(absl::string_view src); + template <typename T, EnableIfString<T> = 0> + void Append(T&& src); + + void Prepend(const Cord& src); + void Prepend(absl::string_view src); + template <typename T, EnableIfString<T> = 0> + void Prepend(T&& src); + + void RemovePrefix(size_t n); + void RemoveSuffix(size_t n); + + // Returns a new cord representing the subrange [pos, pos + new_size) of + // *this. If pos >= size(), the result is empty(). If + // (pos + new_size) >= size(), the result is the subrange [pos, size()). + Cord Subcord(size_t pos, size_t new_size) const; + + friend void swap(Cord& x, Cord& y) noexcept; + + // -------------------------------------------------------------------- + // Accessors + + size_t size() const; + bool empty() const; + + // Returns the approximate number of bytes pinned by this Cord. Note that + // Cords that share memory could each be "charged" independently for the same + // shared memory. + size_t EstimatedMemoryUsage() const; + + // -------------------------------------------------------------------- + // Comparators + + // Compares 'this' Cord with rhs. This function and its relatives + // treat Cords as sequences of unsigned bytes. The comparison is a + // straightforward lexicographic comparison. Return value: + // -1 'this' Cord is smaller + // 0 two Cords are equal + // 1 'this' Cord is larger + int Compare(absl::string_view rhs) const; + int Compare(const Cord& rhs) const; + + // Does 'this' cord start/end with rhs + bool StartsWith(const Cord& rhs) const; + bool StartsWith(absl::string_view rhs) const; + bool EndsWith(absl::string_view rhs) const; + bool EndsWith(const Cord& rhs) const; + + // -------------------------------------------------------------------- + // Conversion to other types + + explicit operator std::string() const; + + // Copies the contents from `src` to `*dst`. + // + // This function optimizes the case of reusing the destination std::string since it + // can reuse previously allocated capacity. However, this function does not + // guarantee that pointers previously returned by `dst->data()` remain valid + // even if `*dst` had enough capacity to hold `src`. If `*dst` is a new + // object, prefer to simply use the conversion operator to `std::string`. + friend void CopyCordToString(const Cord& src, std::string* dst); + + // -------------------------------------------------------------------- + // Iteration + + class CharIterator; + + // Type for iterating over the chunks of a `Cord`. See comments for + // `Cord::chunk_begin()`, `Cord::chunk_end()` and `Cord::Chunks()` below for + // preferred usage. + // + // Additional notes: + // * The `string_view` returned by dereferencing a valid, non-`end()` + // iterator is guaranteed to be non-empty. + // * A `ChunkIterator` object is invalidated after any non-const + // operation on the `Cord` object over which it iterates. + // * Two `ChunkIterator` objects can be equality compared if and only if + // they remain valid and iterate over the same `Cord`. + // * This is a proxy iterator. This means the `string_view` returned by the + // iterator does not live inside the Cord, and its lifetime is limited to + // the lifetime of the iterator itself. To help prevent issues, + // `ChunkIterator::reference` is not a true reference type and is + // equivalent to `value_type`. + // * The iterator keeps state that can grow for `Cord`s that contain many + // nodes and are imbalanced due to sharing. Prefer to pass this type by + // const reference instead of by value. + class ChunkIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = absl::string_view; + using difference_type = ptrdiff_t; + using pointer = const value_type*; + using reference = value_type; + + ChunkIterator() = default; + + ChunkIterator& operator++(); + ChunkIterator operator++(int); + bool operator==(const ChunkIterator& other) const; + bool operator!=(const ChunkIterator& other) const; + reference operator*() const; + pointer operator->() const; + + friend class Cord; + friend class CharIterator; + + private: + // Constructs a `begin()` iterator from `cord`. + explicit ChunkIterator(const Cord* cord); + + // Removes `n` bytes from `current_chunk_`. Expects `n` to be smaller than + // `current_chunk_.size()`. + void RemoveChunkPrefix(size_t n); + Cord AdvanceAndReadBytes(size_t n); + void AdvanceBytes(size_t n); + // Iterates `n` bytes, where `n` is expected to be greater than or equal to + // `current_chunk_.size()`. + void AdvanceBytesSlowPath(size_t n); + + // A view into bytes of the current `CordRep`. It may only be a view to a + // suffix of bytes if this is being used by `CharIterator`. + absl::string_view current_chunk_; + // The current leaf, or `nullptr` if the iterator points to short data. + // If the current chunk is a substring node, current_leaf_ points to the + // underlying flat or external node. + absl::cord_internal::CordRep* current_leaf_ = nullptr; + // The number of bytes left in the `Cord` over which we are iterating. + size_t bytes_remaining_ = 0; + absl::InlinedVector<absl::cord_internal::CordRep*, 4> + stack_of_right_children_; + }; + + // Returns an iterator to the first chunk of the `Cord`. + // + // This is useful for getting a `ChunkIterator` outside the context of a + // range-based for-loop (in which case see `Cord::Chunks()` below). + // + // Example: + // + // absl::Cord::ChunkIterator FindAsChunk(const absl::Cord& c, + // absl::string_view s) { + // return std::find(c.chunk_begin(), c.chunk_end(), s); + // } + ChunkIterator chunk_begin() const; + // Returns an iterator one increment past the last chunk of the `Cord`. + ChunkIterator chunk_end() const; + + // Convenience wrapper over `Cord::chunk_begin()` and `Cord::chunk_end()` to + // enable range-based for-loop iteration over `Cord` chunks. + // + // Prefer to use `Cord::Chunks()` below instead of constructing this directly. + class ChunkRange { + public: + explicit ChunkRange(const Cord* cord) : cord_(cord) {} + + ChunkIterator begin() const; + ChunkIterator end() const; + + private: + const Cord* cord_; + }; + + // Returns a range for iterating over the chunks of a `Cord` with a + // range-based for-loop. + // + // Example: + // + // void ProcessChunks(const Cord& cord) { + // for (absl::string_view chunk : cord.Chunks()) { ... } + // } + // + // Note that the ordinary caveats of temporary lifetime extension apply: + // + // void Process() { + // for (absl::string_view chunk : CordFactory().Chunks()) { + // // The temporary Cord returned by CordFactory has been destroyed! + // } + // } + ChunkRange Chunks() const; + + // Type for iterating over the characters of a `Cord`. See comments for + // `Cord::char_begin()`, `Cord::char_end()` and `Cord::Chars()` below for + // preferred usage. + // + // Additional notes: + // * A `CharIterator` object is invalidated after any non-const + // operation on the `Cord` object over which it iterates. + // * Two `CharIterator` objects can be equality compared if and only if + // they remain valid and iterate over the same `Cord`. + // * The iterator keeps state that can grow for `Cord`s that contain many + // nodes and are imbalanced due to sharing. Prefer to pass this type by + // const reference instead of by value. + // * This type cannot be a forward iterator because a `Cord` can reuse + // sections of memory. This violates the requirement that if dereferencing + // two iterators returns the same object, the iterators must compare + // equal. + class CharIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = char; + using difference_type = ptrdiff_t; + using pointer = const char*; + using reference = const char&; + + CharIterator() = default; + + CharIterator& operator++(); + CharIterator operator++(int); + bool operator==(const CharIterator& other) const; + bool operator!=(const CharIterator& other) const; + reference operator*() const; + pointer operator->() const; + + friend Cord; + + private: + explicit CharIterator(const Cord* cord) : chunk_iterator_(cord) {} + + ChunkIterator chunk_iterator_; + }; + + // Advances `*it` by `n_bytes` and returns the bytes passed as a `Cord`. + // + // `n_bytes` must be less than or equal to the number of bytes remaining for + // iteration. Otherwise the behavior is undefined. It is valid to pass + // `char_end()` and 0. + static Cord AdvanceAndRead(CharIterator* it, size_t n_bytes); + + // Advances `*it` by `n_bytes`. + // + // `n_bytes` must be less than or equal to the number of bytes remaining for + // iteration. Otherwise the behavior is undefined. It is valid to pass + // `char_end()` and 0. + static void Advance(CharIterator* it, size_t n_bytes); + + // Returns the longest contiguous view starting at the iterator's position. + // + // `it` must be dereferenceable. + static absl::string_view ChunkRemaining(const CharIterator& it); + + // Returns an iterator to the first character of the `Cord`. + CharIterator char_begin() const; + // Returns an iterator to one past the last character of the `Cord`. + CharIterator char_end() const; + + // Convenience wrapper over `Cord::char_begin()` and `Cord::char_end()` to + // enable range-based for-loop iterator over the characters of a `Cord`. + // + // Prefer to use `Cord::Chars()` below instead of constructing this directly. + class CharRange { + public: + explicit CharRange(const Cord* cord) : cord_(cord) {} + + CharIterator begin() const; + CharIterator end() const; + + private: + const Cord* cord_; + }; + + // Returns a range for iterating over the characters of a `Cord` with a + // range-based for-loop. + // + // Example: + // + // void ProcessCord(const Cord& cord) { + // for (char c : cord.Chars()) { ... } + // } + // + // Note that the ordinary caveats of temporary lifetime extension apply: + // + // void Process() { + // for (char c : CordFactory().Chars()) { + // // The temporary Cord returned by CordFactory has been destroyed! + // } + // } + CharRange Chars() const; + + // -------------------------------------------------------------------- + // Miscellaneous + + // Get the "i"th character of 'this' and return it. + // NOTE: This routine is reasonably efficient. It is roughly + // logarithmic in the number of nodes that make up the cord. Still, + // if you need to iterate over the contents of a cord, you should + // use a CharIterator/CordIterator rather than call operator[] or Get() + // repeatedly in a loop. + // + // REQUIRES: 0 <= i < size() + char operator[](size_t i) const; + + // Flattens the cord into a single array and returns a view of the data. + // + // If the cord was already flat, the contents are not modified. + absl::string_view Flatten(); + + private: + friend class CordTestPeer; + template <typename H> + friend H absl::hash_internal::HashFragmentedCord(H, const Cord&); + friend bool operator==(const Cord& lhs, const Cord& rhs); + friend bool operator==(const Cord& lhs, absl::string_view rhs); + + // Call the provided function once for each cord chunk, in order. Unlike + // Chunks(), this API will not allocate memory. + void ForEachChunk(absl::FunctionRef<void(absl::string_view)>) const; + + // Allocates new contiguous storage for the contents of the cord. This is + // called by Flatten() when the cord was not already flat. + absl::string_view FlattenSlowPath(); + + // Actual cord contents are hidden inside the following simple + // class so that we can isolate the bulk of cord.cc from changes + // to the representation. + // + // InlineRep holds either either a tree pointer, or an array of kMaxInline + // bytes. + class InlineRep { + public: + static const unsigned char kMaxInline = 15; + static_assert(kMaxInline >= sizeof(absl::cord_internal::CordRep*), ""); + // Tag byte & kMaxInline means we are storing a pointer. + static const unsigned char kTreeFlag = 1 << 4; + // Tag byte & kProfiledFlag means we are profiling the Cord. + static const unsigned char kProfiledFlag = 1 << 5; + + constexpr InlineRep() : data_{} {} + InlineRep(const InlineRep& src); + InlineRep(InlineRep&& src); + InlineRep& operator=(const InlineRep& src); + InlineRep& operator=(InlineRep&& src) noexcept; + + void Swap(InlineRep* rhs); + bool empty() const; + size_t size() const; + const char* data() const; // Returns nullptr if holding pointer + void set_data(const char* data, size_t n, + bool nullify_tail); // Discards pointer, if any + char* set_data(size_t n); // Write data to the result + // Returns nullptr if holding bytes + absl::cord_internal::CordRep* tree() const; + // Discards old pointer, if any + void set_tree(absl::cord_internal::CordRep* rep); + // Replaces a tree with a new root. This is faster than set_tree, but it + // should only be used when it's clear that the old rep was a tree. + void replace_tree(absl::cord_internal::CordRep* rep); + // Returns non-null iff was holding a pointer + absl::cord_internal::CordRep* clear(); + // Convert to pointer if necessary + absl::cord_internal::CordRep* force_tree(size_t extra_hint); + void reduce_size(size_t n); // REQUIRES: holding data + void remove_prefix(size_t n); // REQUIRES: holding data + void AppendArray(const char* src_data, size_t src_size); + absl::string_view FindFlatStartPiece() const; + void AppendTree(absl::cord_internal::CordRep* tree); + void PrependTree(absl::cord_internal::CordRep* tree); + void GetAppendRegion(char** region, size_t* size, size_t max_length); + void GetAppendRegion(char** region, size_t* size); + bool IsSame(const InlineRep& other) const { + return memcmp(data_, other.data_, sizeof(data_)) == 0; + } + int BitwiseCompare(const InlineRep& other) const { + uint64_t x, y; + // Use memcpy to avoid anti-aliasing issues. + memcpy(&x, data_, sizeof(x)); + memcpy(&y, other.data_, sizeof(y)); + if (x == y) { + memcpy(&x, data_ + 8, sizeof(x)); + memcpy(&y, other.data_ + 8, sizeof(y)); + if (x == y) return 0; + } + return absl::big_endian::FromHost64(x) < absl::big_endian::FromHost64(y) + ? -1 + : 1; + } + void CopyTo(std::string* dst) const { + // memcpy is much faster when operating on a known size. On most supported + // platforms, the small std::string optimization is large enough that resizing + // to 15 bytes does not cause a memory allocation. + absl::strings_internal::STLStringResizeUninitialized(dst, + sizeof(data_) - 1); + memcpy(&(*dst)[0], data_, sizeof(data_) - 1); + // erase is faster than resize because the logic for memory allocation is + // not needed. + dst->erase(data_[kMaxInline]); + } + + // Copies the inline contents into `dst`. Assumes the cord is not empty. + void CopyToArray(char* dst) const; + + bool is_tree() const { return data_[kMaxInline] > kMaxInline; } + + private: + friend class Cord; + + void AssignSlow(const InlineRep& src); + // Unrefs the tree, stops profiling, and zeroes the contents + void ClearSlow(); + + // If the data has length <= kMaxInline, we store it in data_[0..len-1], + // and store the length in data_[kMaxInline]. Else we store it in a tree + // and store a pointer to that tree in data_[0..sizeof(CordRep*)-1]. + alignas(absl::cord_internal::CordRep*) char data_[kMaxInline + 1]; + }; + InlineRep contents_; + + // Helper for MemoryUsage() + static size_t MemoryUsageAux(const absl::cord_internal::CordRep* rep); + + // Helper for GetFlat() + static bool GetFlatAux(absl::cord_internal::CordRep* rep, + absl::string_view* fragment); + + // Helper for ForEachChunk() + static void ForEachChunkAux( + absl::cord_internal::CordRep* rep, + absl::FunctionRef<void(absl::string_view)> callback); + + // The destructor for non-empty Cords. + void DestroyCordSlow(); + + // Out-of-line implementation of slower parts of logic. + void CopyToArraySlowPath(char* dst) const; + int CompareSlowPath(absl::string_view rhs, size_t compared_size, + size_t size_to_compare) const; + int CompareSlowPath(const Cord& rhs, size_t compared_size, + size_t size_to_compare) const; + bool EqualsImpl(absl::string_view rhs, size_t size_to_compare) const; + bool EqualsImpl(const Cord& rhs, size_t size_to_compare) const; + int CompareImpl(const Cord& rhs) const; + + template <typename ResultType, typename RHS> + friend ResultType GenericCompare(const Cord& lhs, const RHS& rhs, + size_t size_to_compare); + static absl::string_view GetFirstChunk(const Cord& c); + static absl::string_view GetFirstChunk(absl::string_view sv); + + // Returns a new reference to contents_.tree(), or steals an existing + // reference if called on an rvalue. + absl::cord_internal::CordRep* TakeRep() const&; + absl::cord_internal::CordRep* TakeRep() &&; + + // Helper for Append() + template <typename C> + void AppendImpl(C&& src); +}; + +ABSL_NAMESPACE_END +} // namespace absl + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// allow a Cord to be logged +extern std::ostream& operator<<(std::ostream& out, const Cord& cord); + +// ------------------------------------------------------------------ +// Internal details follow. Clients should ignore. + +namespace cord_internal { + +// Fast implementation of memmove for up to 15 bytes. This implementation is +// safe for overlapping regions. If nullify_tail is true, the destination is +// padded with '\0' up to 16 bytes. +inline void SmallMemmove(char* dst, const char* src, size_t n, + bool nullify_tail = false) { + if (n >= 8) { + assert(n <= 16); + uint64_t buf1; + uint64_t buf2; + memcpy(&buf1, src, 8); + memcpy(&buf2, src + n - 8, 8); + if (nullify_tail) { + memset(dst + 8, 0, 8); + } + memcpy(dst, &buf1, 8); + memcpy(dst + n - 8, &buf2, 8); + } else if (n >= 4) { + uint32_t buf1; + uint32_t buf2; + memcpy(&buf1, src, 4); + memcpy(&buf2, src + n - 4, 4); + if (nullify_tail) { + memset(dst + 4, 0, 4); + memset(dst + 8, 0, 8); + } + memcpy(dst, &buf1, 4); + memcpy(dst + n - 4, &buf2, 4); + } else { + if (n != 0) { + dst[0] = src[0]; + dst[n / 2] = src[n / 2]; + dst[n - 1] = src[n - 1]; + } + if (nullify_tail) { + memset(dst + 8, 0, 8); + memset(dst + n, 0, 8); + } + } +} + +struct ExternalRepReleaserPair { + CordRep* rep; + void* releaser_address; +}; + +// Allocates a new external `CordRep` and returns a pointer to it and a pointer +// to `releaser_size` bytes where the desired releaser can be constructed. +// Expects `data` to be non-empty. +ExternalRepReleaserPair NewExternalWithUninitializedReleaser( + absl::string_view data, ExternalReleaserInvoker invoker, + size_t releaser_size); + +// Creates a new `CordRep` that owns `data` and `releaser` and returns a pointer +// to it, or `nullptr` if `data` was empty. +template <typename Releaser> +// NOLINTNEXTLINE - suppress clang-tidy raw pointer return. +CordRep* NewExternalRep(absl::string_view data, Releaser&& releaser) { + static_assert( +#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) + alignof(Releaser) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__, +#else + alignof(Releaser) <= alignof(max_align_t), +#endif + "Releasers with alignment requirement greater than what is returned by " + "default `::operator new()` are not supported."); + + using ReleaserType = absl::decay_t<Releaser>; + if (data.empty()) { + // Never create empty external nodes. + ::absl::base_internal::Invoke( + ReleaserType(std::forward<Releaser>(releaser)), data); + return nullptr; + } + + auto releaser_invoker = [](void* type_erased_releaser, absl::string_view d) { + auto* my_releaser = static_cast<ReleaserType*>(type_erased_releaser); + ::absl::base_internal::Invoke(std::move(*my_releaser), d); + my_releaser->~ReleaserType(); + return sizeof(Releaser); + }; + + ExternalRepReleaserPair external = NewExternalWithUninitializedReleaser( + data, releaser_invoker, sizeof(releaser)); + ::new (external.releaser_address) + ReleaserType(std::forward<Releaser>(releaser)); + return external.rep; +} + +// Overload for function reference types that dispatches using a function +// pointer because there are no `alignof()` or `sizeof()` a function reference. +// NOLINTNEXTLINE - suppress clang-tidy raw pointer return. +inline CordRep* NewExternalRep(absl::string_view data, + void (&releaser)(absl::string_view)) { + return NewExternalRep(data, &releaser); +} + +} // namespace cord_internal + +template <typename Releaser> +Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser) { + Cord cord; + cord.contents_.set_tree(::absl::cord_internal::NewExternalRep( + data, std::forward<Releaser>(releaser))); + return cord; +} + +inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) { + cord_internal::SmallMemmove(data_, src.data_, sizeof(data_)); +} + +inline Cord::InlineRep::InlineRep(Cord::InlineRep&& src) { + memcpy(data_, src.data_, sizeof(data_)); + memset(src.data_, 0, sizeof(data_)); +} + +inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) { + if (this == &src) { + return *this; + } + if (!is_tree() && !src.is_tree()) { + cord_internal::SmallMemmove(data_, src.data_, sizeof(data_)); + return *this; + } + AssignSlow(src); + return *this; +} + +inline Cord::InlineRep& Cord::InlineRep::operator=( + Cord::InlineRep&& src) noexcept { + if (is_tree()) { + ClearSlow(); + } + memcpy(data_, src.data_, sizeof(data_)); + memset(src.data_, 0, sizeof(data_)); + return *this; +} + +inline void Cord::InlineRep::Swap(Cord::InlineRep* rhs) { + if (rhs == this) { + return; + } + + Cord::InlineRep tmp; + cord_internal::SmallMemmove(tmp.data_, data_, sizeof(data_)); + cord_internal::SmallMemmove(data_, rhs->data_, sizeof(data_)); + cord_internal::SmallMemmove(rhs->data_, tmp.data_, sizeof(data_)); +} + +inline const char* Cord::InlineRep::data() const { + return is_tree() ? nullptr : data_; +} + +inline absl::cord_internal::CordRep* Cord::InlineRep::tree() const { + if (is_tree()) { + absl::cord_internal::CordRep* rep; + memcpy(&rep, data_, sizeof(rep)); + return rep; + } else { + return nullptr; + } +} + +inline bool Cord::InlineRep::empty() const { return data_[kMaxInline] == 0; } + +inline size_t Cord::InlineRep::size() const { + const char tag = data_[kMaxInline]; + if (tag <= kMaxInline) return tag; + return static_cast<size_t>(tree()->length); +} + +inline void Cord::InlineRep::set_tree(absl::cord_internal::CordRep* rep) { + if (rep == nullptr) { + memset(data_, 0, sizeof(data_)); + } else { + bool was_tree = is_tree(); + memcpy(data_, &rep, sizeof(rep)); + memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1); + if (!was_tree) { + data_[kMaxInline] = kTreeFlag; + } + } +} + +inline void Cord::InlineRep::replace_tree(absl::cord_internal::CordRep* rep) { + ABSL_ASSERT(is_tree()); + if (ABSL_PREDICT_FALSE(rep == nullptr)) { + set_tree(rep); + return; + } + memcpy(data_, &rep, sizeof(rep)); + memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1); +} + +inline absl::cord_internal::CordRep* Cord::InlineRep::clear() { + const char tag = data_[kMaxInline]; + absl::cord_internal::CordRep* result = nullptr; + if (tag > kMaxInline) { + memcpy(&result, data_, sizeof(result)); + } + memset(data_, 0, sizeof(data_)); // Clear the cord + return result; +} + +inline void Cord::InlineRep::CopyToArray(char* dst) const { + assert(!is_tree()); + size_t n = data_[kMaxInline]; + assert(n != 0); + cord_internal::SmallMemmove(dst, data_, n); +} + +constexpr inline Cord::Cord() noexcept {} + +inline Cord& Cord::operator=(const Cord& x) { + contents_ = x.contents_; + return *this; +} + +inline Cord::Cord(Cord&& src) noexcept : contents_(std::move(src.contents_)) {} + +inline Cord& Cord::operator=(Cord&& x) noexcept { + contents_ = std::move(x.contents_); + return *this; +} + +template <typename T, Cord::EnableIfString<T>> +inline Cord& Cord::operator=(T&& src) { + *this = absl::string_view(src); + return *this; +} + +inline size_t Cord::size() const { + // Length is 1st field in str.rep_ + return contents_.size(); +} + +inline bool Cord::empty() const { return contents_.empty(); } + +inline size_t Cord::EstimatedMemoryUsage() const { + size_t result = sizeof(Cord); + if (const absl::cord_internal::CordRep* rep = contents_.tree()) { + result += MemoryUsageAux(rep); + } + return result; +} + +inline absl::string_view Cord::Flatten() { + absl::cord_internal::CordRep* rep = contents_.tree(); + if (rep == nullptr) { + return absl::string_view(contents_.data(), contents_.size()); + } else { + absl::string_view already_flat_contents; + if (GetFlatAux(rep, &already_flat_contents)) { + return already_flat_contents; + } + } + return FlattenSlowPath(); +} + +inline void Cord::Append(absl::string_view src) { + contents_.AppendArray(src.data(), src.size()); +} + +template <typename T, Cord::EnableIfString<T>> +inline void Cord::Append(T&& src) { + // Note that this function reserves the right to reuse the `string&&`'s + // memory and that it will do so in the future. + Append(absl::string_view(src)); +} + +template <typename T, Cord::EnableIfString<T>> +inline void Cord::Prepend(T&& src) { + // Note that this function reserves the right to reuse the `string&&`'s + // memory and that it will do so in the future. + Prepend(absl::string_view(src)); +} + +inline int Cord::Compare(const Cord& rhs) const { + if (!contents_.is_tree() && !rhs.contents_.is_tree()) { + return contents_.BitwiseCompare(rhs.contents_); + } + + return CompareImpl(rhs); +} + +// Does 'this' cord start/end with rhs +inline bool Cord::StartsWith(const Cord& rhs) const { + if (contents_.IsSame(rhs.contents_)) return true; + size_t rhs_size = rhs.size(); + if (size() < rhs_size) return false; + return EqualsImpl(rhs, rhs_size); +} + +inline bool Cord::StartsWith(absl::string_view rhs) const { + size_t rhs_size = rhs.size(); + if (size() < rhs_size) return false; + return EqualsImpl(rhs, rhs_size); +} + +inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) + : bytes_remaining_(cord->size()) { + if (cord->empty()) return; + if (cord->contents_.is_tree()) { + stack_of_right_children_.push_back(cord->contents_.tree()); + operator++(); + } else { + current_chunk_ = absl::string_view(cord->contents_.data(), cord->size()); + } +} + +inline Cord::ChunkIterator Cord::ChunkIterator::operator++(int) { + ChunkIterator tmp(*this); + operator++(); + return tmp; +} + +inline bool Cord::ChunkIterator::operator==(const ChunkIterator& other) const { + return bytes_remaining_ == other.bytes_remaining_; +} + +inline bool Cord::ChunkIterator::operator!=(const ChunkIterator& other) const { + return !(*this == other); +} + +inline Cord::ChunkIterator::reference Cord::ChunkIterator::operator*() const { + assert(bytes_remaining_ != 0); + return current_chunk_; +} + +inline Cord::ChunkIterator::pointer Cord::ChunkIterator::operator->() const { + assert(bytes_remaining_ != 0); + return ¤t_chunk_; +} + +inline void Cord::ChunkIterator::RemoveChunkPrefix(size_t n) { + assert(n < current_chunk_.size()); + current_chunk_.remove_prefix(n); + bytes_remaining_ -= n; +} + +inline void Cord::ChunkIterator::AdvanceBytes(size_t n) { + if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) { + RemoveChunkPrefix(n); + } else if (n != 0) { + AdvanceBytesSlowPath(n); + } +} + +inline Cord::ChunkIterator Cord::chunk_begin() const { + return ChunkIterator(this); +} + +inline Cord::ChunkIterator Cord::chunk_end() const { return ChunkIterator(); } + +inline Cord::ChunkIterator Cord::ChunkRange::begin() const { + return cord_->chunk_begin(); +} + +inline Cord::ChunkIterator Cord::ChunkRange::end() const { + return cord_->chunk_end(); +} + +inline Cord::ChunkRange Cord::Chunks() const { return ChunkRange(this); } + +inline Cord::CharIterator& Cord::CharIterator::operator++() { + if (ABSL_PREDICT_TRUE(chunk_iterator_->size() > 1)) { + chunk_iterator_.RemoveChunkPrefix(1); + } else { + ++chunk_iterator_; + } + return *this; +} + +inline Cord::CharIterator Cord::CharIterator::operator++(int) { + CharIterator tmp(*this); + operator++(); + return tmp; +} + +inline bool Cord::CharIterator::operator==(const CharIterator& other) const { + return chunk_iterator_ == other.chunk_iterator_; +} + +inline bool Cord::CharIterator::operator!=(const CharIterator& other) const { + return !(*this == other); +} + +inline Cord::CharIterator::reference Cord::CharIterator::operator*() const { + return *chunk_iterator_->data(); +} + +inline Cord::CharIterator::pointer Cord::CharIterator::operator->() const { + return chunk_iterator_->data(); +} + +inline Cord Cord::AdvanceAndRead(CharIterator* it, size_t n_bytes) { + assert(it != nullptr); + return it->chunk_iterator_.AdvanceAndReadBytes(n_bytes); +} + +inline void Cord::Advance(CharIterator* it, size_t n_bytes) { + assert(it != nullptr); + it->chunk_iterator_.AdvanceBytes(n_bytes); +} + +inline absl::string_view Cord::ChunkRemaining(const CharIterator& it) { + return *it.chunk_iterator_; +} + +inline Cord::CharIterator Cord::char_begin() const { + return CharIterator(this); +} + +inline Cord::CharIterator Cord::char_end() const { return CharIterator(); } + +inline Cord::CharIterator Cord::CharRange::begin() const { + return cord_->char_begin(); +} + +inline Cord::CharIterator Cord::CharRange::end() const { + return cord_->char_end(); +} + +inline Cord::CharRange Cord::Chars() const { return CharRange(this); } + +inline void Cord::ForEachChunk( + absl::FunctionRef<void(absl::string_view)> callback) const { + absl::cord_internal::CordRep* rep = contents_.tree(); + if (rep == nullptr) { + callback(absl::string_view(contents_.data(), contents_.size())); + } else { + return ForEachChunkAux(rep, callback); + } +} + +// Nonmember Cord-to-Cord relational operarators. +inline bool operator==(const Cord& lhs, const Cord& rhs) { + if (lhs.contents_.IsSame(rhs.contents_)) return true; + size_t rhs_size = rhs.size(); + if (lhs.size() != rhs_size) return false; + return lhs.EqualsImpl(rhs, rhs_size); +} + +inline bool operator!=(const Cord& x, const Cord& y) { return !(x == y); } +inline bool operator<(const Cord& x, const Cord& y) { + return x.Compare(y) < 0; +} +inline bool operator>(const Cord& x, const Cord& y) { + return x.Compare(y) > 0; +} +inline bool operator<=(const Cord& x, const Cord& y) { + return x.Compare(y) <= 0; +} +inline bool operator>=(const Cord& x, const Cord& y) { + return x.Compare(y) >= 0; +} + +// Nonmember Cord-to-absl::string_view relational operators. +// +// Due to implicit conversions, these also enable comparisons of Cord with +// with std::string, ::string, and const char*. +inline bool operator==(const Cord& lhs, absl::string_view rhs) { + size_t lhs_size = lhs.size(); + size_t rhs_size = rhs.size(); + if (lhs_size != rhs_size) return false; + return lhs.EqualsImpl(rhs, rhs_size); +} + +inline bool operator==(absl::string_view x, const Cord& y) { return y == x; } +inline bool operator!=(const Cord& x, absl::string_view y) { return !(x == y); } +inline bool operator!=(absl::string_view x, const Cord& y) { return !(x == y); } +inline bool operator<(const Cord& x, absl::string_view y) { + return x.Compare(y) < 0; +} +inline bool operator<(absl::string_view x, const Cord& y) { + return y.Compare(x) > 0; +} +inline bool operator>(const Cord& x, absl::string_view y) { return y < x; } +inline bool operator>(absl::string_view x, const Cord& y) { return y < x; } +inline bool operator<=(const Cord& x, absl::string_view y) { return !(y < x); } +inline bool operator<=(absl::string_view x, const Cord& y) { return !(y < x); } +inline bool operator>=(const Cord& x, absl::string_view y) { return !(x < y); } +inline bool operator>=(absl::string_view x, const Cord& y) { return !(x < y); } + +// Overload of swap for Cord. The use of non-const references is +// required. :( +inline void swap(Cord& x, Cord& y) noexcept { y.contents_.Swap(&x.contents_); } + +// Some internals exposed to test code. +namespace strings_internal { +class CordTestAccess { + public: + static size_t FlatOverhead(); + static size_t MaxFlatLength(); + static size_t SizeofCordRepConcat(); + static size_t SizeofCordRepExternal(); + static size_t SizeofCordRepSubstring(); + static size_t FlatTagToLength(uint8_t tag); + static uint8_t LengthToTag(size_t s); +}; +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORD_H_ diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc new file mode 100644 index 000000000000..434f3a247e08 --- /dev/null +++ b/absl/strings/cord_test.cc @@ -0,0 +1,1526 @@ +#include "absl/strings/cord.h" + +#include <algorithm> +#include <climits> +#include <cstdio> +#include <iterator> +#include <map> +#include <numeric> +#include <random> +#include <sstream> +#include <type_traits> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/casts.h" +#include "absl/base/config.h" +#include "absl/base/internal/endian.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/container/fixed_array.h" +#include "absl/strings/cord_test_helpers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +typedef std::mt19937_64 RandomEngine; + +static std::string RandomLowercaseString(RandomEngine* rng); +static std::string RandomLowercaseString(RandomEngine* rng, size_t length); + +static int GetUniformRandomUpTo(RandomEngine* rng, int upper_bound) { + if (upper_bound > 0) { + std::uniform_int_distribution<int> uniform(0, upper_bound - 1); + return uniform(*rng); + } else { + return 0; + } +} + +static size_t GetUniformRandomUpTo(RandomEngine* rng, size_t upper_bound) { + if (upper_bound > 0) { + std::uniform_int_distribution<size_t> uniform(0, upper_bound - 1); + return uniform(*rng); + } else { + return 0; + } +} + +static int32_t GenerateSkewedRandom(RandomEngine* rng, int max_log) { + const uint32_t base = (*rng)() % (max_log + 1); + const uint32_t mask = ((base < 32) ? (1u << base) : 0u) - 1u; + return (*rng)() & mask; +} + +static std::string RandomLowercaseString(RandomEngine* rng) { + int length; + std::bernoulli_distribution one_in_1k(0.001); + std::bernoulli_distribution one_in_10k(0.0001); + // With low probability, make a large fragment + if (one_in_10k(*rng)) { + length = GetUniformRandomUpTo(rng, 1048576); + } else if (one_in_1k(*rng)) { + length = GetUniformRandomUpTo(rng, 10000); + } else { + length = GenerateSkewedRandom(rng, 10); + } + return RandomLowercaseString(rng, length); +} + +static std::string RandomLowercaseString(RandomEngine* rng, size_t length) { + std::string result(length, '\0'); + std::uniform_int_distribution<int> chars('a', 'z'); + std::generate(result.begin(), result.end(), [&]() { + return static_cast<char>(chars(*rng)); + }); + return result; +} + +static void DoNothing(absl::string_view /* data */, void* /* arg */) {} + +static void DeleteExternalString(absl::string_view data, void* arg) { + std::string* s = reinterpret_cast<std::string*>(arg); + EXPECT_EQ(data, *s); + delete s; +} + +// Add "s" to *dst via `MakeCordFromExternal` +static void AddExternalMemory(absl::string_view s, absl::Cord* dst) { + std::string* str = new std::string(s.data(), s.size()); + dst->Append(absl::MakeCordFromExternal(*str, [str](absl::string_view data) { + DeleteExternalString(data, str); + })); +} + +static void DumpGrowth() { + absl::Cord str; + for (int i = 0; i < 1000; i++) { + char c = 'a' + i % 26; + str.Append(absl::string_view(&c, 1)); + } +} + +// Make a Cord with some number of fragments. Return the size (in bytes) +// of the smallest fragment. +static size_t AppendWithFragments(const std::string& s, RandomEngine* rng, + absl::Cord* cord) { + size_t j = 0; + const size_t max_size = s.size() / 5; // Make approx. 10 fragments + size_t min_size = max_size; // size of smallest fragment + while (j < s.size()) { + size_t N = 1 + GetUniformRandomUpTo(rng, max_size); + if (N > (s.size() - j)) { + N = s.size() - j; + } + if (N < min_size) { + min_size = N; + } + + std::bernoulli_distribution coin_flip(0.5); + if (coin_flip(*rng)) { + // Grow by adding an external-memory. + AddExternalMemory(absl::string_view(s.data() + j, N), cord); + } else { + cord->Append(absl::string_view(s.data() + j, N)); + } + j += N; + } + return min_size; +} + +// Add an external memory that contains the specified std::string to cord +static void AddNewStringBlock(const std::string& str, absl::Cord* dst) { + char* data = new char[str.size()]; + memcpy(data, str.data(), str.size()); + dst->Append(absl::MakeCordFromExternal( + absl::string_view(data, str.size()), + [](absl::string_view s) { delete[] s.data(); })); +} + +// Make a Cord out of many different types of nodes. +static absl::Cord MakeComposite() { + absl::Cord cord; + cord.Append("the"); + AddExternalMemory(" quick brown", &cord); + AddExternalMemory(" fox jumped", &cord); + + absl::Cord full(" over"); + AddExternalMemory(" the lazy", &full); + AddNewStringBlock(" dog slept the whole day away", &full); + absl::Cord substring = full.Subcord(0, 18); + + // Make substring long enough to defeat the copying fast path in Append. + substring.Append(std::string(1000, '.')); + cord.Append(substring); + cord = cord.Subcord(0, cord.size() - 998); // Remove most of extra junk + + return cord; +} + +namespace absl { +ABSL_NAMESPACE_BEGIN + +class CordTestPeer { + public: + static void ForEachChunk( + const Cord& c, absl::FunctionRef<void(absl::string_view)> callback) { + c.ForEachChunk(callback); + } +}; + +ABSL_NAMESPACE_END +} // namespace absl + +TEST(Cord, AllFlatSizes) { + using absl::strings_internal::CordTestAccess; + + for (size_t s = 0; s < CordTestAccess::MaxFlatLength(); s++) { + // Make a std::string of length s. + std::string src; + while (src.size() < s) { + src.push_back('a' + (src.size() % 26)); + } + + absl::Cord dst(src); + EXPECT_EQ(std::string(dst), src) << s; + } +} + +// We create a Cord at least 128GB in size using the fact that Cords can +// internally reference-count; thus the Cord is enormous without actually +// consuming very much memory. +TEST(GigabyteCord, FromExternal) { + const size_t one_gig = 1024U * 1024U * 1024U; + size_t max_size = 2 * one_gig; + if (sizeof(max_size) > 4) max_size = 128 * one_gig; + + size_t length = 128 * 1024; + char* data = new char[length]; + absl::Cord from = absl::MakeCordFromExternal( + absl::string_view(data, length), + [](absl::string_view sv) { delete[] sv.data(); }); + + // This loop may seem odd due to its combination of exponential doubling of + // size and incremental size increases. We do it incrementally to be sure the + // Cord will need rebalancing and will exercise code that, in the past, has + // caused crashes in production. We grow exponentially so that the code will + // execute in a reasonable amount of time. + absl::Cord c; + ABSL_RAW_LOG(INFO, "Made a Cord with %zu bytes!", c.size()); + c.Append(from); + while (c.size() < max_size) { + c.Append(c); + c.Append(from); + c.Append(from); + c.Append(from); + c.Append(from); + } + + for (int i = 0; i < 1024; ++i) { + c.Append(from); + } + ABSL_RAW_LOG(INFO, "Made a Cord with %zu bytes!", c.size()); + // Note: on a 32-bit build, this comes out to 2,818,048,000 bytes. + // Note: on a 64-bit build, this comes out to 171,932,385,280 bytes. +} + +static absl::Cord MakeExternalCord(int size) { + char* buffer = new char[size]; + memset(buffer, 'x', size); + absl::Cord cord; + cord.Append(absl::MakeCordFromExternal( + absl::string_view(buffer, size), + [](absl::string_view s) { delete[] s.data(); })); + return cord; +} + +// Extern to fool clang that this is not constant. Needed to suppress +// a warning of unsafe code we want to test. +extern bool my_unique_true_boolean; +bool my_unique_true_boolean = true; + +TEST(Cord, Assignment) { + absl::Cord x(absl::string_view("hi there")); + absl::Cord y(x); + ASSERT_EQ(std::string(x), "hi there"); + ASSERT_EQ(std::string(y), "hi there"); + ASSERT_TRUE(x == y); + ASSERT_TRUE(x <= y); + ASSERT_TRUE(y <= x); + + x = absl::string_view("foo"); + ASSERT_EQ(std::string(x), "foo"); + ASSERT_EQ(std::string(y), "hi there"); + ASSERT_TRUE(x < y); + ASSERT_TRUE(y > x); + ASSERT_TRUE(x != y); + ASSERT_TRUE(x <= y); + ASSERT_TRUE(y >= x); + + x = "foo"; + ASSERT_EQ(x, "foo"); + + // Test that going from inline rep to tree we don't leak memory. + std::vector<std::pair<absl::string_view, absl::string_view>> + test_string_pairs = {{"hi there", "foo"}, + {"loooooong coooooord", "short cord"}, + {"short cord", "loooooong coooooord"}, + {"loooooong coooooord1", "loooooong coooooord2"}}; + for (std::pair<absl::string_view, absl::string_view> test_strings : + test_string_pairs) { + absl::Cord tmp(test_strings.first); + absl::Cord z(std::move(tmp)); + ASSERT_EQ(std::string(z), test_strings.first); + tmp = test_strings.second; + z = std::move(tmp); + ASSERT_EQ(std::string(z), test_strings.second); + } + { + // Test that self-move assignment doesn't crash/leak. + // Do not write such code! + absl::Cord my_small_cord("foo"); + absl::Cord my_big_cord("loooooong coooooord"); + // Bypass clang's warning on self move-assignment. + absl::Cord* my_small_alias = + my_unique_true_boolean ? &my_small_cord : &my_big_cord; + absl::Cord* my_big_alias = + !my_unique_true_boolean ? &my_small_cord : &my_big_cord; + + *my_small_alias = std::move(my_small_cord); + *my_big_alias = std::move(my_big_cord); + // my_small_cord and my_big_cord are in an unspecified but valid + // state, and will be correctly destroyed here. + } +} + +TEST(Cord, StartsEndsWith) { + absl::Cord x(absl::string_view("abcde")); + absl::Cord empty(""); + + ASSERT_TRUE(x.StartsWith(absl::Cord("abcde"))); + ASSERT_TRUE(x.StartsWith(absl::Cord("abc"))); + ASSERT_TRUE(x.StartsWith(absl::Cord(""))); + ASSERT_TRUE(empty.StartsWith(absl::Cord(""))); + ASSERT_TRUE(x.EndsWith(absl::Cord("abcde"))); + ASSERT_TRUE(x.EndsWith(absl::Cord("cde"))); + ASSERT_TRUE(x.EndsWith(absl::Cord(""))); + ASSERT_TRUE(empty.EndsWith(absl::Cord(""))); + + ASSERT_TRUE(!x.StartsWith(absl::Cord("xyz"))); + ASSERT_TRUE(!empty.StartsWith(absl::Cord("xyz"))); + ASSERT_TRUE(!x.EndsWith(absl::Cord("xyz"))); + ASSERT_TRUE(!empty.EndsWith(absl::Cord("xyz"))); + + ASSERT_TRUE(x.StartsWith("abcde")); + ASSERT_TRUE(x.StartsWith("abc")); + ASSERT_TRUE(x.StartsWith("")); + ASSERT_TRUE(empty.StartsWith("")); + ASSERT_TRUE(x.EndsWith("abcde")); + ASSERT_TRUE(x.EndsWith("cde")); + ASSERT_TRUE(x.EndsWith("")); + ASSERT_TRUE(empty.EndsWith("")); + + ASSERT_TRUE(!x.StartsWith("xyz")); + ASSERT_TRUE(!empty.StartsWith("xyz")); + ASSERT_TRUE(!x.EndsWith("xyz")); + ASSERT_TRUE(!empty.EndsWith("xyz")); +} + +TEST(Cord, Subcord) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + const std::string s = RandomLowercaseString(&rng, 1024); + + absl::Cord a; + AppendWithFragments(s, &rng, &a); + ASSERT_EQ(s.size(), a.size()); + + // Check subcords of a, from a variety of interesting points. + std::set<size_t> positions; + for (int i = 0; i <= 32; ++i) { + positions.insert(i); + positions.insert(i * 32 - 1); + positions.insert(i * 32); + positions.insert(i * 32 + 1); + positions.insert(a.size() - i); + } + positions.insert(237); + positions.insert(732); + for (size_t pos : positions) { + if (pos > a.size()) continue; + for (size_t end_pos : positions) { + if (end_pos < pos || end_pos > a.size()) continue; + absl::Cord sa = a.Subcord(pos, end_pos - pos); + EXPECT_EQ(absl::string_view(s).substr(pos, end_pos - pos), + std::string(sa)) + << a; + } + } + + // Do the same thing for an inline cord. + const std::string sh = "short"; + absl::Cord c(sh); + for (size_t pos = 0; pos <= sh.size(); ++pos) { + for (size_t n = 0; n <= sh.size() - pos; ++n) { + absl::Cord sc = c.Subcord(pos, n); + EXPECT_EQ(sh.substr(pos, n), std::string(sc)) << c; + } + } + + // Check subcords of subcords. + absl::Cord sa = a.Subcord(0, a.size()); + std::string ss = s.substr(0, s.size()); + while (sa.size() > 1) { + sa = sa.Subcord(1, sa.size() - 2); + ss = ss.substr(1, ss.size() - 2); + EXPECT_EQ(ss, std::string(sa)) << a; + if (HasFailure()) break; // halt cascade + } + + // It is OK to ask for too much. + sa = a.Subcord(0, a.size() + 1); + EXPECT_EQ(s, std::string(sa)); + + // It is OK to ask for something beyond the end. + sa = a.Subcord(a.size() + 1, 0); + EXPECT_TRUE(sa.empty()); + sa = a.Subcord(a.size() + 1, 1); + EXPECT_TRUE(sa.empty()); +} + +TEST(Cord, Swap) { + absl::string_view a("Dexter"); + absl::string_view b("Mandark"); + absl::Cord x(a); + absl::Cord y(b); + swap(x, y); + ASSERT_EQ(x, absl::Cord(b)); + ASSERT_EQ(y, absl::Cord(a)); +} + +static void VerifyCopyToString(const absl::Cord& cord) { + std::string initially_empty; + absl::CopyCordToString(cord, &initially_empty); + EXPECT_EQ(initially_empty, cord); + + constexpr size_t kInitialLength = 1024; + std::string has_initial_contents(kInitialLength, 'x'); + const char* address_before_copy = has_initial_contents.data(); + absl::CopyCordToString(cord, &has_initial_contents); + EXPECT_EQ(has_initial_contents, cord); + + if (cord.size() <= kInitialLength) { + EXPECT_EQ(has_initial_contents.data(), address_before_copy) + << "CopyCordToString allocated new std::string storage; " + "has_initial_contents = \"" + << has_initial_contents << "\""; + } +} + +TEST(Cord, CopyToString) { + VerifyCopyToString(absl::Cord()); + VerifyCopyToString(absl::Cord("small cord")); + VerifyCopyToString( + absl::MakeFragmentedCord({"fragmented ", "cord ", "to ", "test ", + "copying ", "to ", "a ", "string."})); +} + +static bool IsFlat(const absl::Cord& c) { + return c.chunk_begin() == c.chunk_end() || ++c.chunk_begin() == c.chunk_end(); +} + +static void VerifyFlatten(absl::Cord c) { + std::string old_contents(c); + absl::string_view old_flat; + bool already_flat_and_non_empty = IsFlat(c) && !c.empty(); + if (already_flat_and_non_empty) { + old_flat = *c.chunk_begin(); + } + absl::string_view new_flat = c.Flatten(); + + // Verify that the contents of the flattened Cord are correct. + EXPECT_EQ(new_flat, old_contents); + EXPECT_EQ(std::string(c), old_contents); + + // If the Cord contained data and was already flat, verify that the data + // wasn't copied. + if (already_flat_and_non_empty) { + EXPECT_EQ(old_flat.data(), new_flat.data()) + << "Allocated new memory even though the Cord was already flat."; + } + + // Verify that the flattened Cord is in fact flat. + EXPECT_TRUE(IsFlat(c)); +} + +TEST(Cord, Flatten) { + VerifyFlatten(absl::Cord()); + VerifyFlatten(absl::Cord("small cord")); + VerifyFlatten(absl::Cord("larger than small buffer optimization")); + VerifyFlatten(absl::MakeFragmentedCord({"small ", "fragmented ", "cord"})); + + // Test with a cord that is longer than the largest flat buffer + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + VerifyFlatten(absl::Cord(RandomLowercaseString(&rng, 8192))); +} + +// Test data +namespace { +class TestData { + private: + std::vector<std::string> data_; + + // Return a std::string of the specified length. + static std::string MakeString(int length) { + std::string result; + char buf[30]; + snprintf(buf, sizeof(buf), "(%d)", length); + while (result.size() < length) { + result += buf; + } + result.resize(length); + return result; + } + + public: + TestData() { + // short strings increasing in length by one + for (int i = 0; i < 30; i++) { + data_.push_back(MakeString(i)); + } + + // strings around half kMaxFlatLength + static const int kMaxFlatLength = 4096 - 9; + static const int kHalf = kMaxFlatLength / 2; + + for (int i = -10; i <= +10; i++) { + data_.push_back(MakeString(kHalf + i)); + } + + for (int i = -10; i <= +10; i++) { + data_.push_back(MakeString(kMaxFlatLength + i)); + } + } + + size_t size() const { return data_.size(); } + const std::string& data(size_t i) const { return data_[i]; } +}; +} // namespace + +TEST(Cord, MultipleLengths) { + TestData d; + for (size_t i = 0; i < d.size(); i++) { + std::string a = d.data(i); + + { // Construct from Cord + absl::Cord tmp(a); + absl::Cord x(tmp); + EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; + } + + { // Construct from absl::string_view + absl::Cord x(a); + EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; + } + + { // Append cord to self + absl::Cord self(a); + self.Append(self); + EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; + } + + { // Prepend cord to self + absl::Cord self(a); + self.Prepend(self); + EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; + } + + // Try to append/prepend others + for (size_t j = 0; j < d.size(); j++) { + std::string b = d.data(j); + + { // CopyFrom Cord + absl::Cord x(a); + absl::Cord y(b); + x = y; + EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // CopyFrom absl::string_view + absl::Cord x(a); + x = b; + EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // Cord::Append(Cord) + absl::Cord x(a); + absl::Cord y(b); + x.Append(y); + EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // Cord::Append(absl::string_view) + absl::Cord x(a); + x.Append(b); + EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // Cord::Prepend(Cord) + absl::Cord x(a); + absl::Cord y(b); + x.Prepend(y); + EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; + } + + { // Cord::Prepend(absl::string_view) + absl::Cord x(a); + x.Prepend(b); + EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; + } + } + } +} + +namespace { + +TEST(Cord, RemoveSuffixWithExternalOrSubstring) { + absl::Cord cord = absl::MakeCordFromExternal( + "foo bar baz", [](absl::string_view s) { DoNothing(s, nullptr); }); + + EXPECT_EQ("foo bar baz", std::string(cord)); + + // This RemoveSuffix() will wrap the EXTERNAL node in a SUBSTRING node. + cord.RemoveSuffix(4); + EXPECT_EQ("foo bar", std::string(cord)); + + // This RemoveSuffix() will adjust the SUBSTRING node in-place. + cord.RemoveSuffix(4); + EXPECT_EQ("foo", std::string(cord)); +} + +TEST(Cord, RemoveSuffixMakesZeroLengthNode) { + absl::Cord c; + c.Append(absl::Cord(std::string(100, 'x'))); + absl::Cord other_ref = c; // Prevent inplace appends + c.Append(absl::Cord(std::string(200, 'y'))); + c.RemoveSuffix(200); + EXPECT_EQ(std::string(100, 'x'), std::string(c)); +} + +} // namespace + +// CordSpliceTest contributed by hendrie. +namespace { + +// Create a cord with an external memory block filled with 'z' +absl::Cord CordWithZedBlock(size_t size) { + char* data = new char[size]; + if (size > 0) { + memset(data, 'z', size); + } + absl::Cord cord = absl::MakeCordFromExternal( + absl::string_view(data, size), + [](absl::string_view s) { delete[] s.data(); }); + return cord; +} + +// Establish that ZedBlock does what we think it does. +TEST(CordSpliceTest, ZedBlock) { + absl::Cord blob = CordWithZedBlock(10); + EXPECT_EQ(10, blob.size()); + std::string s; + absl::CopyCordToString(blob, &s); + EXPECT_EQ("zzzzzzzzzz", s); +} + +TEST(CordSpliceTest, ZedBlock0) { + absl::Cord blob = CordWithZedBlock(0); + EXPECT_EQ(0, blob.size()); + std::string s; + absl::CopyCordToString(blob, &s); + EXPECT_EQ("", s); +} + +TEST(CordSpliceTest, ZedBlockSuffix1) { + absl::Cord blob = CordWithZedBlock(10); + EXPECT_EQ(10, blob.size()); + absl::Cord suffix(blob); + suffix.RemovePrefix(9); + EXPECT_EQ(1, suffix.size()); + std::string s; + absl::CopyCordToString(suffix, &s); + EXPECT_EQ("z", s); +} + +// Remove all of a prefix block +TEST(CordSpliceTest, ZedBlockSuffix0) { + absl::Cord blob = CordWithZedBlock(10); + EXPECT_EQ(10, blob.size()); + absl::Cord suffix(blob); + suffix.RemovePrefix(10); + EXPECT_EQ(0, suffix.size()); + std::string s; + absl::CopyCordToString(suffix, &s); + EXPECT_EQ("", s); +} + +absl::Cord BigCord(size_t len, char v) { + std::string s(len, v); + return absl::Cord(s); +} + +// Splice block into cord. +absl::Cord SpliceCord(const absl::Cord& blob, int64_t offset, + const absl::Cord& block) { + ABSL_RAW_CHECK(offset >= 0, ""); + ABSL_RAW_CHECK(offset + block.size() <= blob.size(), ""); + absl::Cord result(blob); + result.RemoveSuffix(blob.size() - offset); + result.Append(block); + absl::Cord suffix(blob); + suffix.RemovePrefix(offset + block.size()); + result.Append(suffix); + ABSL_RAW_CHECK(blob.size() == result.size(), ""); + return result; +} + +// Taking an empty suffix of a block breaks appending. +TEST(CordSpliceTest, RemoveEntireBlock1) { + absl::Cord zero = CordWithZedBlock(10); + absl::Cord suffix(zero); + suffix.RemovePrefix(10); + absl::Cord result; + result.Append(suffix); +} + +TEST(CordSpliceTest, RemoveEntireBlock2) { + absl::Cord zero = CordWithZedBlock(10); + absl::Cord prefix(zero); + prefix.RemoveSuffix(10); + absl::Cord suffix(zero); + suffix.RemovePrefix(10); + absl::Cord result(prefix); + result.Append(suffix); +} + +TEST(CordSpliceTest, RemoveEntireBlock3) { + absl::Cord blob = CordWithZedBlock(10); + absl::Cord block = BigCord(10, 'b'); + blob = SpliceCord(blob, 0, block); +} + +struct CordCompareTestCase { + template <typename LHS, typename RHS> + CordCompareTestCase(const LHS& lhs, const RHS& rhs) + : lhs_cord(lhs), rhs_cord(rhs) {} + + absl::Cord lhs_cord; + absl::Cord rhs_cord; +}; + +const auto sign = [](int x) { return x == 0 ? 0 : (x > 0 ? 1 : -1); }; + +void VerifyComparison(const CordCompareTestCase& test_case) { + std::string lhs_string(test_case.lhs_cord); + std::string rhs_string(test_case.rhs_cord); + int expected = sign(lhs_string.compare(rhs_string)); + EXPECT_EQ(expected, test_case.lhs_cord.Compare(test_case.rhs_cord)) + << "LHS=" << lhs_string << "; RHS=" << rhs_string; + EXPECT_EQ(expected, test_case.lhs_cord.Compare(rhs_string)) + << "LHS=" << lhs_string << "; RHS=" << rhs_string; + EXPECT_EQ(-expected, test_case.rhs_cord.Compare(test_case.lhs_cord)) + << "LHS=" << rhs_string << "; RHS=" << lhs_string; + EXPECT_EQ(-expected, test_case.rhs_cord.Compare(lhs_string)) + << "LHS=" << rhs_string << "; RHS=" << lhs_string; +} + +TEST(Cord, Compare) { + absl::Cord subcord("aaaaaBBBBBcccccDDDDD"); + subcord = subcord.Subcord(3, 10); + + absl::Cord tmp("aaaaaaaaaaaaaaaa"); + tmp.Append("BBBBBBBBBBBBBBBB"); + absl::Cord concat = absl::Cord("cccccccccccccccc"); + concat.Append("DDDDDDDDDDDDDDDD"); + concat.Prepend(tmp); + + absl::Cord concat2("aaaaaaaaaaaaa"); + concat2.Append("aaaBBBBBBBBBBBBBBBBccccc"); + concat2.Append("cccccccccccDDDDDDDDDDDDDD"); + concat2.Append("DD"); + + std::vector<CordCompareTestCase> test_cases = {{ + // Inline cords + {"abcdef", "abcdef"}, + {"abcdef", "abcdee"}, + {"abcdef", "abcdeg"}, + {"bbcdef", "abcdef"}, + {"bbcdef", "abcdeg"}, + {"abcdefa", "abcdef"}, + {"abcdef", "abcdefa"}, + + // Small flat cords + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDD"}, + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBxccccDDDDD"}, + {"aaaaaBBBBBcxcccDDDDD", "aaaaaBBBBBcccccDDDDD"}, + {"aaaaaBBBBBxccccDDDDD", "aaaaaBBBBBcccccDDDDX"}, + {"aaaaaBBBBBcccccDDDDDa", "aaaaaBBBBBcccccDDDDD"}, + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDDa"}, + + // Subcords + {subcord, subcord}, + {subcord, "aaBBBBBccc"}, + {subcord, "aaBBBBBccd"}, + {subcord, "aaBBBBBccb"}, + {subcord, "aaBBBBBxcb"}, + {subcord, "aaBBBBBccca"}, + {subcord, "aaBBBBBcc"}, + + // Concats + {concat, concat}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBcccccccccccccccxDDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBacccccccccccccccDDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDDe"}, + + {concat, concat2}, + }}; + + for (const auto& tc : test_cases) { + VerifyComparison(tc); + } +} + +TEST(Cord, CompareAfterAssign) { + absl::Cord a("aaaaaa1111111"); + absl::Cord b("aaaaaa2222222"); + a = "cccccc"; + b = "cccccc"; + EXPECT_EQ(a, b); + EXPECT_FALSE(a < b); + + a = "aaaa"; + b = "bbbbb"; + a = ""; + b = ""; + EXPECT_EQ(a, b); + EXPECT_FALSE(a < b); +} + +// Test CompareTo() and ComparePrefix() against string and substring +// comparison methods from std::basic_string. +static void TestCompare(const absl::Cord& c, const absl::Cord& d, + RandomEngine* rng) { + typedef std::basic_string<uint8_t> ustring; + ustring cs(reinterpret_cast<const uint8_t*>(std::string(c).data()), c.size()); + ustring ds(reinterpret_cast<const uint8_t*>(std::string(d).data()), d.size()); + // ustring comparison is ideal because we expect Cord comparisons to be + // based on unsigned byte comparisons regardless of whether char is signed. + int expected = sign(cs.compare(ds)); + EXPECT_EQ(expected, sign(c.Compare(d))) << c << ", " << d; +} + +TEST(Compare, ComparisonIsUnsigned) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + std::uniform_int_distribution<uint32_t> uniform_uint8(0, 255); + char x = static_cast<char>(uniform_uint8(rng)); + TestCompare( + absl::Cord(std::string(GetUniformRandomUpTo(&rng, 100), x)), + absl::Cord(std::string(GetUniformRandomUpTo(&rng, 100), x ^ 0x80)), &rng); +} + +TEST(Compare, RandomComparisons) { + const int kIters = 5000; + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + + int n = GetUniformRandomUpTo(&rng, 5000); + absl::Cord a[] = {MakeExternalCord(n), + absl::Cord("ant"), + absl::Cord("elephant"), + absl::Cord("giraffe"), + absl::Cord(std::string(GetUniformRandomUpTo(&rng, 100), + GetUniformRandomUpTo(&rng, 100))), + absl::Cord(""), + absl::Cord("x"), + absl::Cord("A"), + absl::Cord("B"), + absl::Cord("C")}; + for (int i = 0; i < kIters; i++) { + absl::Cord c, d; + for (int j = 0; j < (i % 7) + 1; j++) { + c.Append(a[GetUniformRandomUpTo(&rng, ABSL_ARRAYSIZE(a))]); + d.Append(a[GetUniformRandomUpTo(&rng, ABSL_ARRAYSIZE(a))]); + } + std::bernoulli_distribution coin_flip(0.5); + TestCompare(coin_flip(rng) ? c : absl::Cord(std::string(c)), + coin_flip(rng) ? d : absl::Cord(std::string(d)), &rng); + } +} + +template <typename T1, typename T2> +void CompareOperators() { + const T1 a("a"); + const T2 b("b"); + + EXPECT_TRUE(a == a); + // For pointer type (i.e. `const char*`), operator== compares the address + // instead of the std::string, so `a == const char*("a")` isn't necessarily true. + EXPECT_TRUE(std::is_pointer<T1>::value || a == T1("a")); + EXPECT_TRUE(std::is_pointer<T2>::value || a == T2("a")); + EXPECT_FALSE(a == b); + + EXPECT_TRUE(a != b); + EXPECT_FALSE(a != a); + + EXPECT_TRUE(a < b); + EXPECT_FALSE(b < a); + + EXPECT_TRUE(b > a); + EXPECT_FALSE(a > b); + + EXPECT_TRUE(a >= a); + EXPECT_TRUE(b >= a); + EXPECT_FALSE(a >= b); + + EXPECT_TRUE(a <= a); + EXPECT_TRUE(a <= b); + EXPECT_FALSE(b <= a); +} + +TEST(ComparisonOperators, Cord_Cord) { + CompareOperators<absl::Cord, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_StringPiece) { + CompareOperators<absl::Cord, absl::string_view>(); +} + +TEST(ComparisonOperators, StringPiece_Cord) { + CompareOperators<absl::string_view, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_string) { + CompareOperators<absl::Cord, std::string>(); +} + +TEST(ComparisonOperators, string_Cord) { + CompareOperators<std::string, absl::Cord>(); +} + +TEST(ComparisonOperators, stdstring_Cord) { + CompareOperators<std::string, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_stdstring) { + CompareOperators<absl::Cord, std::string>(); +} + +TEST(ComparisonOperators, charstar_Cord) { + CompareOperators<const char*, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_charstar) { + CompareOperators<absl::Cord, const char*>(); +} + +TEST(ConstructFromExternal, ReleaserInvoked) { + // Empty external memory means the releaser should be called immediately. + { + bool invoked = false; + auto releaser = [&invoked](absl::string_view) { invoked = true; }; + { + auto c = absl::MakeCordFromExternal("", releaser); + EXPECT_TRUE(invoked); + } + } + + // If the size of the data is small enough, a future constructor + // implementation may copy the bytes and immediately invoke the releaser + // instead of creating an external node. We make a large dummy std::string to + // make this test independent of such an optimization. + std::string large_dummy(2048, 'c'); + { + bool invoked = false; + auto releaser = [&invoked](absl::string_view) { invoked = true; }; + { + auto c = absl::MakeCordFromExternal(large_dummy, releaser); + EXPECT_FALSE(invoked); + } + EXPECT_TRUE(invoked); + } + + { + bool invoked = false; + auto releaser = [&invoked](absl::string_view) { invoked = true; }; + { + absl::Cord copy; + { + auto c = absl::MakeCordFromExternal(large_dummy, releaser); + copy = c; + EXPECT_FALSE(invoked); + } + EXPECT_FALSE(invoked); + } + EXPECT_TRUE(invoked); + } +} + +TEST(ConstructFromExternal, CompareContents) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + + for (int length = 1; length <= 2048; length *= 2) { + std::string data = RandomLowercaseString(&rng, length); + auto* external = new std::string(data); + auto cord = + absl::MakeCordFromExternal(*external, [external](absl::string_view sv) { + EXPECT_EQ(external->data(), sv.data()); + EXPECT_EQ(external->size(), sv.size()); + delete external; + }); + EXPECT_EQ(data, cord); + } +} + +TEST(ConstructFromExternal, LargeReleaser) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + constexpr size_t kLength = 256; + std::string data = RandomLowercaseString(&rng, kLength); + std::array<char, kLength> data_array; + for (size_t i = 0; i < kLength; ++i) data_array[i] = data[i]; + bool invoked = false; + auto releaser = [data_array, &invoked](absl::string_view data) { + EXPECT_EQ(data, absl::string_view(data_array.data(), data_array.size())); + invoked = true; + }; + (void)absl::MakeCordFromExternal(data, releaser); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, FunctionPointerReleaser) { + static absl::string_view data("hello world"); + static bool invoked; + auto* releaser = + static_cast<void (*)(absl::string_view)>([](absl::string_view sv) { + EXPECT_EQ(data, sv); + invoked = true; + }); + invoked = false; + (void)absl::MakeCordFromExternal(data, releaser); + EXPECT_TRUE(invoked); + + invoked = false; + (void)absl::MakeCordFromExternal(data, *releaser); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, MoveOnlyReleaser) { + struct Releaser { + explicit Releaser(bool* invoked) : invoked(invoked) {} + Releaser(Releaser&& other) noexcept : invoked(other.invoked) {} + void operator()(absl::string_view) const { *invoked = true; } + + bool* invoked; + }; + + bool invoked = false; + (void)absl::MakeCordFromExternal("dummy", Releaser(&invoked)); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, NonTrivialReleaserDestructor) { + struct Releaser { + explicit Releaser(bool* destroyed) : destroyed(destroyed) {} + ~Releaser() { *destroyed = true; } + void operator()(absl::string_view) const {} + + bool* destroyed; + }; + + bool destroyed = false; + Releaser releaser(&destroyed); + (void)absl::MakeCordFromExternal("dummy", releaser); + EXPECT_TRUE(destroyed); +} + +TEST(ConstructFromExternal, ReferenceQualifierOverloads) { + struct Releaser { + void operator()(absl::string_view) & { *lvalue_invoked = true; } + void operator()(absl::string_view) && { *rvalue_invoked = true; } + + bool* lvalue_invoked; + bool* rvalue_invoked; + }; + + bool lvalue_invoked = false; + bool rvalue_invoked = false; + Releaser releaser = {&lvalue_invoked, &rvalue_invoked}; + (void)absl::MakeCordFromExternal("", releaser); + EXPECT_FALSE(lvalue_invoked); + EXPECT_TRUE(rvalue_invoked); + rvalue_invoked = false; + + (void)absl::MakeCordFromExternal("dummy", releaser); + EXPECT_FALSE(lvalue_invoked); + EXPECT_TRUE(rvalue_invoked); + rvalue_invoked = false; + + // NOLINTNEXTLINE: suppress clang-tidy std::move on trivially copyable type. + (void)absl::MakeCordFromExternal("dummy", std::move(releaser)); + EXPECT_FALSE(lvalue_invoked); + EXPECT_TRUE(rvalue_invoked); +} + +TEST(ExternalMemory, BasicUsage) { + static const char* strings[] = { "", "hello", "there" }; + for (const char* str : strings) { + absl::Cord dst("(prefix)"); + AddExternalMemory(str, &dst); + dst.Append("(suffix)"); + EXPECT_EQ((std::string("(prefix)") + str + std::string("(suffix)")), + std::string(dst)); + } +} + +TEST(ExternalMemory, RemovePrefixSuffix) { + // Exhaustively try all sub-strings. + absl::Cord cord = MakeComposite(); + std::string s = std::string(cord); + for (int offset = 0; offset <= s.size(); offset++) { + for (int length = 0; length <= s.size() - offset; length++) { + absl::Cord result(cord); + result.RemovePrefix(offset); + result.RemoveSuffix(result.size() - length); + EXPECT_EQ(s.substr(offset, length), std::string(result)) + << offset << " " << length; + } + } +} + +TEST(ExternalMemory, Get) { + absl::Cord cord("hello"); + AddExternalMemory(" world!", &cord); + AddExternalMemory(" how are ", &cord); + cord.Append(" you?"); + std::string s = std::string(cord); + for (int i = 0; i < s.size(); i++) { + EXPECT_EQ(s[i], cord[i]); + } +} + +// CordMemoryUsage tests verify the correctness of the EstimatedMemoryUsage() +// These tests take into account that the reported memory usage is approximate +// and non-deterministic. For all tests, We verify that the reported memory +// usage is larger than `size()`, and less than `size() * 1.5` as a cord should +// never reserve more 'extra' capacity than half of its size as it grows. +// Additionally we have some whiteboxed expectations based on our knowledge of +// the layout and size of empty and inlined cords, and flat nodes. + +TEST(CordMemoryUsage, Empty) { + EXPECT_EQ(sizeof(absl::Cord), absl::Cord().EstimatedMemoryUsage()); +} + +TEST(CordMemoryUsage, Embedded) { + absl::Cord a("hello"); + EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord)); +} + +TEST(CordMemoryUsage, EmbeddedAppend) { + absl::Cord a("a"); + absl::Cord b("bcd"); + EXPECT_EQ(b.EstimatedMemoryUsage(), sizeof(absl::Cord)); + a.Append(b); + EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord)); +} + +TEST(CordMemoryUsage, ExternalMemory) { + static const int kLength = 1000; + absl::Cord cord; + AddExternalMemory(std::string(kLength, 'x'), &cord); + EXPECT_GT(cord.EstimatedMemoryUsage(), kLength); + EXPECT_LE(cord.EstimatedMemoryUsage(), kLength * 1.5); +} + +TEST(CordMemoryUsage, Flat) { + static const int kLength = 125; + absl::Cord a(std::string(kLength, 'a')); + EXPECT_GT(a.EstimatedMemoryUsage(), kLength); + EXPECT_LE(a.EstimatedMemoryUsage(), kLength * 1.5); +} + +TEST(CordMemoryUsage, AppendFlat) { + using absl::strings_internal::CordTestAccess; + absl::Cord a(std::string(CordTestAccess::MaxFlatLength(), 'a')); + size_t length = a.EstimatedMemoryUsage(); + a.Append(std::string(CordTestAccess::MaxFlatLength(), 'b')); + size_t delta = a.EstimatedMemoryUsage() - length; + EXPECT_GT(delta, CordTestAccess::MaxFlatLength()); + EXPECT_LE(delta, CordTestAccess::MaxFlatLength() * 1.5); +} + +// Regtest for a change that had to be rolled back because it expanded out +// of the InlineRep too soon, which was observable through MemoryUsage(). +TEST(CordMemoryUsage, InlineRep) { + constexpr size_t kMaxInline = 15; // Cord::InlineRep::N + const std::string small_string(kMaxInline, 'x'); + absl::Cord c1(small_string); + + absl::Cord c2; + c2.Append(small_string); + EXPECT_EQ(c1, c2); + EXPECT_EQ(c1.EstimatedMemoryUsage(), c2.EstimatedMemoryUsage()); +} + +} // namespace + +// Regtest for 7510292 (fix a bug introduced by 7465150) +TEST(Cord, Concat_Append) { + // Create a rep of type CONCAT + absl::Cord s1("foobarbarbarbarbar"); + s1.Append("abcdefgabcdefgabcdefgabcdefgabcdefgabcdefgabcdefg"); + size_t size = s1.size(); + + // Create a copy of s1 and append to it. + absl::Cord s2 = s1; + s2.Append("x"); + + // 7465150 modifies s1 when it shouldn't. + EXPECT_EQ(s1.size(), size); + EXPECT_EQ(s2.size(), size + 1); +} + +TEST(MakeFragmentedCord, MakeFragmentedCordFromInitializerList) { + absl::Cord fragmented = + absl::MakeFragmentedCord({"A ", "fragmented ", "Cord"}); + + EXPECT_EQ("A fragmented Cord", fragmented); + + auto chunk_it = fragmented.chunk_begin(); + + ASSERT_TRUE(chunk_it != fragmented.chunk_end()); + EXPECT_EQ("A ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("fragmented ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("Cord", *chunk_it); + + ASSERT_TRUE(++chunk_it == fragmented.chunk_end()); +} + +TEST(MakeFragmentedCord, MakeFragmentedCordFromVector) { + std::vector<absl::string_view> chunks = {"A ", "fragmented ", "Cord"}; + absl::Cord fragmented = absl::MakeFragmentedCord(chunks); + + EXPECT_EQ("A fragmented Cord", fragmented); + + auto chunk_it = fragmented.chunk_begin(); + + ASSERT_TRUE(chunk_it != fragmented.chunk_end()); + EXPECT_EQ("A ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("fragmented ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("Cord", *chunk_it); + + ASSERT_TRUE(++chunk_it == fragmented.chunk_end()); +} + +TEST(CordChunkIterator, Traits) { + static_assert(std::is_copy_constructible<absl::Cord::ChunkIterator>::value, + ""); + static_assert(std::is_copy_assignable<absl::Cord::ChunkIterator>::value, ""); + + // Move semantics to satisfy swappable via std::swap + static_assert(std::is_move_constructible<absl::Cord::ChunkIterator>::value, + ""); + static_assert(std::is_move_assignable<absl::Cord::ChunkIterator>::value, ""); + + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::ChunkIterator>::iterator_category, + std::input_iterator_tag>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::ChunkIterator>::value_type, + absl::string_view>::value, + ""); + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::ChunkIterator>::difference_type, + ptrdiff_t>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::ChunkIterator>::pointer, + const absl::string_view*>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::ChunkIterator>::reference, + absl::string_view>::value, + ""); +} + +static void VerifyChunkIterator(const absl::Cord& cord, + size_t expected_chunks) { + EXPECT_EQ(cord.chunk_begin() == cord.chunk_end(), cord.empty()) << cord; + EXPECT_EQ(cord.chunk_begin() != cord.chunk_end(), !cord.empty()); + + absl::Cord::ChunkRange range = cord.Chunks(); + EXPECT_EQ(range.begin() == range.end(), cord.empty()); + EXPECT_EQ(range.begin() != range.end(), !cord.empty()); + + std::string content(cord); + size_t pos = 0; + auto pre_iter = cord.chunk_begin(), post_iter = cord.chunk_begin(); + size_t n_chunks = 0; + while (pre_iter != cord.chunk_end() && post_iter != cord.chunk_end()) { + EXPECT_FALSE(pre_iter == cord.chunk_end()); // NOLINT: explicitly test == + EXPECT_FALSE(post_iter == cord.chunk_end()); // NOLINT + + EXPECT_EQ(pre_iter, post_iter); + EXPECT_EQ(*pre_iter, *post_iter); + + EXPECT_EQ(pre_iter->data(), (*pre_iter).data()); + EXPECT_EQ(pre_iter->size(), (*pre_iter).size()); + + absl::string_view chunk = *pre_iter; + EXPECT_FALSE(chunk.empty()); + EXPECT_LE(pos + chunk.size(), content.size()); + EXPECT_EQ(absl::string_view(content.c_str() + pos, chunk.size()), chunk); + + int n_equal_iterators = 0; + for (absl::Cord::ChunkIterator it = range.begin(); it != range.end(); + ++it) { + n_equal_iterators += static_cast<int>(it == pre_iter); + } + EXPECT_EQ(n_equal_iterators, 1); + + ++pre_iter; + EXPECT_EQ(*post_iter++, chunk); + + pos += chunk.size(); + ++n_chunks; + } + EXPECT_EQ(expected_chunks, n_chunks); + EXPECT_EQ(pos, content.size()); + EXPECT_TRUE(pre_iter == cord.chunk_end()); // NOLINT: explicitly test == + EXPECT_TRUE(post_iter == cord.chunk_end()); // NOLINT +} + +TEST(CordChunkIterator, Operations) { + absl::Cord empty_cord; + VerifyChunkIterator(empty_cord, 0); + + absl::Cord small_buffer_cord("small cord"); + VerifyChunkIterator(small_buffer_cord, 1); + + absl::Cord flat_node_cord("larger than small buffer optimization"); + VerifyChunkIterator(flat_node_cord, 1); + + VerifyChunkIterator( + absl::MakeFragmentedCord({"a ", "small ", "fragmented ", "cord ", "for ", + "testing ", "chunk ", "iterations."}), + 8); + + absl::Cord reused_nodes_cord(std::string(40, 'c')); + reused_nodes_cord.Prepend(absl::Cord(std::string(40, 'b'))); + reused_nodes_cord.Prepend(absl::Cord(std::string(40, 'a'))); + size_t expected_chunks = 3; + for (int i = 0; i < 8; ++i) { + reused_nodes_cord.Prepend(reused_nodes_cord); + expected_chunks *= 2; + VerifyChunkIterator(reused_nodes_cord, expected_chunks); + } + + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); + absl::Cord subcords; + for (int i = 0; i < 128; ++i) subcords.Prepend(flat_cord.Subcord(i, 128)); + VerifyChunkIterator(subcords, 128); +} + +TEST(CordCharIterator, Traits) { + static_assert(std::is_copy_constructible<absl::Cord::CharIterator>::value, + ""); + static_assert(std::is_copy_assignable<absl::Cord::CharIterator>::value, ""); + + // Move semantics to satisfy swappable via std::swap + static_assert(std::is_move_constructible<absl::Cord::CharIterator>::value, + ""); + static_assert(std::is_move_assignable<absl::Cord::CharIterator>::value, ""); + + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::CharIterator>::iterator_category, + std::input_iterator_tag>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::CharIterator>::value_type, + char>::value, + ""); + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::CharIterator>::difference_type, + ptrdiff_t>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::CharIterator>::pointer, + const char*>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::CharIterator>::reference, + const char&>::value, + ""); +} + +static void VerifyCharIterator(const absl::Cord& cord) { + EXPECT_EQ(cord.char_begin() == cord.char_end(), cord.empty()); + EXPECT_EQ(cord.char_begin() != cord.char_end(), !cord.empty()); + + absl::Cord::CharRange range = cord.Chars(); + EXPECT_EQ(range.begin() == range.end(), cord.empty()); + EXPECT_EQ(range.begin() != range.end(), !cord.empty()); + + size_t i = 0; + absl::Cord::CharIterator pre_iter = cord.char_begin(); + absl::Cord::CharIterator post_iter = cord.char_begin(); + std::string content(cord); + while (pre_iter != cord.char_end() && post_iter != cord.char_end()) { + EXPECT_FALSE(pre_iter == cord.char_end()); // NOLINT: explicitly test == + EXPECT_FALSE(post_iter == cord.char_end()); // NOLINT + + EXPECT_LT(i, cord.size()); + EXPECT_EQ(content[i], *pre_iter); + + EXPECT_EQ(pre_iter, post_iter); + EXPECT_EQ(*pre_iter, *post_iter); + EXPECT_EQ(&*pre_iter, &*post_iter); + + EXPECT_EQ(&*pre_iter, pre_iter.operator->()); + + const char* character_address = &*pre_iter; + absl::Cord::CharIterator copy = pre_iter; + ++copy; + EXPECT_EQ(character_address, &*pre_iter); + + int n_equal_iterators = 0; + for (absl::Cord::CharIterator it = range.begin(); it != range.end(); ++it) { + n_equal_iterators += static_cast<int>(it == pre_iter); + } + EXPECT_EQ(n_equal_iterators, 1); + + absl::Cord::CharIterator advance_iter = range.begin(); + absl::Cord::Advance(&advance_iter, i); + EXPECT_EQ(pre_iter, advance_iter); + + advance_iter = range.begin(); + EXPECT_EQ(absl::Cord::AdvanceAndRead(&advance_iter, i), cord.Subcord(0, i)); + EXPECT_EQ(pre_iter, advance_iter); + + advance_iter = pre_iter; + absl::Cord::Advance(&advance_iter, cord.size() - i); + EXPECT_EQ(range.end(), advance_iter); + + advance_iter = pre_iter; + EXPECT_EQ(absl::Cord::AdvanceAndRead(&advance_iter, cord.size() - i), + cord.Subcord(i, cord.size() - i)); + EXPECT_EQ(range.end(), advance_iter); + + ++i; + ++pre_iter; + post_iter++; + } + EXPECT_EQ(i, cord.size()); + EXPECT_TRUE(pre_iter == cord.char_end()); // NOLINT: explicitly test == + EXPECT_TRUE(post_iter == cord.char_end()); // NOLINT + + absl::Cord::CharIterator zero_advanced_end = cord.char_end(); + absl::Cord::Advance(&zero_advanced_end, 0); + EXPECT_EQ(zero_advanced_end, cord.char_end()); + + absl::Cord::CharIterator it = cord.char_begin(); + for (absl::string_view chunk : cord.Chunks()) { + while (!chunk.empty()) { + EXPECT_EQ(absl::Cord::ChunkRemaining(it), chunk); + chunk.remove_prefix(1); + ++it; + } + } +} + +TEST(CordCharIterator, Operations) { + absl::Cord empty_cord; + VerifyCharIterator(empty_cord); + + absl::Cord small_buffer_cord("small cord"); + VerifyCharIterator(small_buffer_cord); + + absl::Cord flat_node_cord("larger than small buffer optimization"); + VerifyCharIterator(flat_node_cord); + + VerifyCharIterator( + absl::MakeFragmentedCord({"a ", "small ", "fragmented ", "cord ", "for ", + "testing ", "character ", "iteration."})); + + absl::Cord reused_nodes_cord("ghi"); + reused_nodes_cord.Prepend(absl::Cord("def")); + reused_nodes_cord.Prepend(absl::Cord("abc")); + for (int i = 0; i < 4; ++i) { + reused_nodes_cord.Prepend(reused_nodes_cord); + VerifyCharIterator(reused_nodes_cord); + } + + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); + absl::Cord subcords; + for (int i = 0; i < 4; ++i) subcords.Prepend(flat_cord.Subcord(16 * i, 128)); + VerifyCharIterator(subcords); +} + +TEST(Cord, StreamingOutput) { + absl::Cord c = + absl::MakeFragmentedCord({"A ", "small ", "fragmented ", "Cord", "."}); + std::stringstream output; + output << c; + EXPECT_EQ("A small fragmented Cord.", output.str()); +} + +TEST(Cord, ForEachChunk) { + for (int num_elements : {1, 10, 200}) { + SCOPED_TRACE(num_elements); + std::vector<std::string> cord_chunks; + for (int i = 0; i < num_elements; ++i) { + cord_chunks.push_back(absl::StrCat("[", i, "]")); + } + absl::Cord c = absl::MakeFragmentedCord(cord_chunks); + + std::vector<std::string> iterated_chunks; + absl::CordTestPeer::ForEachChunk(c, + [&iterated_chunks](absl::string_view sv) { + iterated_chunks.emplace_back(sv); + }); + EXPECT_EQ(iterated_chunks, cord_chunks); + } +} + +TEST(Cord, SmallBufferAssignFromOwnData) { + constexpr size_t kMaxInline = 15; + std::string contents = "small buff cord"; + EXPECT_EQ(contents.size(), kMaxInline); + for (size_t pos = 0; pos < contents.size(); ++pos) { + for (size_t count = contents.size() - pos; count > 0; --count) { + absl::Cord c(contents); + absl::string_view flat = c.Flatten(); + c = flat.substr(pos, count); + EXPECT_EQ(c, contents.substr(pos, count)) + << "pos = " << pos << "; count = " << count; + } + } +} diff --git a/absl/strings/cord_test_helpers.h b/absl/strings/cord_test_helpers.h new file mode 100644 index 000000000000..f1036e3b1388 --- /dev/null +++ b/absl/strings/cord_test_helpers.h @@ -0,0 +1,60 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef ABSL_STRINGS_CORD_TEST_HELPERS_H_ +#define ABSL_STRINGS_CORD_TEST_HELPERS_H_ + +#include "absl/strings/cord.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// Creates a multi-segment Cord from an iterable container of strings. The +// resulting Cord is guaranteed to have one segment for every string in the +// container. This allows code to be unit tested with multi-segment Cord +// inputs. +// +// Example: +// +// absl::Cord c = absl::MakeFragmentedCord({"A ", "fragmented ", "Cord"}); +// EXPECT_FALSE(c.GetFlat(&unused)); +// +// The mechanism by which this Cord is created is an implementation detail. Any +// implementation that produces a multi-segment Cord may produce a flat Cord in +// the future as new optimizations are added to the Cord class. +// MakeFragmentedCord will, however, always be updated to return a multi-segment +// Cord. +template <typename Container> +Cord MakeFragmentedCord(const Container& c) { + Cord result; + for (const auto& s : c) { + auto* external = new std::string(s); + Cord tmp = absl::MakeCordFromExternal( + *external, [external](absl::string_view) { delete external; }); + tmp.Prepend(result); + result = tmp; + } + return result; +} + +inline Cord MakeFragmentedCord(std::initializer_list<absl::string_view> list) { + return MakeFragmentedCord<std::initializer_list<absl::string_view>>(list); +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORD_TEST_HELPERS_H_ diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h new file mode 100644 index 000000000000..5b5d10830891 --- /dev/null +++ b/absl/strings/internal/cord_internal.h @@ -0,0 +1,151 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ + +#include <atomic> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <type_traits> + +#include "absl/meta/type_traits.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Wraps std::atomic for reference counting. +class Refcount { + public: + Refcount() : count_{1} {} + ~Refcount() {} + + // Increments the reference count by 1. Imposes no memory ordering. + inline void Increment() { count_.fetch_add(1, std::memory_order_relaxed); } + + // Asserts that the current refcount is greater than 0. If the refcount is + // greater than 1, decrements the reference count by 1. + // + // Returns false if there are no references outstanding; true otherwise. + // Inserts barriers to ensure that state written before this method returns + // false will be visible to a thread that just observed this method returning + // false. + inline bool Decrement() { + int32_t refcount = count_.load(std::memory_order_acquire); + assert(refcount > 0); + return refcount != 1 && count_.fetch_sub(1, std::memory_order_acq_rel) != 1; + } + + // Same as Decrement but expect that refcount is greater than 1. + inline bool DecrementExpectHighRefcount() { + int32_t refcount = count_.fetch_sub(1, std::memory_order_acq_rel); + assert(refcount > 0); + return refcount != 1; + } + + // Returns the current reference count using acquire semantics. + inline int32_t Get() const { return count_.load(std::memory_order_acquire); } + + // Returns whether the atomic integer is 1. + // If the reference count is used in the conventional way, a + // reference count of 1 implies that the current thread owns the + // reference and no other thread shares it. + // This call performs the test for a reference count of one, and + // performs the memory barrier needed for the owning thread + // to act on the object, knowing that it has exclusive access to the + // object. + inline bool IsOne() { return count_.load(std::memory_order_acquire) == 1; } + + private: + std::atomic<int32_t> count_; +}; + +// The overhead of a vtable is too much for Cord, so we roll our own subclasses +// using only a single byte to differentiate classes from each other - the "tag" +// byte. Define the subclasses first so we can provide downcasting helper +// functions in the base class. + +struct CordRepConcat; +struct CordRepSubstring; +struct CordRepExternal; + +struct CordRep { + // The following three fields have to be less than 32 bytes since + // that is the smallest supported flat node size. + // We use uint64_t for the length even in 32-bit binaries. + uint64_t length; + Refcount refcount; + // If tag < FLAT, it represents CordRepKind and indicates the type of node. + // Otherwise, the node type is CordRepFlat and the tag is the encoded size. + uint8_t tag; + char data[1]; // Starting point for flat array: MUST BE LAST FIELD of CordRep + + inline CordRepConcat* concat(); + inline const CordRepConcat* concat() const; + inline CordRepSubstring* substring(); + inline const CordRepSubstring* substring() const; + inline CordRepExternal* external(); + inline const CordRepExternal* external() const; +}; + +struct CordRepConcat : public CordRep { + CordRep* left; + CordRep* right; + + uint8_t depth() const { return static_cast<uint8_t>(data[0]); } + void set_depth(uint8_t depth) { data[0] = static_cast<char>(depth); } +}; + +struct CordRepSubstring : public CordRep { + size_t start; // Starting offset of substring in child + CordRep* child; +}; + +// TODO(strel): replace the following logic (and related functions in cord.cc) +// with container_internal::Layout. + +// Alignment requirement for CordRepExternal so that the type erased releaser +// will be stored at a suitably aligned address. +constexpr size_t ExternalRepAlignment() { +#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) + return __STDCPP_DEFAULT_NEW_ALIGNMENT__; +#else + return alignof(max_align_t); +#endif +} + +// Type for function pointer that will invoke and destroy the type-erased +// releaser function object. Accepts a pointer to the releaser and the +// `string_view` that were passed in to `NewExternalRep` below. The return value +// is the size of the `Releaser` type. +using ExternalReleaserInvoker = size_t (*)(void*, absl::string_view); + +// External CordReps are allocated together with a type erased releaser. The +// releaser is stored in the memory directly following the CordRepExternal. +struct alignas(ExternalRepAlignment()) CordRepExternal : public CordRep { + const char* base; + // Pointer to function that knows how to call and destroy the releaser. + ExternalReleaserInvoker releaser_invoker; +}; + +// TODO(strel): look into removing, it doesn't seem like anything relies on this +static_assert(sizeof(CordRepConcat) == sizeof(CordRepSubstring), ""); + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl +#endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc index d3904124a1a9..4d0604e00c4b 100644 --- a/absl/strings/internal/str_format/arg.cc +++ b/absl/strings/internal/str_format/arg.cc @@ -88,7 +88,7 @@ class ConvertedIntInfo { template <typename T> void UnsignedToStringRight(T u, ConversionChar conv) { char *p = end(); - switch (conv.radix()) { + switch (FormatConversionCharRadix(conv)) { default: case 10: for (; u; u /= 10) @@ -99,7 +99,7 @@ class ConvertedIntInfo { *--p = static_cast<char>('0' + static_cast<size_t>(u % 8)); break; case 16: { - const char *digits = kDigit[conv.upper() ? 1 : 0]; + const char *digits = kDigit[FormatConversionCharIsUpper(conv) ? 1 : 0]; for (; u; u /= 16) *--p = digits[static_cast<size_t>(u % 16)]; break; } @@ -121,21 +121,20 @@ class ConvertedIntInfo { string_view BaseIndicator(const ConvertedIntInfo &info, const ConversionSpec conv) { bool alt = conv.flags().alt; - int radix = conv.conv().radix(); - if (conv.conv().id() == ConversionChar::p) - alt = true; // always show 0x for %p. + int radix = FormatConversionCharRadix(conv.conv()); + if (conv.conv() == ConversionChar::p) alt = true; // always show 0x for %p. // From the POSIX description of '#' flag: // "For x or X conversion specifiers, a non-zero result shall have // 0x (or 0X) prefixed to it." if (alt && radix == 16 && !info.digits().empty()) { - if (conv.conv().upper()) return "0X"; + if (FormatConversionCharIsUpper(conv.conv())) return "0X"; return "0x"; } return {}; } string_view SignColumn(bool neg, const ConversionSpec conv) { - if (conv.conv().is_signed()) { + if (FormatConversionCharIsSigned(conv.conv())) { if (neg) return "-"; if (conv.flags().show_pos) return "+"; if (conv.flags().sign_col) return " "; @@ -175,7 +174,7 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, if (!precision_specified) precision = 1; - if (conv.flags().alt && conv.conv().id() == ConversionChar::o) { + if (conv.flags().alt && conv.conv() == ConversionChar::o) { // From POSIX description of the '#' (alt) flag: // "For o conversion, it increases the precision (if necessary) to // force the first digit of the result to be zero." @@ -211,7 +210,7 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, template <typename T> bool ConvertIntImplInner(T v, const ConversionSpec conv, FormatSinkImpl *sink) { ConvertedIntInfo info(v, conv.conv()); - if (conv.flags().basic && conv.conv().id() != ConversionChar::p) { + if (conv.flags().basic && (conv.conv() != ConversionChar::p)) { if (info.is_neg()) sink->Append(1, '-'); if (info.digits().empty()) { sink->Append(1, '0'); @@ -225,14 +224,13 @@ bool ConvertIntImplInner(T v, const ConversionSpec conv, FormatSinkImpl *sink) { template <typename T> bool ConvertIntArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (conv.conv().is_float()) { + if (FormatConversionCharIsFloat(conv.conv())) { return FormatConvertImpl(static_cast<double>(v), conv, sink).value; } - if (conv.conv().id() == ConversionChar::c) + if (conv.conv() == ConversionChar::c) return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink); - if (!conv.conv().is_integral()) - return false; - if (!conv.conv().is_signed() && IsSigned<T>::value) { + if (!FormatConversionCharIsIntegral(conv.conv())) return false; + if (!FormatConversionCharIsSigned(conv.conv()) && IsSigned<T>::value) { using U = typename MakeUnsigned<T>::type; return FormatConvertImpl(static_cast<U>(v), conv, sink).value; } @@ -241,13 +239,13 @@ bool ConvertIntArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { template <typename T> bool ConvertFloatArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - return conv.conv().is_float() && ConvertFloatImpl(v, conv, sink); + return FormatConversionCharIsFloat(conv.conv()) && + ConvertFloatImpl(v, conv, sink); } inline bool ConvertStringArg(string_view v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (conv.conv().id() != ConversionChar::s) - return false; + if (conv.conv() != ConversionChar::s) return false; if (conv.flags().basic) { sink->Append(v); return true; @@ -274,7 +272,7 @@ ConvertResult<Conv::s> FormatConvertImpl(string_view v, ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char *v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (conv.conv().id() == ConversionChar::p) + if (conv.conv() == ConversionChar::p) return {FormatConvertImpl(VoidPtr(v), conv, sink).value}; size_t len; if (v == nullptr) { @@ -291,8 +289,7 @@ ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char *v, // ==================== Raw pointers ==================== ConvertResult<Conv::p> FormatConvertImpl(VoidPtr v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (conv.conv().id() != ConversionChar::p) - return {false}; + if (conv.conv() != ConversionChar::p) return {false}; if (!v.value) { sink->Append("(nil)"); return {true}; diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h index b672a2294416..7a93756305e4 100644 --- a/absl/strings/internal/str_format/arg.h +++ b/absl/strings/internal/str_format/arg.h @@ -70,7 +70,7 @@ template <class AbslCord, ConvertResult<Conv::s> FormatConvertImpl(const AbslCord& value, ConversionSpec conv, FormatSinkImpl* sink) { - if (conv.conv().id() != ConversionChar::s) return {false}; + if (conv.conv() != ConversionChar::s) return {false}; bool is_left = conv.flags().left; size_t space_remaining = 0; @@ -185,8 +185,7 @@ struct FormatCountCaptureHelper { FormatSinkImpl* sink) { const absl::enable_if_t<sizeof(T) != 0, FormatCountCapture>& v2 = v; - if (conv.conv().id() != str_format_internal::ConversionChar::n) - return {false}; + if (conv.conv() != str_format_internal::ConversionChar::n) return {false}; *v2.p_ = static_cast<int>(sink->size()); return {true}; } @@ -378,7 +377,7 @@ class FormatArgImpl { template <typename T> static bool Dispatch(Data arg, ConversionSpec spec, void* out) { // A `none` conv indicates that we want the `int` conversion. - if (ABSL_PREDICT_FALSE(spec.conv().id() == ConversionChar::none)) { + if (ABSL_PREDICT_FALSE(spec.conv() == ConversionChar::none)) { return ToInt<T>(arg, static_cast<int*>(out), std::is_integral<T>(), std::is_enum<T>()); } diff --git a/absl/strings/internal/str_format/arg_test.cc b/absl/strings/internal/str_format/arg_test.cc index 96c9cfd335b5..04fa56cd3623 100644 --- a/absl/strings/internal/str_format/arg_test.cc +++ b/absl/strings/internal/str_format/arg_test.cc @@ -96,7 +96,7 @@ TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) { std::string s; FormatSinkImpl sink(&s); ConversionSpec conv; - conv.set_conv(ConversionChar::FromChar('s')); + conv.set_conv(ConversionChar::s); conv.set_flags(Flags()); conv.set_width(-1); conv.set_precision(-1); diff --git a/absl/strings/internal/str_format/extension.cc b/absl/strings/internal/str_format/extension.cc index 21688e873f2c..2e5bc2ce0be0 100644 --- a/absl/strings/internal/str_format/extension.cc +++ b/absl/strings/internal/str_format/extension.cc @@ -23,15 +23,6 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -const ConversionChar::Spec ConversionChar::kSpecs[] = { -#define X_VAL(id) { ConversionChar::id, #id[0] } -#define X_SEP , - ABSL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP), - {ConversionChar::none, '\0'}, -#undef X_VAL -#undef X_SEP -}; - std::string Flags::ToString() const { std::string s; s.append(left ? "-" : ""); @@ -42,8 +33,6 @@ std::string Flags::ToString() const { return s; } -const size_t ConversionChar::kNumValues; - bool FormatSinkImpl::PutPaddedString(string_view v, int w, int p, bool l) { size_t space_remaining = 0; if (w >= 0) space_remaining = w; diff --git a/absl/strings/internal/str_format/extension.h b/absl/strings/internal/str_format/extension.h index 4868eac3e8b4..1a863c207bb1 100644 --- a/absl/strings/internal/str_format/extension.h +++ b/absl/strings/internal/str_format/extension.h @@ -148,117 +148,122 @@ struct Flags { X_VAL(g) X_SEP X_VAL(G) X_SEP X_VAL(a) X_SEP X_VAL(A) X_SEP \ /* misc */ \ X_VAL(n) X_SEP X_VAL(p) -// clang-format on -struct ABSL_DLL ConversionChar { - public: - enum Id : uint8_t { +enum class FormatConversionChar : uint8_t { c, C, s, S, // text d, i, o, u, x, X, // int f, F, e, E, g, G, a, A, // float n, p, // misc - none - }; - static const size_t kNumValues = none + 1; - - ConversionChar() : id_(none) {} - - public: - // Index into the opaque array of ConversionChar enums. - // Requires: i < kNumValues - static ConversionChar FromIndex(size_t i) { - return ConversionChar(kSpecs[i].value); - } + kNone, + none = kNone +}; +// clang-format on - static ConversionChar FromChar(char c) { - ConversionChar::Id out_id = ConversionChar::none; - switch (c) { -#define X_VAL(id) \ - case #id[0]: \ - out_id = ConversionChar::id; \ - break; - ABSL_CONVERSION_CHARS_EXPAND_(X_VAL, ) +inline FormatConversionChar FormatConversionCharFromChar(char c) { + switch (c) { +#define X_VAL(id) \ + case #id[0]: \ + return FormatConversionChar::id; + ABSL_CONVERSION_CHARS_EXPAND_(X_VAL, ) #undef X_VAL - default: - break; - } - return ConversionChar(out_id); } + return FormatConversionChar::kNone; +} - static ConversionChar FromId(Id id) { return ConversionChar(id); } - Id id() const { return id_; } - - int radix() const { - switch (id()) { - case x: case X: case a: case A: case p: return 16; - case o: return 8; - default: return 10; - } +inline int FormatConversionCharRadix(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::x: + case FormatConversionChar::X: + case FormatConversionChar::a: + case FormatConversionChar::A: + case FormatConversionChar::p: + return 16; + case FormatConversionChar::o: + return 8; + default: + return 10; } +} - bool upper() const { - switch (id()) { - case X: case F: case E: case G: case A: return true; - default: return false; - } +inline bool FormatConversionCharIsUpper(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::X: + case FormatConversionChar::F: + case FormatConversionChar::E: + case FormatConversionChar::G: + case FormatConversionChar::A: + return true; + default: + return false; } +} - bool is_signed() const { - switch (id()) { - case d: case i: return true; - default: return false; - } +inline bool FormatConversionCharIsSigned(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::d: + case FormatConversionChar::i: + return true; + default: + return false; } +} - bool is_integral() const { - switch (id()) { - case d: case i: case u: case o: case x: case X: - return true; - default: return false; - } +inline bool FormatConversionCharIsIntegral(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::d: + case FormatConversionChar::i: + case FormatConversionChar::u: + case FormatConversionChar::o: + case FormatConversionChar::x: + case FormatConversionChar::X: + return true; + default: + return false; } +} - bool is_float() const { - switch (id()) { - case a: case e: case f: case g: case A: case E: case F: case G: - return true; - default: return false; - } +inline bool FormatConversionCharIsFloat(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::a: + case FormatConversionChar::e: + case FormatConversionChar::f: + case FormatConversionChar::g: + case FormatConversionChar::A: + case FormatConversionChar::E: + case FormatConversionChar::F: + case FormatConversionChar::G: + return true; + default: + return false; } +} - bool IsValid() const { return id() != none; } - - // The associated char. - char Char() const { return kSpecs[id_].name; } - - friend bool operator==(const ConversionChar& a, const ConversionChar& b) { - return a.id() == b.id(); - } - friend bool operator!=(const ConversionChar& a, const ConversionChar& b) { - return !(a == b); - } - friend std::ostream& operator<<(std::ostream& os, const ConversionChar& v) { - char c = v.Char(); - if (!c) c = '?'; - return os << c; +inline char FormatConversionCharToChar(FormatConversionChar c) { + switch (c) { +#define X_VAL(e) \ + case FormatConversionChar::e: \ + return #e[0]; +#define X_SEP + ABSL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) + case FormatConversionChar::kNone: + return '\0'; +#undef X_VAL +#undef X_SEP } + return '\0'; +} - private: - struct Spec { - Id value; - char name; - }; - static const Spec kSpecs[]; - - explicit ConversionChar(Id id) : id_(id) {} - - Id id_; -}; +// The associated char. +inline std::ostream& operator<<(std::ostream& os, FormatConversionChar v) { + char c = FormatConversionCharToChar(v); + if (!c) c = '?'; + return os << c; +} class ConversionSpec { public: Flags flags() const { return flags_; } - ConversionChar conv() const { + FormatConversionChar conv() const { // Keep this field first in the struct . It generates better code when // accessing it when ConversionSpec is passed by value in registers. static_assert(offsetof(ConversionSpec, conv_) == 0, ""); @@ -273,22 +278,24 @@ class ConversionSpec { int precision() const { return precision_; } void set_flags(Flags f) { flags_ = f; } - void set_conv(ConversionChar c) { conv_ = c; } + void set_conv(FormatConversionChar c) { conv_ = c; } void set_width(int w) { width_ = w; } void set_precision(int p) { precision_ = p; } void set_left(bool b) { flags_.left = b; } private: - ConversionChar conv_; + FormatConversionChar conv_ = FormatConversionChar::kNone; Flags flags_; int width_; int precision_; }; -constexpr uint64_t ConversionCharToConvValue(char conv) { +constexpr uint64_t FormatConversionCharToConvValue(char conv) { return -#define CONV_SET_CASE(c) \ - conv == #c[0] ? (uint64_t{1} << (1 + ConversionChar::Id::c)): +#define CONV_SET_CASE(c) \ + conv == #c[0] \ + ? (uint64_t{1} << (1 + static_cast<uint8_t>(FormatConversionChar::c))) \ + : ABSL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) #undef CONV_SET_CASE conv == '*' @@ -297,12 +304,12 @@ constexpr uint64_t ConversionCharToConvValue(char conv) { } enum class Conv : uint64_t { -#define CONV_SET_CASE(c) c = ConversionCharToConvValue(#c[0]), +#define CONV_SET_CASE(c) c = FormatConversionCharToConvValue(#c[0]), ABSL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) #undef CONV_SET_CASE // Used for width/precision '*' specification. - star = ConversionCharToConvValue('*'), + star = FormatConversionCharToConvValue('*'), // Some predefined values: integral = d | i | u | o | x | X, @@ -323,12 +330,12 @@ constexpr Conv operator|(Conv a, Conv b) { // Get a conversion with a single character in it. constexpr Conv ConversionCharToConv(char c) { - return Conv(ConversionCharToConvValue(c)); + return Conv(FormatConversionCharToConvValue(c)); } // Checks whether `c` exists in `set`. constexpr bool Contains(Conv set, char c) { - return (static_cast<uint64_t>(set) & ConversionCharToConvValue(c)) != 0; + return (static_cast<uint64_t>(set) & FormatConversionCharToConvValue(c)) != 0; } // Checks whether all the characters in `c` are contained in `set` @@ -353,6 +360,9 @@ inline size_t Excess(size_t used, size_t capacity) { return used < capacity ? capacity - used : 0; } +// Type alias for use during migration. +using ConversionChar = FormatConversionChar; + } // namespace str_format_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/str_format/float_conversion.cc b/absl/strings/internal/str_format/float_conversion.cc index ebe4da5b4233..c98ed4ba5008 100644 --- a/absl/strings/internal/str_format/float_conversion.cc +++ b/absl/strings/internal/str_format/float_conversion.cc @@ -33,7 +33,7 @@ bool FallbackToSnprintf(const Float v, const ConversionSpec &conv, if (std::is_same<long double, Float>()) { *fp++ = 'L'; } - *fp++ = conv.conv().Char(); + *fp++ = FormatConversionCharToChar(conv.conv()); *fp = 0; assert(fp < fmt + sizeof(fmt)); } @@ -100,9 +100,11 @@ bool ConvertNonNumericFloats(char sign_char, Float v, char text[4], *ptr = text; if (sign_char) *ptr++ = sign_char; if (std::isnan(v)) { - ptr = std::copy_n(conv.conv().upper() ? "NAN" : "nan", 3, ptr); + ptr = std::copy_n(FormatConversionCharIsUpper(conv.conv()) ? "NAN" : "nan", + 3, ptr); } else if (std::isinf(v)) { - ptr = std::copy_n(conv.conv().upper() ? "INF" : "inf", 3, ptr); + ptr = std::copy_n(FormatConversionCharIsUpper(conv.conv()) ? "INF" : "inf", + 3, ptr); } else { return false; } @@ -399,7 +401,7 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, Buffer buffer; - switch (conv.conv().id()) { + switch (conv.conv()) { case ConversionChar::f: case ConversionChar::F: if (!FloatToBuffer<FormatStyle::Fixed>(decomposed, precision, &buffer, @@ -416,7 +418,8 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, return FallbackToSnprintf(v, conv, sink); } if (!conv.flags().alt && buffer.back() == '.') buffer.pop_back(); - PrintExponent(exp, conv.conv().upper() ? 'E' : 'e', &buffer); + PrintExponent(exp, FormatConversionCharIsUpper(conv.conv()) ? 'E' : 'e', + &buffer); break; case ConversionChar::g: @@ -447,7 +450,10 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, while (buffer.back() == '0') buffer.pop_back(); if (buffer.back() == '.') buffer.pop_back(); } - if (exp) PrintExponent(exp, conv.conv().upper() ? 'E' : 'e', &buffer); + if (exp) { + PrintExponent(exp, FormatConversionCharIsUpper(conv.conv()) ? 'E' : 'e', + &buffer); + } break; case ConversionChar::a: diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc index c4b527bc48aa..aab68db94bca 100644 --- a/absl/strings/internal/str_format/parser.cc +++ b/absl/strings/internal/str_format/parser.cc @@ -17,7 +17,7 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -using CC = ConversionChar::Id; +using CC = ConversionChar; using LM = LengthMod; ABSL_CONST_INIT const ConvTag kTags[256] = { @@ -322,7 +322,9 @@ bool ParsedFormatBase::MatchesConversions( if (conv.width.is_from_arg() && !add_if_valid_conv(conv.width.get_from_arg(), '*')) return false; - if (!add_if_valid_conv(conv.arg_position, conv.conv.Char())) return false; + if (!add_if_valid_conv(conv.arg_position, + FormatConversionCharToChar(conv.conv))) + return false; } return used.size() == convs.size() || allow_ignored; } diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h index 6cbe257697e4..45c90d1df072 100644 --- a/absl/strings/internal/str_format/parser.h +++ b/absl/strings/internal/str_format/parser.h @@ -67,7 +67,7 @@ struct UnboundConversion { Flags flags; LengthMod length_mod = LengthMod::none; - ConversionChar conv; + ConversionChar conv = FormatConversionChar::kNone; }; // Consume conversion spec prefix (not including '%') of [p, end) if valid. @@ -79,10 +79,12 @@ const char* ConsumeUnboundConversion(const char* p, const char* end, UnboundConversion* conv, int* next_arg); // Helper tag class for the table below. -// It allows fast `char -> ConversionChar/LengthMod` checking and conversions. +// It allows fast `char -> ConversionChar/LengthMod` checking and +// conversions. class ConvTag { public: - constexpr ConvTag(ConversionChar::Id id) : tag_(id) {} // NOLINT + constexpr ConvTag(ConversionChar conversion_char) // NOLINT + : tag_(static_cast<int8_t>(conversion_char)) {} // We invert the length modifiers to make them negative so that we can easily // test for them. constexpr ConvTag(LengthMod length_mod) // NOLINT @@ -94,7 +96,7 @@ class ConvTag { bool is_length() const { return tag_ < 0 && tag_ != -128; } ConversionChar as_conv() const { assert(is_conv()); - return ConversionChar::FromId(static_cast<ConversionChar::Id>(tag_)); + return static_cast<ConversionChar>(tag_); } LengthMod as_length() const { assert(is_length()); diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc index 4a8efd0805aa..1b1ee030f183 100644 --- a/absl/strings/internal/str_format/parser_test.cc +++ b/absl/strings/internal/str_format/parser_test.cc @@ -41,7 +41,7 @@ TEST(LengthModTest, Names) { TEST(ConversionCharTest, Names) { struct Expectation { - ConversionChar::Id id; + ConversionChar id; char name; }; // clang-format off @@ -55,12 +55,10 @@ TEST(ConversionCharTest, Names) { {ConversionChar::none, '\0'}, }; // clang-format on - EXPECT_EQ(ABSL_ARRAYSIZE(kExpect), ConversionChar::kNumValues); for (auto e : kExpect) { SCOPED_TRACE(e.name); - ConversionChar v = ConversionChar::FromId(e.id); - EXPECT_EQ(e.id, v.id()); - EXPECT_EQ(e.name, v.Char()); + ConversionChar v = e.id; + EXPECT_EQ(e.name, FormatConversionCharToChar(v)); } } @@ -119,7 +117,7 @@ TEST_F(ConsumeUnboundConversionTest, BasicConversion) { EXPECT_FALSE(Run("dd")); // no excess allowed EXPECT_TRUE(Run("d")); - EXPECT_EQ('d', o.conv.Char()); + EXPECT_EQ('d', FormatConversionCharToChar(o.conv)); EXPECT_FALSE(o.width.is_from_arg()); EXPECT_LT(o.width.value(), 0); EXPECT_FALSE(o.precision.is_from_arg()); @@ -160,7 +158,7 @@ TEST_F(ConsumeUnboundConversionTest, ArgPosition) { TEST_F(ConsumeUnboundConversionTest, WidthAndPrecision) { EXPECT_TRUE(Run("14d")); - EXPECT_EQ('d', o.conv.Char()); + EXPECT_EQ('d', FormatConversionCharToChar(o.conv)); EXPECT_FALSE(o.width.is_from_arg()); EXPECT_EQ(14, o.width.value()); EXPECT_FALSE(o.precision.is_from_arg()); @@ -330,7 +328,7 @@ struct SummarizeConsumer { if (conv.precision.is_from_arg()) { *out += "." + std::to_string(conv.precision.get_from_arg()) + "$*"; } - *out += conv.conv.Char(); + *out += FormatConversionCharToChar(conv.conv); *out += "}"; return true; } diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc index d33bcaa2e466..acbdbf4a2324 100644 --- a/absl/strings/str_format_test.cc +++ b/absl/strings/str_format_test.cc @@ -450,7 +450,7 @@ struct SummarizeConsumer { if (conv.precision.is_from_arg()) { *out += "." + std::to_string(conv.precision.get_from_arg()) + "$*"; } - *out += conv.conv.Char(); + *out += FormatConversionCharToChar(conv.conv); *out += "}"; return true; } |