diff options
Diffstat (limited to 'absl/strings')
-rw-r--r-- | absl/strings/cord.cc | 21 | ||||
-rw-r--r-- | absl/strings/cord.h | 38 | ||||
-rw-r--r-- | absl/strings/cord_test.cc | 10 |
3 files changed, 28 insertions, 41 deletions
diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index 9b32b3cc46b6..415b239b0e9c 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -187,19 +187,20 @@ static constexpr size_t TagToLength(uint8_t tag) { // Enforce that kMaxFlatSize maps to a well-known exact tag value. static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic"); -constexpr uint64_t Fibonacci(uint8_t n, uint64_t a = 0, uint64_t b = 1) { - return n == 0 ? a : n == 1 ? b : Fibonacci(n - 1, b, a + b); +constexpr size_t Fibonacci(uint8_t n, const size_t a = 0, const size_t b = 1) { + return n == 0 + ? a + : n == 1 ? b + : Fibonacci(n - 1, b, + (a > (size_t(-1) - b)) ? size_t(-1) : a + b); } -static_assert(Fibonacci(63) == 6557470319842, - "Fibonacci values computed incorrectly"); - // Minimum length required for a given depth tree -- a tree is considered // balanced if // length(t) >= kMinLength[depth(t)] // The node depth is allowed to become larger to reduce rebalancing // for larger strings (see ShouldRebalance). -constexpr uint64_t kMinLength[] = { +constexpr size_t kMinLength[] = { Fibonacci(2), Fibonacci(3), Fibonacci(4), Fibonacci(5), Fibonacci(6), Fibonacci(7), Fibonacci(8), Fibonacci(9), Fibonacci(10), Fibonacci(11), Fibonacci(12), Fibonacci(13), Fibonacci(14), Fibonacci(15), Fibonacci(16), @@ -218,9 +219,9 @@ constexpr uint64_t kMinLength[] = { Fibonacci(77), Fibonacci(78), Fibonacci(79), Fibonacci(80), Fibonacci(81), Fibonacci(82), Fibonacci(83), Fibonacci(84), Fibonacci(85), Fibonacci(86), Fibonacci(87), Fibonacci(88), Fibonacci(89), Fibonacci(90), Fibonacci(91), - Fibonacci(92), Fibonacci(93)}; + Fibonacci(92), Fibonacci(93), Fibonacci(94), Fibonacci(95)}; -static_assert(sizeof(kMinLength) / sizeof(uint64_t) == +static_assert(sizeof(kMinLength) / sizeof(size_t) >= (cord_internal::MaxCordDepth() + 1), "Not enough elements in kMinLength array to cover all the " "supported Cord depth(s)"); @@ -1169,9 +1170,9 @@ class CordForest { void AddNode(CordRep* node) { CordRep* sum = nullptr; - // Collect together everything with which we will merge node + // Collect together everything with which we will merge with node int i = 0; - for (; node->length > kMinLength[i + 1]; ++i) { + for (; node->length >= kMinLength[i + 1]; ++i) { auto& tree_at_i = trees_[i]; if (tree_at_i == nullptr) continue; diff --git a/absl/strings/cord.h b/absl/strings/cord.h index 3941f19c3451..eb236e50e22e 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -72,39 +72,21 @@ namespace cord_internal { // It's expensive to keep a tree perfectly balanced, so instead we keep trees // approximately balanced. A tree node N of depth D(N) that contains a string // of L(N) characters is considered balanced if L >= Fibonacci(D + 2). -// The "+ 2" is used to ensure that every leaf node contains at least one -// character. Here we presume that +// The "+ 2" is used to ensure that every balanced leaf node contains at least +// one character. Here we presume that // Fibonacci(0) = 0 // Fibonacci(1) = 1 // Fibonacci(2) = 1 // Fibonacci(3) = 2 // ... -// -// Fibonacci numbers are convenient because it means when two balanced trees of -// the same depth are made the children of a new node, the resulting tree is -// guaranteed to also be balanced: -// -// -// L(left) >= Fibonacci(D(left) + 2) -// L(right) >= Fibonacci(D(right) + 2) -// -// L(left) + L(right) >= Fibonacci(D(left) + 2) + Fibonacci(D(right) + 2) -// L(left) + L(right) == L(new_tree) -// -// L(new_tree) >= 2 * Fibonacci(D(child) + 2) -// D(child) == D(new_tree) - 1 -// -// L(new_tree) >= 2 * Fibonacci(D(new_tree) + 1) -// 2 * Fibonacci(N) >= Fibonacci(N + 1) -// -// L(new_tree) >= Fibonacci(D(new_tree) + 2) -// -// -// The 93rd Fibonacci number is the largest Fibonacci number that can be -// represented in 64 bits, so the size of a balanced Cord of depth 92 is too big -// for an unsigned 64 bit integer to hold. Therefore we can safely assume that -// the maximum depth of a Cord is 91. -constexpr size_t MaxCordDepth() { return 91; } +// The algorithm is based on paper by Hans Boehm et al: +// https://www.cs.rit.edu/usr/local/pub/jeh/courses/QUARTERS/FP/Labs/CedarRope/rope-paper.pdf +// In this paper authors shows that rebalancing based on cord forest of already +// balanced subtrees can be proven to never produce tree of depth larger than +// largest Fibonacci number representable in the same integral type as cord size +// For 64 bit integers this is the 93rd Fibonacci number. For 32 bit integrals +// this is 47th Fibonacci number. +constexpr size_t MaxCordDepth() { return sizeof(size_t) == 8 ? 93 : 47; } // This class models fixed max size stack of CordRep pointers. // The elements are being pushed back and popped from the back. diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index d6e091f8b24e..f2d81d4c40d0 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -1403,12 +1403,16 @@ TEST(CordChunkIterator, Operations) { } TEST(CordChunkIterator, MaxLengthFullTree) { + // Start with a 1-byte cord, and then double its length in a loop. We should + // be able to do this until the point where we would overflow size_t. + absl::Cord cord; size_t size = 1; AddExternalMemory("x", &cord); EXPECT_EQ(cord.size(), size); - for (int i = 0; i < 63; ++i) { + const int kCordLengthDoublingLimit = std::numeric_limits<size_t>::digits - 1; + for (int i = 0; i < kCordLengthDoublingLimit; ++i) { cord.Prepend(absl::Cord(cord)); size <<= 1; @@ -1431,7 +1435,7 @@ TEST(CordChunkIterator, MaxDepth) { AddExternalMemory("x", &left_child); absl::Cord root = left_child; - for (int i = 0; i < 91; ++i) { + for (int i = 0; i < absl::cord_internal::MaxCordDepth() - 2; ++i) { size_t new_size = left_child.size() + root.size(); root.Prepend(left_child); EXPECT_EQ(root.size(), new_size); @@ -1442,7 +1446,7 @@ TEST(CordChunkIterator, MaxDepth) { std::swap(left_child, root); } - EXPECT_DEATH_IF_SUPPORTED(root.Prepend(left_child), "Cord depth exceeds max"); + EXPECT_DEATH_IF_SUPPORTED(root.Prepend(left_child), "Cord is too long"); } TEST(CordCharIterator, Traits) { |