about summary refs log tree commit diff
path: root/absl/strings
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/cord.cc21
-rw-r--r--absl/strings/cord.h38
-rw-r--r--absl/strings/cord_test.cc10
3 files changed, 28 insertions, 41 deletions
diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc
index 9b32b3cc46b6..415b239b0e9c 100644
--- a/absl/strings/cord.cc
+++ b/absl/strings/cord.cc
@@ -187,19 +187,20 @@ static constexpr size_t TagToLength(uint8_t tag) {
 // Enforce that kMaxFlatSize maps to a well-known exact tag value.
 static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic");
 
-constexpr uint64_t Fibonacci(uint8_t n, uint64_t a = 0, uint64_t b = 1) {
-  return n == 0 ? a : n == 1 ? b : Fibonacci(n - 1, b, a + b);
+constexpr size_t Fibonacci(uint8_t n, const size_t a = 0, const size_t b = 1) {
+  return n == 0
+             ? a
+             : n == 1 ? b
+                      : Fibonacci(n - 1, b,
+                                  (a > (size_t(-1) - b)) ? size_t(-1) : a + b);
 }
 
-static_assert(Fibonacci(63) == 6557470319842,
-              "Fibonacci values computed incorrectly");
-
 // Minimum length required for a given depth tree -- a tree is considered
 // balanced if
 //      length(t) >= kMinLength[depth(t)]
 // The node depth is allowed to become larger to reduce rebalancing
 // for larger strings (see ShouldRebalance).
-constexpr uint64_t kMinLength[] = {
+constexpr size_t kMinLength[] = {
     Fibonacci(2),  Fibonacci(3),  Fibonacci(4),  Fibonacci(5),  Fibonacci(6),
     Fibonacci(7),  Fibonacci(8),  Fibonacci(9),  Fibonacci(10), Fibonacci(11),
     Fibonacci(12), Fibonacci(13), Fibonacci(14), Fibonacci(15), Fibonacci(16),
@@ -218,9 +219,9 @@ constexpr uint64_t kMinLength[] = {
     Fibonacci(77), Fibonacci(78), Fibonacci(79), Fibonacci(80), Fibonacci(81),
     Fibonacci(82), Fibonacci(83), Fibonacci(84), Fibonacci(85), Fibonacci(86),
     Fibonacci(87), Fibonacci(88), Fibonacci(89), Fibonacci(90), Fibonacci(91),
-    Fibonacci(92), Fibonacci(93)};
+    Fibonacci(92), Fibonacci(93), Fibonacci(94), Fibonacci(95)};
 
-static_assert(sizeof(kMinLength) / sizeof(uint64_t) ==
+static_assert(sizeof(kMinLength) / sizeof(size_t) >=
                   (cord_internal::MaxCordDepth() + 1),
               "Not enough elements in kMinLength array to cover all the "
               "supported Cord depth(s)");
@@ -1169,9 +1170,9 @@ class CordForest {
   void AddNode(CordRep* node) {
     CordRep* sum = nullptr;
 
-    // Collect together everything with which we will merge node
+    // Collect together everything with which we will merge with node
     int i = 0;
-    for (; node->length > kMinLength[i + 1]; ++i) {
+    for (; node->length >= kMinLength[i + 1]; ++i) {
       auto& tree_at_i = trees_[i];
 
       if (tree_at_i == nullptr) continue;
diff --git a/absl/strings/cord.h b/absl/strings/cord.h
index 3941f19c3451..eb236e50e22e 100644
--- a/absl/strings/cord.h
+++ b/absl/strings/cord.h
@@ -72,39 +72,21 @@ namespace cord_internal {
 // It's expensive to keep a tree perfectly balanced, so instead we keep trees
 // approximately balanced.  A tree node N of depth D(N) that contains a string
 // of L(N) characters is considered balanced if L >= Fibonacci(D + 2).
-// The "+ 2" is used to ensure that every leaf node contains at least one
-// character. Here we presume that
+// The "+ 2" is used to ensure that every balanced leaf node contains at least
+//  one character. Here we presume that
 //   Fibonacci(0) = 0
 //   Fibonacci(1) = 1
 //   Fibonacci(2) = 1
 //   Fibonacci(3) = 2
 //   ...
-//
-// Fibonacci numbers are convenient because it means when two balanced trees of
-// the same depth are made the children of a new node, the resulting tree is
-// guaranteed to also be balanced:
-//
-//
-//   L(left)  >= Fibonacci(D(left) + 2)
-//   L(right) >= Fibonacci(D(right) + 2)
-//
-//   L(left) + L(right) >= Fibonacci(D(left) + 2) + Fibonacci(D(right) + 2)
-//   L(left) + L(right) == L(new_tree)
-//
-//   L(new_tree) >= 2 * Fibonacci(D(child) + 2)
-//   D(child) == D(new_tree) - 1
-//
-//   L(new_tree) >= 2 * Fibonacci(D(new_tree) + 1)
-//   2 * Fibonacci(N) >= Fibonacci(N + 1)
-//
-//   L(new_tree) >= Fibonacci(D(new_tree) + 2)
-//
-//
-// The 93rd Fibonacci number is the largest Fibonacci number that can be
-// represented in 64 bits, so the size of a balanced Cord of depth 92 is too big
-// for an unsigned 64 bit integer to hold.  Therefore we can safely assume that
-// the maximum depth of a Cord is 91.
-constexpr size_t MaxCordDepth() { return 91; }
+// The algorithm is based on paper by Hans Boehm et al:
+// https://www.cs.rit.edu/usr/local/pub/jeh/courses/QUARTERS/FP/Labs/CedarRope/rope-paper.pdf
+// In this paper authors shows that rebalancing based on cord forest of already
+// balanced subtrees can be proven to never produce tree of depth larger than
+// largest Fibonacci number representable in the same integral type as cord size
+// For 64 bit integers this is the 93rd Fibonacci number. For 32 bit integrals
+// this is 47th Fibonacci number.
+constexpr size_t MaxCordDepth() { return sizeof(size_t) == 8 ? 93 : 47; }
 
 // This class models fixed max size stack of CordRep pointers.
 // The elements are being pushed back and popped from the back.
diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc
index d6e091f8b24e..f2d81d4c40d0 100644
--- a/absl/strings/cord_test.cc
+++ b/absl/strings/cord_test.cc
@@ -1403,12 +1403,16 @@ TEST(CordChunkIterator, Operations) {
 }
 
 TEST(CordChunkIterator, MaxLengthFullTree) {
+  // Start with a 1-byte cord, and then double its length in a loop.  We should
+  // be able to do this until the point where we would overflow size_t.
+
   absl::Cord cord;
   size_t size = 1;
   AddExternalMemory("x", &cord);
   EXPECT_EQ(cord.size(), size);
 
-  for (int i = 0; i < 63; ++i) {
+  const int kCordLengthDoublingLimit = std::numeric_limits<size_t>::digits - 1;
+  for (int i = 0; i < kCordLengthDoublingLimit; ++i) {
     cord.Prepend(absl::Cord(cord));
     size <<= 1;
 
@@ -1431,7 +1435,7 @@ TEST(CordChunkIterator, MaxDepth) {
   AddExternalMemory("x", &left_child);
   absl::Cord root = left_child;
 
-  for (int i = 0; i < 91; ++i) {
+  for (int i = 0; i < absl::cord_internal::MaxCordDepth() - 2; ++i) {
     size_t new_size = left_child.size() + root.size();
     root.Prepend(left_child);
     EXPECT_EQ(root.size(), new_size);
@@ -1442,7 +1446,7 @@ TEST(CordChunkIterator, MaxDepth) {
     std::swap(left_child, root);
   }
 
-  EXPECT_DEATH_IF_SUPPORTED(root.Prepend(left_child), "Cord depth exceeds max");
+  EXPECT_DEATH_IF_SUPPORTED(root.Prepend(left_child), "Cord is too long");
 }
 
 TEST(CordCharIterator, Traits) {