2 files changed, 88 insertions, 83 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index c4f198bb4247..85e33344c7d5 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -446,9 +446,7 @@ using Group = GroupPortableImpl;
 template <class Policy, class Hash, class Eq, class Alloc>
 class raw_hash_set;
 
-inline bool IsValidCapacity(size_t n) {
-  return ((n + 1) & n) == 0 && n >= Group::kWidth - 1;
-}
+inline bool IsValidCapacity(size_t n) { return ((n + 1) & n) == 0 && n > 0; }
 
 // PRECONDITION:
 //   IsValidCapacity(capacity)
@@ -470,13 +468,9 @@ inline void ConvertDeletedToEmptyAndFullToDeleted(
   ctrl[capacity] = kSentinel;
 }
 
-// Rounds up the capacity to the next power of 2 minus 1 and ensures it is
-// greater or equal to Group::kWidth - 1.
+// Rounds up the capacity to the next power of 2 minus 1, with a minimum of 1.
 inline size_t NormalizeCapacity(size_t n) {
-  constexpr size_t kMinCapacity = Group::kWidth - 1;
-  return n <= kMinCapacity
-             ? kMinCapacity
-             : (std::numeric_limits<size_t>::max)() >> LeadingZeros(n);
+  return n ? ~size_t{} >> LeadingZeros(n) : 1;
 }
 
 // We use 7/8th as maximum load factor.
@@ -1507,6 +1501,7 @@ class raw_hash_set {
 
   void drop_deletes_without_resize() ABSL_ATTRIBUTE_NOINLINE {
     assert(IsValidCapacity(capacity_));
+    assert(!is_small());
     // Algorithm:
     // - mark all DELETED slots as EMPTY
     // - mark all FULL slots as DELETED
@@ -1572,7 +1567,7 @@ class raw_hash_set {
 
   void rehash_and_grow_if_necessary() {
     if (capacity_ == 0) {
-      resize(Group::kWidth - 1);
+      resize(1);
     } else if (size() <= CapacityToGrowth(capacity()) / 2) {
       // Squash DELETED without growing if there is enough capacity.
       drop_deletes_without_resize();
@@ -1619,17 +1614,15 @@ class raw_hash_set {
       auto mask = g.MatchEmptyOrDeleted();
       if (mask) {
 #if !defined(NDEBUG)
-        // We want to force small tables to have random entries too, so
-        // in debug build we will randomly insert in either the front or back of
+        // We want to add entropy even when ASLR is not enabled.
+        // In debug build we will randomly insert in either the front or back of
         // the group.
         // TODO(kfm,sbenza): revisit after we do unconditional mixing
-        if (ShouldInsertBackwards(hash, ctrl_))
+        if (!is_small() && ShouldInsertBackwards(hash, ctrl_)) {
           return {seq.offset(mask.HighestBitSet()), seq.index()};
-        else
-          return {seq.offset(mask.LowestBitSet()), seq.index()};
-#else
-        return {seq.offset(mask.LowestBitSet()), seq.index()};
+        }
 #endif
+        return {seq.offset(mask.LowestBitSet()), seq.index()};
       }
       assert(seq.index() < capacity_ && "full table!");
       seq.next();
@@ -1732,11 +1725,28 @@ class raw_hash_set {
     }
 
     ctrl_[i] = h;
-    ctrl_[((i - Group::kWidth) & capacity_) + Group::kWidth] = h;
+    ctrl_[((i - Group::kWidth) & capacity_) + 1 +
+          ((Group::kWidth - 1) & capacity_)] = h;
   }
 
   size_t& growth_left() { return settings_.template get<0>(); }
 
+  // The representation of the object has two modes:
+  //  - small: For capacities < kWidth-1
+  //  - large: For the rest.
+  //
+  // Differences:
+  //  - In small mode we are able to use the whole capacity. The extra control
+  //  bytes give us at least one "empty" control byte to stop the iteration.
+  //  This is important to make 1 a valid capacity.
+  //
+  //  - In small mode only the first `capacity()` control bytes after the
+  //  sentinel are valid. The rest contain dummy kEmpty values that do not
+  //  represent a real slot. This is important to take into account on
+  //  find_first_non_full(), where we never try ShouldInsertBackwards() for
+  //  small tables.
+  bool is_small() const { return capacity_ < Group::kWidth - 1; }
+
   hasher& hash_ref() { return settings_.template get<1>(); }
   const hasher& hash_ref() const { return settings_.template get<1>(); }
   key_equal& eq_ref() { return settings_.template get<2>(); }
diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc
index b684571feb0c..db0bb823b1a1 100644
--- a/absl/container/internal/raw_hash_set_test.cc
+++ b/absl/container/internal/raw_hash_set_test.cc
@@ -55,13 +55,16 @@ using ::testing::Pair;
 using ::testing::UnorderedElementsAre;
 
 TEST(Util, NormalizeCapacity) {
-  constexpr size_t kMinCapacity = Group::kWidth - 1;
-  EXPECT_EQ(kMinCapacity, NormalizeCapacity(0));
-  EXPECT_EQ(kMinCapacity, NormalizeCapacity(1));
-  EXPECT_EQ(kMinCapacity, NormalizeCapacity(2));
-  EXPECT_EQ(kMinCapacity, NormalizeCapacity(kMinCapacity));
-  EXPECT_EQ(kMinCapacity * 2 + 1, NormalizeCapacity(kMinCapacity + 1));
-  EXPECT_EQ(kMinCapacity * 2 + 1, NormalizeCapacity(kMinCapacity + 2));
+  EXPECT_EQ(1, NormalizeCapacity(0));
+  EXPECT_EQ(1, NormalizeCapacity(1));
+  EXPECT_EQ(3, NormalizeCapacity(2));
+  EXPECT_EQ(3, NormalizeCapacity(3));
+  EXPECT_EQ(7, NormalizeCapacity(4));
+  EXPECT_EQ(7, NormalizeCapacity(7));
+  EXPECT_EQ(15, NormalizeCapacity(8));
+  EXPECT_EQ(15, NormalizeCapacity(15));
+  EXPECT_EQ(15 * 2 + 1, NormalizeCapacity(15 + 1));
+  EXPECT_EQ(15 * 2 + 1, NormalizeCapacity(15 + 2));
 }
 
 TEST(Util, GrowthAndCapacity) {
@@ -72,10 +75,7 @@ TEST(Util, GrowthAndCapacity) {
     size_t capacity = NormalizeCapacity(GrowthToLowerboundCapacity(growth));
     // The capacity is large enough for `growth`
     EXPECT_THAT(CapacityToGrowth(capacity), Ge(growth));
-    if (growth < Group::kWidth - 1) {
-      // Fits in one group, that is the minimum capacity.
-      EXPECT_EQ(capacity, Group::kWidth - 1);
-    } else {
+    if (growth != 0 && capacity > 1) {
       // There is no smaller capacity that works.
       EXPECT_THAT(CapacityToGrowth(capacity / 2), Lt(growth));
     }
@@ -814,7 +814,7 @@ TEST(Table, EnsureNonQuadraticAsInRust) {
 TEST(Table, ClearBug) {
   IntTable t;
   constexpr size_t capacity = container_internal::Group::kWidth - 1;
-  constexpr size_t max_size = capacity / 2;
+  constexpr size_t max_size = capacity / 2 + 1;
   for (size_t i = 0; i < max_size; ++i) {
     t.insert(i);
   }
@@ -1741,80 +1741,74 @@ TEST(Nodes, ExtractInsert) {
   EXPECT_FALSE(node);
 }
 
-StringTable MakeSimpleTable(size_t size) {
-  StringTable t;
-  for (size_t i = 0; i < size; ++i) t.emplace(std::string(1, 'A' + i), "");
+IntTable MakeSimpleTable(size_t size) {
+  IntTable t;
+  while (t.size() < size) t.insert(t.size());
   return t;
 }
 
-std::string OrderOfIteration(const StringTable& t) {
-  std::string order;
-  for (auto& p : t) order += p.first;
-  return order;
+std::vector<int> OrderOfIteration(const IntTable& t) {
+  return {t.begin(), t.end()};
 }
 
+// These IterationOrderChanges tests depend on non-deterministic behavior.
+// We are injecting non-determinism from the pointer of the table, but do so in
+// a way that only the page matters. We have to retry enough times to make sure
+// we are touching different memory pages to cause the ordering to change.
+// We also need to keep the old tables around to avoid getting the same memory
+// blocks over and over.
 TEST(Table, IterationOrderChangesByInstance) {
-  // Needs to be more than kWidth elements to be able to affect order.
-  const StringTable reference = MakeSimpleTable(20);
-
-  // Since order is non-deterministic we can't just try once and verify.
-  // We'll try until we find that order changed. It should not take many tries
-  // for that.
-  // Important: we have to keep the old tables around. Otherwise tcmalloc will
-  // just give us the same blocks and we would be doing the same order again.
-  std::vector<StringTable> garbage;
-  for (int i = 0; i < 10; ++i) {
-    auto trial = MakeSimpleTable(20);
-    if (OrderOfIteration(trial) != OrderOfIteration(reference)) {
-      // We are done.
-      return;
+  for (size_t size : {2, 6, 12, 20}) {
+    const auto reference_table = MakeSimpleTable(size);
+    const auto reference = OrderOfIteration(reference_table);
+
+    std::vector<IntTable> tables;
+    bool found_difference = false;
+    for (int i = 0; !found_difference && i < 500; ++i) {
+      tables.push_back(MakeSimpleTable(size));
+      found_difference = OrderOfIteration(tables.back()) != reference;
+    }
+    if (!found_difference) {
+      FAIL()
+          << "Iteration order remained the same across many attempts with size "
+          << size;
     }
-    garbage.push_back(std::move(trial));
   }
-  FAIL();
 }
 
 TEST(Table, IterationOrderChangesOnRehash) {
-  // Since order is non-deterministic we can't just try once and verify.
-  // We'll try until we find that order changed. It should not take many tries
-  // for that.
-  // Important: we have to keep the old tables around. Otherwise tcmalloc will
-  // just give us the same blocks and we would be doing the same order again.
-  std::vector<StringTable> garbage;
-  for (int i = 0; i < 10; ++i) {
-    // Needs to be more than kWidth elements to be able to affect order.
-    StringTable t = MakeSimpleTable(20);
-    const std::string reference = OrderOfIteration(t);
+  std::vector<IntTable> garbage;
+  for (int i = 0; i < 500; ++i) {
+    auto t = MakeSimpleTable(20);
+    const auto reference = OrderOfIteration(t);
     // Force rehash to the same size.
     t.rehash(0);
-    std::string trial = OrderOfIteration(t);
+    auto trial = OrderOfIteration(t);
     if (trial != reference) {
       // We are done.
       return;
     }
     garbage.push_back(std::move(t));
   }
-  FAIL();
+  FAIL() << "Iteration order remained the same across many attempts.";
 }
 
-TEST(Table, IterationOrderChangesForSmallTables) {
-  // Since order is non-deterministic we can't just try once and verify.
-  // We'll try until we find that order changed.
-  // Important: we have to keep the old tables around. Otherwise tcmalloc will
-  // just give us the same blocks and we would be doing the same order again.
-  StringTable reference_table = MakeSimpleTable(5);
-  const std::string reference = OrderOfIteration(reference_table);
-  std::vector<StringTable> garbage;
-  for (int i = 0; i < 50; ++i) {
-    StringTable t = MakeSimpleTable(5);
-    std::string trial = OrderOfIteration(t);
-    if (trial != reference) {
-      // We are done.
-      return;
-    }
-    garbage.push_back(std::move(t));
-  }
-  FAIL() << "Iteration order remained the same across many attempts.";
+// Verify that pointers are invalidated as soon as a second element is inserted.
+// This prevents dependency on pointer stability on small tables.
+TEST(Table, UnstablePointers) {
+  IntTable table;
+
+  const auto addr = [&](int i) {
+    return reinterpret_cast<uintptr_t>(&*table.find(i));
+  };
+
+  table.insert(0);
+  const uintptr_t old_ptr = addr(0);
+
+  // This causes a rehash.
+  table.insert(1);
+
+  EXPECT_NE(old_ptr, addr(0));
 }
 
 // Confirm that we assert if we try to erase() end().
@@ -1857,6 +1851,7 @@ TEST(RawHashSamplerTest, Sample) {
 #ifdef ADDRESS_SANITIZER
 TEST(Sanitizer, PoisoningUnused) {
   IntTable t;
+  t.reserve(5);
   // Insert something to force an allocation.
   int64_t& v1 = *t.insert(0).first;