diff options
author | Abseil Team <absl-team@google.com> | 2019-03-12T18·39-0700 |
---|---|---|
committer | vslashg <gfalcon@google.com> | 2019-03-12T19·07-0400 |
commit | 256be563447a315f2a7993ec669460ba475fa86a (patch) | |
tree | 80d363bf7055929227415cd3f46f1b1ac62b6944 /absl/container/internal | |
parent | 88a152ae747c3c42dc9167d46c590929b048d436 (diff) |
Export of internal Abseil changes.
-- 00d42e3d5433aaf29c2ed293520b2ba178ae8bdb by Greg Falcon <gfalcon@google.com>: Import of CCTZ from GitHub. PiperOrigin-RevId: 238061818 -- 867a7ca318fac2991ea9a4107dbae3cc9fbf974a by Abseil Team <absl-team@google.com>: Added a IWYU export pragma when including a standard header for the purpose of aliasing its symbols. PiperOrigin-RevId: 238022277 -- 17047745058f2f151cd986ea9f649512542d3876 by Matt Armstrong <marmstrong@google.com>: Clarify the comment discouraging WrapUnique<T>(x) calls. PiperOrigin-RevId: 237873803 -- 3dcb2e4968243d33ca0ce53280c445df50f4a7ec by Samuel Benzaquen <sbenza@google.com>: Workaround clang bug https://bugs.llvm.org/show_bug.cgi?id=38289 PiperOrigin-RevId: 237873551 -- f348d2dc7087a990cbdfb95aa51fd7ff478ae40e by Samuel Benzaquen <sbenza@google.com>: Reduce minimum capacity to 1. This reduces memory usage for small tables. A flat_hash_set<int> of 1 element goes from 92 bytes to 24. A flat_hash_set<string> of 1 element goes from 512 bytes to 56. PiperOrigin-RevId: 237859811 -- 9c8125be5e4e5d22a7bb62bdec8c323338385c1b by Jon Cohen <cohenjon@google.com>: Bump to CMake 3.5. This is the oldest modern cmake being included by default in most popular OS distributions according to https://repology.org/project/cmake/versions. Specifically, Ubuntu LTS 16.04 uses cmake 3.5 (https://packages.ubuntu.com/xenial/cmake) PiperOrigin-RevId: 237859345 -- 07638d672e0a4dced986a62750cfd8318ed36ffa by Derek Mauro <dmauro@google.com>: Import of CCTZ from GitHub. PiperOrigin-RevId: 237714597 GitOrigin-RevId: 00d42e3d5433aaf29c2ed293520b2ba178ae8bdb Change-Id: I5faecc45add4a5a774d4f9baf06e5519091f2ccc
Diffstat (limited to 'absl/container/internal')
-rw-r--r-- | absl/container/internal/raw_hash_set.h | 46 | ||||
-rw-r--r-- | absl/container/internal/raw_hash_set_test.cc | 125 |
2 files changed, 88 insertions, 83 deletions
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h index c4f198bb4247..85e33344c7d5 100644 --- a/absl/container/internal/raw_hash_set.h +++ b/absl/container/internal/raw_hash_set.h @@ -446,9 +446,7 @@ using Group = GroupPortableImpl; template <class Policy, class Hash, class Eq, class Alloc> class raw_hash_set; -inline bool IsValidCapacity(size_t n) { - return ((n + 1) & n) == 0 && n >= Group::kWidth - 1; -} +inline bool IsValidCapacity(size_t n) { return ((n + 1) & n) == 0 && n > 0; } // PRECONDITION: // IsValidCapacity(capacity) @@ -470,13 +468,9 @@ inline void ConvertDeletedToEmptyAndFullToDeleted( ctrl[capacity] = kSentinel; } -// Rounds up the capacity to the next power of 2 minus 1 and ensures it is -// greater or equal to Group::kWidth - 1. +// Rounds up the capacity to the next power of 2 minus 1, with a minimum of 1. inline size_t NormalizeCapacity(size_t n) { - constexpr size_t kMinCapacity = Group::kWidth - 1; - return n <= kMinCapacity - ? kMinCapacity - : (std::numeric_limits<size_t>::max)() >> LeadingZeros(n); + return n ? ~size_t{} >> LeadingZeros(n) : 1; } // We use 7/8th as maximum load factor. @@ -1507,6 +1501,7 @@ class raw_hash_set { void drop_deletes_without_resize() ABSL_ATTRIBUTE_NOINLINE { assert(IsValidCapacity(capacity_)); + assert(!is_small()); // Algorithm: // - mark all DELETED slots as EMPTY // - mark all FULL slots as DELETED @@ -1572,7 +1567,7 @@ class raw_hash_set { void rehash_and_grow_if_necessary() { if (capacity_ == 0) { - resize(Group::kWidth - 1); + resize(1); } else if (size() <= CapacityToGrowth(capacity()) / 2) { // Squash DELETED without growing if there is enough capacity. drop_deletes_without_resize(); @@ -1619,17 +1614,15 @@ class raw_hash_set { auto mask = g.MatchEmptyOrDeleted(); if (mask) { #if !defined(NDEBUG) - // We want to force small tables to have random entries too, so - // in debug build we will randomly insert in either the front or back of + // We want to add entropy even when ASLR is not enabled. + // In debug build we will randomly insert in either the front or back of // the group. // TODO(kfm,sbenza): revisit after we do unconditional mixing - if (ShouldInsertBackwards(hash, ctrl_)) + if (!is_small() && ShouldInsertBackwards(hash, ctrl_)) { return {seq.offset(mask.HighestBitSet()), seq.index()}; - else - return {seq.offset(mask.LowestBitSet()), seq.index()}; -#else - return {seq.offset(mask.LowestBitSet()), seq.index()}; + } #endif + return {seq.offset(mask.LowestBitSet()), seq.index()}; } assert(seq.index() < capacity_ && "full table!"); seq.next(); @@ -1732,11 +1725,28 @@ class raw_hash_set { } ctrl_[i] = h; - ctrl_[((i - Group::kWidth) & capacity_) + Group::kWidth] = h; + ctrl_[((i - Group::kWidth) & capacity_) + 1 + + ((Group::kWidth - 1) & capacity_)] = h; } size_t& growth_left() { return settings_.template get<0>(); } + // The representation of the object has two modes: + // - small: For capacities < kWidth-1 + // - large: For the rest. + // + // Differences: + // - In small mode we are able to use the whole capacity. The extra control + // bytes give us at least one "empty" control byte to stop the iteration. + // This is important to make 1 a valid capacity. + // + // - In small mode only the first `capacity()` control bytes after the + // sentinel are valid. The rest contain dummy kEmpty values that do not + // represent a real slot. This is important to take into account on + // find_first_non_full(), where we never try ShouldInsertBackwards() for + // small tables. + bool is_small() const { return capacity_ < Group::kWidth - 1; } + hasher& hash_ref() { return settings_.template get<1>(); } const hasher& hash_ref() const { return settings_.template get<1>(); } key_equal& eq_ref() { return settings_.template get<2>(); } diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc index b684571feb0c..db0bb823b1a1 100644 --- a/absl/container/internal/raw_hash_set_test.cc +++ b/absl/container/internal/raw_hash_set_test.cc @@ -55,13 +55,16 @@ using ::testing::Pair; using ::testing::UnorderedElementsAre; TEST(Util, NormalizeCapacity) { - constexpr size_t kMinCapacity = Group::kWidth - 1; - EXPECT_EQ(kMinCapacity, NormalizeCapacity(0)); - EXPECT_EQ(kMinCapacity, NormalizeCapacity(1)); - EXPECT_EQ(kMinCapacity, NormalizeCapacity(2)); - EXPECT_EQ(kMinCapacity, NormalizeCapacity(kMinCapacity)); - EXPECT_EQ(kMinCapacity * 2 + 1, NormalizeCapacity(kMinCapacity + 1)); - EXPECT_EQ(kMinCapacity * 2 + 1, NormalizeCapacity(kMinCapacity + 2)); + EXPECT_EQ(1, NormalizeCapacity(0)); + EXPECT_EQ(1, NormalizeCapacity(1)); + EXPECT_EQ(3, NormalizeCapacity(2)); + EXPECT_EQ(3, NormalizeCapacity(3)); + EXPECT_EQ(7, NormalizeCapacity(4)); + EXPECT_EQ(7, NormalizeCapacity(7)); + EXPECT_EQ(15, NormalizeCapacity(8)); + EXPECT_EQ(15, NormalizeCapacity(15)); + EXPECT_EQ(15 * 2 + 1, NormalizeCapacity(15 + 1)); + EXPECT_EQ(15 * 2 + 1, NormalizeCapacity(15 + 2)); } TEST(Util, GrowthAndCapacity) { @@ -72,10 +75,7 @@ TEST(Util, GrowthAndCapacity) { size_t capacity = NormalizeCapacity(GrowthToLowerboundCapacity(growth)); // The capacity is large enough for `growth` EXPECT_THAT(CapacityToGrowth(capacity), Ge(growth)); - if (growth < Group::kWidth - 1) { - // Fits in one group, that is the minimum capacity. - EXPECT_EQ(capacity, Group::kWidth - 1); - } else { + if (growth != 0 && capacity > 1) { // There is no smaller capacity that works. EXPECT_THAT(CapacityToGrowth(capacity / 2), Lt(growth)); } @@ -814,7 +814,7 @@ TEST(Table, EnsureNonQuadraticAsInRust) { TEST(Table, ClearBug) { IntTable t; constexpr size_t capacity = container_internal::Group::kWidth - 1; - constexpr size_t max_size = capacity / 2; + constexpr size_t max_size = capacity / 2 + 1; for (size_t i = 0; i < max_size; ++i) { t.insert(i); } @@ -1741,80 +1741,74 @@ TEST(Nodes, ExtractInsert) { EXPECT_FALSE(node); } -StringTable MakeSimpleTable(size_t size) { - StringTable t; - for (size_t i = 0; i < size; ++i) t.emplace(std::string(1, 'A' + i), ""); +IntTable MakeSimpleTable(size_t size) { + IntTable t; + while (t.size() < size) t.insert(t.size()); return t; } -std::string OrderOfIteration(const StringTable& t) { - std::string order; - for (auto& p : t) order += p.first; - return order; +std::vector<int> OrderOfIteration(const IntTable& t) { + return {t.begin(), t.end()}; } +// These IterationOrderChanges tests depend on non-deterministic behavior. +// We are injecting non-determinism from the pointer of the table, but do so in +// a way that only the page matters. We have to retry enough times to make sure +// we are touching different memory pages to cause the ordering to change. +// We also need to keep the old tables around to avoid getting the same memory +// blocks over and over. TEST(Table, IterationOrderChangesByInstance) { - // Needs to be more than kWidth elements to be able to affect order. - const StringTable reference = MakeSimpleTable(20); - - // Since order is non-deterministic we can't just try once and verify. - // We'll try until we find that order changed. It should not take many tries - // for that. - // Important: we have to keep the old tables around. Otherwise tcmalloc will - // just give us the same blocks and we would be doing the same order again. - std::vector<StringTable> garbage; - for (int i = 0; i < 10; ++i) { - auto trial = MakeSimpleTable(20); - if (OrderOfIteration(trial) != OrderOfIteration(reference)) { - // We are done. - return; + for (size_t size : {2, 6, 12, 20}) { + const auto reference_table = MakeSimpleTable(size); + const auto reference = OrderOfIteration(reference_table); + + std::vector<IntTable> tables; + bool found_difference = false; + for (int i = 0; !found_difference && i < 500; ++i) { + tables.push_back(MakeSimpleTable(size)); + found_difference = OrderOfIteration(tables.back()) != reference; + } + if (!found_difference) { + FAIL() + << "Iteration order remained the same across many attempts with size " + << size; } - garbage.push_back(std::move(trial)); } - FAIL(); } TEST(Table, IterationOrderChangesOnRehash) { - // Since order is non-deterministic we can't just try once and verify. - // We'll try until we find that order changed. It should not take many tries - // for that. - // Important: we have to keep the old tables around. Otherwise tcmalloc will - // just give us the same blocks and we would be doing the same order again. - std::vector<StringTable> garbage; - for (int i = 0; i < 10; ++i) { - // Needs to be more than kWidth elements to be able to affect order. - StringTable t = MakeSimpleTable(20); - const std::string reference = OrderOfIteration(t); + std::vector<IntTable> garbage; + for (int i = 0; i < 500; ++i) { + auto t = MakeSimpleTable(20); + const auto reference = OrderOfIteration(t); // Force rehash to the same size. t.rehash(0); - std::string trial = OrderOfIteration(t); + auto trial = OrderOfIteration(t); if (trial != reference) { // We are done. return; } garbage.push_back(std::move(t)); } - FAIL(); + FAIL() << "Iteration order remained the same across many attempts."; } -TEST(Table, IterationOrderChangesForSmallTables) { - // Since order is non-deterministic we can't just try once and verify. - // We'll try until we find that order changed. - // Important: we have to keep the old tables around. Otherwise tcmalloc will - // just give us the same blocks and we would be doing the same order again. - StringTable reference_table = MakeSimpleTable(5); - const std::string reference = OrderOfIteration(reference_table); - std::vector<StringTable> garbage; - for (int i = 0; i < 50; ++i) { - StringTable t = MakeSimpleTable(5); - std::string trial = OrderOfIteration(t); - if (trial != reference) { - // We are done. - return; - } - garbage.push_back(std::move(t)); - } - FAIL() << "Iteration order remained the same across many attempts."; +// Verify that pointers are invalidated as soon as a second element is inserted. +// This prevents dependency on pointer stability on small tables. +TEST(Table, UnstablePointers) { + IntTable table; + + const auto addr = [&](int i) { + return reinterpret_cast<uintptr_t>(&*table.find(i)); + }; + + table.insert(0); + const uintptr_t old_ptr = addr(0); + + // This causes a rehash. + table.insert(1); + + EXPECT_NE(old_ptr, addr(0)); } // Confirm that we assert if we try to erase() end(). @@ -1857,6 +1851,7 @@ TEST(RawHashSamplerTest, Sample) { #ifdef ADDRESS_SANITIZER TEST(Sanitizer, PoisoningUnused) { IntTable t; + t.reserve(5); // Insert something to force an allocation. int64_t& v1 = *t.insert(0).first; |