about summary refs log tree commit diff
path: root/absl/container
diff options
context:
space:
mode:
Diffstat (limited to 'absl/container')
-rw-r--r--absl/container/BUILD.bazel31
-rw-r--r--absl/container/CMakeLists.txt27
-rw-r--r--absl/container/internal/hashtablez_sampler.cc289
-rw-r--r--absl/container/internal/hashtablez_sampler.h236
-rw-r--r--absl/container/internal/hashtablez_sampler_test.cc307
-rw-r--r--absl/container/internal/raw_hash_set.h53
-rw-r--r--absl/container/internal/raw_hash_set_test.cc22
7 files changed, 949 insertions, 16 deletions
diff --git a/absl/container/BUILD.bazel b/absl/container/BUILD.bazel
index 55aea3979f44..e1880a8b546f 100644
--- a/absl/container/BUILD.bazel
+++ b/absl/container/BUILD.bazel
@@ -437,6 +437,35 @@ cc_library(
 )
 
 cc_library(
+    name = "hashtablez_sampler",
+    srcs = ["internal/hashtablez_sampler.cc"],
+    hdrs = ["internal/hashtablez_sampler.h"],
+    copts = ABSL_DEFAULT_COPTS,
+    deps = [
+        ":have_sse",
+        "//absl/base:core_headers",
+        "//absl/debugging:stacktrace",
+        "//absl/memory",
+        "//absl/synchronization",
+        "//absl/utility",
+    ],
+)
+
+cc_test(
+    name = "hashtablez_sampler_test",
+    srcs = ["internal/hashtablez_sampler_test.cc"],
+    deps = [
+        ":hashtablez_sampler",
+        ":have_sse",
+        "//absl/base:core_headers",
+        "//absl/synchronization",
+        "//absl/synchronization:thread_pool",
+        "//absl/time",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_library(
     name = "node_hash_policy",
     hdrs = ["internal/node_hash_policy.h"],
     copts = ABSL_DEFAULT_COPTS,
@@ -467,6 +496,7 @@ cc_library(
     name = "have_sse",
     hdrs = ["internal/have_sse.h"],
     copts = ABSL_DEFAULT_COPTS,
+    visibility = ["//visibility:private"],
 )
 
 cc_library(
@@ -479,6 +509,7 @@ cc_library(
         ":container_memory",
         ":hash_policy_traits",
         ":hashtable_debug_hooks",
+        ":hashtablez_sampler",
         ":have_sse",
         ":layout",
         "//absl/base:bits",
diff --git a/absl/container/CMakeLists.txt b/absl/container/CMakeLists.txt
index 9f78100468a5..eb1b9ed72bfc 100644
--- a/absl/container/CMakeLists.txt
+++ b/absl/container/CMakeLists.txt
@@ -433,6 +433,31 @@ absl_cc_test(
 
 absl_cc_library(
   NAME
+    hashtablez_sampler
+  HDRS
+    "internal/hashtablez_sampler.h"
+  SRCS
+    "internal/hashtablez_sampler.cc"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::have_sse
+    absl::synchronization
+)
+
+absl_cc_test(
+  NAME
+    hashtablez_sampler_test
+  SRCS
+    "internal/hashtablez_sampler_test.cc"
+  DEPS
+    absl::hashtablez_sampler
+    absl::have_sse
+    gmock_main
+)
+
+absl_cc_library(
+  NAME
     hashtable_debug
   HDRS
     "internal/hashtable_debug.h"
@@ -459,7 +484,6 @@ absl_cc_library(
     "internal/have_sse.h"
   COPTS
     ${ABSL_DEFAULT_COPTS}
-  PUBLIC
 )
 
 absl_cc_library(
@@ -520,6 +544,7 @@ absl_cc_library(
     absl::meta
     absl::optional
     absl::utility
+    absl::hashtablez_sampler
   PUBLIC
 )
 
diff --git a/absl/container/internal/hashtablez_sampler.cc b/absl/container/internal/hashtablez_sampler.cc
new file mode 100644
index 000000000000..6cc10c201c34
--- /dev/null
+++ b/absl/container/internal/hashtablez_sampler.cc
@@ -0,0 +1,289 @@
+// Copyright 2018 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/container/internal/hashtablez_sampler.h"
+
+#include <atomic>
+#include <cassert>
+#include <functional>
+#include <limits>
+
+#include "absl/base/attributes.h"
+#include "absl/container/internal/have_sse.h"
+#include "absl/debugging/stacktrace.h"
+#include "absl/memory/memory.h"
+#include "absl/synchronization/mutex.h"
+
+namespace absl {
+namespace container_internal {
+constexpr int HashtablezInfo::kMaxStackDepth;
+
+namespace {
+ABSL_CONST_INIT std::atomic<bool> g_hashtablez_enabled{
+   false
+};
+ABSL_CONST_INIT std::atomic<int32_t> g_hashtablez_sample_parameter{1 << 10};
+ABSL_CONST_INIT std::atomic<int32_t> g_hashtablez_max_samples{1 << 20};
+
+// Returns the next pseudo-random value.
+// pRNG is: aX+b mod c with a = 0x5DEECE66D, b =  0xB, c = 1<<48
+// This is the lrand64 generator.
+uint64_t NextRandom(uint64_t rnd) {
+  const uint64_t prng_mult = uint64_t{0x5DEECE66D};
+  const uint64_t prng_add = 0xB;
+  const uint64_t prng_mod_power = 48;
+  const uint64_t prng_mod_mask = ~(~uint64_t{0} << prng_mod_power);
+  return (prng_mult * rnd + prng_add) & prng_mod_mask;
+}
+
+// Generates a geometric variable with the specified mean.
+// This is done by generating a random number between 0 and 1 and applying
+// the inverse cumulative distribution function for an exponential.
+// Specifically: Let m be the inverse of the sample period, then
+// the probability distribution function is m*exp(-mx) so the CDF is
+// p = 1 - exp(-mx), so
+// q = 1 - p = exp(-mx)
+// log_e(q) = -mx
+// -log_e(q)/m = x
+// log_2(q) * (-log_e(2) * 1/m) = x
+// In the code, q is actually in the range 1 to 2**26, hence the -26 below
+//
+int64_t GetGeometricVariable(int64_t mean) {
+#if ABSL_HAVE_THREAD_LOCAL
+  thread_local
+#else   // ABSL_HAVE_THREAD_LOCAL
+  // SampleSlow and hence GetGeometricVariable is guarded by a single mutex when
+  // there are not thread locals.  Thus, a single global rng is acceptable for
+  // that case.
+  static
+#endif  // ABSL_HAVE_THREAD_LOCAL
+      uint64_t rng = []() {
+        // We don't get well distributed numbers from this so we call
+        // NextRandom() a bunch to mush the bits around.  We use a global_rand
+        // to handle the case where the same thread (by memory address) gets
+        // created and destroyed repeatedly.
+        ABSL_CONST_INIT static std::atomic<uint32_t> global_rand(0);
+        uint64_t r = reinterpret_cast<uint64_t>(&rng) +
+                   global_rand.fetch_add(1, std::memory_order_relaxed);
+        for (int i = 0; i < 20; ++i) {
+          r = NextRandom(r);
+        }
+        return r;
+      }();
+
+  rng = NextRandom(rng);
+
+  // Take the top 26 bits as the random number
+  // (This plus the 1<<58 sampling bound give a max possible step of
+  // 5194297183973780480 bytes.)
+  const uint64_t prng_mod_power = 48;  // Number of bits in prng
+  // The uint32_t cast is to prevent a (hard-to-reproduce) NAN
+  // under piii debug for some binaries.
+  double q = static_cast<uint32_t>(rng >> (prng_mod_power - 26)) + 1.0;
+  // Put the computed p-value through the CDF of a geometric.
+  double interval = (std::log2(q) - 26) * (-std::log(2.0) * mean);
+
+  // Very large values of interval overflow int64_t. If we happen to
+  // hit such improbable condition, we simply cheat and clamp interval
+  // to largest supported value.
+  if (interval > static_cast<double>(std::numeric_limits<int64_t>::max() / 2)) {
+    return std::numeric_limits<int64_t>::max() / 2;
+  }
+
+  // Small values of interval are equivalent to just sampling next time.
+  if (interval < 1) {
+    return 1;
+  }
+  return static_cast<int64_t>(interval);
+}
+
+}  // namespace
+
+HashtablezSampler& HashtablezSampler::Global() {
+  static auto* sampler = new HashtablezSampler();
+  return *sampler;
+}
+
+HashtablezInfo::HashtablezInfo() { PrepareForSampling(); }
+HashtablezInfo::~HashtablezInfo() = default;
+
+void HashtablezInfo::PrepareForSampling() {
+  capacity.store(0, std::memory_order_relaxed);
+  size.store(0, std::memory_order_relaxed);
+  num_erases.store(0, std::memory_order_relaxed);
+  max_probe_length.store(0, std::memory_order_relaxed);
+  total_probe_length.store(0, std::memory_order_relaxed);
+  hashes_bitwise_or.store(0, std::memory_order_relaxed);
+  hashes_bitwise_and.store(~size_t{}, std::memory_order_relaxed);
+
+  create_time = absl::Now();
+  // The inliner makes hardcoded skip_count difficult (especially when combined
+  // with LTO).  We use the ability to exclude stacks by regex when encoding
+  // instead.
+  depth = absl::GetStackTrace(stack, HashtablezInfo::kMaxStackDepth,
+                              /* skip_count= */ 0);
+  dead = nullptr;
+}
+
+HashtablezSampler::HashtablezSampler()
+    : dropped_samples_(0), size_estimate_(0), all_(nullptr) {
+  absl::MutexLock l(&graveyard_.init_mu);
+  graveyard_.dead = &graveyard_;
+}
+
+HashtablezSampler::~HashtablezSampler() {
+  HashtablezInfo* s = all_.load(std::memory_order_acquire);
+  while (s != nullptr) {
+    HashtablezInfo* next = s->next;
+    delete s;
+    s = next;
+  }
+}
+
+void HashtablezSampler::PushNew(HashtablezInfo* sample) {
+  sample->next = all_.load(std::memory_order_relaxed);
+  while (!all_.compare_exchange_weak(sample->next, sample,
+                                     std::memory_order_release,
+                                     std::memory_order_relaxed)) {
+  }
+}
+
+void HashtablezSampler::PushDead(HashtablezInfo* sample) {
+  absl::MutexLock graveyard_lock(&graveyard_.init_mu);
+  absl::MutexLock sample_lock(&sample->init_mu);
+  sample->dead = graveyard_.dead;
+  graveyard_.dead = sample;
+}
+
+HashtablezInfo* HashtablezSampler::PopDead() {
+  absl::MutexLock graveyard_lock(&graveyard_.init_mu);
+
+  // The list is circular, so eventually it collapses down to
+  //   graveyard_.dead == &graveyard_
+  // when it is empty.
+  HashtablezInfo* sample = graveyard_.dead;
+  if (sample == &graveyard_) return nullptr;
+
+  absl::MutexLock sample_lock(&sample->init_mu);
+  graveyard_.dead = sample->dead;
+  sample->PrepareForSampling();
+  return sample;
+}
+
+HashtablezInfo* HashtablezSampler::Register() {
+  int64_t size = size_estimate_.fetch_add(1, std::memory_order_relaxed);
+  if (size > g_hashtablez_max_samples.load(std::memory_order_relaxed)) {
+    size_estimate_.fetch_sub(1, std::memory_order_relaxed);
+    dropped_samples_.fetch_add(1, std::memory_order_relaxed);
+    return nullptr;
+  }
+
+  HashtablezInfo* sample = PopDead();
+  if (sample == nullptr) {
+    // Resurrection failed.  Hire a new warlock.
+    sample = new HashtablezInfo();
+    PushNew(sample);
+  }
+
+  return sample;
+}
+
+void HashtablezSampler::Unregister(HashtablezInfo* sample) {
+  PushDead(sample);
+  size_estimate_.fetch_sub(1, std::memory_order_relaxed);
+}
+
+int64_t HashtablezSampler::Iterate(
+    const std::function<void(const HashtablezInfo& stack)>& f) {
+  HashtablezInfo* s = all_.load(std::memory_order_acquire);
+  while (s != nullptr) {
+    absl::MutexLock l(&s->init_mu);
+    if (s->dead == nullptr) {
+      f(*s);
+    }
+    s = s->next;
+  }
+
+  return dropped_samples_.load(std::memory_order_relaxed);
+}
+
+HashtablezInfo* SampleSlow(int64_t* next_sample) {
+  bool first = *next_sample < 0;
+  *next_sample = GetGeometricVariable(
+      g_hashtablez_sample_parameter.load(std::memory_order_relaxed));
+
+  // g_hashtablez_enabled can be dynamically flipped, we need to set a threshold
+  // low enough that we will start sampling in a reasonable time, so we just use
+  // the default sampling rate.
+  if (!g_hashtablez_enabled.load(std::memory_order_relaxed)) return nullptr;
+
+  // We will only be negative on our first count, so we should just retry in
+  // that case.
+  if (first) {
+    if (ABSL_PREDICT_TRUE(--*next_sample > 0)) return nullptr;
+    return SampleSlow(next_sample);
+  }
+
+  return HashtablezSampler::Global().Register();
+}
+
+void UnsampleSlow(HashtablezInfo* info) {
+  HashtablezSampler::Global().Unregister(info);
+}
+
+void RecordInsertSlow(HashtablezInfo* info, size_t hash,
+                      size_t distance_from_desired) {
+  // SwissTables probe in groups of 16, so scale this to count items probes and
+  // not offset from desired.
+  size_t probe_length = distance_from_desired;
+#if SWISSTABLE_HAVE_SSE2
+  probe_length /= 16;
+#else
+  probe_length /= 8;
+#endif
+
+  info->hashes_bitwise_and.fetch_and(hash, std::memory_order_relaxed);
+  info->hashes_bitwise_or.fetch_or(hash, std::memory_order_relaxed);
+  info->max_probe_length.store(
+      std::max(info->max_probe_length.load(std::memory_order_relaxed),
+               probe_length),
+      std::memory_order_relaxed);
+  info->total_probe_length.fetch_add(probe_length, std::memory_order_relaxed);
+  info->size.fetch_add(1, std::memory_order_relaxed);
+}
+
+void SetHashtablezEnabled(bool enabled) {
+  g_hashtablez_enabled.store(enabled, std::memory_order_release);
+}
+
+void SetHashtablezSampleParameter(int32_t rate) {
+  if (rate > 0) {
+    g_hashtablez_sample_parameter.store(rate, std::memory_order_release);
+  } else {
+    ABSL_RAW_LOG(ERROR, "Invalid hashtablez sample rate: %lld",
+                 static_cast<long long>(rate));  // NOLINT(runtime/int)
+  }
+}
+
+void SetHashtablezMaxSamples(int32_t max) {
+  if (max > 0) {
+    g_hashtablez_max_samples.store(max, std::memory_order_release);
+  } else {
+    ABSL_RAW_LOG(ERROR, "Invalid hashtablez max samples: %lld",
+                 static_cast<long long>(max));  // NOLINT(runtime/int)
+  }
+}
+
+}  // namespace container_internal
+}  // namespace absl
diff --git a/absl/container/internal/hashtablez_sampler.h b/absl/container/internal/hashtablez_sampler.h
new file mode 100644
index 000000000000..4aea3ffa67de
--- /dev/null
+++ b/absl/container/internal/hashtablez_sampler.h
@@ -0,0 +1,236 @@
+// Copyright 2018 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This is a low level library to sample hashtables and collect runtime
+// statistics about them.
+//
+// `HashtablezSampler` controls the lifecycle of `HashtablezInfo` objects which
+// store information about a single sample.
+//
+// `Record*` methods store information into samples.
+// `Sample()` and `Unsample()` make use of a single global sampler with
+// properties controlled by the flags hashtablez_enabled,
+// hashtablez_sample_rate, and hashtablez_max_samples.
+
+#ifndef ABSL_CONTAINER_INTERNAL_HASHTABLEZ_SAMPLER_H_
+#define ABSL_CONTAINER_INTERNAL_HASHTABLEZ_SAMPLER_H_
+
+#include <atomic>
+#include <functional>
+#include <memory>
+#include <vector>
+
+#include "absl/base/optimization.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/utility/utility.h"
+
+namespace absl {
+namespace container_internal {
+
+// Stores information about a sampled hashtable.  All mutations to this *must*
+// be made through `Record*` functions below.  All reads from this *must* only
+// occur in the callback to `HashtablezSampler::Iterate`.
+struct HashtablezInfo {
+  // Constructs the object but does not fill in any fields.
+  HashtablezInfo();
+  ~HashtablezInfo();
+  HashtablezInfo(const HashtablezInfo&) = delete;
+  HashtablezInfo& operator=(const HashtablezInfo&) = delete;
+
+  // Puts the object into a clean state, fills in the logically `const` members,
+  // blocking for any readers that are currently sampling the object.
+  void PrepareForSampling() EXCLUSIVE_LOCKS_REQUIRED(init_mu);
+
+  // These fields are mutated by the various Record* APIs and need to be
+  // thread-safe.
+  std::atomic<size_t> capacity;
+  std::atomic<size_t> size;
+  std::atomic<size_t> num_erases;
+  std::atomic<size_t> max_probe_length;
+  std::atomic<size_t> total_probe_length;
+  std::atomic<size_t> hashes_bitwise_or;
+  std::atomic<size_t> hashes_bitwise_and;
+
+  // `HashtablezSampler` maintains intrusive linked lists for all samples.  See
+  // comments on `HashtablezSampler::all_` for details on these.  `init_mu`
+  // guards the ability to restore the sample to a pristine state.  This
+  // prevents races with sampling and resurrecting an object.
+  absl::Mutex init_mu;
+  HashtablezInfo* next;
+  HashtablezInfo* dead GUARDED_BY(init_mu);
+
+  // All of the fields below are set by `PrepareForSampling`, they must not be
+  // mutated in `Record*` functions.  They are logically `const` in that sense.
+  // These are guarded by init_mu, but that is not externalized to clients, who
+  // can only read them during `HashtablezSampler::Iterate` which will hold the
+  // lock.
+  static constexpr int kMaxStackDepth = 64;
+  absl::Time create_time;
+  int32_t depth;
+  void* stack[kMaxStackDepth];
+};
+
+inline void RecordStorageChangedSlow(HashtablezInfo* info, size_t size,
+                                     size_t capacity) {
+  info->size.store(size, std::memory_order_relaxed);
+  info->capacity.store(capacity, std::memory_order_relaxed);
+}
+
+void RecordInsertSlow(HashtablezInfo* info, size_t hash,
+                      size_t distance_from_desired);
+
+inline void RecordEraseSlow(HashtablezInfo* info) {
+  info->size.fetch_sub(1, std::memory_order_relaxed);
+  info->num_erases.fetch_add(1, std::memory_order_relaxed);
+}
+
+HashtablezInfo* SampleSlow(int64_t* next_sample);
+void UnsampleSlow(HashtablezInfo* info);
+
+class HashtablezInfoHandle {
+ public:
+  explicit HashtablezInfoHandle() : info_(nullptr) {}
+  explicit HashtablezInfoHandle(HashtablezInfo* info) : info_(info) {}
+  ~HashtablezInfoHandle() {
+    if (ABSL_PREDICT_TRUE(info_ == nullptr)) return;
+    UnsampleSlow(info_);
+  }
+
+  HashtablezInfoHandle(const HashtablezInfoHandle&) = delete;
+  HashtablezInfoHandle& operator=(const HashtablezInfoHandle&) = delete;
+
+  HashtablezInfoHandle(HashtablezInfoHandle&& o) noexcept
+      : info_(absl::exchange(o.info_, nullptr)) {}
+  HashtablezInfoHandle& operator=(HashtablezInfoHandle&& o) noexcept {
+    if (ABSL_PREDICT_FALSE(info_ != nullptr)) {
+      UnsampleSlow(info_);
+    }
+    info_ = absl::exchange(o.info_, nullptr);
+    return *this;
+  }
+
+  inline void RecordStorageChanged(size_t size, size_t capacity) {
+    if (ABSL_PREDICT_TRUE(info_ == nullptr)) return;
+    RecordStorageChangedSlow(info_, size, capacity);
+  }
+
+  inline void RecordInsert(size_t hash, size_t distance_from_desired) {
+    if (ABSL_PREDICT_TRUE(info_ == nullptr)) return;
+    RecordInsertSlow(info_, hash, distance_from_desired);
+  }
+
+  inline void RecordErase() {
+    if (ABSL_PREDICT_TRUE(info_ == nullptr)) return;
+    RecordEraseSlow(info_);
+  }
+
+  friend inline void swap(HashtablezInfoHandle& lhs,
+                          HashtablezInfoHandle& rhs) {
+    std::swap(lhs.info_, rhs.info_);
+  }
+
+ private:
+  friend class HashtablezInfoHandlePeer;
+  HashtablezInfo* info_;
+};
+
+// Returns an RAII sampling handle that manages registration and unregistation
+// with the global sampler.
+inline HashtablezInfoHandle Sample() {
+#if ABSL_HAVE_THREAD_LOCAL
+  thread_local int64_t next_sample = 0;
+#else   // ABSL_HAVE_THREAD_LOCAL
+  static auto* mu = new absl::Mutex;
+  static int64_t next_sample = 0;
+  absl::MutexLock l(mu);
+#endif  // ABSL_HAVE_THREAD_LOCAL
+
+  if (ABSL_PREDICT_TRUE(--next_sample > 0)) {
+    return HashtablezInfoHandle(nullptr);
+  }
+  return HashtablezInfoHandle(SampleSlow(&next_sample));
+}
+
+// Holds samples and their associated stack traces with a soft limit of
+// `SetHashtablezMaxSamples()`.
+//
+// Thread safe.
+class HashtablezSampler {
+ public:
+  // Returns a global Sampler.
+  static HashtablezSampler& Global();
+
+  HashtablezSampler();
+  ~HashtablezSampler();
+
+  // Registers for sampling.  Returns an opaque registration info.
+  HashtablezInfo* Register();
+
+  // Unregisters the sample.
+  void Unregister(HashtablezInfo* sample);
+
+  // Iterates over all the registered `StackInfo`s.  Returning the number of
+  // samples that have been dropped.
+  int64_t Iterate(const std::function<void(const HashtablezInfo& stack)>& f);
+
+ private:
+  void PushNew(HashtablezInfo* sample);
+  void PushDead(HashtablezInfo* sample);
+  HashtablezInfo* PopDead();
+
+  std::atomic<size_t> dropped_samples_;
+  std::atomic<size_t> size_estimate_;
+
+  // Intrusive lock free linked lists for tracking samples.
+  //
+  // `all_` records all samples (they are never removed from this list) and is
+  // terminated with a `nullptr`.
+  //
+  // `graveyard_.dead` is a circular linked list.  When it is empty,
+  // `graveyard_.dead == &graveyard`.  The list is circular so that
+  // every item on it (even the last) has a non-null dead pointer.  This allows
+  // `Iterate` to determine if a given sample is live or dead using only
+  // information on the sample itself.
+  //
+  // For example, nodes [A, B, C, D, E] with [A, C, E] alive and [B, D] dead
+  // looks like this (G is the Graveyard):
+  //
+  //           +---+    +---+    +---+    +---+    +---+
+  //    all -->| A |--->| B |--->| C |--->| D |--->| E |
+  //           |   |    |   |    |   |    |   |    |   |
+  //   +---+   |   | +->|   |-+  |   | +->|   |-+  |   |
+  //   | G |   +---+ |  +---+ |  +---+ |  +---+ |  +---+
+  //   |   |         |        |        |        |
+  //   |   | --------+        +--------+        |
+  //   +---+                                    |
+  //     ^                                      |
+  //     +--------------------------------------+
+  //
+  std::atomic<HashtablezInfo*> all_;
+  HashtablezInfo graveyard_;
+};
+
+// Enables or disables sampling for Swiss tables.
+void SetHashtablezEnabled(bool enabled);
+
+// Sets the rate at which Swiss tables will be sampled.
+void SetHashtablezSampleParameter(int32_t rate);
+
+// Sets a soft max for the number of samples that will be kept.
+void SetHashtablezMaxSamples(int32_t max);
+
+}  // namespace container_internal
+}  // namespace absl
+
+#endif  // ABSL_CONTAINER_INTERNAL_HASHTABLEZ_SAMPLER_H_
diff --git a/absl/container/internal/hashtablez_sampler_test.cc b/absl/container/internal/hashtablez_sampler_test.cc
new file mode 100644
index 000000000000..31e7641a1222
--- /dev/null
+++ b/absl/container/internal/hashtablez_sampler_test.cc
@@ -0,0 +1,307 @@
+// Copyright 2018 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/container/internal/hashtablez_sampler.h"
+
+#include <atomic>
+#include <limits>
+#include <random>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/attributes.h"
+#include "absl/container/internal/have_sse.h"
+#include "absl/synchronization/blocking_counter.h"
+#include "absl/synchronization/internal/thread_pool.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/synchronization/notification.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+
+#if SWISSTABLE_HAVE_SSE2
+constexpr int kProbeLength = 16;
+#else
+constexpr int kProbeLength = 8;
+#endif
+
+namespace absl {
+namespace container_internal {
+class HashtablezInfoHandlePeer {
+ public:
+  static bool IsSampled(const HashtablezInfoHandle& h) {
+    return h.info_ != nullptr;
+  }
+
+  static HashtablezInfo* GetInfo(HashtablezInfoHandle* h) { return h->info_; }
+};
+
+namespace {
+using ::absl::synchronization_internal::ThreadPool;
+using ::testing::IsEmpty;
+using ::testing::UnorderedElementsAre;
+
+std::vector<size_t> GetSizes(HashtablezSampler* s) {
+  std::vector<size_t> res;
+  s->Iterate([&](const HashtablezInfo& info) {
+    res.push_back(info.size.load(std::memory_order_acquire));
+  });
+  return res;
+}
+
+HashtablezInfo* Register(HashtablezSampler* s, size_t size) {
+  auto* info = s->Register();
+  assert(info != nullptr);
+  info->size.store(size);
+  return info;
+}
+
+TEST(HashtablezInfoTest, PrepareForSampling) {
+  absl::Time test_start = absl::Now();
+  HashtablezInfo info;
+  absl::MutexLock l(&info.init_mu);
+  info.PrepareForSampling();
+
+  EXPECT_EQ(info.capacity.load(), 0);
+  EXPECT_EQ(info.size.load(), 0);
+  EXPECT_EQ(info.num_erases.load(), 0);
+  EXPECT_EQ(info.max_probe_length.load(), 0);
+  EXPECT_EQ(info.total_probe_length.load(), 0);
+  EXPECT_EQ(info.hashes_bitwise_or.load(), 0);
+  EXPECT_EQ(info.hashes_bitwise_and.load(), ~size_t{});
+  EXPECT_GE(info.create_time, test_start);
+
+  info.capacity.store(1, std::memory_order_relaxed);
+  info.size.store(1, std::memory_order_relaxed);
+  info.num_erases.store(1, std::memory_order_relaxed);
+  info.max_probe_length.store(1, std::memory_order_relaxed);
+  info.total_probe_length.store(1, std::memory_order_relaxed);
+  info.hashes_bitwise_or.store(1, std::memory_order_relaxed);
+  info.hashes_bitwise_and.store(1, std::memory_order_relaxed);
+  info.create_time = test_start - absl::Hours(20);
+
+  info.PrepareForSampling();
+  EXPECT_EQ(info.capacity.load(), 0);
+  EXPECT_EQ(info.size.load(), 0);
+  EXPECT_EQ(info.num_erases.load(), 0);
+  EXPECT_EQ(info.max_probe_length.load(), 0);
+  EXPECT_EQ(info.total_probe_length.load(), 0);
+  EXPECT_EQ(info.hashes_bitwise_or.load(), 0);
+  EXPECT_EQ(info.hashes_bitwise_and.load(), ~size_t{});
+  EXPECT_GE(info.create_time, test_start);
+}
+
+TEST(HashtablezInfoTest, RecordStorageChanged) {
+  HashtablezInfo info;
+  absl::MutexLock l(&info.init_mu);
+  info.PrepareForSampling();
+  RecordStorageChangedSlow(&info, 17, 47);
+  EXPECT_EQ(info.size.load(), 17);
+  EXPECT_EQ(info.capacity.load(), 47);
+  RecordStorageChangedSlow(&info, 20, 20);
+  EXPECT_EQ(info.size.load(), 20);
+  EXPECT_EQ(info.capacity.load(), 20);
+}
+
+TEST(HashtablezInfoTest, RecordInsert) {
+  HashtablezInfo info;
+  absl::MutexLock l(&info.init_mu);
+  info.PrepareForSampling();
+  EXPECT_EQ(info.max_probe_length.load(), 0);
+  RecordInsertSlow(&info, 0x0000FF00, 6 * kProbeLength);
+  EXPECT_EQ(info.max_probe_length.load(), 6);
+  EXPECT_EQ(info.hashes_bitwise_and.load(), 0x0000FF00);
+  EXPECT_EQ(info.hashes_bitwise_or.load(), 0x0000FF00);
+  RecordInsertSlow(&info, 0x000FF000, 4 * kProbeLength);
+  EXPECT_EQ(info.max_probe_length.load(), 6);
+  EXPECT_EQ(info.hashes_bitwise_and.load(), 0x0000F000);
+  EXPECT_EQ(info.hashes_bitwise_or.load(), 0x000FFF00);
+  RecordInsertSlow(&info, 0x00FF0000, 12 * kProbeLength);
+  EXPECT_EQ(info.max_probe_length.load(), 12);
+  EXPECT_EQ(info.hashes_bitwise_and.load(), 0x00000000);
+  EXPECT_EQ(info.hashes_bitwise_or.load(), 0x00FFFF00);
+}
+
+TEST(HashtablezInfoTest, RecordErase) {
+  HashtablezInfo info;
+  absl::MutexLock l(&info.init_mu);
+  info.PrepareForSampling();
+  EXPECT_EQ(info.num_erases.load(), 0);
+  EXPECT_EQ(info.size.load(), 0);
+  RecordInsertSlow(&info, 0x0000FF00, 6 * kProbeLength);
+  EXPECT_EQ(info.size.load(), 1);
+  RecordEraseSlow(&info);
+  EXPECT_EQ(info.size.load(), 0);
+  EXPECT_EQ(info.num_erases.load(), 1);
+}
+
+TEST(HashtablezSamplerTest, SmallSampleParameter) {
+  SetHashtablezEnabled(true);
+  SetHashtablezSampleParameter(100);
+
+  for (int i = 0; i < 1000; ++i) {
+    int64_t next_sample = 0;
+    HashtablezInfo* sample = SampleSlow(&next_sample);
+    EXPECT_GT(next_sample, 0);
+    EXPECT_NE(sample, nullptr);
+    UnsampleSlow(sample);
+  }
+}
+
+TEST(HashtablezSamplerTest, LargeSampleParameter) {
+  SetHashtablezEnabled(true);
+  SetHashtablezSampleParameter(std::numeric_limits<int32_t>::max());
+
+  for (int i = 0; i < 1000; ++i) {
+    int64_t next_sample = 0;
+    HashtablezInfo* sample = SampleSlow(&next_sample);
+    EXPECT_GT(next_sample, 0);
+    EXPECT_NE(sample, nullptr);
+    UnsampleSlow(sample);
+  }
+}
+
+TEST(HashtablezSamplerTest, Sample) {
+  SetHashtablezEnabled(true);
+  SetHashtablezSampleParameter(100);
+  int64_t num_sampled = 0;
+  int64_t total = 0;
+  double sample_rate;
+  for (int i = 0; i < 1000000; ++i) {
+    HashtablezInfoHandle h = Sample();
+    ++total;
+    if (HashtablezInfoHandlePeer::IsSampled(h)) {
+      ++num_sampled;
+    }
+    sample_rate = static_cast<double>(num_sampled) / total;
+    if (0.005 < sample_rate && sample_rate < 0.015) break;
+  }
+  EXPECT_NEAR(sample_rate, 0.01, 0.005);
+}
+
+TEST(HashtablezSamplerTest, Handle) {
+  auto& sampler = HashtablezSampler::Global();
+  HashtablezInfoHandle h(sampler.Register());
+  auto* info = HashtablezInfoHandlePeer::GetInfo(&h);
+  info->hashes_bitwise_and.store(0x12345678, std::memory_order_relaxed);
+
+  bool found = false;
+  sampler.Iterate([&](const HashtablezInfo& h) {
+    if (&h == info) {
+      EXPECT_EQ(h.hashes_bitwise_and.load(), 0x12345678);
+      found = true;
+    }
+  });
+  EXPECT_TRUE(found);
+
+  h = HashtablezInfoHandle();
+  found = false;
+  sampler.Iterate([&](const HashtablezInfo& h) {
+    if (&h == info) {
+      // this will only happen if some other thread has resurrected the info
+      // the old handle was using.
+      if (h.hashes_bitwise_and.load() == 0x12345678) {
+        found = true;
+      }
+    }
+  });
+  EXPECT_FALSE(found);
+}
+
+TEST(HashtablezSamplerTest, Registration) {
+  HashtablezSampler sampler;
+  auto* info1 = Register(&sampler, 1);
+  EXPECT_THAT(GetSizes(&sampler), UnorderedElementsAre(1));
+
+  auto* info2 = Register(&sampler, 2);
+  EXPECT_THAT(GetSizes(&sampler), UnorderedElementsAre(1, 2));
+  info1->size.store(3);
+  EXPECT_THAT(GetSizes(&sampler), UnorderedElementsAre(3, 2));
+
+  sampler.Unregister(info1);
+  sampler.Unregister(info2);
+}
+
+TEST(HashtablezSamplerTest, Unregistration) {
+  HashtablezSampler sampler;
+  std::vector<HashtablezInfo*> infos;
+  for (size_t i = 0; i < 3; ++i) {
+    infos.push_back(Register(&sampler, i));
+  }
+  EXPECT_THAT(GetSizes(&sampler), UnorderedElementsAre(0, 1, 2));
+
+  sampler.Unregister(infos[1]);
+  EXPECT_THAT(GetSizes(&sampler), UnorderedElementsAre(0, 2));
+
+  infos.push_back(Register(&sampler, 3));
+  infos.push_back(Register(&sampler, 4));
+  EXPECT_THAT(GetSizes(&sampler), UnorderedElementsAre(0, 2, 3, 4));
+  sampler.Unregister(infos[3]);
+  EXPECT_THAT(GetSizes(&sampler), UnorderedElementsAre(0, 2, 4));
+
+  sampler.Unregister(infos[0]);
+  sampler.Unregister(infos[2]);
+  sampler.Unregister(infos[4]);
+  EXPECT_THAT(GetSizes(&sampler), IsEmpty());
+}
+
+TEST(HashtablezSamplerTest, MultiThreaded) {
+  HashtablezSampler sampler;
+  Notification stop;
+  ThreadPool pool(10);
+
+  for (int i = 0; i < 10; ++i) {
+    pool.Schedule([&sampler, &stop]() {
+      std::random_device rd;
+      std::mt19937 gen(rd());
+
+      std::vector<HashtablezInfo*> infoz;
+      while (!stop.HasBeenNotified()) {
+        if (infoz.empty()) {
+          infoz.push_back(sampler.Register());
+        }
+        switch (std::uniform_int_distribution<>(0, 2)(gen)) {
+          case 0: {
+            infoz.push_back(sampler.Register());
+            break;
+          }
+          case 1: {
+            size_t p =
+                std::uniform_int_distribution<>(0, infoz.size() - 1)(gen);
+            HashtablezInfo* info = infoz[p];
+            infoz[p] = infoz.back();
+            infoz.pop_back();
+            sampler.Unregister(info);
+            break;
+          }
+          case 2: {
+            absl::Duration oldest = absl::ZeroDuration();
+            sampler.Iterate([&](const HashtablezInfo& info) {
+              oldest = std::max(oldest, absl::Now() - info.create_time);
+            });
+            ASSERT_GE(oldest, absl::ZeroDuration());
+            break;
+          }
+        }
+      }
+    });
+  }
+  // The threads will hammer away.  Give it a little bit of time for tsan to
+  // spot errors.
+  absl::SleepFor(absl::Seconds(3));
+  stop.Notify();
+}
+
+}  // namespace
+}  // namespace container_internal
+}  // namespace absl
diff --git a/absl/container/internal/raw_hash_set.h b/absl/container/internal/raw_hash_set.h
index b7b5ef8c7b44..34d69d7af2fc 100644
--- a/absl/container/internal/raw_hash_set.h
+++ b/absl/container/internal/raw_hash_set.h
@@ -109,6 +109,7 @@
 #include "absl/container/internal/container_memory.h"
 #include "absl/container/internal/hash_policy_traits.h"
 #include "absl/container/internal/hashtable_debug_hooks.h"
+#include "absl/container/internal/hashtablez_sampler.h"
 #include "absl/container/internal/have_sse.h"
 #include "absl/container/internal/layout.h"
 #include "absl/memory/memory.h"
@@ -943,9 +944,10 @@ class raw_hash_set {
     // than a full `insert`.
     for (const auto& v : that) {
       const size_t hash = PolicyTraits::apply(HashElement{hash_ref()}, v);
-      const size_t i = find_first_non_full(hash);
-      set_ctrl(i, H2(hash));
-      emplace_at(i, v);
+      auto target = find_first_non_full(hash);
+      set_ctrl(target.offset, H2(hash));
+      emplace_at(target.offset, v);
+      infoz_.RecordInsert(hash, target.probe_length);
     }
     size_ = that.size();
     growth_left() -= that.size();
@@ -959,6 +961,7 @@ class raw_hash_set {
         slots_(absl::exchange(that.slots_, nullptr)),
         size_(absl::exchange(that.size_, 0)),
         capacity_(absl::exchange(that.capacity_, 0)),
+        infoz_(absl::exchange(that.infoz_, HashtablezInfoHandle())),
         // Hash, equality and allocator are copied instead of moved because
         // `that` must be left valid. If Hash is std::function<Key>, moving it
         // would create a nullptr functor that cannot be called.
@@ -979,6 +982,7 @@ class raw_hash_set {
       std::swap(size_, that.size_);
       std::swap(capacity_, that.capacity_);
       std::swap(growth_left(), that.growth_left());
+      std::swap(infoz_, that.infoz_);
     } else {
       reserve(that.size());
       // Note: this will copy elements of dense_set and unordered_set instead of
@@ -1049,6 +1053,7 @@ class raw_hash_set {
       growth_left() = static_cast<size_t>(capacity_ * kMaxLoadFactor);
     }
     assert(empty());
+    infoz_.RecordStorageChanged(size_, capacity_);
   }
 
   // This overload kicks in when the argument is an rvalue of insertable and
@@ -1323,6 +1328,7 @@ class raw_hash_set {
     swap(growth_left(), that.growth_left());
     swap(hash_ref(), that.hash_ref());
     swap(eq_ref(), that.eq_ref());
+    swap(infoz_, that.infoz_);
     if (AllocTraits::propagate_on_container_swap::value) {
       swap(alloc_ref(), that.alloc_ref());
     } else {
@@ -1333,7 +1339,11 @@ class raw_hash_set {
 
   void rehash(size_t n) {
     if (n == 0 && capacity_ == 0) return;
-    if (n == 0 && size_ == 0) return destroy_slots();
+    if (n == 0 && size_ == 0) {
+      destroy_slots();
+      infoz_.RecordStorageChanged(size_, capacity_);
+      return;
+    }
     auto m = NormalizeCapacity((std::max)(n, NumSlotsFast(size())));
     // n == 0 unconditionally rehashes as per the standard.
     if (n == 0 || m > capacity_) {
@@ -1550,10 +1560,15 @@ class raw_hash_set {
 
     set_ctrl(index, was_never_full ? kEmpty : kDeleted);
     growth_left() += was_never_full;
+    infoz_.RecordErase();
   }
 
   void initialize_slots() {
     assert(capacity_);
+    if (slots_ == nullptr) {
+      infoz_ = Sample();
+    }
+
     auto layout = MakeLayout(capacity_);
     char* mem = static_cast<char*>(
         Allocate<Layout::Alignment()>(&alloc_ref(), layout.AllocSize()));
@@ -1561,6 +1576,7 @@ class raw_hash_set {
     slots_ = layout.template Pointer<1>(mem);
     reset_ctrl();
     growth_left() = static_cast<size_t>(capacity_ * kMaxLoadFactor) - size_;
+    infoz_.RecordStorageChanged(size_, capacity_);
   }
 
   void destroy_slots() {
@@ -1593,7 +1609,7 @@ class raw_hash_set {
       if (IsFull(old_ctrl[i])) {
         size_t hash = PolicyTraits::apply(HashElement{hash_ref()},
                                           PolicyTraits::element(old_slots + i));
-        size_t new_i = find_first_non_full(hash);
+        size_t new_i = find_first_non_full(hash).offset;
         set_ctrl(new_i, H2(hash));
         PolicyTraits::transfer(&alloc_ref(), slots_ + new_i, old_slots + i);
       }
@@ -1633,7 +1649,7 @@ class raw_hash_set {
       if (!IsDeleted(ctrl_[i])) continue;
       size_t hash = PolicyTraits::apply(HashElement{hash_ref()},
                                         PolicyTraits::element(slots_ + i));
-      size_t new_i = find_first_non_full(hash);
+      size_t new_i = find_first_non_full(hash).offset;
 
       // Verify if the old and new i fall within the same group wrt the hash.
       // If they do, we don't need to move the object as it falls already in the
@@ -1706,7 +1722,11 @@ class raw_hash_set {
   // - the input is already a set
   // - there are enough slots
   // - the element with the hash is not in the table
-  size_t find_first_non_full(size_t hash) {
+  struct FindInfo {
+    size_t offset;
+    size_t probe_length;
+  };
+  FindInfo find_first_non_full(size_t hash) {
     auto seq = probe(hash);
     while (true) {
       Group g{ctrl_ + seq.offset()};
@@ -1718,11 +1738,11 @@ class raw_hash_set {
         // the group.
         // TODO(kfm,sbenza): revisit after we do unconditional mixing
         if (ShouldInsertBackwards(hash, ctrl_))
-          return seq.offset(mask.HighestBitSet());
+          return {seq.offset(mask.HighestBitSet()), seq.index()};
         else
-          return seq.offset(mask.LowestBitSet());
+          return {seq.offset(mask.LowestBitSet()), seq.index()};
 #else
-        return seq.offset(mask.LowestBitSet());
+        return {seq.offset(mask.LowestBitSet()), seq.index()};
 #endif
       }
       assert(seq.index() < capacity_ && "full table!");
@@ -1762,15 +1782,17 @@ class raw_hash_set {
   }
 
   size_t prepare_insert(size_t hash) ABSL_ATTRIBUTE_NOINLINE {
-    size_t target = find_first_non_full(hash);
-    if (ABSL_PREDICT_FALSE(growth_left() == 0 && !IsDeleted(ctrl_[target]))) {
+    auto target = find_first_non_full(hash);
+    if (ABSL_PREDICT_FALSE(growth_left() == 0 &&
+                           !IsDeleted(ctrl_[target.offset]))) {
       rehash_and_grow_if_necessary();
       target = find_first_non_full(hash);
     }
     ++size_;
-    growth_left() -= IsEmpty(ctrl_[target]);
-    set_ctrl(target, H2(hash));
-    return target;
+    growth_left() -= IsEmpty(ctrl_[target.offset]);
+    set_ctrl(target.offset, H2(hash));
+    infoz_.RecordInsert(hash, target.probe_length);
+    return target.offset;
   }
 
   // Constructs the value in the space pointed by the iterator. This only works
@@ -1847,6 +1869,7 @@ class raw_hash_set {
   slot_type* slots_ = nullptr;     // [capacity * slot_type]
   size_t size_ = 0;                // number of full slots
   size_t capacity_ = 0;            // total number of slots
+  HashtablezInfoHandle infoz_;
   absl::container_internal::CompressedTuple<size_t /* growth_left */, hasher,
                                             key_equal, allocator_type>
       settings_{0, hasher{}, key_equal{}, allocator_type{}};
diff --git a/absl/container/internal/raw_hash_set_test.cc b/absl/container/internal/raw_hash_set_test.cc
index 5ad4904f9713..78b627556d74 100644
--- a/absl/container/internal/raw_hash_set_test.cc
+++ b/absl/container/internal/raw_hash_set_test.cc
@@ -342,6 +342,7 @@ TEST(Table, EmptyFunctorOptimization) {
     size_t size;
     size_t capacity;
     size_t growth_left;
+    void* infoz;
   };
   struct StatelessHash {
     size_t operator()(absl::string_view) const { return 0; }
@@ -1798,6 +1799,27 @@ TEST(TableDeathTest, EraseOfEndAsserts) {
   EXPECT_DEATH_IF_SUPPORTED(t.erase(t.end()), kDeathMsg);
 }
 
+TEST(RawHashSamplerTest, Sample) {
+  // Enable the feature even if the prod default is off.
+  SetHashtablezEnabled(true);
+  SetHashtablezSampleParameter(100);
+
+  auto& sampler = HashtablezSampler::Global();
+  size_t start_size = 0;
+  start_size += sampler.Iterate([&](const HashtablezInfo&) { ++start_size; });
+
+  std::vector<IntTable> tables;
+  for (int i = 0; i < 1000000; ++i) {
+    tables.emplace_back();
+    tables.back().insert(1);
+  }
+  size_t end_size = 0;
+  end_size += sampler.Iterate([&](const HashtablezInfo&) { ++end_size; });
+
+  EXPECT_NEAR((end_size - start_size) / static_cast<double>(tables.size()),
+              0.01, 0.005);
+}
+
 #ifdef ADDRESS_SANITIZER
 TEST(Sanitizer, PoisoningUnused) {
   IntTable t;