diff options
Diffstat (limited to 'absl')
-rw-r--r-- | absl/base/internal/exponential_biased.cc | 45 | ||||
-rw-r--r-- | absl/base/internal/exponential_biased.h | 86 | ||||
-rw-r--r-- | absl/base/internal/exponential_biased_test.cc | 35 | ||||
-rw-r--r-- | absl/base/internal/periodic_sampler.cc | 7 | ||||
-rw-r--r-- | absl/base/internal/periodic_sampler_benchmark.cc | 7 | ||||
-rw-r--r-- | absl/base/internal/periodic_sampler_test.cc | 12 |
6 files changed, 162 insertions, 30 deletions
diff --git a/absl/base/internal/exponential_biased.cc b/absl/base/internal/exponential_biased.cc index d7ffd184e968..3007f9b46b86 100644 --- a/absl/base/internal/exponential_biased.cc +++ b/absl/base/internal/exponential_biased.cc @@ -16,6 +16,7 @@ #include <stdint.h> +#include <algorithm> #include <atomic> #include <cmath> #include <limits> @@ -26,6 +27,42 @@ namespace absl { namespace base_internal { + +int64_t ExponentialBiased::GetSkipCount(int64_t mean) { + if (ABSL_PREDICT_FALSE(!initialized_)) { + Initialize(); + } + + uint64_t rng = NextRandom(rng_); + rng_ = rng; + + // Take the top 26 bits as the random number + // (This plus the 1<<58 sampling bound give a max possible step of + // 5194297183973780480 bytes.) + // The uint32_t cast is to prevent a (hard-to-reproduce) NAN + // under piii debug for some binaries. + double q = static_cast<uint32_t>(rng >> (kPrngNumBits - 26)) + 1.0; + // Put the computed p-value through the CDF of a geometric. + double interval = bias_ + (std::log2(q) - 26) * (-std::log(2.0) * mean); + // Very large values of interval overflow int64_t. To avoid that, we will + // cheat and clamp any huge values to (int64_t max)/2. This is a potential + // source of bias, but the mean would need to be such a large value that it's + // not likely to come up. For example, with a mean of 1e18, the probability of + // hitting this condition is about 1/1000. For a mean of 1e17, standard + // calculators claim that this event won't happen. + if (interval > static_cast<double>(std::numeric_limits<int64_t>::max() / 2)) { + // Assume huge values are bias neutral, retain bias for next call. + return std::numeric_limits<int64_t>::max() / 2; + } + double value = std::round(interval); + bias_ = interval - value; + return value; +} + +int64_t ExponentialBiased::GetStride(int64_t mean) { + return GetSkipCount(mean - 1) + 1; +} + // The algorithm generates a random number between 0 and 1 and applies the // inverse cumulative distribution function for an exponential. Specifically: // Let m be the inverse of the sample period, then the probability @@ -51,7 +88,7 @@ int64_t ExponentialBiased::Get(int64_t mean) { // under piii debug for some binaries. double q = static_cast<uint32_t>(rng >> (kPrngNumBits - 26)) + 1.0; // Put the computed p-value through the CDF of a geometric. - double interval = (std::log2(q) - 26) * (-std::log(2.0) * mean); + double interval = bias_ + (std::log2(q) - 26) * (-std::log(2.0) * mean); // Very large values of interval overflow int64_t. To avoid that, we will cheat // and clamp any huge values to (int64_t max)/2. This is a potential source of // bias, but the mean would need to be such a large value that it's not likely @@ -59,10 +96,12 @@ int64_t ExponentialBiased::Get(int64_t mean) { // this condition is about 1/1000. For a mean of 1e17, standard calculators // claim that this event won't happen. if (interval > static_cast<double>(std::numeric_limits<int64_t>::max() / 2)) { + // Assume huge values are bias neutral, retain bias for next call. return std::numeric_limits<int64_t>::max() / 2; } - - return static_cast<int64_t>(interval); + int64_t value = std::max<int64_t>(1, std::round(interval)); + bias_ = interval - value; + return value; } void ExponentialBiased::Initialize() { diff --git a/absl/base/internal/exponential_biased.h b/absl/base/internal/exponential_biased.h index cac2d8ad84ff..571505d32677 100644 --- a/absl/base/internal/exponential_biased.h +++ b/absl/base/internal/exponential_biased.h @@ -17,24 +17,56 @@ #include <stdint.h> +#include "absl/base/macros.h" + namespace absl { namespace base_internal { // ExponentialBiased provides a small and fast random number generator for a -// rounded exponential distribution. This generator doesn't requires very little -// state doesn't impose synchronization overhead, which makes it useful in some -// specialized scenarios. +// rounded exponential distribution. This generator manages very little state, +// and imposes no synchronization overhead. This makes it useful in specialized +// scenarios requiring minimum overhead, such as stride based periodic sampling. +// +// ExponentialBiased provides two closely related functions, GetSkipCount() and +// GetStride(), both returning a rounded integer defining a number of events +// required before some event with a given mean probability occurs. +// +// The distribution is useful to generate a random wait time or some periodic +// event with a given mean probability. For example, if an action is supposed to +// happen on average once every 'N' events, then we can get a random 'stride' +// counting down how long before the event to happen. For example, if we'd want +// to sample one in every 1000 'Frobber' calls, our code could look like this: +// +// Frobber::Frobber() { +// stride_ = exponential_biased_.GetStride(1000); +// } +// +// void Frobber::Frob(int arg) { +// if (--stride == 0) { +// SampleFrob(arg); +// stride_ = exponential_biased_.GetStride(1000); +// } +// ... +// } +// +// The rounding of the return value creates a bias, especially for smaller means +// where the distribution of the fraction is not evenly distributed. We correct +// this bias by tracking the fraction we rounded up or down on each iteration, +// effectively tracking the distance between the cumulative value, and the +// rounded cumulative value. For example, given a mean of 2: // -// For the generated variable X, X ~ floor(Exponential(1/mean)). The floor -// operation introduces a small amount of bias, but the distribution is useful -// to generate a wait time. That is, if an operation is supposed to happen on -// average to 1/mean events, then the generated variable X will describe how -// many events to skip before performing the operation and computing a new X. +// raw = 1.63076, cumulative = 1.63076, rounded = 2, bias = -0.36923 +// raw = 0.14624, cumulative = 1.77701, rounded = 2, bias = 0.14624 +// raw = 4.93194, cumulative = 6.70895, rounded = 7, bias = -0.06805 +// raw = 0.24206, cumulative = 6.95101, rounded = 7, bias = 0.24206 +// etc... // -// The mathematically precise distribution to use for integer wait times is a -// Geometric distribution, but a Geometric distribution takes slightly more time -// to compute and when the mean is large (say, 100+), the Geometric distribution -// is hard to distinguish from the result of ExponentialBiased. +// Adjusting with rounding bias is relatively trivial: +// +// double value = bias_ + exponential_distribution(mean)(); +// double rounded_value = std::round(value); +// bias_ = value - rounded_value; +// return rounded_value; // // This class is thread-compatible. class ExponentialBiased { @@ -42,9 +74,32 @@ class ExponentialBiased { // The number of bits set by NextRandom. static constexpr int kPrngNumBits = 48; - // Generates the floor of an exponentially distributed random variable by - // rounding the value down to the nearest integer. The result will be in the - // range [0, int64_t max / 2]. + // `GetSkipCount()` returns the number of events to skip before some chosen + // event happens. For example, randomly tossing a coin, we will on average + // throw heads once before we get tails. We can simulate random coin tosses + // using GetSkipCount() as: + // + // ExponentialBiased eb; + // for (...) { + // int number_of_heads_before_tail = eb.GetSkipCount(1); + // for (int flips = 0; flips < number_of_heads_before_tail; ++flips) { + // printf("head..."); + // } + // printf("tail\n"); + // } + // + int64_t GetSkipCount(int64_t mean); + + // GetStride() returns the number of events required for a specific event to + // happen. See the class comments for a usage example. `GetStride()` is + // equivalent to `GetSkipCount(mean - 1) + 1`. When to use `GetStride()` or + // `GetSkipCount()` depends mostly on what best fits the use case. + int64_t GetStride(int64_t mean); + + // Generates a rounded exponentially distributed random variable + // by rounding the value to the nearest integer. + // The result will be in the range [0, int64_t max / 2]. + ABSL_DEPRECATED("Use GetSkipCount() or GetStride() instead") int64_t Get(int64_t mean); // Computes a random number in the range [0, 1<<(kPrngNumBits+1) - 1] @@ -56,6 +111,7 @@ class ExponentialBiased { void Initialize(); uint64_t rng_{0}; + double bias_{0}; bool initialized_{false}; }; diff --git a/absl/base/internal/exponential_biased_test.cc b/absl/base/internal/exponential_biased_test.cc index 09b511d14e70..af003239f8de 100644 --- a/absl/base/internal/exponential_biased_test.cc +++ b/absl/base/internal/exponential_biased_test.cc @@ -113,6 +113,35 @@ double AndersonDarlingTest(const std::vector<double>& random_sample) { return p; } +TEST(ExponentialBiasedTest, CoinTossDemoWithGetSkipCount) { + ExponentialBiased eb; + for (int runs = 0; runs < 10; ++runs) { + for (int flips = eb.GetSkipCount(1); flips > 0; --flips) { + printf("head..."); + } + printf("tail\n"); + } + int heads = 0; + for (int i = 0; i < 10000000; i += 1 + eb.GetSkipCount(1)) { + ++heads; + } + printf("Heads = %d (%f%%)\n", heads, 100.0 * heads / 10000000); +} + +TEST(ExponentialBiasedTest, SampleDemoWithStride) { + ExponentialBiased eb; + int stride = eb.GetStride(10); + int samples = 0; + for (int i = 0; i < 10000000; ++i) { + if (--stride == 0) { + ++samples; + stride = eb.GetStride(10); + } + } + printf("Samples = %d (%f%%)\n", samples, 100.0 * samples / 10000000); +} + + // Testing that NextRandom generates uniform random numbers. Applies the // Anderson-Darling test for uniformity TEST(ExponentialBiasedTest, TestNextRandom) { @@ -153,15 +182,15 @@ TEST(ExponentialBiasedTest, TestNextRandom) { // variable. TEST(ExponentialBiasedTest, InitializationModes) { ABSL_CONST_INIT static ExponentialBiased eb_static; - EXPECT_THAT(eb_static.Get(2), Ge(0)); + EXPECT_THAT(eb_static.GetSkipCount(2), Ge(0)); #if ABSL_HAVE_THREAD_LOCAL thread_local ExponentialBiased eb_thread; - EXPECT_THAT(eb_thread.Get(2), Ge(0)); + EXPECT_THAT(eb_thread.GetSkipCount(2), Ge(0)); #endif ExponentialBiased eb_stack; - EXPECT_THAT(eb_stack.Get(2), Ge(0)); + EXPECT_THAT(eb_stack.GetSkipCount(2), Ge(0)); } } // namespace base_internal diff --git a/absl/base/internal/periodic_sampler.cc b/absl/base/internal/periodic_sampler.cc index 69656c8a0fbe..87c3c85ae3e2 100644 --- a/absl/base/internal/periodic_sampler.cc +++ b/absl/base/internal/periodic_sampler.cc @@ -22,7 +22,7 @@ namespace absl { namespace base_internal { int64_t PeriodicSamplerBase::GetExponentialBiased(int period) noexcept { - return rng_.Get(period); + return rng_.GetStride(period); } bool PeriodicSamplerBase::SubtleConfirmSample() noexcept { @@ -36,13 +36,14 @@ bool PeriodicSamplerBase::SubtleConfirmSample() noexcept { // Check if this is the first call to Sample() if (ABSL_PREDICT_FALSE(stride_ == 1)) { - stride_ = static_cast<uint64_t>(-1 - GetExponentialBiased(current_period)); + stride_ = static_cast<uint64_t>(-GetExponentialBiased(current_period)); if (static_cast<int64_t>(stride_) < -1) { ++stride_; return false; } } - stride_ = static_cast<uint64_t>(-1 - GetExponentialBiased(current_period)); + + stride_ = static_cast<uint64_t>(-GetExponentialBiased(current_period)); return true; } diff --git a/absl/base/internal/periodic_sampler_benchmark.cc b/absl/base/internal/periodic_sampler_benchmark.cc index f87cef73dc16..037596321776 100644 --- a/absl/base/internal/periodic_sampler_benchmark.cc +++ b/absl/base/internal/periodic_sampler_benchmark.cc @@ -37,6 +37,13 @@ void BM_SampleMinunumInlined(Sampler* sampler, benchmark::State& state) { } } +void BM_PeriodicSampler_TinySample(benchmark::State& state) { + struct Tag {}; + PeriodicSampler<Tag, 10> sampler; + BM_Sample(&sampler, state); +} +BENCHMARK(BM_PeriodicSampler_TinySample); + void BM_PeriodicSampler_ShortSample(benchmark::State& state) { struct Tag {}; PeriodicSampler<Tag, 1024> sampler; diff --git a/absl/base/internal/periodic_sampler_test.cc b/absl/base/internal/periodic_sampler_test.cc index 1ba61377a0f4..29f4d24d6323 100644 --- a/absl/base/internal/periodic_sampler_test.cc +++ b/absl/base/internal/periodic_sampler_test.cc @@ -42,9 +42,9 @@ TEST(PeriodicSamplerBaseTest, Sample) { EXPECT_CALL(sampler, period()).Times(3).WillRepeatedly(Return(16)); EXPECT_CALL(sampler, GetExponentialBiased(16)) - .WillOnce(Return(1)) .WillOnce(Return(2)) - .WillOnce(Return(3)); + .WillOnce(Return(3)) + .WillOnce(Return(4)); EXPECT_FALSE(sampler.Sample()); EXPECT_TRUE(sampler.Sample()); @@ -63,9 +63,9 @@ TEST(PeriodicSamplerBaseTest, ImmediatelySample) { EXPECT_CALL(sampler, period()).Times(2).WillRepeatedly(Return(16)); EXPECT_CALL(sampler, GetExponentialBiased(16)) - .WillOnce(Return(0)) .WillOnce(Return(1)) - .WillOnce(Return(2)); + .WillOnce(Return(2)) + .WillOnce(Return(3)); EXPECT_TRUE(sampler.Sample()); @@ -100,7 +100,7 @@ TEST(PeriodicSamplerBaseTest, Disable) { StrictMock<MockPeriodicSampler> sampler; EXPECT_CALL(sampler, period()).WillOnce(Return(16)); - EXPECT_CALL(sampler, GetExponentialBiased(16)).WillOnce(Return(2)); + EXPECT_CALL(sampler, GetExponentialBiased(16)).WillOnce(Return(3)); EXPECT_FALSE(sampler.Sample()); EXPECT_FALSE(sampler.Sample()); @@ -119,7 +119,7 @@ TEST(PeriodicSamplerBaseTest, Enable) { EXPECT_CALL(sampler, period()).Times(2).WillRepeatedly(Return(16)); EXPECT_CALL(sampler, GetExponentialBiased(16)) .Times(2) - .WillRepeatedly(Return(2)); + .WillRepeatedly(Return(3)); EXPECT_FALSE(sampler.Sample()); EXPECT_FALSE(sampler.Sample()); |