diff options
author | Abseil Team <absl-team@google.com> | 2019-10-24T02·35-0700 |
---|---|---|
committer | Derek Mauro <dmauro@google.com> | 2019-10-24T14·29-0400 |
commit | 078b89b3c046d230ef3ad39494e5852184eb528b (patch) | |
tree | 90368e0b398c3e43196c823504b88ab78a8408b3 /absl/base/internal | |
parent | 19b021cb3ff23048dfbe236a4e611925d8930831 (diff) |
Export of internal Abseil changes
-- e54b9c7bbb0c58475676c268e2e19c69f4bce48a by Jorg Brown <jorg@google.com>: Tweak ABSL_PREDICT_TRUE slightly, for better code on some platforms and/or optimization levels. "false || (x)" is more verbose than "!!(x)", but ultimately more efficient. For example, given this code: void InitIfNecessary() { if (ABSL_PREDICT_TRUE(NeedsInit())) { SlowInitIfNecessary(); } } Clang with default optimization level will produce: Before this CL After this CL InitIfNecessary: InitIfNecessary: push rbp push rbp mov rbp, rsp mov rbp, rsp call NeedsInit call NeedsInit xor al, -1 xor al, -1 test al, 1 test al, 1 jne .LBB2_1 jne .LBB3_1 jmp .LBB2_2 jmp .LBB3_2 .LBB2_1: .LBB3_1: call SlowInitIfNecessary call SlowInitIfNecessary .LBB2_2: .LBB3_2: pop rbp pop rbp ret ret PiperOrigin-RevId: 276401386 -- 0a3c4dfd8342bf2b1b11a87f1c662c883f73cab7 by Abseil Team <absl-team@google.com>: Fix comment nit: sem_open => sem_init. The code calls sem_init, not sem_open, to initialize an unnamed semaphore. (sem_open creates or opens a named semaphore.) PiperOrigin-RevId: 276344072 -- b36a664e9459057509a90e83d3482e1d3a4c44c7 by Abseil Team <absl-team@google.com>: Fix typo in flat_hash_map.h: exchaged -> exchanged PiperOrigin-RevId: 276295792 -- 7bbd8d18276eb110c8335743e35fceb662ddf3d6 by Samuel Benzaquen <sbenza@google.com>: Add assertions to verify use of iterators. PiperOrigin-RevId: 276283300 -- 677398a8ffcb1f59182cffe57a4fe7ff147a0404 by Laramie Leavitt <lar@google.com>: Migrate distribution_impl.h/cc to generate_real.h/cc. Combine the methods RandU64To<Float,Double> into a single method: GenerateRealFromBits(). Remove rejection sampling from absl::uniform_real_distribution. PiperOrigin-RevId: 276158675 -- c60c9d11d24b0c546329d998e78e15a84b3153f5 by Abseil Team <absl-team@google.com>: Internal change PiperOrigin-RevId: 276126962 -- 4c840cab6a8d86efa29b397cafaf7520eece68cc by Andy Soffer <asoffer@google.com>: Update CMakeLists.txt to address https://github.com/abseil/abseil-cpp/issues/365. This does not cover every platform, but it does at least address the first-order issue of assuming gcc implies x86. PiperOrigin-RevId: 276116253 -- 98da366e6b5d51afe5d7ac6722126aca23d85ee6 by Abseil Team <absl-team@google.com>: Internal change PiperOrigin-RevId: 276097452 GitOrigin-RevId: e54b9c7bbb0c58475676c268e2e19c69f4bce48a Change-Id: I02d84454bb71ab21ad3d39650acf6cc6e36f58d7
Diffstat (limited to 'absl/base/internal')
-rw-r--r-- | absl/base/internal/exponential_biased.cc | 84 | ||||
-rw-r--r-- | absl/base/internal/exponential_biased.h | 77 | ||||
-rw-r--r-- | absl/base/internal/exponential_biased_test.cc | 168 |
3 files changed, 329 insertions, 0 deletions
diff --git a/absl/base/internal/exponential_biased.cc b/absl/base/internal/exponential_biased.cc new file mode 100644 index 000000000000..d7ffd184e968 --- /dev/null +++ b/absl/base/internal/exponential_biased.cc @@ -0,0 +1,84 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/base/internal/exponential_biased.h" + +#include <stdint.h> + +#include <atomic> +#include <cmath> +#include <limits> + +#include "absl/base/attributes.h" +#include "absl/base/optimization.h" + +namespace absl { +namespace base_internal { + +// The algorithm generates a random number between 0 and 1 and applies the +// inverse cumulative distribution function for an exponential. Specifically: +// Let m be the inverse of the sample period, then the probability +// distribution function is m*exp(-mx) so the CDF is +// p = 1 - exp(-mx), so +// q = 1 - p = exp(-mx) +// log_e(q) = -mx +// -log_e(q)/m = x +// log_2(q) * (-log_e(2) * 1/m) = x +// In the code, q is actually in the range 1 to 2**26, hence the -26 below +int64_t ExponentialBiased::Get(int64_t mean) { + if (ABSL_PREDICT_FALSE(!initialized_)) { + Initialize(); + } + + uint64_t rng = NextRandom(rng_); + rng_ = rng; + + // Take the top 26 bits as the random number + // (This plus the 1<<58 sampling bound give a max possible step of + // 5194297183973780480 bytes.) + // The uint32_t cast is to prevent a (hard-to-reproduce) NAN + // under piii debug for some binaries. + double q = static_cast<uint32_t>(rng >> (kPrngNumBits - 26)) + 1.0; + // Put the computed p-value through the CDF of a geometric. + double interval = (std::log2(q) - 26) * (-std::log(2.0) * mean); + // Very large values of interval overflow int64_t. To avoid that, we will cheat + // and clamp any huge values to (int64_t max)/2. This is a potential source of + // bias, but the mean would need to be such a large value that it's not likely + // to come up. For example, with a mean of 1e18, the probability of hitting + // this condition is about 1/1000. For a mean of 1e17, standard calculators + // claim that this event won't happen. + if (interval > static_cast<double>(std::numeric_limits<int64_t>::max() / 2)) { + return std::numeric_limits<int64_t>::max() / 2; + } + + return static_cast<int64_t>(interval); +} + +void ExponentialBiased::Initialize() { + // We don't get well distributed numbers from `this` so we call NextRandom() a + // bunch to mush the bits around. We use a global_rand to handle the case + // where the same thread (by memory address) gets created and destroyed + // repeatedly. + ABSL_CONST_INIT static std::atomic<uint32_t> global_rand(0); + uint64_t r = reinterpret_cast<uint64_t>(this) + + global_rand.fetch_add(1, std::memory_order_relaxed); + for (int i = 0; i < 20; ++i) { + r = NextRandom(r); + } + rng_ = r; + initialized_ = true; +} + +} // namespace base_internal +} // namespace absl diff --git a/absl/base/internal/exponential_biased.h b/absl/base/internal/exponential_biased.h new file mode 100644 index 000000000000..cac2d8ad84ff --- /dev/null +++ b/absl/base/internal/exponential_biased.h @@ -0,0 +1,77 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_BASE_INTERNAL_EXPONENTIAL_BIASED_H_ +#define ABSL_BASE_INTERNAL_EXPONENTIAL_BIASED_H_ + +#include <stdint.h> + +namespace absl { +namespace base_internal { + +// ExponentialBiased provides a small and fast random number generator for a +// rounded exponential distribution. This generator doesn't requires very little +// state doesn't impose synchronization overhead, which makes it useful in some +// specialized scenarios. +// +// For the generated variable X, X ~ floor(Exponential(1/mean)). The floor +// operation introduces a small amount of bias, but the distribution is useful +// to generate a wait time. That is, if an operation is supposed to happen on +// average to 1/mean events, then the generated variable X will describe how +// many events to skip before performing the operation and computing a new X. +// +// The mathematically precise distribution to use for integer wait times is a +// Geometric distribution, but a Geometric distribution takes slightly more time +// to compute and when the mean is large (say, 100+), the Geometric distribution +// is hard to distinguish from the result of ExponentialBiased. +// +// This class is thread-compatible. +class ExponentialBiased { + public: + // The number of bits set by NextRandom. + static constexpr int kPrngNumBits = 48; + + // Generates the floor of an exponentially distributed random variable by + // rounding the value down to the nearest integer. The result will be in the + // range [0, int64_t max / 2]. + int64_t Get(int64_t mean); + + // Computes a random number in the range [0, 1<<(kPrngNumBits+1) - 1] + // + // This is public to enable testing. + static uint64_t NextRandom(uint64_t rnd); + + private: + void Initialize(); + + uint64_t rng_{0}; + bool initialized_{false}; +}; + +// Returns the next prng value. +// pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48 +// This is the lrand64 generator. +inline uint64_t ExponentialBiased::NextRandom(uint64_t rnd) { + const uint64_t prng_mult = uint64_t{0x5DEECE66D}; + const uint64_t prng_add = 0xB; + const uint64_t prng_mod_power = 48; + const uint64_t prng_mod_mask = + ~((~static_cast<uint64_t>(0)) << prng_mod_power); + return (prng_mult * rnd + prng_add) & prng_mod_mask; +} + +} // namespace base_internal +} // namespace absl + +#endif // ABSL_BASE_INTERNAL_EXPONENTIAL_BIASED_H_ diff --git a/absl/base/internal/exponential_biased_test.cc b/absl/base/internal/exponential_biased_test.cc new file mode 100644 index 000000000000..09b511d14e70 --- /dev/null +++ b/absl/base/internal/exponential_biased_test.cc @@ -0,0 +1,168 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/base/internal/exponential_biased.h" + +#include <stddef.h> + +#include <cmath> +#include <cstdint> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" + +using ::testing::Ge; + +namespace absl { +namespace base_internal { + +MATCHER_P2(IsBetween, a, b, + absl::StrCat(std::string(negation ? "isn't" : "is"), " between ", a, + " and ", b)) { + return a <= arg && arg <= b; +} + +// Tests of the quality of the random numbers generated +// This uses the Anderson Darling test for uniformity. +// See "Evaluating the Anderson-Darling Distribution" by Marsaglia +// for details. + +// Short cut version of ADinf(z), z>0 (from Marsaglia) +// This returns the p-value for Anderson Darling statistic in +// the limit as n-> infinity. For finite n, apply the error fix below. +double AndersonDarlingInf(double z) { + if (z < 2) { + return exp(-1.2337141 / z) / sqrt(z) * + (2.00012 + + (0.247105 - + (0.0649821 - (0.0347962 - (0.011672 - 0.00168691 * z) * z) * z) * + z) * + z); + } + return exp( + -exp(1.0776 - + (2.30695 - + (0.43424 - (0.082433 - (0.008056 - 0.0003146 * z) * z) * z) * z) * + z)); +} + +// Corrects the approximation error in AndersonDarlingInf for small values of n +// Add this to AndersonDarlingInf to get a better approximation +// (from Marsaglia) +double AndersonDarlingErrFix(int n, double x) { + if (x > 0.8) { + return (-130.2137 + + (745.2337 - + (1705.091 - (1950.646 - (1116.360 - 255.7844 * x) * x) * x) * x) * + x) / + n; + } + double cutoff = 0.01265 + 0.1757 / n; + if (x < cutoff) { + double t = x / cutoff; + t = sqrt(t) * (1 - t) * (49 * t - 102); + return t * (0.0037 / (n * n) + 0.00078 / n + 0.00006) / n; + } else { + double t = (x - cutoff) / (0.8 - cutoff); + t = -0.00022633 + + (6.54034 - (14.6538 - (14.458 - (8.259 - 1.91864 * t) * t) * t) * t) * + t; + return t * (0.04213 + 0.01365 / n) / n; + } +} + +// Returns the AndersonDarling p-value given n and the value of the statistic +double AndersonDarlingPValue(int n, double z) { + double ad = AndersonDarlingInf(z); + double errfix = AndersonDarlingErrFix(n, ad); + return ad + errfix; +} + +double AndersonDarlingStatistic(const std::vector<double>& random_sample) { + int n = random_sample.size(); + double ad_sum = 0; + for (int i = 0; i < n; i++) { + ad_sum += (2 * i + 1) * + std::log(random_sample[i] * (1 - random_sample[n - 1 - i])); + } + double ad_statistic = -n - 1 / static_cast<double>(n) * ad_sum; + return ad_statistic; +} + +// Tests if the array of doubles is uniformly distributed. +// Returns the p-value of the Anderson Darling Statistic +// for the given set of sorted random doubles +// See "Evaluating the Anderson-Darling Distribution" by +// Marsaglia and Marsaglia for details. +double AndersonDarlingTest(const std::vector<double>& random_sample) { + double ad_statistic = AndersonDarlingStatistic(random_sample); + double p = AndersonDarlingPValue(random_sample.size(), ad_statistic); + return p; +} + +// Testing that NextRandom generates uniform random numbers. Applies the +// Anderson-Darling test for uniformity +TEST(ExponentialBiasedTest, TestNextRandom) { + for (auto n : std::vector<int>({ + 10, // Check short-range correlation + 100, 1000, + 10000 // Make sure there's no systemic error + })) { + uint64_t x = 1; + // This assumes that the prng returns 48 bit numbers + uint64_t max_prng_value = static_cast<uint64_t>(1) << 48; + // Initialize. + for (int i = 1; i <= 20; i++) { + x = ExponentialBiased::NextRandom(x); + } + std::vector<uint64_t> int_random_sample(n); + // Collect samples + for (int i = 0; i < n; i++) { + int_random_sample[i] = x; + x = ExponentialBiased::NextRandom(x); + } + // First sort them... + std::sort(int_random_sample.begin(), int_random_sample.end()); + std::vector<double> random_sample(n); + // Convert them to uniform randoms (in the range [0,1]) + for (int i = 0; i < n; i++) { + random_sample[i] = + static_cast<double>(int_random_sample[i]) / max_prng_value; + } + // Now compute the Anderson-Darling statistic + double ad_pvalue = AndersonDarlingTest(random_sample); + EXPECT_GT(std::min(ad_pvalue, 1 - ad_pvalue), 0.0001) + << "prng is not uniform: n = " << n << " p = " << ad_pvalue; + } +} + +// The generator needs to be available as a thread_local and as a static +// variable. +TEST(ExponentialBiasedTest, InitializationModes) { + ABSL_CONST_INIT static ExponentialBiased eb_static; + EXPECT_THAT(eb_static.Get(2), Ge(0)); + +#if ABSL_HAVE_THREAD_LOCAL + thread_local ExponentialBiased eb_thread; + EXPECT_THAT(eb_thread.Get(2), Ge(0)); +#endif + + ExponentialBiased eb_stack; + EXPECT_THAT(eb_stack.Get(2), Ge(0)); +} + +} // namespace base_internal +} // namespace absl |