about summary refs log tree commit diff
path: root/absl/base/internal
diff options
context:
space:
mode:
Diffstat (limited to 'absl/base/internal')
-rw-r--r--absl/base/internal/exponential_biased.cc84
-rw-r--r--absl/base/internal/exponential_biased.h77
-rw-r--r--absl/base/internal/exponential_biased_test.cc168
3 files changed, 329 insertions, 0 deletions
diff --git a/absl/base/internal/exponential_biased.cc b/absl/base/internal/exponential_biased.cc
new file mode 100644
index 000000000000..d7ffd184e968
--- /dev/null
+++ b/absl/base/internal/exponential_biased.cc
@@ -0,0 +1,84 @@
+// Copyright 2019 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/internal/exponential_biased.h"
+
+#include <stdint.h>
+
+#include <atomic>
+#include <cmath>
+#include <limits>
+
+#include "absl/base/attributes.h"
+#include "absl/base/optimization.h"
+
+namespace absl {
+namespace base_internal {
+
+// The algorithm generates a random number between 0 and 1 and applies the
+// inverse cumulative distribution function for an exponential. Specifically:
+// Let m be the inverse of the sample period, then the probability
+// distribution function is m*exp(-mx) so the CDF is
+// p = 1 - exp(-mx), so
+// q = 1 - p = exp(-mx)
+// log_e(q) = -mx
+// -log_e(q)/m = x
+// log_2(q) * (-log_e(2) * 1/m) = x
+// In the code, q is actually in the range 1 to 2**26, hence the -26 below
+int64_t ExponentialBiased::Get(int64_t mean) {
+  if (ABSL_PREDICT_FALSE(!initialized_)) {
+    Initialize();
+  }
+
+  uint64_t rng = NextRandom(rng_);
+  rng_ = rng;
+
+  // Take the top 26 bits as the random number
+  // (This plus the 1<<58 sampling bound give a max possible step of
+  // 5194297183973780480 bytes.)
+  // The uint32_t cast is to prevent a (hard-to-reproduce) NAN
+  // under piii debug for some binaries.
+  double q = static_cast<uint32_t>(rng >> (kPrngNumBits - 26)) + 1.0;
+  // Put the computed p-value through the CDF of a geometric.
+  double interval = (std::log2(q) - 26) * (-std::log(2.0) * mean);
+  // Very large values of interval overflow int64_t. To avoid that, we will cheat
+  // and clamp any huge values to (int64_t max)/2. This is a potential source of
+  // bias, but the mean would need to be such a large value that it's not likely
+  // to come up. For example, with a mean of 1e18, the probability of hitting
+  // this condition is about 1/1000. For a mean of 1e17, standard calculators
+  // claim that this event won't happen.
+  if (interval > static_cast<double>(std::numeric_limits<int64_t>::max() / 2)) {
+    return std::numeric_limits<int64_t>::max() / 2;
+  }
+
+  return static_cast<int64_t>(interval);
+}
+
+void ExponentialBiased::Initialize() {
+  // We don't get well distributed numbers from `this` so we call NextRandom() a
+  // bunch to mush the bits around. We use a global_rand to handle the case
+  // where the same thread (by memory address) gets created and destroyed
+  // repeatedly.
+  ABSL_CONST_INIT static std::atomic<uint32_t> global_rand(0);
+  uint64_t r = reinterpret_cast<uint64_t>(this) +
+               global_rand.fetch_add(1, std::memory_order_relaxed);
+  for (int i = 0; i < 20; ++i) {
+    r = NextRandom(r);
+  }
+  rng_ = r;
+  initialized_ = true;
+}
+
+}  // namespace base_internal
+}  // namespace absl
diff --git a/absl/base/internal/exponential_biased.h b/absl/base/internal/exponential_biased.h
new file mode 100644
index 000000000000..cac2d8ad84ff
--- /dev/null
+++ b/absl/base/internal/exponential_biased.h
@@ -0,0 +1,77 @@
+// Copyright 2019 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_BASE_INTERNAL_EXPONENTIAL_BIASED_H_
+#define ABSL_BASE_INTERNAL_EXPONENTIAL_BIASED_H_
+
+#include <stdint.h>
+
+namespace absl {
+namespace base_internal {
+
+// ExponentialBiased provides a small and fast random number generator for a
+// rounded exponential distribution. This generator doesn't requires very little
+// state doesn't impose synchronization overhead, which makes it useful in some
+// specialized scenarios.
+//
+// For the generated variable X, X ~ floor(Exponential(1/mean)). The floor
+// operation introduces a small amount of bias, but the distribution is useful
+// to generate a wait time. That is, if an operation is supposed to happen on
+// average to 1/mean events, then the generated variable X will describe how
+// many events to skip before performing the operation and computing a new X.
+//
+// The mathematically precise distribution to use for integer wait times is a
+// Geometric distribution, but a Geometric distribution takes slightly more time
+// to compute and when the mean is large (say, 100+), the Geometric distribution
+// is hard to distinguish from the result of ExponentialBiased.
+//
+// This class is thread-compatible.
+class ExponentialBiased {
+ public:
+  // The number of bits set by NextRandom.
+  static constexpr int kPrngNumBits = 48;
+
+  // Generates the floor of an exponentially distributed random variable by
+  // rounding the value down to the nearest integer. The result will be in the
+  // range [0, int64_t max / 2].
+  int64_t Get(int64_t mean);
+
+  // Computes a random number in the range [0, 1<<(kPrngNumBits+1) - 1]
+  //
+  // This is public to enable testing.
+  static uint64_t NextRandom(uint64_t rnd);
+
+ private:
+  void Initialize();
+
+  uint64_t rng_{0};
+  bool initialized_{false};
+};
+
+// Returns the next prng value.
+// pRNG is: aX+b mod c with a = 0x5DEECE66D, b =  0xB, c = 1<<48
+// This is the lrand64 generator.
+inline uint64_t ExponentialBiased::NextRandom(uint64_t rnd) {
+  const uint64_t prng_mult = uint64_t{0x5DEECE66D};
+  const uint64_t prng_add = 0xB;
+  const uint64_t prng_mod_power = 48;
+  const uint64_t prng_mod_mask =
+      ~((~static_cast<uint64_t>(0)) << prng_mod_power);
+  return (prng_mult * rnd + prng_add) & prng_mod_mask;
+}
+
+}  // namespace base_internal
+}  // namespace absl
+
+#endif  // ABSL_BASE_INTERNAL_EXPONENTIAL_BIASED_H_
diff --git a/absl/base/internal/exponential_biased_test.cc b/absl/base/internal/exponential_biased_test.cc
new file mode 100644
index 000000000000..09b511d14e70
--- /dev/null
+++ b/absl/base/internal/exponential_biased_test.cc
@@ -0,0 +1,168 @@
+// Copyright 2019 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/internal/exponential_biased.h"
+
+#include <stddef.h>
+
+#include <cmath>
+#include <cstdint>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+
+using ::testing::Ge;
+
+namespace absl {
+namespace base_internal {
+
+MATCHER_P2(IsBetween, a, b,
+           absl::StrCat(std::string(negation ? "isn't" : "is"), " between ", a,
+                        " and ", b)) {
+  return a <= arg && arg <= b;
+}
+
+// Tests of the quality of the random numbers generated
+// This uses the Anderson Darling test for uniformity.
+// See "Evaluating the Anderson-Darling Distribution" by Marsaglia
+// for details.
+
+// Short cut version of ADinf(z), z>0 (from Marsaglia)
+// This returns the p-value for Anderson Darling statistic in
+// the limit as n-> infinity. For finite n, apply the error fix below.
+double AndersonDarlingInf(double z) {
+  if (z < 2) {
+    return exp(-1.2337141 / z) / sqrt(z) *
+           (2.00012 +
+            (0.247105 -
+             (0.0649821 - (0.0347962 - (0.011672 - 0.00168691 * z) * z) * z) *
+                 z) *
+                z);
+  }
+  return exp(
+      -exp(1.0776 -
+           (2.30695 -
+            (0.43424 - (0.082433 - (0.008056 - 0.0003146 * z) * z) * z) * z) *
+               z));
+}
+
+// Corrects the approximation error in AndersonDarlingInf for small values of n
+// Add this to AndersonDarlingInf to get a better approximation
+// (from Marsaglia)
+double AndersonDarlingErrFix(int n, double x) {
+  if (x > 0.8) {
+    return (-130.2137 +
+            (745.2337 -
+             (1705.091 - (1950.646 - (1116.360 - 255.7844 * x) * x) * x) * x) *
+                x) /
+           n;
+  }
+  double cutoff = 0.01265 + 0.1757 / n;
+  if (x < cutoff) {
+    double t = x / cutoff;
+    t = sqrt(t) * (1 - t) * (49 * t - 102);
+    return t * (0.0037 / (n * n) + 0.00078 / n + 0.00006) / n;
+  } else {
+    double t = (x - cutoff) / (0.8 - cutoff);
+    t = -0.00022633 +
+        (6.54034 - (14.6538 - (14.458 - (8.259 - 1.91864 * t) * t) * t) * t) *
+            t;
+    return t * (0.04213 + 0.01365 / n) / n;
+  }
+}
+
+// Returns the AndersonDarling p-value given n and the value of the statistic
+double AndersonDarlingPValue(int n, double z) {
+  double ad = AndersonDarlingInf(z);
+  double errfix = AndersonDarlingErrFix(n, ad);
+  return ad + errfix;
+}
+
+double AndersonDarlingStatistic(const std::vector<double>& random_sample) {
+  int n = random_sample.size();
+  double ad_sum = 0;
+  for (int i = 0; i < n; i++) {
+    ad_sum += (2 * i + 1) *
+              std::log(random_sample[i] * (1 - random_sample[n - 1 - i]));
+  }
+  double ad_statistic = -n - 1 / static_cast<double>(n) * ad_sum;
+  return ad_statistic;
+}
+
+// Tests if the array of doubles is uniformly distributed.
+// Returns the p-value of the Anderson Darling Statistic
+// for the given set of sorted random doubles
+// See "Evaluating the Anderson-Darling Distribution" by
+// Marsaglia and Marsaglia for details.
+double AndersonDarlingTest(const std::vector<double>& random_sample) {
+  double ad_statistic = AndersonDarlingStatistic(random_sample);
+  double p = AndersonDarlingPValue(random_sample.size(), ad_statistic);
+  return p;
+}
+
+// Testing that NextRandom generates uniform random numbers. Applies the
+// Anderson-Darling test for uniformity
+TEST(ExponentialBiasedTest, TestNextRandom) {
+  for (auto n : std::vector<int>({
+           10,  // Check short-range correlation
+           100, 1000,
+           10000  // Make sure there's no systemic error
+       })) {
+    uint64_t x = 1;
+    // This assumes that the prng returns 48 bit numbers
+    uint64_t max_prng_value = static_cast<uint64_t>(1) << 48;
+    // Initialize.
+    for (int i = 1; i <= 20; i++) {
+      x = ExponentialBiased::NextRandom(x);
+    }
+    std::vector<uint64_t> int_random_sample(n);
+    // Collect samples
+    for (int i = 0; i < n; i++) {
+      int_random_sample[i] = x;
+      x = ExponentialBiased::NextRandom(x);
+    }
+    // First sort them...
+    std::sort(int_random_sample.begin(), int_random_sample.end());
+    std::vector<double> random_sample(n);
+    // Convert them to uniform randoms (in the range [0,1])
+    for (int i = 0; i < n; i++) {
+      random_sample[i] =
+          static_cast<double>(int_random_sample[i]) / max_prng_value;
+    }
+    // Now compute the Anderson-Darling statistic
+    double ad_pvalue = AndersonDarlingTest(random_sample);
+    EXPECT_GT(std::min(ad_pvalue, 1 - ad_pvalue), 0.0001)
+        << "prng is not uniform: n = " << n << " p = " << ad_pvalue;
+  }
+}
+
+// The generator needs to be available as a thread_local and as a static
+// variable.
+TEST(ExponentialBiasedTest, InitializationModes) {
+  ABSL_CONST_INIT static ExponentialBiased eb_static;
+  EXPECT_THAT(eb_static.Get(2), Ge(0));
+
+#if ABSL_HAVE_THREAD_LOCAL
+  thread_local ExponentialBiased eb_thread;
+  EXPECT_THAT(eb_thread.Get(2), Ge(0));
+#endif
+
+  ExponentialBiased eb_stack;
+  EXPECT_THAT(eb_stack.Get(2), Ge(0));
+}
+
+}  // namespace base_internal
+}  // namespace absl