about summary refs log tree commit diff
path: root/absl/random/bernoulli_distribution.h
blob: 25bd0d5ca4202832edb36f9d552c948eb10ab936 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef ABSL_RANDOM_BERNOULLI_DISTRIBUTION_H_
#define ABSL_RANDOM_BERNOULLI_DISTRIBUTION_H_

#include <cstdint>
#include <istream>
#include <limits>

#include "absl/base/optimization.h"
#include "absl/random/internal/fast_uniform_bits.h"
#include "absl/random/internal/iostream_state_saver.h"

namespace absl {
ABSL_NAMESPACE_BEGIN

// absl::bernoulli_distribution is a drop in replacement for
// std::bernoulli_distribution. It guarantees that (given a perfect
// UniformRandomBitGenerator) the acceptance probability is *exactly* equal to
// the given double.
//
// The implementation assumes that double is IEEE754
class bernoulli_distribution {
 public:
  using result_type = bool;

  class param_type {
   public:
    using distribution_type = bernoulli_distribution;

    explicit param_type(double p = 0.5) : prob_(p) {
      assert(p >= 0.0 && p <= 1.0);
    }

    double p() const { return prob_; }

    friend bool operator==(const param_type& p1, const param_type& p2) {
      return p1.p() == p2.p();
    }
    friend bool operator!=(const param_type& p1, const param_type& p2) {
      return p1.p() != p2.p();
    }

   private:
    double prob_;
  };

  bernoulli_distribution() : bernoulli_distribution(0.5) {}

  explicit bernoulli_distribution(double p) : param_(p) {}

  explicit bernoulli_distribution(param_type p) : param_(p) {}

  // no-op
  void reset() {}

  template <typename URBG>
  bool operator()(URBG& g) {  // NOLINT(runtime/references)
    return Generate(param_.p(), g);
  }

  template <typename URBG>
  bool operator()(URBG& g,  // NOLINT(runtime/references)
                  const param_type& param) {
    return Generate(param.p(), g);
  }

  param_type param() const { return param_; }
  void param(const param_type& param) { param_ = param; }

  double p() const { return param_.p(); }

  result_type(min)() const { return false; }
  result_type(max)() const { return true; }

  friend bool operator==(const bernoulli_distribution& d1,
                         const bernoulli_distribution& d2) {
    return d1.param_ == d2.param_;
  }

  friend bool operator!=(const bernoulli_distribution& d1,
                         const bernoulli_distribution& d2) {
    return d1.param_ != d2.param_;
  }

 private:
  static constexpr uint64_t kP32 = static_cast<uint64_t>(1) << 32;

  template <typename URBG>
  static bool Generate(double p, URBG& g);  // NOLINT(runtime/references)

  param_type param_;
};

template <typename CharT, typename Traits>
std::basic_ostream<CharT, Traits>& operator<<(
    std::basic_ostream<CharT, Traits>& os,  // NOLINT(runtime/references)
    const bernoulli_distribution& x) {
  auto saver = random_internal::make_ostream_state_saver(os);
  os.precision(random_internal::stream_precision_helper<double>::kPrecision);
  os << x.p();
  return os;
}

template <typename CharT, typename Traits>
std::basic_istream<CharT, Traits>& operator>>(
    std::basic_istream<CharT, Traits>& is,  // NOLINT(runtime/references)
    bernoulli_distribution& x) {            // NOLINT(runtime/references)
  auto saver = random_internal::make_istream_state_saver(is);
  auto p = random_internal::read_floating_point<double>(is);
  if (!is.fail()) {
    x.param(bernoulli_distribution::param_type(p));
  }
  return is;
}

template <typename URBG>
bool bernoulli_distribution::Generate(double p,
                                      URBG& g) {  // NOLINT(runtime/references)
  random_internal::FastUniformBits<uint32_t> fast_u32;

  while (true) {
    // There are two aspects of the definition of `c` below that are worth
    // commenting on.  First, because `p` is in the range [0, 1], `c` is in the
    // range [0, 2^32] which does not fit in a uint32_t and therefore requires
    // 64 bits.
    //
    // Second, `c` is constructed by first casting explicitly to a signed
    // integer and then converting implicitly to an unsigned integer of the same
    // size.  This is done because the hardware conversion instructions produce
    // signed integers from double; if taken as a uint64_t the conversion would
    // be wrong for doubles greater than 2^63 (not relevant in this use-case).
    // If converted directly to an unsigned integer, the compiler would end up
    // emitting code to handle such large values that are not relevant due to
    // the known bounds on `c`.  To avoid these extra instructions this
    // implementation converts first to the signed type and then use the
    // implicit conversion to unsigned (which is a no-op).
    const uint64_t c = static_cast<int64_t>(p * kP32);
    const uint32_t v = fast_u32(g);
    // FAST PATH: this path fails with probability 1/2^32.  Note that simply
    // returning v <= c would approximate P very well (up to an absolute error
    // of 1/2^32); the slow path (taken in that range of possible error, in the
    // case of equality) eliminates the remaining error.
    if (ABSL_PREDICT_TRUE(v != c)) return v < c;

    // It is guaranteed that `q` is strictly less than 1, because if `q` were
    // greater than or equal to 1, the same would be true for `p`. Certainly `p`
    // cannot be greater than 1, and if `p == 1`, then the fast path would
    // necessary have been taken already.
    const double q = static_cast<double>(c) / kP32;

    // The probability of acceptance on the fast path is `q` and so the
    // probability of acceptance here should be `p - q`.
    //
    // Note that `q` is obtained from `p` via some shifts and conversions, the
    // upshot of which is that `q` is simply `p` with some of the
    // least-significant bits of its mantissa set to zero. This means that the
    // difference `p - q` will not have any rounding errors. To see why, pretend
    // that double has 10 bits of resolution and q is obtained from `p` in such
    // a way that the 4 least-significant bits of its mantissa are set to zero.
    // For example:
    //   p   = 1.1100111011 * 2^-1
    //   q   = 1.1100110000 * 2^-1
    // p - q = 1.011        * 2^-8
    // The difference `p - q` has exactly the nonzero mantissa bits that were
    // "lost" in `q` producing a number which is certainly representable in a
    // double.
    const double left = p - q;

    // By construction, the probability of being on this slow path is 1/2^32, so
    // P(accept in slow path) = P(accept| in slow path) * P(slow path),
    // which means the probability of acceptance here is `1 / (left * kP32)`:
    const double here = left * kP32;

    // The simplest way to compute the result of this trial is to repeat the
    // whole algorithm with the new probability. This terminates because even
    // given  arbitrarily unfriendly "random" bits, each iteration either
    // multiplies a tiny probability by 2^32 (if c == 0) or strips off some
    // number of nonzero mantissa bits. That process is bounded.
    if (here == 0) return false;
    p = here;
  }
}

ABSL_NAMESPACE_END
}  // namespace absl

#endif  // ABSL_RANDOM_BERNOULLI_DISTRIBUTION_H_