about summary refs log tree commit diff
path: root/absl/strings/internal/charconv_bigint.h
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings/internal/charconv_bigint.h')
-rw-r--r--absl/strings/internal/charconv_bigint.h426
1 files changed, 426 insertions, 0 deletions
diff --git a/absl/strings/internal/charconv_bigint.h b/absl/strings/internal/charconv_bigint.h
new file mode 100644
index 000000000000..aa70af2c2894
--- /dev/null
+++ b/absl/strings/internal/charconv_bigint.h
@@ -0,0 +1,426 @@
+// Copyright 2018 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
+#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
+
+#include <algorithm>
+#include <cstdint>
+#include <iostream>
+#include <string>
+
+#include "absl/strings/ascii.h"
+#include "absl/strings/internal/charconv_parse.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+namespace strings_internal {
+
+// The largest power that 5 that can be raised to, and still fit in a uint32_t.
+constexpr int kMaxSmallPowerOfFive = 13;
+// The largest power that 10 that can be raised to, and still fit in a uint32_t.
+constexpr int kMaxSmallPowerOfTen = 9;
+
+extern const uint32_t kFiveToNth[kMaxSmallPowerOfFive + 1];
+extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1];
+
+// Large, fixed-width unsigned integer.
+//
+// Exact rounding for decimal-to-binary floating point conversion requires very
+// large integer math, but a design goal of absl::from_chars is to avoid
+// allocating memory.  The integer precision needed for decimal-to-binary
+// conversions is large but bounded, so a huge fixed-width integer class
+// suffices.
+//
+// This is an intentionally limited big integer class.  Only needed operations
+// are implemented.  All storage lives in an array data member, and all
+// arithmetic is done in-place, to avoid requiring separate storage for operand
+// and result.
+//
+// This is an internal class.  Some methods live in the .cc file, and are
+// instantiated only for the values of max_words we need.
+template <int max_words>
+class BigUnsigned {
+ public:
+  static_assert(max_words == 4 || max_words == 84,
+                "unsupported max_words value");
+
+  BigUnsigned() : size_(0), words_{} {}
+  explicit BigUnsigned(uint32_t v) : size_(v > 0 ? 1 : 0), words_{v} {}
+  explicit BigUnsigned(uint64_t v)
+      : size_(0),
+        words_{static_cast<uint32_t>(v & 0xffffffff),
+               static_cast<uint32_t>(v >> 32)} {
+    if (words_[1]) {
+      size_ = 2;
+    } else if (words_[0]) {
+      size_ = 1;
+    }
+  }
+
+  // Constructs a BigUnsigned from the given string_view containing a decimal
+  // value.  If the input std::string is not a decimal integer, constructs a 0
+  // instead.
+  explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} {
+    // Check for valid input, returning a 0 otherwise.  This is reasonable
+    // behavior only because this constructor is for unit tests.
+    if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() ||
+        sv.empty()) {
+      return;
+    }
+    int exponent_adjust =
+        ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1);
+    if (exponent_adjust > 0) {
+      MultiplyByTenToTheNth(exponent_adjust);
+    }
+  }
+
+  // Loads the mantissa value of a previously-parsed float.
+  //
+  // Returns the associated decimal exponent.  The value of the parsed float is
+  // exactly *this * 10**exponent.
+  int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits);
+
+  // Returns the number of decimal digits of precision this type provides.  All
+  // numbers with this many decimal digits or fewer are representable by this
+  // type.
+  //
+  // Analagous to std::numeric_limits<BigUnsigned>::digits10.
+  static constexpr int Digits10() {
+    // 9975007/1035508 is very slightly less than log10(2**32).
+    return static_cast<uint64_t>(max_words) * 9975007 / 1035508;
+  }
+
+  // Shifts left by the given number of bits.
+  void ShiftLeft(int count) {
+    if (count > 0) {
+      const int word_shift = count / 32;
+      if (word_shift >= max_words) {
+        SetToZero();
+        return;
+      }
+      size_ = std::min(size_ + word_shift, max_words);
+      count %= 32;
+      if (count == 0) {
+        std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_);
+      } else {
+        for (int i = std::min(size_, max_words - 1); i > word_shift; --i) {
+          words_[i] = (words_[i - word_shift] << count) |
+                      (words_[i - word_shift - 1] >> (32 - count));
+        }
+        words_[word_shift] = words_[0] << count;
+        // Grow size_ if necessary.
+        if (size_ < max_words && words_[size_]) {
+          ++size_;
+        }
+      }
+      std::fill(words_, words_ + word_shift, 0u);
+    }
+  }
+
+
+  // Multiplies by v in-place.
+  void MultiplyBy(uint32_t v) {
+    if (size_ == 0 || v == 1) {
+      return;
+    }
+    if (v == 0) {
+      SetToZero();
+      return;
+    }
+    const uint64_t factor = v;
+    uint64_t window = 0;
+    for (int i = 0; i < size_; ++i) {
+      window += factor * words_[i];
+      words_[i] = window & 0xffffffff;
+      window >>= 32;
+    }
+    // If carry bits remain and there's space for them, grow size_.
+    if (window && size_ < max_words) {
+      words_[size_] = window & 0xffffffff;
+      ++size_;
+    }
+  }
+
+  void MultiplyBy(uint64_t v) {
+    uint32_t words[2];
+    words[0] = static_cast<uint32_t>(v);
+    words[1] = static_cast<uint32_t>(v >> 32);
+    if (words[1] == 0) {
+      MultiplyBy(words[0]);
+    } else {
+      MultiplyBy(2, words);
+    }
+  }
+
+  // Multiplies in place by 5 to the power of n.  n must be non-negative.
+  void MultiplyByFiveToTheNth(int n) {
+    while (n >= kMaxSmallPowerOfFive) {
+      MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]);
+      n -= kMaxSmallPowerOfFive;
+    }
+    if (n > 0) {
+      MultiplyBy(kFiveToNth[n]);
+    }
+  }
+
+  // Multiplies in place by 10 to the power of n.  n must be non-negative.
+  void MultiplyByTenToTheNth(int n) {
+    if (n > kMaxSmallPowerOfTen) {
+      // For large n, raise to a power of 5, then shift left by the same amount.
+      // (10**n == 5**n * 2**n.)  This requires fewer multiplications overall.
+      MultiplyByFiveToTheNth(n);
+      ShiftLeft(n);
+    } else if (n > 0) {
+      // We can do this more quickly for very small N by using a single
+      // multiplication.
+      MultiplyBy(kTenToNth[n]);
+    }
+  }
+
+  // Returns the value of 5**n, for non-negative n.  This implementation uses
+  // a lookup table, and is faster then seeding a BigUnsigned with 1 and calling
+  // MultiplyByFiveToTheNth().
+  static BigUnsigned FiveToTheNth(int n);
+
+  // Multiplies by another BigUnsigned, in-place.
+  template <int M>
+  void MultiplyBy(const BigUnsigned<M>& other) {
+    MultiplyBy(other.size(), other.words());
+  }
+
+  void SetToZero() {
+    std::fill(words_, words_ + size_, 0u);
+    size_ = 0;
+  }
+
+  // Returns the value of the nth word of this BigUnsigned.  This is
+  // range-checked, and returns 0 on out-of-bounds accesses.
+  uint32_t GetWord(int index) const {
+    if (index < 0 || index >= size_) {
+      return 0;
+    }
+    return words_[index];
+  }
+
+  // Returns this integer as a decimal std::string.  This is not used in the decimal-
+  // to-binary conversion; it is intended to aid in testing.
+  std::string ToString() const;
+
+  int size() const { return size_; }
+  const uint32_t* words() const { return words_; }
+
+ private:
+  // Reads the number between [begin, end), possibly containing a decimal point,
+  // into this BigUnsigned.
+  //
+  // Callers are required to ensure [begin, end) contains a valid number, with
+  // one or more decimal digits and at most one decimal point.  This routine
+  // will behave unpredictably if these preconditions are not met.
+  //
+  // Only the first `significant_digits` digits are read.  Digits beyond this
+  // limit are "sticky": If the final significant digit is 0 or 5, and if any
+  // dropped digit is nonzero, then that final significant digit is adjusted up
+  // to 1 or 6.  This adjustment allows for precise rounding.
+  //
+  // Returns `exponent_adjustment`, a power-of-ten exponent adjustment to
+  // account for the decimal point and for dropped significant digits.  After
+  // this function returns,
+  //   actual_value_of_parsed_string ~= *this * 10**exponent_adjustment.
+  int ReadDigits(const char* begin, const char* end, int significant_digits);
+
+  // Performs a step of big integer multiplication.  This computes the full
+  // (64-bit-wide) values that should be added at the given index (step), and
+  // adds to that location in-place.
+  //
+  // Because our math all occurs in place, we must multiply starting from the
+  // highest word working downward.  (This is a bit more expensive due to the
+  // extra carries involved.)
+  //
+  // This must be called in steps, for each word to be calculated, starting from
+  // the high end and working down to 0.  The first value of `step` should be
+  //   `std::min(original_size + other.size_ - 2, max_words - 1)`.
+  // The reason for this expression is that multiplying the i'th word from one
+  // multiplicand and the j'th word of another multiplicand creates a
+  // two-word-wide value to be stored at the (i+j)'th element.  The highest
+  // word indices we will access are `original_size - 1` from this object, and
+  // `other.size_ - 1` from our operand.  Therefore,
+  // `original_size + other.size_ - 2` is the first step we should calculate,
+  // but limited on an upper bound by max_words.
+
+  // Working from high-to-low ensures that we do not overwrite the portions of
+  // the initial value of *this which are still needed for later steps.
+  //
+  // Once called with step == 0, *this contains the result of the
+  // multiplication.
+  //
+  // `original_size` is the size_ of *this before the first call to
+  // MultiplyStep().  `other_words` and `other_size` are the contents of our
+  // operand.  `step` is the step to perform, as described above.
+  void MultiplyStep(int original_size, const uint32_t* other_words,
+                    int other_size, int step);
+
+  void MultiplyBy(int other_size, const uint32_t* other_words) {
+    const int original_size = size_;
+    const int first_step =
+        std::min(original_size + other_size - 2, max_words - 1);
+    for (int step = first_step; step >= 0; --step) {
+      MultiplyStep(original_size, other_words, other_size, step);
+    }
+  }
+
+  // Adds a 32-bit value to the index'th word, with carry.
+  void AddWithCarry(int index, uint32_t value) {
+    if (value) {
+      while (index < max_words && value > 0) {
+        words_[index] += value;
+        // carry if we overflowed in this word:
+        if (value > words_[index]) {
+          value = 1;
+          ++index;
+        } else {
+          value = 0;
+        }
+      }
+      size_ = std::min(max_words, std::max(index + 1, size_));
+    }
+  }
+
+  void AddWithCarry(int index, uint64_t value) {
+    if (value && index < max_words) {
+      uint32_t high = value >> 32;
+      uint32_t low = value & 0xffffffff;
+      words_[index] += low;
+      if (words_[index] < low) {
+        ++high;
+        if (high == 0) {
+          // Carry from the low word caused our high word to overflow.
+          // Short circuit here to do the right thing.
+          AddWithCarry(index + 2, static_cast<uint32_t>(1));
+          return;
+        }
+      }
+      if (high > 0) {
+        AddWithCarry(index + 1, high);
+      } else {
+        // Normally 32-bit AddWithCarry() sets size_, but since we don't call
+        // it when `high` is 0, do it ourselves here.
+        size_ = std::min(max_words, std::max(index + 1, size_));
+      }
+    }
+  }
+
+  // Divide this in place by a constant divisor.  Returns the remainder of the
+  // division.
+  template <uint32_t divisor>
+  uint32_t DivMod() {
+    uint64_t accumulator = 0;
+    for (int i = size_ - 1; i >= 0; --i) {
+      accumulator <<= 32;
+      accumulator += words_[i];
+      // accumulator / divisor will never overflow an int32_t in this loop
+      words_[i] = static_cast<uint32_t>(accumulator / divisor);
+      accumulator = accumulator % divisor;
+    }
+    while (size_ > 0 && words_[size_ - 1] == 0) {
+      --size_;
+    }
+    return static_cast<uint32_t>(accumulator);
+  }
+
+  // The number of elements in words_ that may carry significant values.
+  // All elements beyond this point are 0.
+  //
+  // When size_ is 0, this BigUnsigned stores the value 0.
+  // When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is
+  // nonzero.  This can occur due to overflow truncation.
+  // In particular, x.size_ != y.size_ does *not* imply x != y.
+  int size_;
+  uint32_t words_[max_words];
+};
+
+// Compares two big integer instances.
+//
+// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs.
+template <int N, int M>
+int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
+  int limit = std::max(lhs.size(), rhs.size());
+  for (int i = limit - 1; i >= 0; --i) {
+    const uint32_t lhs_word = lhs.GetWord(i);
+    const uint32_t rhs_word = rhs.GetWord(i);
+    if (lhs_word < rhs_word) {
+      return -1;
+    } else if (lhs_word > rhs_word) {
+      return 1;
+    }
+  }
+  return 0;
+}
+
+template <int N, int M>
+bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
+  int limit = std::max(lhs.size(), rhs.size());
+  for (int i = 0; i < limit; ++i) {
+    if (lhs.GetWord(i) != rhs.GetWord(i)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <int N, int M>
+bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <int N, int M>
+bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
+  return Compare(lhs, rhs) == -1;
+}
+
+template <int N, int M>
+bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
+  return rhs < lhs;
+}
+template <int N, int M>
+bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
+  return !(rhs < lhs);
+}
+template <int N, int M>
+bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
+  return !(lhs < rhs);
+}
+
+// Output operator for BigUnsigned, for testing purposes only.
+template <int N>
+std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) {
+  return os << num.ToString();
+}
+
+// Explicit instantiation declarations for the sizes of BigUnsigned that we
+// are using.
+//
+// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is
+// still bigger than an int128, and 84 is a large value we will want to use
+// in the from_chars implementation.
+//
+// Comments justifying the use of 84 belong in the from_chars implementation,
+// and will be added in a follow-up CL.
+extern template class BigUnsigned<4>;
+extern template class BigUnsigned<84>;
+
+}  // namespace strings_internal
+}  // namespace absl
+
+#endif  // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_