From bd40a41cc142b36c73b881099d08a9d83f7f4780 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Mon, 18 Jun 2018 13:18:53 -0700 Subject: -- f28d30df5769bb832dec3ff36d2fcd2bcdf494a3 by Shaindel Schwartz : Internal change PiperOrigin-RevId: 201046831 -- 711715a78b7e53dfaafd4d7f08a74e76db22af88 by Mark Barolak : Internal fix PiperOrigin-RevId: 201043684 -- 64b53edd6bf1fa48f74e7f5d33f00f80d5089147 by Shaindel Schwartz : Remove extra whitespace PiperOrigin-RevId: 201041989 -- 0bdd2a0b33657b688e4a04aeba9ebba47e4dc6ca by Shaindel Schwartz : Whitespace fix. PiperOrigin-RevId: 201034413 -- 3deb0ac296ef1b74c4789e114a8a8bf53253f26b by Shaindel Schwartz : Scrub build tags. No functional changes. PiperOrigin-RevId: 201032927 -- da75d0f8b73baa7e8f4e9a092bba546012ed3b71 by Alex Strelnikov : Internal change. PiperOrigin-RevId: 201026131 -- 6815d80caa19870d0c441b6b9816c68db41393a5 by Tom Manshreck : Add documentation for our LTS snapshot branches PiperOrigin-RevId: 201025191 -- 64c3b02006f39e6a8127bbabf9ec947fb45b6504 by Greg Falcon : Provide absl::from_chars for double and float types. This is a forward-compatible implementation of std::from_chars from C++17. This provides exact "round_to_nearest" conversions, and has some nice properties: * Works with string_view (it can convert numbers from non-NUL-terminated buffers) * Never allocates memory * Faster than the standard library strtod() in our toolchain * Uses integer math in its calculations, so is unaffected by floating point environment * Unaffected by C locale Also change SimpleAtod/SimpleAtoi to use this new API under the hood. PiperOrigin-RevId: 201003324 -- 542869258eb100779497c899103dc96aced52749 by Greg Falcon : Internal change PiperOrigin-RevId: 200999200 -- 3aba192775c7f80e2cd7f221b0a73537823c54ea by Gennadiy Rozental : Internal change PiperOrigin-RevId: 200947470 -- daf9b9feedd748d5364a4c06165b7cb7604d3e1e by Mark Barolak : Add an absl:: qualification to a usage of base_internal::SchedulingMode outside of an absl:: namespace. PiperOrigin-RevId: 200748234 -- a8d265290a22d629f3d9bf9f872c204200bfe8c8 by Mark Barolak : Add a missing namespace closing comment to optional.h. PiperOrigin-RevId: 200739934 -- f05af8ee1c6b864dad2df7c907d424209a3e3202 by Abseil Team : Internal change PiperOrigin-RevId: 200719115 GitOrigin-RevId: f28d30df5769bb832dec3ff36d2fcd2bcdf494a3 Change-Id: Ie4fa601078fd4aa57286372611f1d114fdec82c0 --- absl/strings/BUILD.bazel | 74 +- absl/strings/CMakeLists.txt | 44 ++ absl/strings/charconv.cc | 982 ++++++++++++++++++++++++++ absl/strings/charconv.h | 115 +++ absl/strings/charconv_benchmark.cc | 204 ++++++ absl/strings/charconv_test.cc | 766 ++++++++++++++++++++ absl/strings/internal/charconv_bigint.cc | 357 ++++++++++ absl/strings/internal/charconv_bigint.h | 426 +++++++++++ absl/strings/internal/charconv_bigint_test.cc | 203 ++++++ absl/strings/internal/charconv_parse.cc | 496 +++++++++++++ absl/strings/internal/charconv_parse.h | 96 +++ absl/strings/internal/charconv_parse_test.cc | 357 ++++++++++ absl/strings/numbers.cc | 86 +-- 13 files changed, 4149 insertions(+), 57 deletions(-) create mode 100644 absl/strings/charconv.cc create mode 100644 absl/strings/charconv.h create mode 100644 absl/strings/charconv_benchmark.cc create mode 100644 absl/strings/charconv_test.cc create mode 100644 absl/strings/internal/charconv_bigint.cc create mode 100644 absl/strings/internal/charconv_bigint.h create mode 100644 absl/strings/internal/charconv_bigint_test.cc create mode 100644 absl/strings/internal/charconv_parse.cc create mode 100644 absl/strings/internal/charconv_parse.h create mode 100644 absl/strings/internal/charconv_parse_test.cc (limited to 'absl/strings') diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index f06bdc0d01cf..328f52f33d26 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -32,7 +32,12 @@ cc_library( name = "strings", srcs = [ "ascii.cc", + "charconv.cc", "escaping.cc", + "internal/charconv_bigint.cc", + "internal/charconv_bigint.h", + "internal/charconv_parse.cc", + "internal/charconv_parse.h", "internal/memutil.cc", "internal/memutil.h", "internal/stl_type_traits.h", @@ -48,6 +53,7 @@ cc_library( ], hdrs = [ "ascii.h", + "charconv.h", "escaping.h", "match.h", "numbers.h", @@ -144,11 +150,6 @@ cc_test( size = "small", srcs = ["ascii_test.cc"], copts = ABSL_TEST_COPTS, - tags = [ - "no_test_android_arm", - "no_test_android_arm64", - "no_test_android_x86", - ], visibility = ["//visibility:private"], deps = [ ":strings", @@ -398,12 +399,6 @@ cc_test( "numbers_test.cc", ], copts = ABSL_TEST_COPTS, - tags = [ - "no_test_android_arm", - "no_test_android_arm64", - "no_test_android_x86", - "no_test_loonix", - ], visibility = ["//visibility:private"], deps = [ ":strings", @@ -429,11 +424,6 @@ cc_test( name = "char_map_test", srcs = ["internal/char_map_test.cc"], copts = ABSL_TEST_COPTS, - tags = [ - "no_test_android_arm", - "no_test_android_arm64", - "no_test_android_x86", - ], deps = [ ":internal", "@com_google_googletest//:gtest_main", @@ -450,3 +440,55 @@ cc_test( "@com_github_google_benchmark//:benchmark_main", ], ) + +cc_test( + name = "charconv_test", + srcs = ["charconv_test.cc"], + copts = ABSL_TEST_COPTS, + deps = [ + ":strings", + "//absl/base", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "charconv_parse_test", + srcs = [ + "internal/charconv_parse.h", + "internal/charconv_parse_test.cc", + ], + copts = ABSL_TEST_COPTS, + deps = [ + ":strings", + "//absl/base", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "charconv_bigint_test", + srcs = [ + "internal/charconv_bigint.h", + "internal/charconv_bigint_test.cc", + "internal/charconv_parse.h", + ], + copts = ABSL_TEST_COPTS, + deps = [ + ":strings", + "//absl/base", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "charconv_benchmark", + srcs = [ + "charconv_benchmark.cc", + ], + deps = [ + ":strings", + "//absl/base", + "@com_github_google_benchmark//:benchmark_main", + ], +) diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 9dc47328c731..cab2c4561e48 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -17,6 +17,7 @@ list(APPEND STRINGS_PUBLIC_HEADERS "ascii.h" + "charconv.h" "escaping.h" "match.h" "numbers.h" @@ -33,6 +34,8 @@ list(APPEND STRINGS_PUBLIC_HEADERS list(APPEND STRINGS_INTERNAL_HEADERS "internal/bits.h" "internal/char_map.h" + "internal/charconv_bigint.h" + "internal/charconv_parse.h" "internal/memutil.h" "internal/ostringstream.h" "internal/resize_uninitialized.h" @@ -47,7 +50,10 @@ list(APPEND STRINGS_INTERNAL_HEADERS # add string library list(APPEND STRINGS_SRC "ascii.cc" + "charconv.cc" "escaping.cc" + "internal/charconv_bigint.cc" + "internal/charconv_parse.cc" "internal/memutil.cc" "internal/memutil.h" "internal/utf8.cc" @@ -301,5 +307,43 @@ absl_test( ) +# test charconv_test +set(CHARCONV_TEST_SRC "charconv_test.cc") +set(CHARCONV_TEST_PUBLIC_LIBRARIES absl::strings) +absl_test( + TARGET + charconv_test + SOURCES + ${CHARCONV_TEST_SRC} + PUBLIC_LIBRARIES + ${CHARCONV_TEST_PUBLIC_LIBRARIES} +) + + +# test charconv_parse_test +set(CHARCONV_PARSE_TEST_SRC "internal/charconv_parse_test.cc") +set(CHARCONV_PARSE_TEST_PUBLIC_LIBRARIES absl::strings) + +absl_test( + TARGET + charconv_parse_test + SOURCES + ${CHARCONV_PARSE_TEST_SRC} + PUBLIC_LIBRARIES + ${CHARCONV_PARSE_TEST_PUBLIC_LIBRARIES} +) + + +# test charconv_bigint_test +set(CHARCONV_BIGINT_TEST_SRC "internal/charconv_bigint_test.cc") +set(CHARCONV_BIGINT_TEST_PUBLIC_LIBRARIES absl::strings) +absl_test( + TARGET + charconv_bigint_test + SOURCES + ${CHARCONV_BIGINT_TEST_SRC} + PUBLIC_LIBRARIES + ${CHARCONV_BIGINT_TEST_PUBLIC_LIBRARIES} +) diff --git a/absl/strings/charconv.cc b/absl/strings/charconv.cc new file mode 100644 index 000000000000..08c3947eccd4 --- /dev/null +++ b/absl/strings/charconv.cc @@ -0,0 +1,982 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/charconv.h" + +#include +#include +#include +#include + +#include "absl/base/casts.h" +#include "absl/numeric/int128.h" +#include "absl/strings/internal/bits.h" +#include "absl/strings/internal/charconv_bigint.h" +#include "absl/strings/internal/charconv_parse.h" + +// The macro ABSL_BIT_PACK_FLOATS is defined on x86-64, where IEEE floating +// point numbers have the same endianness in memory as a bitfield struct +// containing the corresponding parts. +// +// When set, we replace calls to ldexp() with manual bit packing, which is +// faster and is unaffected by floating point environment. +#ifdef ABSL_BIT_PACK_FLOATS +#error ABSL_BIT_PACK_FLOATS cannot be directly set +#elif defined(__x86_64__) || defined(_M_X64) +#define ABSL_BIT_PACK_FLOATS 1 +#endif + +// A note about subnormals: +// +// The code below talks about "normals" and "subnormals". A normal IEEE float +// has a fixed-width mantissa and power of two exponent. For example, a normal +// `double` has a 53-bit mantissa. Because the high bit is always 1, it is not +// stored in the representation. The implicit bit buys an extra bit of +// resolution in the datatype. +// +// The downside of this scheme is that there is a large gap between DBL_MIN and +// zero. (Large, at least, relative to the different between DBL_MIN and the +// next representable number). This gap is softened by the "subnormal" numbers, +// which have the same power-of-two exponent as DBL_MIN, but no implicit 53rd +// bit. An all-bits-zero exponent in the encoding represents subnormals. (Zero +// is represented as a subnormal with an all-bits-zero mantissa.) +// +// The code below, in calculations, represents the mantissa as a uint64_t. The +// end result normally has the 53rd bit set. It represents subnormals by using +// narrower mantissas. + +namespace absl { +namespace { + +template +struct FloatTraits; + +template <> +struct FloatTraits { + // The number of mantissa bits in the given float type. This includes the + // implied high bit. + static constexpr int kTargetMantissaBits = 53; + + // The largest supported IEEE exponent, in our integral mantissa + // representation. + // + // If `m` is the largest possible int kTargetMantissaBits bits wide, then + // m * 2**kMaxExponent is exactly equal to DBL_MAX. + static constexpr int kMaxExponent = 971; + + // The smallest supported IEEE normal exponent, in our integral mantissa + // representation. + // + // If `m` is the smallest possible int kTargetMantissaBits bits wide, then + // m * 2**kMinNormalExponent is exactly equal to DBL_MIN. + static constexpr int kMinNormalExponent = -1074; + + static double MakeNan(const char* tagp) { + // Support nan no matter which namespace it's in. Some platforms + // incorrectly don't put it in namespace std. + using namespace std; // NOLINT + return nan(tagp); + } + + // Builds a nonzero floating point number out of the provided parts. + // + // This is intended to do the same operation as ldexp(mantissa, exponent), + // but using purely integer math, to avoid -ffastmath and floating + // point environment issues. Using type punning is also faster. We fall back + // to ldexp on a per-platform basis for portability. + // + // `exponent` must be between kMinNormalExponent and kMaxExponent. + // + // `mantissa` must either be exactly kTargetMantissaBits wide, in which case + // a normal value is made, or it must be less narrow than that, in which case + // `exponent` must be exactly kMinNormalExponent, and a subnormal value is + // made. + static double Make(uint64_t mantissa, int exponent, bool sign) { +#ifndef ABSL_BIT_PACK_FLOATS + // Support ldexp no matter which namespace it's in. Some platforms + // incorrectly don't put it in namespace std. + using namespace std; // NOLINT + return sign ? -ldexp(mantissa, exponent) : ldexp(mantissa, exponent); +#else + constexpr uint64_t kMantissaMask = + (uint64_t(1) << (kTargetMantissaBits - 1)) - 1; + uint64_t dbl = static_cast(sign) << 63; + if (mantissa > kMantissaMask) { + // Normal value. + // Adjust by 1023 for the exponent representation bias, and an additional + // 52 due to the implied decimal point in the IEEE mantissa represenation. + dbl += uint64_t{exponent + 1023u + kTargetMantissaBits - 1} << 52; + mantissa &= kMantissaMask; + } else { + // subnormal value + assert(exponent == kMinNormalExponent); + } + dbl += mantissa; + return absl::bit_cast(dbl); +#endif // ABSL_BIT_PACK_FLOATS + } +}; + +// Specialization of floating point traits for the `float` type. See the +// FloatTraits specialization above for meaning of each of the following +// members and methods. +template <> +struct FloatTraits { + static constexpr int kTargetMantissaBits = 24; + static constexpr int kMaxExponent = 104; + static constexpr int kMinNormalExponent = -149; + static float MakeNan(const char* tagp) { + // Support nanf no matter which namespace it's in. Some platforms + // incorrectly don't put it in namespace std. + using namespace std; // NOLINT + return nanf(tagp); + } + static float Make(uint32_t mantissa, int exponent, bool sign) { +#ifndef ABSL_BIT_PACK_FLOATS + // Support ldexpf no matter which namespace it's in. Some platforms + // incorrectly don't put it in namespace std. + using namespace std; // NOLINT + return sign ? -ldexpf(mantissa, exponent) : ldexpf(mantissa, exponent); +#else + constexpr uint32_t kMantissaMask = + (uint32_t(1) << (kTargetMantissaBits - 1)) - 1; + uint32_t flt = static_cast(sign) << 31; + if (mantissa > kMantissaMask) { + // Normal value. + // Adjust by 127 for the exponent representation bias, and an additional + // 23 due to the implied decimal point in the IEEE mantissa represenation. + flt += uint32_t{exponent + 127u + kTargetMantissaBits - 1} << 23; + mantissa &= kMantissaMask; + } else { + // subnormal value + assert(exponent == kMinNormalExponent); + } + flt += mantissa; + return absl::bit_cast(flt); +#endif // ABSL_BIT_PACK_FLOATS + } +}; + +// Decimal-to-binary conversions require coercing powers of 10 into a mantissa +// and a power of 2. The two helper functions Power10Mantissa(n) and +// Power10Exponent(n) perform this task. Together, these represent a hand- +// rolled floating point value which is equal to or just less than 10**n. +// +// The return values satisfy two range guarantees: +// +// Power10Mantissa(n) * 2**Power10Exponent(n) <= 10**n +// < (Power10Mantissa(n) + 1) * 2**Power10Exponent(n) +// +// 2**63 <= Power10Mantissa(n) < 2**64. +// +// Lookups into the power-of-10 table must first check the Power10Overflow() and +// Power10Underflow() functions, to avoid out-of-bounds table access. +// +// Indexes into these tables are biased by -kPower10TableMin, and the table has +// values in the range [kPower10TableMin, kPower10TableMax]. +extern const uint64_t kPower10MantissaTable[]; +extern const int16_t kPower10ExponentTable[]; + +// The smallest allowed value for use with the Power10Mantissa() and +// Power10Exponent() functions below. (If a smaller exponent is needed in +// calculations, the end result is guaranteed to underflow.) +constexpr int kPower10TableMin = -342; + +// The largest allowed value for use with the Power10Mantissa() and +// Power10Exponent() functions below. (If a smaller exponent is needed in +// calculations, the end result is guaranteed to overflow.) +constexpr int kPower10TableMax = 308; + +uint64_t Power10Mantissa(int n) { + return kPower10MantissaTable[n - kPower10TableMin]; +} + +int Power10Exponent(int n) { + return kPower10ExponentTable[n - kPower10TableMin]; +} + +// Returns true if n is large enough that 10**n always results in an IEEE +// overflow. +bool Power10Overflow(int n) { return n > kPower10TableMax; } + +// Returns true if n is small enough that 10**n times a ParsedFloat mantissa +// always results in an IEEE underflow. +bool Power10Underflow(int n) { return n < kPower10TableMin; } + +// Returns true if Power10Mantissa(n) * 2**Power10Exponent(n) is exactly equal +// to 10**n numerically. Put another way, this returns true if there is no +// truncation error in Power10Mantissa(n). +bool Power10Exact(int n) { return n >= 0 && n <= 27; } + +// Sentinel exponent values for representing numbers too large or too close to +// zero to represent in a double. +constexpr int kOverflow = 99999; +constexpr int kUnderflow = -99999; + +// Struct representing the calculated conversion result of a positive (nonzero) +// floating point number. +// +// The calculated number is mantissa * 2**exponent (mantissa is treated as an +// integer.) `mantissa` is chosen to be the correct width for the IEEE float +// representation being calculated. (`mantissa` will always have the same bit +// width for normal values, and narrower bit widths for subnormals.) +// +// If the result of conversion was an underflow or overflow, exponent is set +// to kUnderflow or kOverflow. +struct CalculatedFloat { + uint64_t mantissa = 0; + int exponent = 0; +}; + +// Returns the bit width of the given uint128. (Equivalently, returns 128 +// minus the number of leading zero bits.) +int BitWidth(uint128 value) { + if (Uint128High64(value) == 0) { + return 64 - strings_internal::CountLeadingZeros64(Uint128Low64(value)); + } + return 128 - strings_internal::CountLeadingZeros64(Uint128High64(value)); +} + +// Calculates how far to the right a mantissa needs to be shifted to create a +// properly adjusted mantissa for an IEEE floating point number. +// +// `mantissa_width` is the bit width of the mantissa to be shifted, and +// `binary_exponent` is the exponent of the number before the shift. +// +// This accounts for subnormal values, and will return a larger-than-normal +// shift if binary_exponent would otherwise be too low. +template +int NormalizedShiftSize(int mantissa_width, int binary_exponent) { + const int normal_shift = + mantissa_width - FloatTraits::kTargetMantissaBits; + const int minimum_shift = + FloatTraits::kMinNormalExponent - binary_exponent; + return std::max(normal_shift, minimum_shift); +} + +// Right shifts a uint128 so that it has the requested bit width. (The +// resulting value will have 128 - bit_width leading zeroes.) The initial +// `value` must be wider than the requested bit width. +// +// Returns the number of bits shifted. +int TruncateToBitWidth(int bit_width, uint128* value) { + const int current_bit_width = BitWidth(*value); + const int shift = current_bit_width - bit_width; + *value >>= shift; + return shift; +} + +// Checks if the given ParsedFloat represents one of the edge cases that are +// not dependent on number base: zero, infinity, or NaN. If so, sets *value +// the appropriate double, and returns true. +template +bool HandleEdgeCase(const strings_internal::ParsedFloat& input, bool negative, + FloatType* value) { + if (input.type == strings_internal::FloatType::kNan) { + // A bug in both clang and gcc would cause the compiler to optimize away the + // buffer we are building below. Declaring the buffer volatile avoids the + // issue, and has no measurable performance impact in microbenchmarks. + // + // https://bugs.llvm.org/show_bug.cgi?id=37778 + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86113 + constexpr ptrdiff_t kNanBufferSize = 128; + volatile char n_char_sequence[kNanBufferSize]; + if (input.subrange_begin == nullptr) { + n_char_sequence[0] = '\0'; + } else { + ptrdiff_t nan_size = input.subrange_end - input.subrange_begin; + nan_size = std::min(nan_size, kNanBufferSize - 1); + std::copy_n(input.subrange_begin, nan_size, n_char_sequence); + n_char_sequence[nan_size] = '\0'; + } + char* nan_argument = const_cast(n_char_sequence); + *value = negative ? -FloatTraits::MakeNan(nan_argument) + : FloatTraits::MakeNan(nan_argument); + return true; + } + if (input.type == strings_internal::FloatType::kInfinity) { + *value = negative ? -std::numeric_limits::infinity() + : std::numeric_limits::infinity(); + return true; + } + if (input.mantissa == 0) { + *value = negative ? -0.0 : 0.0; + return true; + } + return false; +} + +// Given a CalculatedFloat result of a from_chars conversion, generate the +// correct output values. +// +// CalculatedFloat can represent an underflow or overflow, in which case the +// error code in *result is set. Otherwise, the calculated floating point +// number is stored in *value. +template +void EncodeResult(const CalculatedFloat& calculated, bool negative, + absl::from_chars_result* result, FloatType* value) { + if (calculated.exponent == kOverflow) { + result->ec = std::errc::result_out_of_range; + *value = negative ? -std::numeric_limits::max() + : std::numeric_limits::max(); + return; + } else if (calculated.mantissa == 0 || calculated.exponent == kUnderflow) { + result->ec = std::errc::result_out_of_range; + *value = negative ? -0.0 : 0.0; + return; + } + *value = FloatTraits::Make(calculated.mantissa, + calculated.exponent, negative); +} + +// Returns the given uint128 shifted to the right by `shift` bits, and rounds +// the remaining bits using round_to_nearest logic. The value is returned as a +// uint64_t, since this is the type used by this library for storing calculated +// floating point mantissas. +// +// It is expected that the width of the input value shifted by `shift` will +// be the correct bit-width for the target mantissa, which is strictly narrower +// than a uint64_t. +// +// If `input_exact` is false, then a nonzero error epsilon is assumed. For +// rounding purposes, the true value being rounded is strictly greater than the +// input value. The error may represent a single lost carry bit. +// +// When input_exact, shifted bits of the form 1000000... represent a tie, which +// is broken by rounding to even -- the rounding direction is chosen so the low +// bit of the returned value is 0. +// +// When !input_exact, shifted bits of the form 10000000... represent a value +// strictly greater than one half (due to the error epsilon), and so ties are +// always broken by rounding up. +// +// When !input_exact, shifted bits of the form 01111111... are uncertain; +// the true value may or may not be greater than 10000000..., due to the +// possible lost carry bit. The correct rounding direction is unknown. In this +// case, the result is rounded down, and `output_exact` is set to false. +// +// Zero and negative values of `shift` are accepted, in which case the word is +// shifted left, as necessary. +uint64_t ShiftRightAndRound(uint128 value, int shift, bool input_exact, + bool* output_exact) { + if (shift <= 0) { + *output_exact = input_exact; + return static_cast(value << -shift); + } + if (shift >= 128) { + // Exponent is so small that we are shifting away all significant bits. + // Answer will not be representable, even as a subnormal, so return a zero + // mantissa (which represents underflow). + *output_exact = true; + return 0; + } + + *output_exact = true; + const uint128 shift_mask = (uint128(1) << shift) - 1; + const uint128 halfway_point = uint128(1) << (shift - 1); + + const uint128 shifted_bits = value & shift_mask; + value >>= shift; + if (shifted_bits > halfway_point) { + // Shifted bits greater than 10000... require rounding up. + return static_cast(value + 1); + } + if (shifted_bits == halfway_point) { + // In exact mode, shifted bits of 10000... mean we're exactly halfway + // between two numbers, and we must round to even. So only round up if + // the low bit of `value` is set. + // + // In inexact mode, the nonzero error means the actual value is greater + // than the halfway point and we must alway round up. + if ((value & 1) == 1 || !input_exact) { + ++value; + } + return static_cast(value); + } + if (!input_exact && shifted_bits == halfway_point - 1) { + // Rounding direction is unclear, due to error. + *output_exact = false; + } + // Otherwise, round down. + return static_cast(value); +} + +// Checks if a floating point guess needs to be rounded up, using high precision +// math. +// +// `guess_mantissa` and `guess_exponent` represent a candidate guess for the +// number represented by `parsed_decimal`. +// +// The exact number represented by `parsed_decimal` must lie between the two +// numbers: +// A = `guess_mantissa * 2**guess_exponent` +// B = `(guess_mantissa + 1) * 2**guess_exponent` +// +// This function returns false if `A` is the better guess, and true if `B` is +// the better guess, with rounding ties broken by rounding to even. +bool MustRoundUp(uint64_t guess_mantissa, int guess_exponent, + const strings_internal::ParsedFloat& parsed_decimal) { + // 768 is the number of digits needed in the worst case. We could determine a + // better limit dynamically based on the value of parsed_decimal.exponent. + // This would optimize pathological input cases only. (Sane inputs won't have + // hundreds of digits of mantissa.) + absl::strings_internal::BigUnsigned<84> exact_mantissa; + int exact_exponent = exact_mantissa.ReadFloatMantissa(parsed_decimal, 768); + + // Adjust the `guess` arguments to be halfway between A and B. + guess_mantissa = guess_mantissa * 2 + 1; + guess_exponent -= 1; + + // In our comparison: + // lhs = exact = exact_mantissa * 10**exact_exponent + // = exact_mantissa * 5**exact_exponent * 2**exact_exponent + // rhs = guess = guess_mantissa * 2**guess_exponent + // + // Because we are doing integer math, we can't directly deal with negative + // exponents. We instead move these to the other side of the inequality. + absl::strings_internal::BigUnsigned<84>& lhs = exact_mantissa; + int comparison; + if (exact_exponent >= 0) { + lhs.MultiplyByFiveToTheNth(exact_exponent); + absl::strings_internal::BigUnsigned<84> rhs(guess_mantissa); + // There are powers of 2 on both sides of the inequality; reduce this to + // a single bit-shift. + if (exact_exponent > guess_exponent) { + lhs.ShiftLeft(exact_exponent - guess_exponent); + } else { + rhs.ShiftLeft(guess_exponent - exact_exponent); + } + comparison = Compare(lhs, rhs); + } else { + // Move the power of 5 to the other side of the equation, giving us: + // lhs = exact_mantissa * 2**exact_exponent + // rhs = guess_mantissa * 5**(-exact_exponent) * 2**guess_exponent + absl::strings_internal::BigUnsigned<84> rhs = + absl::strings_internal::BigUnsigned<84>::FiveToTheNth(-exact_exponent); + rhs.MultiplyBy(guess_mantissa); + if (exact_exponent > guess_exponent) { + lhs.ShiftLeft(exact_exponent - guess_exponent); + } else { + rhs.ShiftLeft(guess_exponent - exact_exponent); + } + comparison = Compare(lhs, rhs); + } + if (comparison < 0) { + return false; + } else if (comparison > 0) { + return true; + } else { + // When lhs == rhs, the decimal input is exactly between A and B. + // Round towards even -- round up only if the low bit of the initial + // `guess_mantissa` was a 1. We shifted guess_mantissa left 1 bit at + // the beginning of this function, so test the 2nd bit here. + return (guess_mantissa & 2) == 2; + } +} + +// Constructs a CalculatedFloat from a given mantissa and exponent, but +// with the following normalizations applied: +// +// If rounding has caused mantissa to increase just past the allowed bit +// width, shift and adjust exponent. +// +// If exponent is too high, sets kOverflow. +// +// If mantissa is zero (representing a non-zero value not representable, even +// as a subnormal), sets kUnderflow. +template +CalculatedFloat CalculatedFloatFromRawValues(uint64_t mantissa, int exponent) { + CalculatedFloat result; + if (mantissa == uint64_t(1) << FloatTraits::kTargetMantissaBits) { + mantissa >>= 1; + exponent += 1; + } + if (exponent > FloatTraits::kMaxExponent) { + result.exponent = kOverflow; + } else if (mantissa == 0) { + result.exponent = kUnderflow; + } else { + result.exponent = exponent; + result.mantissa = mantissa; + } + return result; +} + +template +CalculatedFloat CalculateFromParsedHexadecimal( + const strings_internal::ParsedFloat& parsed_hex) { + uint64_t mantissa = parsed_hex.mantissa; + int exponent = parsed_hex.exponent; + int mantissa_width = 64 - strings_internal::CountLeadingZeros64(mantissa); + const int shift = NormalizedShiftSize(mantissa_width, exponent); + bool result_exact; + exponent += shift; + mantissa = ShiftRightAndRound(mantissa, shift, + /* input exact= */ true, &result_exact); + // ParseFloat handles rounding in the hexadecimal case, so we don't have to + // check `result_exact` here. + return CalculatedFloatFromRawValues(mantissa, exponent); +} + +template +CalculatedFloat CalculateFromParsedDecimal( + const strings_internal::ParsedFloat& parsed_decimal) { + CalculatedFloat result; + + // Large or small enough decimal exponents will always result in overflow + // or underflow. + if (Power10Underflow(parsed_decimal.exponent)) { + result.exponent = kUnderflow; + return result; + } else if (Power10Overflow(parsed_decimal.exponent)) { + result.exponent = kOverflow; + return result; + } + + // Otherwise convert our power of 10 into a power of 2 times an integer + // mantissa, and multiply this by our parsed decimal mantissa. + uint128 wide_binary_mantissa = parsed_decimal.mantissa; + wide_binary_mantissa *= Power10Mantissa(parsed_decimal.exponent); + int binary_exponent = Power10Exponent(parsed_decimal.exponent); + + // Discard bits that are inaccurate due to truncation error. The magic + // `mantissa_width` constants below are justified in charconv_algorithm.md. + // They represent the number of bits in `wide_binary_mantissa` that are + // guaranteed to be unaffected by error propagation. + bool mantissa_exact; + int mantissa_width; + if (parsed_decimal.subrange_begin) { + // Truncated mantissa + mantissa_width = 58; + mantissa_exact = false; + binary_exponent += + TruncateToBitWidth(mantissa_width, &wide_binary_mantissa); + } else if (!Power10Exact(parsed_decimal.exponent)) { + // Exact mantissa, truncated power of ten + mantissa_width = 63; + mantissa_exact = false; + binary_exponent += + TruncateToBitWidth(mantissa_width, &wide_binary_mantissa); + } else { + // Product is exact + mantissa_width = BitWidth(wide_binary_mantissa); + mantissa_exact = true; + } + + // Shift into an FloatType-sized mantissa, and round to nearest. + const int shift = + NormalizedShiftSize(mantissa_width, binary_exponent); + bool result_exact; + binary_exponent += shift; + uint64_t binary_mantissa = ShiftRightAndRound(wide_binary_mantissa, shift, + mantissa_exact, &result_exact); + if (!result_exact) { + // We could not determine the rounding direction using int128 math. Use + // full resolution math instead. + if (MustRoundUp(binary_mantissa, binary_exponent, parsed_decimal)) { + binary_mantissa += 1; + } + } + + return CalculatedFloatFromRawValues(binary_mantissa, + binary_exponent); +} + +template +from_chars_result FromCharsImpl(const char* first, const char* last, + FloatType& value, chars_format fmt_flags) { + from_chars_result result; + result.ptr = first; // overwritten on successful parse + result.ec = std::errc(); + + bool negative = false; + if (first != last && *first == '-') { + ++first; + negative = true; + } + // If the `hex` flag is *not* set, then we will accept a 0x prefix and try + // to parse a hexadecimal float. + if ((fmt_flags & chars_format::hex) == chars_format{} && last - first >= 2 && + *first == '0' && (first[1] == 'x' || first[1] == 'X')) { + const char* hex_first = first + 2; + strings_internal::ParsedFloat hex_parse = + strings_internal::ParseFloat<16>(hex_first, last, fmt_flags); + if (hex_parse.end == nullptr || + hex_parse.type != strings_internal::FloatType::kNumber) { + // Either we failed to parse a hex float after the "0x", or we read + // "0xinf" or "0xnan" which we don't want to match. + // + // However, a std::string that begins with "0x" also begins with "0", which + // is normally a valid match for the number zero. So we want these + // strings to match zero unless fmt_flags is `scientific`. (This flag + // means an exponent is required, which the std::string "0" does not have.) + if (fmt_flags == chars_format::scientific) { + result.ec = std::errc::invalid_argument; + } else { + result.ptr = first + 1; + value = negative ? -0.0 : 0.0; + } + return result; + } + // We matched a value. + result.ptr = hex_parse.end; + if (HandleEdgeCase(hex_parse, negative, &value)) { + return result; + } + CalculatedFloat calculated = + CalculateFromParsedHexadecimal(hex_parse); + EncodeResult(calculated, negative, &result, &value); + return result; + } + // Otherwise, we choose the number base based on the flags. + if ((fmt_flags & chars_format::hex) == chars_format::hex) { + strings_internal::ParsedFloat hex_parse = + strings_internal::ParseFloat<16>(first, last, fmt_flags); + if (hex_parse.end == nullptr) { + result.ec = std::errc::invalid_argument; + return result; + } + result.ptr = hex_parse.end; + if (HandleEdgeCase(hex_parse, negative, &value)) { + return result; + } + CalculatedFloat calculated = + CalculateFromParsedHexadecimal(hex_parse); + EncodeResult(calculated, negative, &result, &value); + return result; + } else { + strings_internal::ParsedFloat decimal_parse = + strings_internal::ParseFloat<10>(first, last, fmt_flags); + if (decimal_parse.end == nullptr) { + result.ec = std::errc::invalid_argument; + return result; + } + result.ptr = decimal_parse.end; + if (HandleEdgeCase(decimal_parse, negative, &value)) { + return result; + } + CalculatedFloat calculated = + CalculateFromParsedDecimal(decimal_parse); + EncodeResult(calculated, negative, &result, &value); + return result; + } + return result; +} +} // namespace + +from_chars_result from_chars(const char* first, const char* last, double& value, + chars_format fmt) { + return FromCharsImpl(first, last, value, fmt); +} + +from_chars_result from_chars(const char* first, const char* last, float& value, + chars_format fmt) { + return FromCharsImpl(first, last, value, fmt); +} + +namespace { + +// Table of powers of 10, from kPower10TableMin to kPower10TableMax. +// +// kPower10MantissaTable[i - kPower10TableMin] stores the 64-bit mantissa (high +// bit always on), and kPower10ExponentTable[i - kPower10TableMin] stores the +// power-of-two exponent. For a given number i, this gives the unique mantissa +// and exponent such that mantissa * 2**exponent <= 10**i < (mantissa + 1) * +// 2**exponent. + +const uint64_t kPower10MantissaTable[] = { + 0xeef453d6923bd65aU, 0x9558b4661b6565f8U, 0xbaaee17fa23ebf76U, + 0xe95a99df8ace6f53U, 0x91d8a02bb6c10594U, 0xb64ec836a47146f9U, + 0xe3e27a444d8d98b7U, 0x8e6d8c6ab0787f72U, 0xb208ef855c969f4fU, + 0xde8b2b66b3bc4723U, 0x8b16fb203055ac76U, 0xaddcb9e83c6b1793U, + 0xd953e8624b85dd78U, 0x87d4713d6f33aa6bU, 0xa9c98d8ccb009506U, + 0xd43bf0effdc0ba48U, 0x84a57695fe98746dU, 0xa5ced43b7e3e9188U, + 0xcf42894a5dce35eaU, 0x818995ce7aa0e1b2U, 0xa1ebfb4219491a1fU, + 0xca66fa129f9b60a6U, 0xfd00b897478238d0U, 0x9e20735e8cb16382U, + 0xc5a890362fddbc62U, 0xf712b443bbd52b7bU, 0x9a6bb0aa55653b2dU, + 0xc1069cd4eabe89f8U, 0xf148440a256e2c76U, 0x96cd2a865764dbcaU, + 0xbc807527ed3e12bcU, 0xeba09271e88d976bU, 0x93445b8731587ea3U, + 0xb8157268fdae9e4cU, 0xe61acf033d1a45dfU, 0x8fd0c16206306babU, + 0xb3c4f1ba87bc8696U, 0xe0b62e2929aba83cU, 0x8c71dcd9ba0b4925U, + 0xaf8e5410288e1b6fU, 0xdb71e91432b1a24aU, 0x892731ac9faf056eU, + 0xab70fe17c79ac6caU, 0xd64d3d9db981787dU, 0x85f0468293f0eb4eU, + 0xa76c582338ed2621U, 0xd1476e2c07286faaU, 0x82cca4db847945caU, + 0xa37fce126597973cU, 0xcc5fc196fefd7d0cU, 0xff77b1fcbebcdc4fU, + 0x9faacf3df73609b1U, 0xc795830d75038c1dU, 0xf97ae3d0d2446f25U, + 0x9becce62836ac577U, 0xc2e801fb244576d5U, 0xf3a20279ed56d48aU, + 0x9845418c345644d6U, 0xbe5691ef416bd60cU, 0xedec366b11c6cb8fU, + 0x94b3a202eb1c3f39U, 0xb9e08a83a5e34f07U, 0xe858ad248f5c22c9U, + 0x91376c36d99995beU, 0xb58547448ffffb2dU, 0xe2e69915b3fff9f9U, + 0x8dd01fad907ffc3bU, 0xb1442798f49ffb4aU, 0xdd95317f31c7fa1dU, + 0x8a7d3eef7f1cfc52U, 0xad1c8eab5ee43b66U, 0xd863b256369d4a40U, + 0x873e4f75e2224e68U, 0xa90de3535aaae202U, 0xd3515c2831559a83U, + 0x8412d9991ed58091U, 0xa5178fff668ae0b6U, 0xce5d73ff402d98e3U, + 0x80fa687f881c7f8eU, 0xa139029f6a239f72U, 0xc987434744ac874eU, + 0xfbe9141915d7a922U, 0x9d71ac8fada6c9b5U, 0xc4ce17b399107c22U, + 0xf6019da07f549b2bU, 0x99c102844f94e0fbU, 0xc0314325637a1939U, + 0xf03d93eebc589f88U, 0x96267c7535b763b5U, 0xbbb01b9283253ca2U, + 0xea9c227723ee8bcbU, 0x92a1958a7675175fU, 0xb749faed14125d36U, + 0xe51c79a85916f484U, 0x8f31cc0937ae58d2U, 0xb2fe3f0b8599ef07U, + 0xdfbdcece67006ac9U, 0x8bd6a141006042bdU, 0xaecc49914078536dU, + 0xda7f5bf590966848U, 0x888f99797a5e012dU, 0xaab37fd7d8f58178U, + 0xd5605fcdcf32e1d6U, 0x855c3be0a17fcd26U, 0xa6b34ad8c9dfc06fU, + 0xd0601d8efc57b08bU, 0x823c12795db6ce57U, 0xa2cb1717b52481edU, + 0xcb7ddcdda26da268U, 0xfe5d54150b090b02U, 0x9efa548d26e5a6e1U, + 0xc6b8e9b0709f109aU, 0xf867241c8cc6d4c0U, 0x9b407691d7fc44f8U, + 0xc21094364dfb5636U, 0xf294b943e17a2bc4U, 0x979cf3ca6cec5b5aU, + 0xbd8430bd08277231U, 0xece53cec4a314ebdU, 0x940f4613ae5ed136U, + 0xb913179899f68584U, 0xe757dd7ec07426e5U, 0x9096ea6f3848984fU, + 0xb4bca50b065abe63U, 0xe1ebce4dc7f16dfbU, 0x8d3360f09cf6e4bdU, + 0xb080392cc4349decU, 0xdca04777f541c567U, 0x89e42caaf9491b60U, + 0xac5d37d5b79b6239U, 0xd77485cb25823ac7U, 0x86a8d39ef77164bcU, + 0xa8530886b54dbdebU, 0xd267caa862a12d66U, 0x8380dea93da4bc60U, + 0xa46116538d0deb78U, 0xcd795be870516656U, 0x806bd9714632dff6U, + 0xa086cfcd97bf97f3U, 0xc8a883c0fdaf7df0U, 0xfad2a4b13d1b5d6cU, + 0x9cc3a6eec6311a63U, 0xc3f490aa77bd60fcU, 0xf4f1b4d515acb93bU, + 0x991711052d8bf3c5U, 0xbf5cd54678eef0b6U, 0xef340a98172aace4U, + 0x9580869f0e7aac0eU, 0xbae0a846d2195712U, 0xe998d258869facd7U, + 0x91ff83775423cc06U, 0xb67f6455292cbf08U, 0xe41f3d6a7377eecaU, + 0x8e938662882af53eU, 0xb23867fb2a35b28dU, 0xdec681f9f4c31f31U, + 0x8b3c113c38f9f37eU, 0xae0b158b4738705eU, 0xd98ddaee19068c76U, + 0x87f8a8d4cfa417c9U, 0xa9f6d30a038d1dbcU, 0xd47487cc8470652bU, + 0x84c8d4dfd2c63f3bU, 0xa5fb0a17c777cf09U, 0xcf79cc9db955c2ccU, + 0x81ac1fe293d599bfU, 0xa21727db38cb002fU, 0xca9cf1d206fdc03bU, + 0xfd442e4688bd304aU, 0x9e4a9cec15763e2eU, 0xc5dd44271ad3cdbaU, + 0xf7549530e188c128U, 0x9a94dd3e8cf578b9U, 0xc13a148e3032d6e7U, + 0xf18899b1bc3f8ca1U, 0x96f5600f15a7b7e5U, 0xbcb2b812db11a5deU, + 0xebdf661791d60f56U, 0x936b9fcebb25c995U, 0xb84687c269ef3bfbU, + 0xe65829b3046b0afaU, 0x8ff71a0fe2c2e6dcU, 0xb3f4e093db73a093U, + 0xe0f218b8d25088b8U, 0x8c974f7383725573U, 0xafbd2350644eeacfU, + 0xdbac6c247d62a583U, 0x894bc396ce5da772U, 0xab9eb47c81f5114fU, + 0xd686619ba27255a2U, 0x8613fd0145877585U, 0xa798fc4196e952e7U, + 0xd17f3b51fca3a7a0U, 0x82ef85133de648c4U, 0xa3ab66580d5fdaf5U, + 0xcc963fee10b7d1b3U, 0xffbbcfe994e5c61fU, 0x9fd561f1fd0f9bd3U, + 0xc7caba6e7c5382c8U, 0xf9bd690a1b68637bU, 0x9c1661a651213e2dU, + 0xc31bfa0fe5698db8U, 0xf3e2f893dec3f126U, 0x986ddb5c6b3a76b7U, + 0xbe89523386091465U, 0xee2ba6c0678b597fU, 0x94db483840b717efU, + 0xba121a4650e4ddebU, 0xe896a0d7e51e1566U, 0x915e2486ef32cd60U, + 0xb5b5ada8aaff80b8U, 0xe3231912d5bf60e6U, 0x8df5efabc5979c8fU, + 0xb1736b96b6fd83b3U, 0xddd0467c64bce4a0U, 0x8aa22c0dbef60ee4U, + 0xad4ab7112eb3929dU, 0xd89d64d57a607744U, 0x87625f056c7c4a8bU, + 0xa93af6c6c79b5d2dU, 0xd389b47879823479U, 0x843610cb4bf160cbU, + 0xa54394fe1eedb8feU, 0xce947a3da6a9273eU, 0x811ccc668829b887U, + 0xa163ff802a3426a8U, 0xc9bcff6034c13052U, 0xfc2c3f3841f17c67U, + 0x9d9ba7832936edc0U, 0xc5029163f384a931U, 0xf64335bcf065d37dU, + 0x99ea0196163fa42eU, 0xc06481fb9bcf8d39U, 0xf07da27a82c37088U, + 0x964e858c91ba2655U, 0xbbe226efb628afeaU, 0xeadab0aba3b2dbe5U, + 0x92c8ae6b464fc96fU, 0xb77ada0617e3bbcbU, 0xe55990879ddcaabdU, + 0x8f57fa54c2a9eab6U, 0xb32df8e9f3546564U, 0xdff9772470297ebdU, + 0x8bfbea76c619ef36U, 0xaefae51477a06b03U, 0xdab99e59958885c4U, + 0x88b402f7fd75539bU, 0xaae103b5fcd2a881U, 0xd59944a37c0752a2U, + 0x857fcae62d8493a5U, 0xa6dfbd9fb8e5b88eU, 0xd097ad07a71f26b2U, + 0x825ecc24c873782fU, 0xa2f67f2dfa90563bU, 0xcbb41ef979346bcaU, + 0xfea126b7d78186bcU, 0x9f24b832e6b0f436U, 0xc6ede63fa05d3143U, + 0xf8a95fcf88747d94U, 0x9b69dbe1b548ce7cU, 0xc24452da229b021bU, + 0xf2d56790ab41c2a2U, 0x97c560ba6b0919a5U, 0xbdb6b8e905cb600fU, + 0xed246723473e3813U, 0x9436c0760c86e30bU, 0xb94470938fa89bceU, + 0xe7958cb87392c2c2U, 0x90bd77f3483bb9b9U, 0xb4ecd5f01a4aa828U, + 0xe2280b6c20dd5232U, 0x8d590723948a535fU, 0xb0af48ec79ace837U, + 0xdcdb1b2798182244U, 0x8a08f0f8bf0f156bU, 0xac8b2d36eed2dac5U, + 0xd7adf884aa879177U, 0x86ccbb52ea94baeaU, 0xa87fea27a539e9a5U, + 0xd29fe4b18e88640eU, 0x83a3eeeef9153e89U, 0xa48ceaaab75a8e2bU, + 0xcdb02555653131b6U, 0x808e17555f3ebf11U, 0xa0b19d2ab70e6ed6U, + 0xc8de047564d20a8bU, 0xfb158592be068d2eU, 0x9ced737bb6c4183dU, + 0xc428d05aa4751e4cU, 0xf53304714d9265dfU, 0x993fe2c6d07b7fabU, + 0xbf8fdb78849a5f96U, 0xef73d256a5c0f77cU, 0x95a8637627989aadU, + 0xbb127c53b17ec159U, 0xe9d71b689dde71afU, 0x9226712162ab070dU, + 0xb6b00d69bb55c8d1U, 0xe45c10c42a2b3b05U, 0x8eb98a7a9a5b04e3U, + 0xb267ed1940f1c61cU, 0xdf01e85f912e37a3U, 0x8b61313bbabce2c6U, + 0xae397d8aa96c1b77U, 0xd9c7dced53c72255U, 0x881cea14545c7575U, + 0xaa242499697392d2U, 0xd4ad2dbfc3d07787U, 0x84ec3c97da624ab4U, + 0xa6274bbdd0fadd61U, 0xcfb11ead453994baU, 0x81ceb32c4b43fcf4U, + 0xa2425ff75e14fc31U, 0xcad2f7f5359a3b3eU, 0xfd87b5f28300ca0dU, + 0x9e74d1b791e07e48U, 0xc612062576589ddaU, 0xf79687aed3eec551U, + 0x9abe14cd44753b52U, 0xc16d9a0095928a27U, 0xf1c90080baf72cb1U, + 0x971da05074da7beeU, 0xbce5086492111aeaU, 0xec1e4a7db69561a5U, + 0x9392ee8e921d5d07U, 0xb877aa3236a4b449U, 0xe69594bec44de15bU, + 0x901d7cf73ab0acd9U, 0xb424dc35095cd80fU, 0xe12e13424bb40e13U, + 0x8cbccc096f5088cbU, 0xafebff0bcb24aafeU, 0xdbe6fecebdedd5beU, + 0x89705f4136b4a597U, 0xabcc77118461cefcU, 0xd6bf94d5e57a42bcU, + 0x8637bd05af6c69b5U, 0xa7c5ac471b478423U, 0xd1b71758e219652bU, + 0x83126e978d4fdf3bU, 0xa3d70a3d70a3d70aU, 0xccccccccccccccccU, + 0x8000000000000000U, 0xa000000000000000U, 0xc800000000000000U, + 0xfa00000000000000U, 0x9c40000000000000U, 0xc350000000000000U, + 0xf424000000000000U, 0x9896800000000000U, 0xbebc200000000000U, + 0xee6b280000000000U, 0x9502f90000000000U, 0xba43b74000000000U, + 0xe8d4a51000000000U, 0x9184e72a00000000U, 0xb5e620f480000000U, + 0xe35fa931a0000000U, 0x8e1bc9bf04000000U, 0xb1a2bc2ec5000000U, + 0xde0b6b3a76400000U, 0x8ac7230489e80000U, 0xad78ebc5ac620000U, + 0xd8d726b7177a8000U, 0x878678326eac9000U, 0xa968163f0a57b400U, + 0xd3c21bcecceda100U, 0x84595161401484a0U, 0xa56fa5b99019a5c8U, + 0xcecb8f27f4200f3aU, 0x813f3978f8940984U, 0xa18f07d736b90be5U, + 0xc9f2c9cd04674edeU, 0xfc6f7c4045812296U, 0x9dc5ada82b70b59dU, + 0xc5371912364ce305U, 0xf684df56c3e01bc6U, 0x9a130b963a6c115cU, + 0xc097ce7bc90715b3U, 0xf0bdc21abb48db20U, 0x96769950b50d88f4U, + 0xbc143fa4e250eb31U, 0xeb194f8e1ae525fdU, 0x92efd1b8d0cf37beU, + 0xb7abc627050305adU, 0xe596b7b0c643c719U, 0x8f7e32ce7bea5c6fU, + 0xb35dbf821ae4f38bU, 0xe0352f62a19e306eU, 0x8c213d9da502de45U, + 0xaf298d050e4395d6U, 0xdaf3f04651d47b4cU, 0x88d8762bf324cd0fU, + 0xab0e93b6efee0053U, 0xd5d238a4abe98068U, 0x85a36366eb71f041U, + 0xa70c3c40a64e6c51U, 0xd0cf4b50cfe20765U, 0x82818f1281ed449fU, + 0xa321f2d7226895c7U, 0xcbea6f8ceb02bb39U, 0xfee50b7025c36a08U, + 0x9f4f2726179a2245U, 0xc722f0ef9d80aad6U, 0xf8ebad2b84e0d58bU, + 0x9b934c3b330c8577U, 0xc2781f49ffcfa6d5U, 0xf316271c7fc3908aU, + 0x97edd871cfda3a56U, 0xbde94e8e43d0c8ecU, 0xed63a231d4c4fb27U, + 0x945e455f24fb1cf8U, 0xb975d6b6ee39e436U, 0xe7d34c64a9c85d44U, + 0x90e40fbeea1d3a4aU, 0xb51d13aea4a488ddU, 0xe264589a4dcdab14U, + 0x8d7eb76070a08aecU, 0xb0de65388cc8ada8U, 0xdd15fe86affad912U, + 0x8a2dbf142dfcc7abU, 0xacb92ed9397bf996U, 0xd7e77a8f87daf7fbU, + 0x86f0ac99b4e8dafdU, 0xa8acd7c0222311bcU, 0xd2d80db02aabd62bU, + 0x83c7088e1aab65dbU, 0xa4b8cab1a1563f52U, 0xcde6fd5e09abcf26U, + 0x80b05e5ac60b6178U, 0xa0dc75f1778e39d6U, 0xc913936dd571c84cU, + 0xfb5878494ace3a5fU, 0x9d174b2dcec0e47bU, 0xc45d1df942711d9aU, + 0xf5746577930d6500U, 0x9968bf6abbe85f20U, 0xbfc2ef456ae276e8U, + 0xefb3ab16c59b14a2U, 0x95d04aee3b80ece5U, 0xbb445da9ca61281fU, + 0xea1575143cf97226U, 0x924d692ca61be758U, 0xb6e0c377cfa2e12eU, + 0xe498f455c38b997aU, 0x8edf98b59a373fecU, 0xb2977ee300c50fe7U, + 0xdf3d5e9bc0f653e1U, 0x8b865b215899f46cU, 0xae67f1e9aec07187U, + 0xda01ee641a708de9U, 0x884134fe908658b2U, 0xaa51823e34a7eedeU, + 0xd4e5e2cdc1d1ea96U, 0x850fadc09923329eU, 0xa6539930bf6bff45U, + 0xcfe87f7cef46ff16U, 0x81f14fae158c5f6eU, 0xa26da3999aef7749U, + 0xcb090c8001ab551cU, 0xfdcb4fa002162a63U, 0x9e9f11c4014dda7eU, + 0xc646d63501a1511dU, 0xf7d88bc24209a565U, 0x9ae757596946075fU, + 0xc1a12d2fc3978937U, 0xf209787bb47d6b84U, 0x9745eb4d50ce6332U, + 0xbd176620a501fbffU, 0xec5d3fa8ce427affU, 0x93ba47c980e98cdfU, + 0xb8a8d9bbe123f017U, 0xe6d3102ad96cec1dU, 0x9043ea1ac7e41392U, + 0xb454e4a179dd1877U, 0xe16a1dc9d8545e94U, 0x8ce2529e2734bb1dU, + 0xb01ae745b101e9e4U, 0xdc21a1171d42645dU, 0x899504ae72497ebaU, + 0xabfa45da0edbde69U, 0xd6f8d7509292d603U, 0x865b86925b9bc5c2U, + 0xa7f26836f282b732U, 0xd1ef0244af2364ffU, 0x8335616aed761f1fU, + 0xa402b9c5a8d3a6e7U, 0xcd036837130890a1U, 0x802221226be55a64U, + 0xa02aa96b06deb0fdU, 0xc83553c5c8965d3dU, 0xfa42a8b73abbf48cU, + 0x9c69a97284b578d7U, 0xc38413cf25e2d70dU, 0xf46518c2ef5b8cd1U, + 0x98bf2f79d5993802U, 0xbeeefb584aff8603U, 0xeeaaba2e5dbf6784U, + 0x952ab45cfa97a0b2U, 0xba756174393d88dfU, 0xe912b9d1478ceb17U, + 0x91abb422ccb812eeU, 0xb616a12b7fe617aaU, 0xe39c49765fdf9d94U, + 0x8e41ade9fbebc27dU, 0xb1d219647ae6b31cU, 0xde469fbd99a05fe3U, + 0x8aec23d680043beeU, 0xada72ccc20054ae9U, 0xd910f7ff28069da4U, + 0x87aa9aff79042286U, 0xa99541bf57452b28U, 0xd3fa922f2d1675f2U, + 0x847c9b5d7c2e09b7U, 0xa59bc234db398c25U, 0xcf02b2c21207ef2eU, + 0x8161afb94b44f57dU, 0xa1ba1ba79e1632dcU, 0xca28a291859bbf93U, + 0xfcb2cb35e702af78U, 0x9defbf01b061adabU, 0xc56baec21c7a1916U, + 0xf6c69a72a3989f5bU, 0x9a3c2087a63f6399U, 0xc0cb28a98fcf3c7fU, + 0xf0fdf2d3f3c30b9fU, 0x969eb7c47859e743U, 0xbc4665b596706114U, + 0xeb57ff22fc0c7959U, 0x9316ff75dd87cbd8U, 0xb7dcbf5354e9beceU, + 0xe5d3ef282a242e81U, 0x8fa475791a569d10U, 0xb38d92d760ec4455U, + 0xe070f78d3927556aU, 0x8c469ab843b89562U, 0xaf58416654a6babbU, + 0xdb2e51bfe9d0696aU, 0x88fcf317f22241e2U, 0xab3c2fddeeaad25aU, + 0xd60b3bd56a5586f1U, 0x85c7056562757456U, 0xa738c6bebb12d16cU, + 0xd106f86e69d785c7U, 0x82a45b450226b39cU, 0xa34d721642b06084U, + 0xcc20ce9bd35c78a5U, 0xff290242c83396ceU, 0x9f79a169bd203e41U, + 0xc75809c42c684dd1U, 0xf92e0c3537826145U, 0x9bbcc7a142b17ccbU, + 0xc2abf989935ddbfeU, 0xf356f7ebf83552feU, 0x98165af37b2153deU, + 0xbe1bf1b059e9a8d6U, 0xeda2ee1c7064130cU, 0x9485d4d1c63e8be7U, + 0xb9a74a0637ce2ee1U, 0xe8111c87c5c1ba99U, 0x910ab1d4db9914a0U, + 0xb54d5e4a127f59c8U, 0xe2a0b5dc971f303aU, 0x8da471a9de737e24U, + 0xb10d8e1456105dadU, 0xdd50f1996b947518U, 0x8a5296ffe33cc92fU, + 0xace73cbfdc0bfb7bU, 0xd8210befd30efa5aU, 0x8714a775e3e95c78U, + 0xa8d9d1535ce3b396U, 0xd31045a8341ca07cU, 0x83ea2b892091e44dU, + 0xa4e4b66b68b65d60U, 0xce1de40642e3f4b9U, 0x80d2ae83e9ce78f3U, + 0xa1075a24e4421730U, 0xc94930ae1d529cfcU, 0xfb9b7cd9a4a7443cU, + 0x9d412e0806e88aa5U, 0xc491798a08a2ad4eU, 0xf5b5d7ec8acb58a2U, + 0x9991a6f3d6bf1765U, 0xbff610b0cc6edd3fU, 0xeff394dcff8a948eU, + 0x95f83d0a1fb69cd9U, 0xbb764c4ca7a4440fU, 0xea53df5fd18d5513U, + 0x92746b9be2f8552cU, 0xb7118682dbb66a77U, 0xe4d5e82392a40515U, + 0x8f05b1163ba6832dU, 0xb2c71d5bca9023f8U, 0xdf78e4b2bd342cf6U, + 0x8bab8eefb6409c1aU, 0xae9672aba3d0c320U, 0xda3c0f568cc4f3e8U, + 0x8865899617fb1871U, 0xaa7eebfb9df9de8dU, 0xd51ea6fa85785631U, + 0x8533285c936b35deU, 0xa67ff273b8460356U, 0xd01fef10a657842cU, + 0x8213f56a67f6b29bU, 0xa298f2c501f45f42U, 0xcb3f2f7642717713U, + 0xfe0efb53d30dd4d7U, 0x9ec95d1463e8a506U, 0xc67bb4597ce2ce48U, + 0xf81aa16fdc1b81daU, 0x9b10a4e5e9913128U, 0xc1d4ce1f63f57d72U, + 0xf24a01a73cf2dccfU, 0x976e41088617ca01U, 0xbd49d14aa79dbc82U, + 0xec9c459d51852ba2U, 0x93e1ab8252f33b45U, 0xb8da1662e7b00a17U, + 0xe7109bfba19c0c9dU, 0x906a617d450187e2U, 0xb484f9dc9641e9daU, + 0xe1a63853bbd26451U, 0x8d07e33455637eb2U, 0xb049dc016abc5e5fU, + 0xdc5c5301c56b75f7U, 0x89b9b3e11b6329baU, 0xac2820d9623bf429U, + 0xd732290fbacaf133U, 0x867f59a9d4bed6c0U, 0xa81f301449ee8c70U, + 0xd226fc195c6a2f8cU, 0x83585d8fd9c25db7U, 0xa42e74f3d032f525U, + 0xcd3a1230c43fb26fU, 0x80444b5e7aa7cf85U, 0xa0555e361951c366U, + 0xc86ab5c39fa63440U, 0xfa856334878fc150U, 0x9c935e00d4b9d8d2U, + 0xc3b8358109e84f07U, 0xf4a642e14c6262c8U, 0x98e7e9cccfbd7dbdU, + 0xbf21e44003acdd2cU, 0xeeea5d5004981478U, 0x95527a5202df0ccbU, + 0xbaa718e68396cffdU, 0xe950df20247c83fdU, 0x91d28b7416cdd27eU, + 0xb6472e511c81471dU, 0xe3d8f9e563a198e5U, 0x8e679c2f5e44ff8fU, +}; + +const int16_t kPower10ExponentTable[] = { + -1200, -1196, -1193, -1190, -1186, -1183, -1180, -1176, -1173, -1170, -1166, + -1163, -1160, -1156, -1153, -1150, -1146, -1143, -1140, -1136, -1133, -1130, + -1127, -1123, -1120, -1117, -1113, -1110, -1107, -1103, -1100, -1097, -1093, + -1090, -1087, -1083, -1080, -1077, -1073, -1070, -1067, -1063, -1060, -1057, + -1053, -1050, -1047, -1043, -1040, -1037, -1034, -1030, -1027, -1024, -1020, + -1017, -1014, -1010, -1007, -1004, -1000, -997, -994, -990, -987, -984, + -980, -977, -974, -970, -967, -964, -960, -957, -954, -950, -947, + -944, -940, -937, -934, -931, -927, -924, -921, -917, -914, -911, + -907, -904, -901, -897, -894, -891, -887, -884, -881, -877, -874, + -871, -867, -864, -861, -857, -854, -851, -847, -844, -841, -838, + -834, -831, -828, -824, -821, -818, -814, -811, -808, -804, -801, + -798, -794, -791, -788, -784, -781, -778, -774, -771, -768, -764, + -761, -758, -754, -751, -748, -744, -741, -738, -735, -731, -728, + -725, -721, -718, -715, -711, -708, -705, -701, -698, -695, -691, + -688, -685, -681, -678, -675, -671, -668, -665, -661, -658, -655, + -651, -648, -645, -642, -638, -635, -632, -628, -625, -622, -618, + -615, -612, -608, -605, -602, -598, -595, -592, -588, -585, -582, + -578, -575, -572, -568, -565, -562, -558, -555, -552, -549, -545, + -542, -539, -535, -532, -529, -525, -522, -519, -515, -512, -509, + -505, -502, -499, -495, -492, -489, -485, -482, -479, -475, -472, + -469, -465, -462, -459, -455, -452, -449, -446, -442, -439, -436, + -432, -429, -426, -422, -419, -416, -412, -409, -406, -402, -399, + -396, -392, -389, -386, -382, -379, -376, -372, -369, -366, -362, + -359, -356, -353, -349, -346, -343, -339, -336, -333, -329, -326, + -323, -319, -316, -313, -309, -306, -303, -299, -296, -293, -289, + -286, -283, -279, -276, -273, -269, -266, -263, -259, -256, -253, + -250, -246, -243, -240, -236, -233, -230, -226, -223, -220, -216, + -213, -210, -206, -203, -200, -196, -193, -190, -186, -183, -180, + -176, -173, -170, -166, -163, -160, -157, -153, -150, -147, -143, + -140, -137, -133, -130, -127, -123, -120, -117, -113, -110, -107, + -103, -100, -97, -93, -90, -87, -83, -80, -77, -73, -70, + -67, -63, -60, -57, -54, -50, -47, -44, -40, -37, -34, + -30, -27, -24, -20, -17, -14, -10, -7, -4, 0, 3, + 6, 10, 13, 16, 20, 23, 26, 30, 33, 36, 39, + 43, 46, 49, 53, 56, 59, 63, 66, 69, 73, 76, + 79, 83, 86, 89, 93, 96, 99, 103, 106, 109, 113, + 116, 119, 123, 126, 129, 132, 136, 139, 142, 146, 149, + 152, 156, 159, 162, 166, 169, 172, 176, 179, 182, 186, + 189, 192, 196, 199, 202, 206, 209, 212, 216, 219, 222, + 226, 229, 232, 235, 239, 242, 245, 249, 252, 255, 259, + 262, 265, 269, 272, 275, 279, 282, 285, 289, 292, 295, + 299, 302, 305, 309, 312, 315, 319, 322, 325, 328, 332, + 335, 338, 342, 345, 348, 352, 355, 358, 362, 365, 368, + 372, 375, 378, 382, 385, 388, 392, 395, 398, 402, 405, + 408, 412, 415, 418, 422, 425, 428, 431, 435, 438, 441, + 445, 448, 451, 455, 458, 461, 465, 468, 471, 475, 478, + 481, 485, 488, 491, 495, 498, 501, 505, 508, 511, 515, + 518, 521, 524, 528, 531, 534, 538, 541, 544, 548, 551, + 554, 558, 561, 564, 568, 571, 574, 578, 581, 584, 588, + 591, 594, 598, 601, 604, 608, 611, 614, 617, 621, 624, + 627, 631, 634, 637, 641, 644, 647, 651, 654, 657, 661, + 664, 667, 671, 674, 677, 681, 684, 687, 691, 694, 697, + 701, 704, 707, 711, 714, 717, 720, 724, 727, 730, 734, + 737, 740, 744, 747, 750, 754, 757, 760, 764, 767, 770, + 774, 777, 780, 784, 787, 790, 794, 797, 800, 804, 807, + 810, 813, 817, 820, 823, 827, 830, 833, 837, 840, 843, + 847, 850, 853, 857, 860, 863, 867, 870, 873, 877, 880, + 883, 887, 890, 893, 897, 900, 903, 907, 910, 913, 916, + 920, 923, 926, 930, 933, 936, 940, 943, 946, 950, 953, + 956, 960, +}; + +} // namespace +} // namespace absl diff --git a/absl/strings/charconv.h b/absl/strings/charconv.h new file mode 100644 index 000000000000..3e313679c961 --- /dev/null +++ b/absl/strings/charconv.h @@ -0,0 +1,115 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CHARCONV_H_ +#define ABSL_STRINGS_CHARCONV_H_ + +#include // NOLINT(build/c++11) + +namespace absl { + +// Workalike compatibilty version of std::chars_format from C++17. +// +// This is an bitfield enumerator which can be passed to absl::from_chars to +// configure the std::string-to-float conversion. +enum class chars_format { + scientific = 1, + fixed = 2, + hex = 4, + general = fixed | scientific, +}; + +// The return result of a std::string-to-number conversion. +// +// `ec` will be set to `invalid_argument` if a well-formed number was not found +// at the start of the input range, `result_out_of_range` if a well-formed +// number was found, but it was out of the representable range of the requested +// type, or to std::errc() otherwise. +// +// If a well-formed number was found, `ptr` is set to one past the sequence of +// characters that were successfully parsed. If none was found, `ptr` is set +// to the `first` argument to from_chars. +struct from_chars_result { + const char* ptr; + std::errc ec; +}; + +// Workalike compatibilty version of std::from_chars from C++17. Currently +// this only supports the `double` and `float` types. +// +// This interface incorporates the proposed resolutions for library issues +// DR 3800 and DR 3801. If these are adopted with different wording, +// Abseil's behavior will change to match the standard. (The behavior most +// likely to change is for DR 3801, which says what `value` will be set to in +// the case of overflow and underflow. Code that wants to avoid possible +// breaking changes in this area should not depend on `value` when the returned +// from_chars_result indicates a range error.) +// +// Searches the range [first, last) for the longest matching pattern beginning +// at `first` that represents a floating point number. If one is found, store +// the result in `value`. +// +// The matching pattern format is almost the same as that of strtod(), except +// that C locale is not respected, and an initial '+' character in the input +// range will never be matched. +// +// If `fmt` is set, it must be one of the enumerator values of the chars_format. +// (This is despite the fact that chars_format is a bitmask type.) If set to +// `scientific`, a matching number must contain an exponent. If set to `fixed`, +// then an exponent will never match. (For example, the std::string "1e5" will be +// parsed as "1".) If set to `hex`, then a hexadecimal float is parsed in the +// format that strtod() accepts, except that a "0x" prefix is NOT matched. +// (In particular, in `hex` mode, the input "0xff" results in the largest +// matching pattern "0".) +absl::from_chars_result from_chars(const char* first, const char* last, + double& value, // NOLINT + chars_format fmt = chars_format::general); + +absl::from_chars_result from_chars(const char* first, const char* last, + float& value, // NOLINT + chars_format fmt = chars_format::general); + +// std::chars_format is specified as a bitmask type, which means the following +// operations must be provided: +inline constexpr chars_format operator&(chars_format lhs, chars_format rhs) { + return static_cast(static_cast(lhs) & + static_cast(rhs)); +} +inline constexpr chars_format operator|(chars_format lhs, chars_format rhs) { + return static_cast(static_cast(lhs) | + static_cast(rhs)); +} +inline constexpr chars_format operator^(chars_format lhs, chars_format rhs) { + return static_cast(static_cast(lhs) ^ + static_cast(rhs)); +} +inline constexpr chars_format operator~(chars_format arg) { + return static_cast(~static_cast(arg)); +} +inline chars_format& operator&=(chars_format& lhs, chars_format rhs) { + lhs = lhs & rhs; + return lhs; +} +inline chars_format& operator|=(chars_format& lhs, chars_format rhs) { + lhs = lhs | rhs; + return lhs; +} +inline chars_format& operator^=(chars_format& lhs, chars_format rhs) { + lhs = lhs ^ rhs; + return lhs; +} + +} // namespace absl + +#endif // ABSL_STRINGS_CHARCONV_H_ diff --git a/absl/strings/charconv_benchmark.cc b/absl/strings/charconv_benchmark.cc new file mode 100644 index 000000000000..fd83f44e3d09 --- /dev/null +++ b/absl/strings/charconv_benchmark.cc @@ -0,0 +1,204 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/charconv.h" + +#include +#include +#include + +#include "benchmark/benchmark.h" + +namespace { + +void BM_Strtod_Pi(benchmark::State& state) { + const char* pi = "3.14159"; + for (auto s : state) { + benchmark::DoNotOptimize(pi); + benchmark::DoNotOptimize(strtod(pi, nullptr)); + } +} +BENCHMARK(BM_Strtod_Pi); + +void BM_Absl_Pi(benchmark::State& state) { + const char* pi = "3.14159"; + const char* pi_end = pi + strlen(pi); + for (auto s : state) { + benchmark::DoNotOptimize(pi); + double v; + absl::from_chars(pi, pi_end, v); + benchmark::DoNotOptimize(v); + } +} +BENCHMARK(BM_Absl_Pi); + +void BM_Strtod_Pi_float(benchmark::State& state) { + const char* pi = "3.14159"; + for (auto s : state) { + benchmark::DoNotOptimize(pi); + benchmark::DoNotOptimize(strtof(pi, nullptr)); + } +} +BENCHMARK(BM_Strtod_Pi_float); + +void BM_Absl_Pi_float(benchmark::State& state) { + const char* pi = "3.14159"; + const char* pi_end = pi + strlen(pi); + for (auto s : state) { + benchmark::DoNotOptimize(pi); + float v; + absl::from_chars(pi, pi_end, v); + benchmark::DoNotOptimize(v); + } +} +BENCHMARK(BM_Absl_Pi_float); + +void BM_Strtod_HardLarge(benchmark::State& state) { + const char* num = "272104041512242479.e200"; + for (auto s : state) { + benchmark::DoNotOptimize(num); + benchmark::DoNotOptimize(strtod(num, nullptr)); + } +} +BENCHMARK(BM_Strtod_HardLarge); + +void BM_Absl_HardLarge(benchmark::State& state) { + const char* numstr = "272104041512242479.e200"; + const char* numstr_end = numstr + strlen(numstr); + for (auto s : state) { + benchmark::DoNotOptimize(numstr); + double v; + absl::from_chars(numstr, numstr_end, v); + benchmark::DoNotOptimize(v); + } +} +BENCHMARK(BM_Absl_HardLarge); + +void BM_Strtod_HardSmall(benchmark::State& state) { + const char* num = "94080055902682397.e-242"; + for (auto s : state) { + benchmark::DoNotOptimize(num); + benchmark::DoNotOptimize(strtod(num, nullptr)); + } +} +BENCHMARK(BM_Strtod_HardSmall); + +void BM_Absl_HardSmall(benchmark::State& state) { + const char* numstr = "94080055902682397.e-242"; + const char* numstr_end = numstr + strlen(numstr); + for (auto s : state) { + benchmark::DoNotOptimize(numstr); + double v; + absl::from_chars(numstr, numstr_end, v); + benchmark::DoNotOptimize(v); + } +} +BENCHMARK(BM_Absl_HardSmall); + +void BM_Strtod_HugeMantissa(benchmark::State& state) { + std::string huge(200, '3'); + const char* num = huge.c_str(); + for (auto s : state) { + benchmark::DoNotOptimize(num); + benchmark::DoNotOptimize(strtod(num, nullptr)); + } +} +BENCHMARK(BM_Strtod_HugeMantissa); + +void BM_Absl_HugeMantissa(benchmark::State& state) { + std::string huge(200, '3'); + const char* num = huge.c_str(); + const char* num_end = num + 200; + for (auto s : state) { + benchmark::DoNotOptimize(num); + double v; + absl::from_chars(num, num_end, v); + benchmark::DoNotOptimize(v); + } +} +BENCHMARK(BM_Absl_HugeMantissa); + +std::string MakeHardCase(int length) { + // The number 1.1521...e-297 is exactly halfway between 12345 * 2**-1000 and + // the next larger representable number. The digits of this number are in + // the std::string below. + const std::string digits = + "1." + "152113937042223790993097181572444900347587985074226836242307364987727724" + "831384300183638649152607195040591791364113930628852279348613864894524591" + "272746490313676832900762939595690019745859128071117417798540258114233761" + "012939937017879509401007964861774960297319002612457273148497158989073482" + "171377406078223015359818300988676687994537274548940612510414856761641652" + "513434981938564294004070500716200446656421722229202383105446378511678258" + "370570631774499359748259931676320916632111681001853983492795053244971606" + "922718923011680846577744433974087653954904214152517799883551075537146316" + "168973685866425605046988661997658648354773076621610279716804960009043764" + "038392994055171112475093876476783502487512538082706095923790634572014823" + "78877699375152587890625" + + std::string(5000, '0'); + // generate the hard cases on either side for the given length. + // Lengths between 3 and 1000 are reasonable. + return digits.substr(0, length) + "1e-297"; +} + +void BM_Strtod_Big_And_Difficult(benchmark::State& state) { + std::string testcase = MakeHardCase(state.range(0)); + const char* begin = testcase.c_str(); + for (auto s : state) { + benchmark::DoNotOptimize(begin); + benchmark::DoNotOptimize(strtod(begin, nullptr)); + } +} +BENCHMARK(BM_Strtod_Big_And_Difficult)->Range(3, 5000); + +void BM_Absl_Big_And_Difficult(benchmark::State& state) { + std::string testcase = MakeHardCase(state.range(0)); + const char* begin = testcase.c_str(); + const char* end = begin + testcase.size(); + for (auto s : state) { + benchmark::DoNotOptimize(begin); + double v; + absl::from_chars(begin, end, v); + benchmark::DoNotOptimize(v); + } +} +BENCHMARK(BM_Absl_Big_And_Difficult)->Range(3, 5000); + +} // namespace + +// ------------------------------------------------------------------------ +// Benchmark Time CPU Iterations +// ------------------------------------------------------------------------ +// BM_Strtod_Pi 96 ns 96 ns 6337454 +// BM_Absl_Pi 35 ns 35 ns 20031996 +// BM_Strtod_Pi_float 91 ns 91 ns 7745851 +// BM_Absl_Pi_float 35 ns 35 ns 20430298 +// BM_Strtod_HardLarge 133 ns 133 ns 5288341 +// BM_Absl_HardLarge 181 ns 181 ns 3855615 +// BM_Strtod_HardSmall 279 ns 279 ns 2517243 +// BM_Absl_HardSmall 287 ns 287 ns 2458744 +// BM_Strtod_HugeMantissa 433 ns 433 ns 1604293 +// BM_Absl_HugeMantissa 160 ns 160 ns 4403671 +// BM_Strtod_Big_And_Difficult/3 236 ns 236 ns 2942496 +// BM_Strtod_Big_And_Difficult/8 232 ns 232 ns 2983796 +// BM_Strtod_Big_And_Difficult/64 437 ns 437 ns 1591951 +// BM_Strtod_Big_And_Difficult/512 1738 ns 1738 ns 402519 +// BM_Strtod_Big_And_Difficult/4096 3943 ns 3943 ns 176128 +// BM_Strtod_Big_And_Difficult/5000 4397 ns 4397 ns 157878 +// BM_Absl_Big_And_Difficult/3 39 ns 39 ns 17799583 +// BM_Absl_Big_And_Difficult/8 43 ns 43 ns 16096859 +// BM_Absl_Big_And_Difficult/64 550 ns 550 ns 1259717 +// BM_Absl_Big_And_Difficult/512 4167 ns 4167 ns 171414 +// BM_Absl_Big_And_Difficult/4096 9160 ns 9159 ns 76297 +// BM_Absl_Big_And_Difficult/5000 9738 ns 9738 ns 70140 diff --git a/absl/strings/charconv_test.cc b/absl/strings/charconv_test.cc new file mode 100644 index 000000000000..f8d71cc6c448 --- /dev/null +++ b/absl/strings/charconv_test.cc @@ -0,0 +1,766 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/charconv.h" + +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" + +#ifdef _MSC_FULL_VER +#define ABSL_COMPILER_DOES_EXACT_ROUNDING 0 +#define ABSL_STRTOD_HANDLES_NAN_CORRECTLY 0 +#else +#define ABSL_COMPILER_DOES_EXACT_ROUNDING 1 +#define ABSL_STRTOD_HANDLES_NAN_CORRECTLY 1 +#endif + +namespace { + +#if ABSL_COMPILER_DOES_EXACT_ROUNDING + +// Tests that the given std::string is accepted by absl::from_chars, and that it +// converts exactly equal to the given number. +void TestDoubleParse(absl::string_view str, double expected_number) { + SCOPED_TRACE(str); + double actual_number = 0.0; + absl::from_chars_result result = + absl::from_chars(str.data(), str.data() + str.length(), actual_number); + EXPECT_EQ(result.ec, std::errc()); + EXPECT_EQ(result.ptr, str.data() + str.length()); + EXPECT_EQ(actual_number, expected_number); +} + +void TestFloatParse(absl::string_view str, float expected_number) { + SCOPED_TRACE(str); + float actual_number = 0.0; + absl::from_chars_result result = + absl::from_chars(str.data(), str.data() + str.length(), actual_number); + EXPECT_EQ(result.ec, std::errc()); + EXPECT_EQ(result.ptr, str.data() + str.length()); + EXPECT_EQ(actual_number, expected_number); +} + +// Tests that the given double or single precision floating point literal is +// parsed correctly by absl::from_chars. +// +// These convenience macros assume that the C++ compiler being used also does +// fully correct decimal-to-binary conversions. +#define FROM_CHARS_TEST_DOUBLE(number) \ + { \ + TestDoubleParse(#number, number); \ + TestDoubleParse("-" #number, -number); \ + } + +#define FROM_CHARS_TEST_FLOAT(number) \ + { \ + TestFloatParse(#number, number##f); \ + TestFloatParse("-" #number, -number##f); \ + } + +TEST(FromChars, NearRoundingCases) { + // Cases from "A Program for Testing IEEE Decimal-Binary Conversion" + // by Vern Paxson. + + // Forms that should round towards zero. (These are the hardest cases for + // each decimal mantissa size.) + FROM_CHARS_TEST_DOUBLE(5.e125); + FROM_CHARS_TEST_DOUBLE(69.e267); + FROM_CHARS_TEST_DOUBLE(999.e-026); + FROM_CHARS_TEST_DOUBLE(7861.e-034); + FROM_CHARS_TEST_DOUBLE(75569.e-254); + FROM_CHARS_TEST_DOUBLE(928609.e-261); + FROM_CHARS_TEST_DOUBLE(9210917.e080); + FROM_CHARS_TEST_DOUBLE(84863171.e114); + FROM_CHARS_TEST_DOUBLE(653777767.e273); + FROM_CHARS_TEST_DOUBLE(5232604057.e-298); + FROM_CHARS_TEST_DOUBLE(27235667517.e-109); + FROM_CHARS_TEST_DOUBLE(653532977297.e-123); + FROM_CHARS_TEST_DOUBLE(3142213164987.e-294); + FROM_CHARS_TEST_DOUBLE(46202199371337.e-072); + FROM_CHARS_TEST_DOUBLE(231010996856685.e-073); + FROM_CHARS_TEST_DOUBLE(9324754620109615.e212); + FROM_CHARS_TEST_DOUBLE(78459735791271921.e049); + FROM_CHARS_TEST_DOUBLE(272104041512242479.e200); + FROM_CHARS_TEST_DOUBLE(6802601037806061975.e198); + FROM_CHARS_TEST_DOUBLE(20505426358836677347.e-221); + FROM_CHARS_TEST_DOUBLE(836168422905420598437.e-234); + FROM_CHARS_TEST_DOUBLE(4891559871276714924261.e222); + FROM_CHARS_TEST_FLOAT(5.e-20); + FROM_CHARS_TEST_FLOAT(67.e14); + FROM_CHARS_TEST_FLOAT(985.e15); + FROM_CHARS_TEST_FLOAT(7693.e-42); + FROM_CHARS_TEST_FLOAT(55895.e-16); + FROM_CHARS_TEST_FLOAT(996622.e-44); + FROM_CHARS_TEST_FLOAT(7038531.e-32); + FROM_CHARS_TEST_FLOAT(60419369.e-46); + FROM_CHARS_TEST_FLOAT(702990899.e-20); + FROM_CHARS_TEST_FLOAT(6930161142.e-48); + FROM_CHARS_TEST_FLOAT(25933168707.e-13); + FROM_CHARS_TEST_FLOAT(596428896559.e20); + + // Similarly, forms that should round away from zero. + FROM_CHARS_TEST_DOUBLE(9.e-265); + FROM_CHARS_TEST_DOUBLE(85.e-037); + FROM_CHARS_TEST_DOUBLE(623.e100); + FROM_CHARS_TEST_DOUBLE(3571.e263); + FROM_CHARS_TEST_DOUBLE(81661.e153); + FROM_CHARS_TEST_DOUBLE(920657.e-023); + FROM_CHARS_TEST_DOUBLE(4603285.e-024); + FROM_CHARS_TEST_DOUBLE(87575437.e-309); + FROM_CHARS_TEST_DOUBLE(245540327.e122); + FROM_CHARS_TEST_DOUBLE(6138508175.e120); + FROM_CHARS_TEST_DOUBLE(83356057653.e193); + FROM_CHARS_TEST_DOUBLE(619534293513.e124); + FROM_CHARS_TEST_DOUBLE(2335141086879.e218); + FROM_CHARS_TEST_DOUBLE(36167929443327.e-159); + FROM_CHARS_TEST_DOUBLE(609610927149051.e-255); + FROM_CHARS_TEST_DOUBLE(3743626360493413.e-165); + FROM_CHARS_TEST_DOUBLE(94080055902682397.e-242); + FROM_CHARS_TEST_DOUBLE(899810892172646163.e283); + FROM_CHARS_TEST_DOUBLE(7120190517612959703.e120); + FROM_CHARS_TEST_DOUBLE(25188282901709339043.e-252); + FROM_CHARS_TEST_DOUBLE(308984926168550152811.e-052); + FROM_CHARS_TEST_DOUBLE(6372891218502368041059.e064); + FROM_CHARS_TEST_FLOAT(3.e-23); + FROM_CHARS_TEST_FLOAT(57.e18); + FROM_CHARS_TEST_FLOAT(789.e-35); + FROM_CHARS_TEST_FLOAT(2539.e-18); + FROM_CHARS_TEST_FLOAT(76173.e28); + FROM_CHARS_TEST_FLOAT(887745.e-11); + FROM_CHARS_TEST_FLOAT(5382571.e-37); + FROM_CHARS_TEST_FLOAT(82381273.e-35); + FROM_CHARS_TEST_FLOAT(750486563.e-38); + FROM_CHARS_TEST_FLOAT(3752432815.e-39); + FROM_CHARS_TEST_FLOAT(75224575729.e-45); + FROM_CHARS_TEST_FLOAT(459926601011.e15); +} + +#undef FROM_CHARS_TEST_DOUBLE +#undef FROM_CHARS_TEST_FLOAT +#endif + +float ToFloat(absl::string_view s) { + float f; + absl::from_chars(s.data(), s.data() + s.size(), f); + return f; +} + +double ToDouble(absl::string_view s) { + double d; + absl::from_chars(s.data(), s.data() + s.size(), d); + return d; +} + +// A duplication of the test cases in "NearRoundingCases" above, but with +// expected values expressed with integers, using ldexp/ldexpf. These test +// cases will work even on compilers that do not accurately round floating point +// literals. +TEST(FromChars, NearRoundingCasesExplicit) { + EXPECT_EQ(ToDouble("5.e125"), ldexp(6653062250012735, 365)); + EXPECT_EQ(ToDouble("69.e267"), ldexp(4705683757438170, 841)); + EXPECT_EQ(ToDouble("999.e-026"), ldexp(6798841691080350, -129)); + EXPECT_EQ(ToDouble("7861.e-034"), ldexp(8975675289889240, -153)); + EXPECT_EQ(ToDouble("75569.e-254"), ldexp(6091718967192243, -880)); + EXPECT_EQ(ToDouble("928609.e-261"), ldexp(7849264900213743, -900)); + EXPECT_EQ(ToDouble("9210917.e080"), ldexp(8341110837370930, 236)); + EXPECT_EQ(ToDouble("84863171.e114"), ldexp(4625202867375927, 353)); + EXPECT_EQ(ToDouble("653777767.e273"), ldexp(5068902999763073, 884)); + EXPECT_EQ(ToDouble("5232604057.e-298"), ldexp(5741343011915040, -1010)); + EXPECT_EQ(ToDouble("27235667517.e-109"), ldexp(6707124626673586, -380)); + EXPECT_EQ(ToDouble("653532977297.e-123"), ldexp(7078246407265384, -422)); + EXPECT_EQ(ToDouble("3142213164987.e-294"), ldexp(8219991337640559, -988)); + EXPECT_EQ(ToDouble("46202199371337.e-072"), ldexp(5224462102115359, -246)); + EXPECT_EQ(ToDouble("231010996856685.e-073"), ldexp(5224462102115359, -247)); + EXPECT_EQ(ToDouble("9324754620109615.e212"), ldexp(5539753864394442, 705)); + EXPECT_EQ(ToDouble("78459735791271921.e049"), ldexp(8388176519442766, 166)); + EXPECT_EQ(ToDouble("272104041512242479.e200"), ldexp(5554409530847367, 670)); + EXPECT_EQ(ToDouble("6802601037806061975.e198"), ldexp(5554409530847367, 668)); + EXPECT_EQ(ToDouble("20505426358836677347.e-221"), + ldexp(4524032052079546, -722)); + EXPECT_EQ(ToDouble("836168422905420598437.e-234"), + ldexp(5070963299887562, -760)); + EXPECT_EQ(ToDouble("4891559871276714924261.e222"), + ldexp(6452687840519111, 757)); + EXPECT_EQ(ToFloat("5.e-20"), ldexpf(15474250, -88)); + EXPECT_EQ(ToFloat("67.e14"), ldexpf(12479722, 29)); + EXPECT_EQ(ToFloat("985.e15"), ldexpf(14333636, 36)); + EXPECT_EQ(ToFloat("7693.e-42"), ldexpf(10979816, -150)); + EXPECT_EQ(ToFloat("55895.e-16"), ldexpf(12888509, -61)); + EXPECT_EQ(ToFloat("996622.e-44"), ldexpf(14224264, -150)); + EXPECT_EQ(ToFloat("7038531.e-32"), ldexpf(11420669, -107)); + EXPECT_EQ(ToFloat("60419369.e-46"), ldexpf(8623340, -150)); + EXPECT_EQ(ToFloat("702990899.e-20"), ldexpf(16209866, -61)); + EXPECT_EQ(ToFloat("6930161142.e-48"), ldexpf(9891056, -150)); + EXPECT_EQ(ToFloat("25933168707.e-13"), ldexpf(11138211, -32)); + EXPECT_EQ(ToFloat("596428896559.e20"), ldexpf(12333860, 82)); + + + EXPECT_EQ(ToDouble("9.e-265"), ldexp(8168427841980010, -930)); + EXPECT_EQ(ToDouble("85.e-037"), ldexp(6360455125664090, -169)); + EXPECT_EQ(ToDouble("623.e100"), ldexp(6263531988747231, 289)); + EXPECT_EQ(ToDouble("3571.e263"), ldexp(6234526311072170, 833)); + EXPECT_EQ(ToDouble("81661.e153"), ldexp(6696636728760206, 472)); + EXPECT_EQ(ToDouble("920657.e-023"), ldexp(5975405561110124, -109)); + EXPECT_EQ(ToDouble("4603285.e-024"), ldexp(5975405561110124, -110)); + EXPECT_EQ(ToDouble("87575437.e-309"), ldexp(8452160731874668, -1053)); + EXPECT_EQ(ToDouble("245540327.e122"), ldexp(4985336549131723, 381)); + EXPECT_EQ(ToDouble("6138508175.e120"), ldexp(4985336549131723, 379)); + EXPECT_EQ(ToDouble("83356057653.e193"), ldexp(5986732817132056, 625)); + EXPECT_EQ(ToDouble("619534293513.e124"), ldexp(4798406992060657, 399)); + EXPECT_EQ(ToDouble("2335141086879.e218"), ldexp(5419088166961646, 713)); + EXPECT_EQ(ToDouble("36167929443327.e-159"), ldexp(8135819834632444, -536)); + EXPECT_EQ(ToDouble("609610927149051.e-255"), ldexp(4576664294594737, -850)); + EXPECT_EQ(ToDouble("3743626360493413.e-165"), ldexp(6898586531774201, -549)); + EXPECT_EQ(ToDouble("94080055902682397.e-242"), ldexp(6273271706052298, -800)); + EXPECT_EQ(ToDouble("899810892172646163.e283"), ldexp(7563892574477827, 947)); + EXPECT_EQ(ToDouble("7120190517612959703.e120"), ldexp(5385467232557565, 409)); + EXPECT_EQ(ToDouble("25188282901709339043.e-252"), + ldexp(5635662608542340, -825)); + EXPECT_EQ(ToDouble("308984926168550152811.e-052"), + ldexp(5644774693823803, -157)); + EXPECT_EQ(ToDouble("6372891218502368041059.e064"), + ldexp(4616868614322430, 233)); + + EXPECT_EQ(ToFloat("3.e-23"), ldexpf(9507380, -98)); + EXPECT_EQ(ToFloat("57.e18"), ldexpf(12960300, 42)); + EXPECT_EQ(ToFloat("789.e-35"), ldexpf(10739312, -130)); + EXPECT_EQ(ToFloat("2539.e-18"), ldexpf(11990089, -72)); + EXPECT_EQ(ToFloat("76173.e28"), ldexpf(9845130, 86)); + EXPECT_EQ(ToFloat("887745.e-11"), ldexpf(9760860, -40)); + EXPECT_EQ(ToFloat("5382571.e-37"), ldexpf(11447463, -124)); + EXPECT_EQ(ToFloat("82381273.e-35"), ldexpf(8554961, -113)); + EXPECT_EQ(ToFloat("750486563.e-38"), ldexpf(9975678, -120)); + EXPECT_EQ(ToFloat("3752432815.e-39"), ldexpf(9975678, -121)); + EXPECT_EQ(ToFloat("75224575729.e-45"), ldexpf(13105970, -137)); + EXPECT_EQ(ToFloat("459926601011.e15"), ldexpf(12466336, 65)); +} + +// Common test logic for converting a std::string which lies exactly halfway between +// two target floats. +// +// mantissa and exponent represent the precise value between two floating point +// numbers, `expected_low` and `expected_high`. The floating point +// representation to parse in `StrCat(mantissa, "e", exponent)`. +// +// This function checks that an input just slightly less than the exact value +// is rounded down to `expected_low`, and an input just slightly greater than +// the exact value is rounded up to `expected_high`. +// +// The exact value should round to `expected_half`, which must be either +// `expected_low` or `expected_high`. +template +void TestHalfwayValue(const std::string& mantissa, int exponent, + FloatType expected_low, FloatType expected_high, + FloatType expected_half) { + std::string low_rep = mantissa; + low_rep[low_rep.size() - 1] -= 1; + absl::StrAppend(&low_rep, std::string(1000, '9'), "e", exponent); + + FloatType actual_low = 0; + absl::from_chars(low_rep.data(), low_rep.data() + low_rep.size(), actual_low); + EXPECT_EQ(expected_low, actual_low); + + std::string high_rep = absl::StrCat(mantissa, std::string(1000, '0'), "1e", exponent); + FloatType actual_high = 0; + absl::from_chars(high_rep.data(), high_rep.data() + high_rep.size(), + actual_high); + EXPECT_EQ(expected_high, actual_high); + + std::string halfway_rep = absl::StrCat(mantissa, "e", exponent); + FloatType actual_half = 0; + absl::from_chars(halfway_rep.data(), halfway_rep.data() + halfway_rep.size(), + actual_half); + EXPECT_EQ(expected_half, actual_half); +} + +TEST(FromChars, DoubleRounding) { + const double zero = 0.0; + const double first_subnormal = nextafter(zero, 1.0); + const double second_subnormal = nextafter(first_subnormal, 1.0); + + const double first_normal = DBL_MIN; + const double last_subnormal = nextafter(first_normal, 0.0); + const double second_normal = nextafter(first_normal, 1.0); + + const double last_normal = DBL_MAX; + const double penultimate_normal = nextafter(last_normal, 0.0); + + // Various test cases for numbers between two representable floats. Each + // call to TestHalfwayValue tests a number just below and just above the + // halfway point, as well as the number exactly between them. + + // Test between zero and first_subnormal. Round-to-even tie rounds down. + TestHalfwayValue( + "2." + "470328229206232720882843964341106861825299013071623822127928412503377536" + "351043759326499181808179961898982823477228588654633283551779698981993873" + "980053909390631503565951557022639229085839244910518443593180284993653615" + "250031937045767824921936562366986365848075700158576926990370631192827955" + "855133292783433840935197801553124659726357957462276646527282722005637400" + "648549997709659947045402082816622623785739345073633900796776193057750674" + "017632467360096895134053553745851666113422376667860416215968046191446729" + "184030053005753084904876539171138659164623952491262365388187963623937328" + "042389101867234849766823508986338858792562830275599565752445550725518931" + "369083625477918694866799496832404970582102851318545139621383772282614543" + "7693412532098591327667236328125", + -324, zero, first_subnormal, zero); + + // first_subnormal and second_subnormal. Round-to-even tie rounds up. + TestHalfwayValue( + "7." + "410984687618698162648531893023320585475897039214871466383785237510132609" + "053131277979497545424539885696948470431685765963899850655339096945981621" + "940161728171894510697854671067917687257517734731555330779540854980960845" + "750095811137303474765809687100959097544227100475730780971111893578483867" + "565399878350301522805593404659373979179073872386829939581848166016912201" + "945649993128979841136206248449867871357218035220901702390328579173252022" + "052897402080290685402160661237554998340267130003581248647904138574340187" + "552090159017259254714629617513415977493871857473787096164563890871811984" + "127167305601704549300470526959016576377688490826798697257336652176556794" + "107250876433756084600398490497214911746308553955635418864151316847843631" + "3080237596295773983001708984375", + -324, first_subnormal, second_subnormal, second_subnormal); + + // last_subnormal and first_normal. Round-to-even tie rounds up. + TestHalfwayValue( + "2." + "225073858507201136057409796709131975934819546351645648023426109724822222" + "021076945516529523908135087914149158913039621106870086438694594645527657" + "207407820621743379988141063267329253552286881372149012981122451451889849" + "057222307285255133155755015914397476397983411801999323962548289017107081" + "850690630666655994938275772572015763062690663332647565300009245888316433" + "037779791869612049497390377829704905051080609940730262937128958950003583" + "799967207254304360284078895771796150945516748243471030702609144621572289" + "880258182545180325707018860872113128079512233426288368622321503775666622" + "503982534335974568884423900265498198385487948292206894721689831099698365" + "846814022854243330660339850886445804001034933970427567186443383770486037" + "86162277173854562306587467901408672332763671875", + -308, last_subnormal, first_normal, first_normal); + + // first_normal and second_normal. Round-to-even tie rounds down. + TestHalfwayValue( + "2." + "225073858507201630123055637955676152503612414573018013083228724049586647" + "606759446192036794116886953213985520549032000903434781884412325572184367" + "563347617020518175998922941393629966742598285899994830148971433555578567" + "693279306015978183162142425067962460785295885199272493577688320732492479" + "924816869232247165964934329258783950102250973957579510571600738343645738" + "494324192997092179207389919761694314131497173265255020084997973676783743" + "155205818804439163810572367791175177756227497413804253387084478193655533" + "073867420834526162513029462022730109054820067654020201547112002028139700" + "141575259123440177362244273712468151750189745559978653234255886219611516" + "335924167958029604477064946470184777360934300451421683607013647479513962" + "13837722826145437693412532098591327667236328125", + -308, first_normal, second_normal, first_normal); + + // penultimate_normal and last_normal. Round-to-even rounds down. + TestHalfwayValue( + "1." + "797693134862315608353258760581052985162070023416521662616611746258695532" + "672923265745300992879465492467506314903358770175220871059269879629062776" + "047355692132901909191523941804762171253349609463563872612866401980290377" + "995141836029815117562837277714038305214839639239356331336428021390916694" + "57927874464075218944", + 308, penultimate_normal, last_normal, penultimate_normal); +} + +// Same test cases as DoubleRounding, now with new and improved Much Smaller +// Precision! +TEST(FromChars, FloatRounding) { + const float zero = 0.0; + const float first_subnormal = nextafterf(zero, 1.0); + const float second_subnormal = nextafterf(first_subnormal, 1.0); + + const float first_normal = FLT_MIN; + const float last_subnormal = nextafterf(first_normal, 0.0); + const float second_normal = nextafterf(first_normal, 1.0); + + const float last_normal = FLT_MAX; + const float penultimate_normal = nextafterf(last_normal, 0.0); + + // Test between zero and first_subnormal. Round-to-even tie rounds down. + TestHalfwayValue( + "7." + "006492321624085354618647916449580656401309709382578858785341419448955413" + "42930300743319094181060791015625", + -46, zero, first_subnormal, zero); + + // first_subnormal and second_subnormal. Round-to-even tie rounds up. + TestHalfwayValue( + "2." + "101947696487225606385594374934874196920392912814773657635602425834686624" + "028790902229957282543182373046875", + -45, first_subnormal, second_subnormal, second_subnormal); + + // last_subnormal and first_normal. Round-to-even tie rounds up. + TestHalfwayValue( + "1." + "175494280757364291727882991035766513322858992758990427682963118425003064" + "9651730385585324256680905818939208984375", + -38, last_subnormal, first_normal, first_normal); + + // first_normal and second_normal. Round-to-even tie rounds down. + TestHalfwayValue( + "1." + "175494420887210724209590083408724842314472120785184615334540294131831453" + "9442813071445925743319094181060791015625", + -38, first_normal, second_normal, first_normal); + + // penultimate_normal and last_normal. Round-to-even rounds down. + TestHalfwayValue("3.40282336497324057985868971510891282432", 38, + penultimate_normal, last_normal, penultimate_normal); +} + +TEST(FromChars, Underflow) { + // Check that underflow is handled correctly, according to the specification + // in DR 3081. + double d; + float f; + absl::from_chars_result result; + + std::string negative_underflow = "-1e-1000"; + const char* begin = negative_underflow.data(); + const char* end = begin + negative_underflow.size(); + d = 100.0; + result = absl::from_chars(begin, end, d); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_TRUE(std::signbit(d)); // negative + EXPECT_GE(d, -std::numeric_limits::min()); + f = 100.0; + result = absl::from_chars(begin, end, f); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_TRUE(std::signbit(f)); // negative + EXPECT_GE(f, -std::numeric_limits::min()); + + std::string positive_underflow = "1e-1000"; + begin = positive_underflow.data(); + end = begin + positive_underflow.size(); + d = -100.0; + result = absl::from_chars(begin, end, d); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_FALSE(std::signbit(d)); // positive + EXPECT_LE(d, std::numeric_limits::min()); + f = -100.0; + result = absl::from_chars(begin, end, f); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_FALSE(std::signbit(f)); // positive + EXPECT_LE(f, std::numeric_limits::min()); +} + +TEST(FromChars, Overflow) { + // Check that overflow is handled correctly, according to the specification + // in DR 3081. + double d; + float f; + absl::from_chars_result result; + + std::string negative_overflow = "-1e1000"; + const char* begin = negative_overflow.data(); + const char* end = begin + negative_overflow.size(); + d = 100.0; + result = absl::from_chars(begin, end, d); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_TRUE(std::signbit(d)); // negative + EXPECT_EQ(d, -std::numeric_limits::max()); + f = 100.0; + result = absl::from_chars(begin, end, f); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_TRUE(std::signbit(f)); // negative + EXPECT_EQ(f, -std::numeric_limits::max()); + + std::string positive_overflow = "1e1000"; + begin = positive_overflow.data(); + end = begin + positive_overflow.size(); + d = -100.0; + result = absl::from_chars(begin, end, d); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_FALSE(std::signbit(d)); // positive + EXPECT_EQ(d, std::numeric_limits::max()); + f = -100.0; + result = absl::from_chars(begin, end, f); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_FALSE(std::signbit(f)); // positive + EXPECT_EQ(f, std::numeric_limits::max()); +} + +TEST(FromChars, ReturnValuePtr) { + // Check that `ptr` points one past the number scanned, even if that number + // is not representable. + double d; + absl::from_chars_result result; + + std::string normal = "3.14@#$%@#$%"; + result = absl::from_chars(normal.data(), normal.data() + normal.size(), d); + EXPECT_EQ(result.ec, std::errc()); + EXPECT_EQ(result.ptr - normal.data(), 4); + + std::string overflow = "1e1000@#$%@#$%"; + result = absl::from_chars(overflow.data(), + overflow.data() + overflow.size(), d); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_EQ(result.ptr - overflow.data(), 6); + + std::string garbage = "#$%@#$%"; + result = absl::from_chars(garbage.data(), + garbage.data() + garbage.size(), d); + EXPECT_EQ(result.ec, std::errc::invalid_argument); + EXPECT_EQ(result.ptr - garbage.data(), 0); +} + +// Check for a wide range of inputs that strtod() and absl::from_chars() exactly +// agree on the conversion amount. +// +// This test assumes the platform's strtod() uses perfect round_to_nearest +// rounding. +TEST(FromChars, TestVersusStrtod) { + for (int mantissa = 1000000; mantissa <= 9999999; mantissa += 501) { + for (int exponent = -300; exponent < 300; ++exponent) { + std::string candidate = absl::StrCat(mantissa, "e", exponent); + double strtod_value = strtod(candidate.c_str(), nullptr); + double absl_value = 0; + absl::from_chars(candidate.data(), candidate.data() + candidate.size(), + absl_value); + ASSERT_EQ(strtod_value, absl_value) << candidate; + } + } +} + +// Check for a wide range of inputs that strtof() and absl::from_chars() exactly +// agree on the conversion amount. +// +// This test assumes the platform's strtof() uses perfect round_to_nearest +// rounding. +TEST(FromChars, TestVersusStrtof) { + for (int mantissa = 1000000; mantissa <= 9999999; mantissa += 501) { + for (int exponent = -43; exponent < 32; ++exponent) { + std::string candidate = absl::StrCat(mantissa, "e", exponent); + float strtod_value = strtof(candidate.c_str(), nullptr); + float absl_value = 0; + absl::from_chars(candidate.data(), candidate.data() + candidate.size(), + absl_value); + ASSERT_EQ(strtod_value, absl_value) << candidate; + } + } +} + +// Tests if two floating point values have identical bit layouts. (EXPECT_EQ +// is not suitable for NaN testing, since NaNs are never equal.) +template +bool Identical(Float a, Float b) { + return 0 == memcmp(&a, &b, sizeof(Float)); +} + +// Check that NaNs are parsed correctly. The spec requires that +// std::from_chars on "NaN(123abc)" return the same value as std::nan("123abc"). +// How such an n-char-sequence affects the generated NaN is unspecified, so we +// just test for symmetry with std::nan and strtod here. +// +// (In Linux, this parses the value as a number and stuffs that number into the +// free bits of a quiet NaN.) +TEST(FromChars, NaNDoubles) { + for (std::string n_char_sequence : + {"", "1", "2", "3", "fff", "FFF", "200000", "400000", "4000000000000", + "8000000000000", "abc123", "legal_but_unexpected", + "99999999999999999999999", "_"}) { + std::string input = absl::StrCat("nan(", n_char_sequence, ")"); + SCOPED_TRACE(input); + double from_chars_double; + absl::from_chars(input.data(), input.data() + input.size(), + from_chars_double); + double std_nan_double = std::nan(n_char_sequence.c_str()); + EXPECT_TRUE(Identical(from_chars_double, std_nan_double)); + + // Also check that we match strtod()'s behavior. This test assumes that the + // platform has a compliant strtod(). +#if ABSL_STRTOD_HANDLES_NAN_CORRECTLY + double strtod_double = strtod(input.c_str(), nullptr); + EXPECT_TRUE(Identical(from_chars_double, strtod_double)); +#endif // ABSL_STRTOD_HANDLES_NAN_CORRECTLY + + // Check that we can parse a negative NaN + std::string negative_input = "-" + input; + double negative_from_chars_double; + absl::from_chars(negative_input.data(), + negative_input.data() + negative_input.size(), + negative_from_chars_double); + EXPECT_TRUE(std::signbit(negative_from_chars_double)); + EXPECT_FALSE(Identical(negative_from_chars_double, from_chars_double)); + from_chars_double = std::copysign(from_chars_double, -1.0); + EXPECT_TRUE(Identical(negative_from_chars_double, from_chars_double)); + } +} + +TEST(FromChars, NaNFloats) { + for (std::string n_char_sequence : + {"", "1", "2", "3", "fff", "FFF", "200000", "400000", "4000000000000", + "8000000000000", "abc123", "legal_but_unexpected", + "99999999999999999999999", "_"}) { + std::string input = absl::StrCat("nan(", n_char_sequence, ")"); + SCOPED_TRACE(input); + float from_chars_float; + absl::from_chars(input.data(), input.data() + input.size(), + from_chars_float); + float std_nan_float = std::nanf(n_char_sequence.c_str()); + EXPECT_TRUE(Identical(from_chars_float, std_nan_float)); + + // Also check that we match strtof()'s behavior. This test assumes that the + // platform has a compliant strtof(). +#if ABSL_STRTOD_HANDLES_NAN_CORRECTLY + float strtof_float = strtof(input.c_str(), nullptr); + EXPECT_TRUE(Identical(from_chars_float, strtof_float)); +#endif // ABSL_STRTOD_HANDLES_NAN_CORRECTLY + + // Check that we can parse a negative NaN + std::string negative_input = "-" + input; + float negative_from_chars_float; + absl::from_chars(negative_input.data(), + negative_input.data() + negative_input.size(), + negative_from_chars_float); + EXPECT_TRUE(std::signbit(negative_from_chars_float)); + EXPECT_FALSE(Identical(negative_from_chars_float, from_chars_float)); + from_chars_float = std::copysign(from_chars_float, -1.0); + EXPECT_TRUE(Identical(negative_from_chars_float, from_chars_float)); + } +} + +// Returns an integer larger than step. The values grow exponentially. +int NextStep(int step) { + return step + (step >> 2) + 1; +} + +// Test a conversion on a family of input strings, checking that the calculation +// is correct for in-bounds values, and that overflow and underflow are done +// correctly for out-of-bounds values. +// +// input_generator maps from an integer index to a std::string to test. +// expected_generator maps from an integer index to an expected Float value. +// from_chars conversion of input_generator(i) should result in +// expected_generator(i). +// +// lower_bound and upper_bound denote the smallest and largest values for which +// the conversion is expected to succeed. +template +void TestOverflowAndUnderflow( + const std::function& input_generator, + const std::function& expected_generator, int lower_bound, + int upper_bound) { + // test legal values near lower_bound + int index, step; + for (index = lower_bound, step = 1; index < upper_bound; + index += step, step = NextStep(step)) { + std::string input = input_generator(index); + SCOPED_TRACE(input); + Float expected = expected_generator(index); + Float actual; + auto result = + absl::from_chars(input.data(), input.data() + input.size(), actual); + EXPECT_EQ(result.ec, std::errc()); + EXPECT_EQ(expected, actual); + } + // test legal values near upper_bound + for (index = upper_bound, step = 1; index > lower_bound; + index -= step, step = NextStep(step)) { + std::string input = input_generator(index); + SCOPED_TRACE(input); + Float expected = expected_generator(index); + Float actual; + auto result = + absl::from_chars(input.data(), input.data() + input.size(), actual); + EXPECT_EQ(result.ec, std::errc()); + EXPECT_EQ(expected, actual); + } + // Test underflow values below lower_bound + for (index = lower_bound - 1, step = 1; index > -1000000; + index -= step, step = NextStep(step)) { + std::string input = input_generator(index); + SCOPED_TRACE(input); + Float actual; + auto result = + absl::from_chars(input.data(), input.data() + input.size(), actual); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_LT(actual, 1.0); // check for underflow + } + // Test overflow values above upper_bound + for (index = upper_bound + 1, step = 1; index < 1000000; + index += step, step = NextStep(step)) { + std::string input = input_generator(index); + SCOPED_TRACE(input); + Float actual; + auto result = + absl::from_chars(input.data(), input.data() + input.size(), actual); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_GT(actual, 1.0); // check for overflow + } +} + +// Check that overflow and underflow are caught correctly for hex doubles. +// +// The largest representable double is 0x1.fffffffffffffp+1023, and the +// smallest representable subnormal is 0x0.0000000000001p-1022, which equals +// 0x1p-1074. Therefore 1023 and -1074 are the limits of acceptable exponents +// in this test. +TEST(FromChars, HexdecimalDoubleLimits) { + auto input_gen = [](int index) { return absl::StrCat("0x1.0p", index); }; + auto expected_gen = [](int index) { return std::ldexp(1.0, index); }; + TestOverflowAndUnderflow(input_gen, expected_gen, -1074, 1023); +} + +// Check that overflow and underflow are caught correctly for hex floats. +// +// The largest representable float is 0x1.fffffep+127, and the smallest +// representable subnormal is 0x0.000002p-126, which equals 0x1p-149. +// Therefore 127 and -149 are the limits of acceptable exponents in this test. +TEST(FromChars, HexdecimalFloatLimits) { + auto input_gen = [](int index) { return absl::StrCat("0x1.0p", index); }; + auto expected_gen = [](int index) { return std::ldexp(1.0f, index); }; + TestOverflowAndUnderflow(input_gen, expected_gen, -149, 127); +} + +// Check that overflow and underflow are caught correctly for decimal doubles. +// +// The largest representable double is about 1.8e308, and the smallest +// representable subnormal is about 5e-324. '1e-324' therefore rounds away from +// the smallest representable positive value. -323 and 308 are the limits of +// acceptable exponents in this test. +TEST(FromChars, DecimalDoubleLimits) { + auto input_gen = [](int index) { return absl::StrCat("1.0e", index); }; + auto expected_gen = [](int index) { return std::pow(10.0, index); }; + TestOverflowAndUnderflow(input_gen, expected_gen, -323, 308); +} + +// Check that overflow and underflow are caught correctly for decimal floats. +// +// The largest representable float is about 3.4e38, and the smallest +// representable subnormal is about 1.45e-45. '1e-45' therefore rounds towards +// the smallest representable positive value. -45 and 38 are the limits of +// acceptable exponents in this test. +TEST(FromChars, DecimalFloatLimits) { + auto input_gen = [](int index) { return absl::StrCat("1.0e", index); }; + auto expected_gen = [](int index) { return std::pow(10.0, index); }; + TestOverflowAndUnderflow(input_gen, expected_gen, -45, 38); +} + +} // namespace diff --git a/absl/strings/internal/charconv_bigint.cc b/absl/strings/internal/charconv_bigint.cc new file mode 100644 index 000000000000..3e7296e7068a --- /dev/null +++ b/absl/strings/internal/charconv_bigint.cc @@ -0,0 +1,357 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/charconv_bigint.h" + +#include +#include +#include + +namespace absl { +namespace strings_internal { + +namespace { + +// Table containing some large powers of 5, for fast computation. + +// Constant step size for entries in the kLargePowersOfFive table. Each entry +// is larger than the previous entry by a factor of 5**kLargePowerOfFiveStep +// (or 5**27). +// +// In other words, the Nth entry in the table is 5**(27*N). +// +// 5**27 is the largest power of 5 that fits in 64 bits. +constexpr int kLargePowerOfFiveStep = 27; + +// The largest legal index into the kLargePowersOfFive table. +// +// In other words, the largest precomputed power of 5 is 5**(27*20). +constexpr int kLargestPowerOfFiveIndex = 20; + +// Table of powers of (5**27), up to (5**27)**20 == 5**540. +// +// Used to generate large powers of 5 while limiting the number of repeated +// multiplications required. +// +// clang-format off +const uint32_t kLargePowersOfFive[] = { +// 5**27 (i=1), start=0, end=2 + 0xfa10079dU, 0x6765c793U, +// 5**54 (i=2), start=2, end=6 + 0x97d9f649U, 0x6664242dU, 0x29939b14U, 0x29c30f10U, +// 5**81 (i=3), start=6, end=12 + 0xc4f809c5U, 0x7bf3f22aU, 0x67bdae34U, 0xad340517U, 0x369d1b5fU, 0x10de1593U, +// 5**108 (i=4), start=12, end=20 + 0x92b260d1U, 0x9efff7c7U, 0x81de0ec6U, 0xaeba5d56U, 0x410664a4U, 0x4f40737aU, + 0x20d3846fU, 0x06d00f73U, +// 5**135 (i=5), start=20, end=30 + 0xff1b172dU, 0x13a1d71cU, 0xefa07617U, 0x7f682d3dU, 0xff8c90c0U, 0x3f0131e7U, + 0x3fdcb9feU, 0x917b0177U, 0x16c407a7U, 0x02c06b9dU, +// 5**162 (i=6), start=30, end=42 + 0x960f7199U, 0x056667ecU, 0xe07aefd8U, 0x80f2b9ccU, 0x8273f5e3U, 0xeb9a214aU, + 0x40b38005U, 0x0e477ad4U, 0x277d08e6U, 0xfa28b11eU, 0xd3f7d784U, 0x011c835bU, +// 5**189 (i=7), start=42, end=56 + 0xf723d9d5U, 0x3282d3f3U, 0xe00857d1U, 0x69659d25U, 0x2cf117cfU, 0x24da6d07U, + 0x954d1417U, 0x3e5d8cedU, 0x7a8bb766U, 0xfd785ae6U, 0x645436d2U, 0x40c78b34U, + 0x94151217U, 0x0072e9f7U, +// 5**216 (i=8), start=56, end=72 + 0x2b416aa1U, 0x7893c5a7U, 0xe37dc6d4U, 0x2bad2beaU, 0xf0fc846cU, 0x7575ae4bU, + 0x62587b14U, 0x83b67a34U, 0x02110cdbU, 0xf7992f55U, 0x00deb022U, 0xa4a23becU, + 0x8af5c5cdU, 0xb85b654fU, 0x818df38bU, 0x002e69d2U, +// 5**243 (i=9), start=72, end=90 + 0x3518cbbdU, 0x20b0c15fU, 0x38756c2fU, 0xfb5dc3ddU, 0x22ad2d94U, 0xbf35a952U, + 0xa699192aU, 0x9a613326U, 0xad2a9cedU, 0xd7f48968U, 0xe87dfb54U, 0xc8f05db6U, + 0x5ef67531U, 0x31c1ab49U, 0xe202ac9fU, 0x9b2957b5U, 0xa143f6d3U, 0x0012bf07U, +// 5**270 (i=10), start=90, end=110 + 0x8b971de9U, 0x21aba2e1U, 0x63944362U, 0x57172336U, 0xd9544225U, 0xfb534166U, + 0x08c563eeU, 0x14640ee2U, 0x24e40d31U, 0x02b06537U, 0x03887f14U, 0x0285e533U, + 0xb744ef26U, 0x8be3a6c4U, 0x266979b4U, 0x6761ece2U, 0xd9cb39e4U, 0xe67de319U, + 0x0d39e796U, 0x00079250U, +// 5**297 (i=11), start=110, end=132 + 0x260eb6e5U, 0xf414a796U, 0xee1a7491U, 0xdb9368ebU, 0xf50c105bU, 0x59157750U, + 0x9ed2fb5cU, 0xf6e56d8bU, 0xeaee8d23U, 0x0f319f75U, 0x2aa134d6U, 0xac2908e9U, + 0xd4413298U, 0x02f02a55U, 0x989d5a7aU, 0x70dde184U, 0xba8040a7U, 0x03200981U, + 0xbe03b11cU, 0x3c1c2a18U, 0xd60427a1U, 0x00030ee0U, +// 5**324 (i=12), start=132, end=156 + 0xce566d71U, 0xf1c4aa25U, 0x4e93ca53U, 0xa72283d0U, 0x551a73eaU, 0x3d0538e2U, + 0x8da4303fU, 0x6a58de60U, 0x0e660221U, 0x49cf61a6U, 0x8d058fc1U, 0xb9d1a14cU, + 0x4bab157dU, 0xc85c6932U, 0x518c8b9eU, 0x9b92b8d0U, 0x0d8a0e21U, 0xbd855df9U, + 0xb3ea59a1U, 0x8da29289U, 0x4584d506U, 0x3752d80fU, 0xb72569c6U, 0x00013c33U, +// 5**351 (i=13), start=156, end=182 + 0x190f354dU, 0x83695cfeU, 0xe5a4d0c7U, 0xb60fb7e8U, 0xee5bbcc4U, 0xb922054cU, + 0xbb4f0d85U, 0x48394028U, 0x1d8957dbU, 0x0d7edb14U, 0x4ecc7587U, 0x505e9e02U, + 0x4c87f36bU, 0x99e66bd6U, 0x44b9ed35U, 0x753037d4U, 0xe5fe5f27U, 0x2742c203U, + 0x13b2ed2bU, 0xdc525d2cU, 0xe6fde59aU, 0x77ffb18fU, 0x13c5752cU, 0x08a84bccU, + 0x859a4940U, 0x00007fb6U, +// 5**378 (i=14), start=182, end=210 + 0x4f98cb39U, 0xa60edbbcU, 0x83b5872eU, 0xa501acffU, 0x9cc76f78U, 0xbadd4c73U, + 0x43e989faU, 0xca7acf80U, 0x2e0c824fU, 0xb19f4ffcU, 0x092fd81cU, 0xe4eb645bU, + 0xa1ff84c2U, 0x8a5a83baU, 0xa8a1fae9U, 0x1db43609U, 0xb0fed50bU, 0x0dd7d2bdU, + 0x7d7accd8U, 0x91fa640fU, 0x37dcc6c5U, 0x1c417fd5U, 0xe4d462adU, 0xe8a43399U, + 0x131bf9a5U, 0x8df54d29U, 0x36547dc1U, 0x00003395U, +// 5**405 (i=15), start=210, end=240 + 0x5bd330f5U, 0x77d21967U, 0x1ac481b7U, 0x6be2f7ceU, 0x7f4792a9U, 0xe84c2c52U, + 0x84592228U, 0x9dcaf829U, 0xdab44ce1U, 0x3d0c311bU, 0x532e297dU, 0x4704e8b4U, + 0x9cdc32beU, 0x41e64d9dU, 0x7717bea1U, 0xa824c00dU, 0x08f50b27U, 0x0f198d77U, + 0x49bbfdf0U, 0x025c6c69U, 0xd4e55cd3U, 0xf083602bU, 0xb9f0fecdU, 0xc0864aeaU, + 0x9cb98681U, 0xaaf620e9U, 0xacb6df30U, 0x4faafe66U, 0x8af13c3bU, 0x000014d5U, +// 5**432 (i=16), start=240, end=272 + 0x682bb941U, 0x89a9f297U, 0xcba75d7bU, 0x404217b1U, 0xb4e519e9U, 0xa1bc162bU, + 0xf7f5910aU, 0x98715af5U, 0x2ff53e57U, 0xe3ef118cU, 0x490c4543U, 0xbc9b1734U, + 0x2affbe4dU, 0x4cedcb4cU, 0xfb14e99eU, 0x35e34212U, 0xece39c24U, 0x07673ab3U, + 0xe73115ddU, 0xd15d38e7U, 0x093eed3bU, 0xf8e7eac5U, 0x78a8cc80U, 0x25227aacU, + 0x3f590551U, 0x413da1cbU, 0xdf643a55U, 0xab65ad44U, 0xd70b23d7U, 0xc672cd76U, + 0x3364ea62U, 0x0000086aU, +// 5**459 (i=17), start=272, end=306 + 0x22f163ddU, 0x23cf07acU, 0xbe2af6c2U, 0xf412f6f6U, 0xc3ff541eU, 0x6eeaf7deU, + 0xa47047e0U, 0x408cda92U, 0x0f0eeb08U, 0x56deba9dU, 0xcfc6b090U, 0x8bbbdf04U, + 0x3933cdb3U, 0x9e7bb67dU, 0x9f297035U, 0x38946244U, 0xee1d37bbU, 0xde898174U, + 0x63f3559dU, 0x705b72fbU, 0x138d27d9U, 0xf8603a78U, 0x735eec44U, 0xe30987d5U, + 0xc6d38070U, 0x9cfe548eU, 0x9ff01422U, 0x7c564aa8U, 0x91cc60baU, 0xcbc3565dU, + 0x7550a50bU, 0x6909aeadU, 0x13234c45U, 0x00000366U, +// 5**486 (i=18), start=306, end=342 + 0x17954989U, 0x3a7d7709U, 0x98042de5U, 0xa9011443U, 0x45e723c2U, 0x269ffd6fU, + 0x58852a46U, 0xaaa1042aU, 0x2eee8153U, 0xb2b6c39eU, 0xaf845b65U, 0xf6c365d7U, + 0xe4cffb2bU, 0xc840e90cU, 0xabea8abbU, 0x5c58f8d2U, 0x5c19fa3aU, 0x4670910aU, + 0x4449f21cU, 0xefa645b3U, 0xcc427decU, 0x083c3d73U, 0x467cb413U, 0x6fe10ae4U, + 0x3caffc72U, 0x9f8da55eU, 0x5e5c8ea7U, 0x490594bbU, 0xf0871b0bU, 0xdd89816cU, + 0x8e931df8U, 0xe85ce1c9U, 0xcca090a5U, 0x575fa16bU, 0x6b9f106cU, 0x0000015fU, +// 5**513 (i=19), start=342, end=380 + 0xee20d805U, 0x57bc3c07U, 0xcdea624eU, 0xd3f0f52dU, 0x9924b4f4U, 0xcf968640U, + 0x61d41962U, 0xe87fb464U, 0xeaaf51c7U, 0x564c8b60U, 0xccda4028U, 0x529428bbU, + 0x313a1fa8U, 0x96bd0f94U, 0x7a82ebaaU, 0xad99e7e9U, 0xf2668cd4U, 0xbe33a45eU, + 0xfd0db669U, 0x87ee369fU, 0xd3ec20edU, 0x9c4d7db7U, 0xdedcf0d8U, 0x7cd2ca64U, + 0xe25a6577U, 0x61003fd4U, 0xe56f54ccU, 0x10b7c748U, 0x40526e5eU, 0x7300ae87U, + 0x5c439261U, 0x2c0ff469U, 0xbf723f12U, 0xb2379b61U, 0xbf59b4f5U, 0xc91b1c3fU, + 0xf0046d27U, 0x0000008dU, +// 5**540 (i=20), start=380, end=420 + 0x525c9e11U, 0xf4e0eb41U, 0xebb2895dU, 0x5da512f9U, 0x7d9b29d4U, 0x452f4edcU, + 0x0b90bc37U, 0x341777cbU, 0x63d269afU, 0x1da77929U, 0x0a5c1826U, 0x77991898U, + 0x5aeddf86U, 0xf853a877U, 0x538c31ccU, 0xe84896daU, 0xb7a0010bU, 0x17ef4de5U, + 0xa52a2adeU, 0x029fd81cU, 0x987ce701U, 0x27fefd77U, 0xdb46c66fU, 0x5d301900U, + 0x496998c0U, 0xbb6598b9U, 0x5eebb607U, 0xe547354aU, 0xdf4a2f7eU, 0xf06c4955U, + 0x96242ffaU, 0x1775fb27U, 0xbecc58ceU, 0xebf2a53bU, 0x3eaad82aU, 0xf41137baU, + 0x573e6fbaU, 0xfb4866b8U, 0x54002148U, 0x00000039U, +}; +// clang-format on + +// Returns a pointer to the big integer data for (5**27)**i. i must be +// between 1 and 20, inclusive. +const uint32_t* LargePowerOfFiveData(int i) { + return kLargePowersOfFive + i * (i - 1); +} + +// Returns the size of the big integer data for (5**27)**i, in words. i must be +// between 1 and 20, inclusive. +int LargePowerOfFiveSize(int i) { return 2 * i; } +} // namespace + +const uint32_t kFiveToNth[14] = { + 1, 5, 25, 125, 625, 3125, 15625, + 78125, 390625, 1953125, 9765625, 48828125, 244140625, 1220703125, +}; + +const uint32_t kTenToNth[10] = { + 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, +}; + +template +int BigUnsigned::ReadFloatMantissa(const ParsedFloat& fp, + int significant_digits) { + SetToZero(); + assert(fp.type == FloatType::kNumber); + + if (fp.subrange_begin == nullptr) { + // We already exactly parsed the mantissa, so no more work is necessary. + words_[0] = fp.mantissa & 0xffffffffu; + words_[1] = fp.mantissa >> 32; + if (words_[1]) { + size_ = 2; + } else if (words_[0]) { + size_ = 1; + } + return fp.exponent; + } + int exponent_adjust = + ReadDigits(fp.subrange_begin, fp.subrange_end, significant_digits); + return fp.literal_exponent + exponent_adjust; +} + +template +int BigUnsigned::ReadDigits(const char* begin, const char* end, + int significant_digits) { + assert(significant_digits <= Digits10() + 1); + SetToZero(); + + bool after_decimal_point = false; + // Discard any leading zeroes before the decimal point + while (begin < end && *begin == '0') { + ++begin; + } + int dropped_digits = 0; + // Discard any trailing zeroes. These may or may not be after the decimal + // point. + while (begin < end && *std::prev(end) == '0') { + --end; + ++dropped_digits; + } + if (begin < end && *std::prev(end) == '.') { + // If the std::string ends in '.', either before or after dropping zeroes, then + // drop the decimal point and look for more digits to drop. + dropped_digits = 0; + --end; + while (begin < end && *std::prev(end) == '0') { + --end; + ++dropped_digits; + } + } else if (dropped_digits) { + // We dropped digits, and aren't sure if they're before or after the decimal + // point. Figure that out now. + const char* dp = std::find(begin, end, '.'); + if (dp != end) { + // The dropped trailing digits were after the decimal point, so don't + // count them. + dropped_digits = 0; + } + } + // Any non-fraction digits we dropped need to be accounted for in our exponent + // adjustment. + int exponent_adjust = dropped_digits; + + uint32_t queued = 0; + int digits_queued = 0; + for (; begin != end && significant_digits > 0; ++begin) { + if (*begin == '.') { + after_decimal_point = true; + continue; + } + if (after_decimal_point) { + // For each fractional digit we emit in our parsed integer, adjust our + // decimal exponent to compensate. + --exponent_adjust; + } + int digit = (*begin - '0'); + --significant_digits; + if (significant_digits == 0 && std::next(begin) != end && + (digit == 0 || digit == 5)) { + // If this is the very last significant digit, but insignificant digits + // remain, we know that the last of those remaining significant digits is + // nonzero. (If it wasn't, we would have stripped it before we got here.) + // So if this final digit is a 0 or 5, adjust it upward by 1. + // + // This adjustment is what allows incredibly large mantissas ending in + // 500000...000000000001 to correctly round up, rather than to nearest. + ++digit; + } + queued = 10 * queued + digit; + ++digits_queued; + if (digits_queued == kMaxSmallPowerOfTen) { + MultiplyBy(kTenToNth[kMaxSmallPowerOfTen]); + AddWithCarry(0, queued); + queued = digits_queued = 0; + } + } + // Encode any remaining digits. + if (digits_queued) { + MultiplyBy(kTenToNth[digits_queued]); + AddWithCarry(0, queued); + } + + // If any insignificant digits remain, we will drop them. But if we have not + // yet read the decimal point, then we have to adjust the exponent to account + // for the dropped digits. + if (begin < end && !after_decimal_point) { + // This call to std::find will result in a pointer either to the decimal + // point, or to the end of our buffer if there was none. + // + // Either way, [begin, decimal_point) will contain the set of dropped digits + // that require an exponent adjustment. + const char* decimal_point = std::find(begin, end, '.'); + exponent_adjust += (decimal_point - begin); + } + return exponent_adjust; +} + +template +/* static */ BigUnsigned BigUnsigned::FiveToTheNth( + int n) { + BigUnsigned answer(1u); + + // Seed from the table of large powers, if possible. + bool first_pass = true; + while (n >= kLargePowerOfFiveStep) { + int big_power = + std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex); + if (first_pass) { + // just copy, rather than multiplying by 1 + std::copy( + LargePowerOfFiveData(big_power), + LargePowerOfFiveData(big_power) + LargePowerOfFiveSize(big_power), + answer.words_); + answer.size_ = LargePowerOfFiveSize(big_power); + first_pass = false; + } else { + answer.MultiplyBy(LargePowerOfFiveSize(big_power), + LargePowerOfFiveData(big_power)); + } + n -= kLargePowerOfFiveStep * big_power; + } + answer.MultiplyByFiveToTheNth(n); + return answer; +} + +template +void BigUnsigned::MultiplyStep(int original_size, + const uint32_t* other_words, + int other_size, int step) { + int this_i = std::min(original_size - 1, step); + int other_i = step - this_i; + + uint64_t this_word = 0; + uint64_t carry = 0; + for (; this_i >= 0 && other_i < other_size; --this_i, ++other_i) { + uint64_t product = words_[this_i]; + product *= other_words[other_i]; + this_word += product; + carry += (this_word >> 32); + this_word &= 0xffffffff; + } + AddWithCarry(step + 1, carry); + words_[step] = this_word & 0xffffffff; + if (this_word > 0 && size_ <= step) { + size_ = step + 1; + } +} + +template +std::string BigUnsigned::ToString() const { + BigUnsigned copy = *this; + std::string result; + // Build result in reverse order + while (copy.size() > 0) { + int next_digit = copy.DivMod<10>(); + result.push_back('0' + next_digit); + } + if (result.empty()) { + result.push_back('0'); + } + std::reverse(result.begin(), result.end()); + return result; +} + +template class BigUnsigned<4>; +template class BigUnsigned<84>; + +} // namespace strings_internal +} // namespace absl diff --git a/absl/strings/internal/charconv_bigint.h b/absl/strings/internal/charconv_bigint.h new file mode 100644 index 000000000000..aa70af2c2894 --- /dev/null +++ b/absl/strings/internal/charconv_bigint.h @@ -0,0 +1,426 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ +#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ + +#include +#include +#include +#include + +#include "absl/strings/ascii.h" +#include "absl/strings/internal/charconv_parse.h" +#include "absl/strings/string_view.h" + +namespace absl { +namespace strings_internal { + +// The largest power that 5 that can be raised to, and still fit in a uint32_t. +constexpr int kMaxSmallPowerOfFive = 13; +// The largest power that 10 that can be raised to, and still fit in a uint32_t. +constexpr int kMaxSmallPowerOfTen = 9; + +extern const uint32_t kFiveToNth[kMaxSmallPowerOfFive + 1]; +extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1]; + +// Large, fixed-width unsigned integer. +// +// Exact rounding for decimal-to-binary floating point conversion requires very +// large integer math, but a design goal of absl::from_chars is to avoid +// allocating memory. The integer precision needed for decimal-to-binary +// conversions is large but bounded, so a huge fixed-width integer class +// suffices. +// +// This is an intentionally limited big integer class. Only needed operations +// are implemented. All storage lives in an array data member, and all +// arithmetic is done in-place, to avoid requiring separate storage for operand +// and result. +// +// This is an internal class. Some methods live in the .cc file, and are +// instantiated only for the values of max_words we need. +template +class BigUnsigned { + public: + static_assert(max_words == 4 || max_words == 84, + "unsupported max_words value"); + + BigUnsigned() : size_(0), words_{} {} + explicit BigUnsigned(uint32_t v) : size_(v > 0 ? 1 : 0), words_{v} {} + explicit BigUnsigned(uint64_t v) + : size_(0), + words_{static_cast(v & 0xffffffff), + static_cast(v >> 32)} { + if (words_[1]) { + size_ = 2; + } else if (words_[0]) { + size_ = 1; + } + } + + // Constructs a BigUnsigned from the given string_view containing a decimal + // value. If the input std::string is not a decimal integer, constructs a 0 + // instead. + explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} { + // Check for valid input, returning a 0 otherwise. This is reasonable + // behavior only because this constructor is for unit tests. + if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() || + sv.empty()) { + return; + } + int exponent_adjust = + ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1); + if (exponent_adjust > 0) { + MultiplyByTenToTheNth(exponent_adjust); + } + } + + // Loads the mantissa value of a previously-parsed float. + // + // Returns the associated decimal exponent. The value of the parsed float is + // exactly *this * 10**exponent. + int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits); + + // Returns the number of decimal digits of precision this type provides. All + // numbers with this many decimal digits or fewer are representable by this + // type. + // + // Analagous to std::numeric_limits::digits10. + static constexpr int Digits10() { + // 9975007/1035508 is very slightly less than log10(2**32). + return static_cast(max_words) * 9975007 / 1035508; + } + + // Shifts left by the given number of bits. + void ShiftLeft(int count) { + if (count > 0) { + const int word_shift = count / 32; + if (word_shift >= max_words) { + SetToZero(); + return; + } + size_ = std::min(size_ + word_shift, max_words); + count %= 32; + if (count == 0) { + std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_); + } else { + for (int i = std::min(size_, max_words - 1); i > word_shift; --i) { + words_[i] = (words_[i - word_shift] << count) | + (words_[i - word_shift - 1] >> (32 - count)); + } + words_[word_shift] = words_[0] << count; + // Grow size_ if necessary. + if (size_ < max_words && words_[size_]) { + ++size_; + } + } + std::fill(words_, words_ + word_shift, 0u); + } + } + + + // Multiplies by v in-place. + void MultiplyBy(uint32_t v) { + if (size_ == 0 || v == 1) { + return; + } + if (v == 0) { + SetToZero(); + return; + } + const uint64_t factor = v; + uint64_t window = 0; + for (int i = 0; i < size_; ++i) { + window += factor * words_[i]; + words_[i] = window & 0xffffffff; + window >>= 32; + } + // If carry bits remain and there's space for them, grow size_. + if (window && size_ < max_words) { + words_[size_] = window & 0xffffffff; + ++size_; + } + } + + void MultiplyBy(uint64_t v) { + uint32_t words[2]; + words[0] = static_cast(v); + words[1] = static_cast(v >> 32); + if (words[1] == 0) { + MultiplyBy(words[0]); + } else { + MultiplyBy(2, words); + } + } + + // Multiplies in place by 5 to the power of n. n must be non-negative. + void MultiplyByFiveToTheNth(int n) { + while (n >= kMaxSmallPowerOfFive) { + MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]); + n -= kMaxSmallPowerOfFive; + } + if (n > 0) { + MultiplyBy(kFiveToNth[n]); + } + } + + // Multiplies in place by 10 to the power of n. n must be non-negative. + void MultiplyByTenToTheNth(int n) { + if (n > kMaxSmallPowerOfTen) { + // For large n, raise to a power of 5, then shift left by the same amount. + // (10**n == 5**n * 2**n.) This requires fewer multiplications overall. + MultiplyByFiveToTheNth(n); + ShiftLeft(n); + } else if (n > 0) { + // We can do this more quickly for very small N by using a single + // multiplication. + MultiplyBy(kTenToNth[n]); + } + } + + // Returns the value of 5**n, for non-negative n. This implementation uses + // a lookup table, and is faster then seeding a BigUnsigned with 1 and calling + // MultiplyByFiveToTheNth(). + static BigUnsigned FiveToTheNth(int n); + + // Multiplies by another BigUnsigned, in-place. + template + void MultiplyBy(const BigUnsigned& other) { + MultiplyBy(other.size(), other.words()); + } + + void SetToZero() { + std::fill(words_, words_ + size_, 0u); + size_ = 0; + } + + // Returns the value of the nth word of this BigUnsigned. This is + // range-checked, and returns 0 on out-of-bounds accesses. + uint32_t GetWord(int index) const { + if (index < 0 || index >= size_) { + return 0; + } + return words_[index]; + } + + // Returns this integer as a decimal std::string. This is not used in the decimal- + // to-binary conversion; it is intended to aid in testing. + std::string ToString() const; + + int size() const { return size_; } + const uint32_t* words() const { return words_; } + + private: + // Reads the number between [begin, end), possibly containing a decimal point, + // into this BigUnsigned. + // + // Callers are required to ensure [begin, end) contains a valid number, with + // one or more decimal digits and at most one decimal point. This routine + // will behave unpredictably if these preconditions are not met. + // + // Only the first `significant_digits` digits are read. Digits beyond this + // limit are "sticky": If the final significant digit is 0 or 5, and if any + // dropped digit is nonzero, then that final significant digit is adjusted up + // to 1 or 6. This adjustment allows for precise rounding. + // + // Returns `exponent_adjustment`, a power-of-ten exponent adjustment to + // account for the decimal point and for dropped significant digits. After + // this function returns, + // actual_value_of_parsed_string ~= *this * 10**exponent_adjustment. + int ReadDigits(const char* begin, const char* end, int significant_digits); + + // Performs a step of big integer multiplication. This computes the full + // (64-bit-wide) values that should be added at the given index (step), and + // adds to that location in-place. + // + // Because our math all occurs in place, we must multiply starting from the + // highest word working downward. (This is a bit more expensive due to the + // extra carries involved.) + // + // This must be called in steps, for each word to be calculated, starting from + // the high end and working down to 0. The first value of `step` should be + // `std::min(original_size + other.size_ - 2, max_words - 1)`. + // The reason for this expression is that multiplying the i'th word from one + // multiplicand and the j'th word of another multiplicand creates a + // two-word-wide value to be stored at the (i+j)'th element. The highest + // word indices we will access are `original_size - 1` from this object, and + // `other.size_ - 1` from our operand. Therefore, + // `original_size + other.size_ - 2` is the first step we should calculate, + // but limited on an upper bound by max_words. + + // Working from high-to-low ensures that we do not overwrite the portions of + // the initial value of *this which are still needed for later steps. + // + // Once called with step == 0, *this contains the result of the + // multiplication. + // + // `original_size` is the size_ of *this before the first call to + // MultiplyStep(). `other_words` and `other_size` are the contents of our + // operand. `step` is the step to perform, as described above. + void MultiplyStep(int original_size, const uint32_t* other_words, + int other_size, int step); + + void MultiplyBy(int other_size, const uint32_t* other_words) { + const int original_size = size_; + const int first_step = + std::min(original_size + other_size - 2, max_words - 1); + for (int step = first_step; step >= 0; --step) { + MultiplyStep(original_size, other_words, other_size, step); + } + } + + // Adds a 32-bit value to the index'th word, with carry. + void AddWithCarry(int index, uint32_t value) { + if (value) { + while (index < max_words && value > 0) { + words_[index] += value; + // carry if we overflowed in this word: + if (value > words_[index]) { + value = 1; + ++index; + } else { + value = 0; + } + } + size_ = std::min(max_words, std::max(index + 1, size_)); + } + } + + void AddWithCarry(int index, uint64_t value) { + if (value && index < max_words) { + uint32_t high = value >> 32; + uint32_t low = value & 0xffffffff; + words_[index] += low; + if (words_[index] < low) { + ++high; + if (high == 0) { + // Carry from the low word caused our high word to overflow. + // Short circuit here to do the right thing. + AddWithCarry(index + 2, static_cast(1)); + return; + } + } + if (high > 0) { + AddWithCarry(index + 1, high); + } else { + // Normally 32-bit AddWithCarry() sets size_, but since we don't call + // it when `high` is 0, do it ourselves here. + size_ = std::min(max_words, std::max(index + 1, size_)); + } + } + } + + // Divide this in place by a constant divisor. Returns the remainder of the + // division. + template + uint32_t DivMod() { + uint64_t accumulator = 0; + for (int i = size_ - 1; i >= 0; --i) { + accumulator <<= 32; + accumulator += words_[i]; + // accumulator / divisor will never overflow an int32_t in this loop + words_[i] = static_cast(accumulator / divisor); + accumulator = accumulator % divisor; + } + while (size_ > 0 && words_[size_ - 1] == 0) { + --size_; + } + return static_cast(accumulator); + } + + // The number of elements in words_ that may carry significant values. + // All elements beyond this point are 0. + // + // When size_ is 0, this BigUnsigned stores the value 0. + // When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is + // nonzero. This can occur due to overflow truncation. + // In particular, x.size_ != y.size_ does *not* imply x != y. + int size_; + uint32_t words_[max_words]; +}; + +// Compares two big integer instances. +// +// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs. +template +int Compare(const BigUnsigned& lhs, const BigUnsigned& rhs) { + int limit = std::max(lhs.size(), rhs.size()); + for (int i = limit - 1; i >= 0; --i) { + const uint32_t lhs_word = lhs.GetWord(i); + const uint32_t rhs_word = rhs.GetWord(i); + if (lhs_word < rhs_word) { + return -1; + } else if (lhs_word > rhs_word) { + return 1; + } + } + return 0; +} + +template +bool operator==(const BigUnsigned& lhs, const BigUnsigned& rhs) { + int limit = std::max(lhs.size(), rhs.size()); + for (int i = 0; i < limit; ++i) { + if (lhs.GetWord(i) != rhs.GetWord(i)) { + return false; + } + } + return true; +} + +template +bool operator!=(const BigUnsigned& lhs, const BigUnsigned& rhs) { + return !(lhs == rhs); +} + +template +bool operator<(const BigUnsigned& lhs, const BigUnsigned& rhs) { + return Compare(lhs, rhs) == -1; +} + +template +bool operator>(const BigUnsigned& lhs, const BigUnsigned& rhs) { + return rhs < lhs; +} +template +bool operator<=(const BigUnsigned& lhs, const BigUnsigned& rhs) { + return !(rhs < lhs); +} +template +bool operator>=(const BigUnsigned& lhs, const BigUnsigned& rhs) { + return !(lhs < rhs); +} + +// Output operator for BigUnsigned, for testing purposes only. +template +std::ostream& operator<<(std::ostream& os, const BigUnsigned& num) { + return os << num.ToString(); +} + +// Explicit instantiation declarations for the sizes of BigUnsigned that we +// are using. +// +// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is +// still bigger than an int128, and 84 is a large value we will want to use +// in the from_chars implementation. +// +// Comments justifying the use of 84 belong in the from_chars implementation, +// and will be added in a follow-up CL. +extern template class BigUnsigned<4>; +extern template class BigUnsigned<84>; + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ diff --git a/absl/strings/internal/charconv_bigint_test.cc b/absl/strings/internal/charconv_bigint_test.cc new file mode 100644 index 000000000000..9b6357888fbf --- /dev/null +++ b/absl/strings/internal/charconv_bigint_test.cc @@ -0,0 +1,203 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/charconv_bigint.h" + +#include + +#include "gtest/gtest.h" + +namespace absl { +namespace strings_internal { + +TEST(BigUnsigned, ShiftLeft) { + { + // Check that 3 * 2**100 is calculated correctly + BigUnsigned<4> num(3u); + num.ShiftLeft(100); + EXPECT_EQ(num, BigUnsigned<4>("3802951800684688204490109616128")); + } + { + // Test that overflow is truncated properly. + // 15 is 4 bits long, and BigUnsigned<4> is a 128-bit bigint. + // Shifting left by 125 bits should truncate off the high bit, so that + // 15 << 125 == 7 << 125 + // after truncation. + BigUnsigned<4> a(15u); + BigUnsigned<4> b(7u); + BigUnsigned<4> c(3u); + a.ShiftLeft(125); + b.ShiftLeft(125); + c.ShiftLeft(125); + EXPECT_EQ(a, b); + EXPECT_NE(a, c); + } + { + // Same test, larger bigint: + BigUnsigned<84> a(15u); + BigUnsigned<84> b(7u); + BigUnsigned<84> c(3u); + a.ShiftLeft(84 * 32 - 3); + b.ShiftLeft(84 * 32 - 3); + c.ShiftLeft(84 * 32 - 3); + EXPECT_EQ(a, b); + EXPECT_NE(a, c); + } + { + // Check that incrementally shifting has the same result as doing it all at + // once (attempting to capture corner cases.) + const std::string seed = "1234567890123456789012345678901234567890"; + BigUnsigned<84> a(seed); + for (int i = 1; i <= 84 * 32; ++i) { + a.ShiftLeft(1); + BigUnsigned<84> b(seed); + b.ShiftLeft(i); + EXPECT_EQ(a, b); + } + // And we should have fully rotated all bits off by now: + EXPECT_EQ(a, BigUnsigned<84>(0u)); + } +} + +TEST(BigUnsigned, MultiplyByUint32) { + const BigUnsigned<84> factorial_100( + "933262154439441526816992388562667004907159682643816214685929638952175999" + "932299156089414639761565182862536979208272237582511852109168640000000000" + "00000000000000"); + BigUnsigned<84> a(1u); + for (uint32_t i = 1; i <= 100; ++i) { + a.MultiplyBy(i); + } + EXPECT_EQ(a, BigUnsigned<84>(factorial_100)); +} + +TEST(BigUnsigned, MultiplyByBigUnsigned) { + { + // Put the terms of factorial_200 into two bigints, and multiply them + // together. + const BigUnsigned<84> factorial_200( + "7886578673647905035523632139321850622951359776871732632947425332443594" + "4996340334292030428401198462390417721213891963883025764279024263710506" + "1926624952829931113462857270763317237396988943922445621451664240254033" + "2918641312274282948532775242424075739032403212574055795686602260319041" + "7032406235170085879617892222278962370389737472000000000000000000000000" + "0000000000000000000000000"); + BigUnsigned<84> evens(1u); + BigUnsigned<84> odds(1u); + for (uint32_t i = 1; i < 200; i += 2) { + odds.MultiplyBy(i); + evens.MultiplyBy(i + 1); + } + evens.MultiplyBy(odds); + EXPECT_EQ(evens, factorial_200); + } + { + // Multiply various powers of 10 together. + for (int a = 0 ; a < 700; a += 25) { + SCOPED_TRACE(a); + BigUnsigned<84> a_value("3" + std::string(a, '0')); + for (int b = 0; b < (700 - a); b += 25) { + SCOPED_TRACE(b); + BigUnsigned<84> b_value("2" + std::string(b, '0')); + BigUnsigned<84> expected_product("6" + std::string(a + b, '0')); + b_value.MultiplyBy(a_value); + EXPECT_EQ(b_value, expected_product); + } + } + } +} + +TEST(BigUnsigned, MultiplyByOverflow) { + { + // Check that multiplcation overflow predictably truncates. + + // A big int with all bits on. + BigUnsigned<4> all_bits_on("340282366920938463463374607431768211455"); + // Modulo 2**128, this is equal to -1. Therefore the square of this, + // modulo 2**128, should be 1. + all_bits_on.MultiplyBy(all_bits_on); + EXPECT_EQ(all_bits_on, BigUnsigned<4>(1u)); + } + { + // Try multiplying a large bigint by 2**50, and compare the result to + // shifting. + BigUnsigned<4> value_1("12345678901234567890123456789012345678"); + BigUnsigned<4> value_2("12345678901234567890123456789012345678"); + BigUnsigned<4> two_to_fiftieth(1u); + two_to_fiftieth.ShiftLeft(50); + + value_1.ShiftLeft(50); + value_2.MultiplyBy(two_to_fiftieth); + EXPECT_EQ(value_1, value_2); + } +} + +TEST(BigUnsigned, FiveToTheNth) { + { + // Sanity check that MultiplyByFiveToTheNth gives consistent answers, up to + // and including overflow. + for (int i = 0; i < 1160; ++i) { + SCOPED_TRACE(i); + BigUnsigned<84> value_1(123u); + BigUnsigned<84> value_2(123u); + value_1.MultiplyByFiveToTheNth(i); + for (int j = 0; j < i; j++) { + value_2.MultiplyBy(5u); + } + EXPECT_EQ(value_1, value_2); + } + } + { + // Check that the faster, table-lookup-based static method returns the same + // result that multiplying in-place would return, up to and including + // overflow. + for (int i = 0; i < 1160; ++i) { + SCOPED_TRACE(i); + BigUnsigned<84> value_1(1u); + value_1.MultiplyByFiveToTheNth(i); + BigUnsigned<84> value_2 = BigUnsigned<84>::FiveToTheNth(i); + EXPECT_EQ(value_1, value_2); + } + } +} + +TEST(BigUnsigned, TenToTheNth) { + { + // Sanity check MultiplyByTenToTheNth. + for (int i = 0; i < 800; ++i) { + SCOPED_TRACE(i); + BigUnsigned<84> value_1(123u); + BigUnsigned<84> value_2(123u); + value_1.MultiplyByTenToTheNth(i); + for (int j = 0; j < i; j++) { + value_2.MultiplyBy(10u); + } + EXPECT_EQ(value_1, value_2); + } + } + { + // Alternate testing approach, taking advantage of the decimal parser. + for (int i = 0; i < 200; ++i) { + SCOPED_TRACE(i); + BigUnsigned<84> value_1(135u); + value_1.MultiplyByTenToTheNth(i); + BigUnsigned<84> value_2("135" + std::string(i, '0')); + EXPECT_EQ(value_1, value_2); + } + } +} + + +} // namespace strings_internal +} // namespace absl diff --git a/absl/strings/internal/charconv_parse.cc b/absl/strings/internal/charconv_parse.cc new file mode 100644 index 000000000000..a04cc67669a7 --- /dev/null +++ b/absl/strings/internal/charconv_parse.cc @@ -0,0 +1,496 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/charconv_parse.h" +#include "absl/strings/charconv.h" + +#include +#include +#include + +#include "absl/strings/internal/memutil.h" + +namespace absl { +namespace { + +// ParseFloat<10> will read the first 19 significant digits of the mantissa. +// This number was chosen for multiple reasons. +// +// (a) First, for whatever integer type we choose to represent the mantissa, we +// want to choose the largest possible number of decimal digits for that integer +// type. We are using uint64_t, which can express any 19-digit unsigned +// integer. +// +// (b) Second, we need to parse enough digits that the binary value of any +// mantissa we capture has more bits of resolution than the mantissa +// representation in the target float. Our algorithm requires at least 3 bits +// of headway, but 19 decimal digits give a little more than that. +// +// The following static assertions verify the above comments: +constexpr int kDecimalMantissaDigitsMax = 19; + +static_assert(std::numeric_limits::digits10 == + kDecimalMantissaDigitsMax, + "(a) above"); + +// IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa. +static_assert(std::numeric_limits::is_iec559, "IEEE double assumed"); +static_assert(std::numeric_limits::radix == 2, "IEEE double fact"); +static_assert(std::numeric_limits::digits == 53, "IEEE double fact"); + +// The lowest valued 19-digit decimal mantissa we can read still contains +// sufficient information to reconstruct a binary mantissa. +static_assert(1000000000000000000u > (uint64_t(1) << (53 + 3)), "(b) above"); + +// ParseFloat<16> will read the first 15 significant digits of the mantissa. +// +// Because a base-16-to-base-2 conversion can be done exactly, we do not need +// to maximize the number of scanned hex digits to improve our conversion. What +// is required is to scan two more bits than the mantissa can represent, so that +// we always round correctly. +// +// (One extra bit does not suffice to perform correct rounding, since a number +// exactly halfway between two representable floats has unique rounding rules, +// so we need to differentiate between a "halfway between" number and a "closer +// to the larger value" number.) +constexpr int kHexadecimalMantissaDigitsMax = 15; + +// The minimum number of significant bits that will be read from +// kHexadecimalMantissaDigitsMax hex digits. We must subtract by three, since +// the most significant digit can be a "1", which only contributes a single +// significant bit. +constexpr int kGuaranteedHexadecimalMantissaBitPrecision = + 4 * kHexadecimalMantissaDigitsMax - 3; + +static_assert(kGuaranteedHexadecimalMantissaBitPrecision > + std::numeric_limits::digits + 2, + "kHexadecimalMantissaDigitsMax too small"); + +// We also impose a limit on the number of significant digits we will read from +// an exponent, to avoid having to deal with integer overflow. We use 9 for +// this purpose. +// +// If we read a 9 digit exponent, the end result of the conversion will +// necessarily be infinity or zero, depending on the sign of the exponent. +// Therefore we can just drop extra digits on the floor without any extra +// logic. +constexpr int kDecimalExponentDigitsMax = 9; +static_assert(std::numeric_limits::digits10 >= kDecimalExponentDigitsMax, + "int type too small"); + +// To avoid incredibly large inputs causing integer overflow for our exponent, +// we impose an arbitrary but very large limit on the number of significant +// digits we will accept. The implementation refuses to match a std::string with +// more consecutive significant mantissa digits than this. +constexpr int kDecimalDigitLimit = 50000000; + +// Corresponding limit for hexadecimal digit inputs. This is one fourth the +// amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires +// a binary exponent adjustment of 4. +constexpr int kHexadecimalDigitLimit = kDecimalDigitLimit / 4; + +// The largest exponent we can read is 999999999 (per +// kDecimalExponentDigitsMax), and the largest exponent adjustment we can get +// from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these +// comfortably fits in an integer. +// +// We count kDecimalDigitLimit twice because there are independent limits for +// numbers before and after the decimal point. (In the case where there are no +// significant digits before the decimal point, there are independent limits for +// post-decimal-point leading zeroes and for significant digits.) +static_assert(999999999 + 2 * kDecimalDigitLimit < + std::numeric_limits::max(), + "int type too small"); +static_assert(999999999 + 2 * (4 * kHexadecimalDigitLimit) < + std::numeric_limits::max(), + "int type too small"); + +// Returns true if the provided bitfield allows parsing an exponent value +// (e.g., "1.5e100"). +bool AllowExponent(chars_format flags) { + bool fixed = (flags & chars_format::fixed) == chars_format::fixed; + bool scientific = + (flags & chars_format::scientific) == chars_format::scientific; + return scientific || !fixed; +} + +// Returns true if the provided bitfield requires an exponent value be present. +bool RequireExponent(chars_format flags) { + bool fixed = (flags & chars_format::fixed) == chars_format::fixed; + bool scientific = + (flags & chars_format::scientific) == chars_format::scientific; + return scientific && !fixed; +} + +const int8_t kAsciiToInt[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1}; + +// Returns true if `ch` is a digit in the given base +template +bool IsDigit(char ch); + +// Converts a valid `ch` to its digit value in the given base. +template +unsigned ToDigit(char ch); + +// Returns true if `ch` is the exponent delimiter for the given base. +template +bool IsExponentCharacter(char ch); + +// Returns the maximum number of significant digits we will read for a float +// in the given base. +template +constexpr int MantissaDigitsMax(); + +// Returns the largest consecutive run of digits we will accept when parsing a +// number in the given base. +template +constexpr int DigitLimit(); + +// Returns the amount the exponent must be adjusted by for each dropped digit. +// (For decimal this is 1, since the digits are in base 10 and the exponent base +// is also 10, but for hexadecimal this is 4, since the digits are base 16 but +// the exponent base is 2.) +template +constexpr int DigitMagnitude(); + +template <> +bool IsDigit<10>(char ch) { + return ch >= '0' && ch <= '9'; +} +template <> +bool IsDigit<16>(char ch) { + return kAsciiToInt[static_cast(ch)] >= 0; +} + +template <> +unsigned ToDigit<10>(char ch) { + return ch - '0'; +} +template <> +unsigned ToDigit<16>(char ch) { + return kAsciiToInt[static_cast(ch)]; +} + +template <> +bool IsExponentCharacter<10>(char ch) { + return ch == 'e' || ch == 'E'; +} + +template <> +bool IsExponentCharacter<16>(char ch) { + return ch == 'p' || ch == 'P'; +} + +template <> +constexpr int MantissaDigitsMax<10>() { + return kDecimalMantissaDigitsMax; +} +template <> +constexpr int MantissaDigitsMax<16>() { + return kHexadecimalMantissaDigitsMax; +} + +template <> +constexpr int DigitLimit<10>() { + return kDecimalDigitLimit; +} +template <> +constexpr int DigitLimit<16>() { + return kHexadecimalDigitLimit; +} + +template <> +constexpr int DigitMagnitude<10>() { + return 1; +} +template <> +constexpr int DigitMagnitude<16>() { + return 4; +} + +// Reads decimal digits from [begin, end) into *out. Returns the number of +// digits consumed. +// +// After max_digits has been read, keeps consuming characters, but no longer +// adjusts *out. If a nonzero digit is dropped this way, *dropped_nonzero_digit +// is set; otherwise, it is left unmodified. +// +// If no digits are matched, returns 0 and leaves *out unchanged. +// +// ConsumeDigits does not protect against overflow on *out; max_digits must +// be chosen with respect to type T to avoid the possibility of overflow. +template +std::size_t ConsumeDigits(const char* begin, const char* end, int max_digits, + T* out, bool* dropped_nonzero_digit) { + if (base == 10) { + assert(max_digits <= std::numeric_limits::digits10); + } else if (base == 16) { + assert(max_digits * 4 <= std::numeric_limits::digits); + } + const char* const original_begin = begin; + T accumulator = *out; + const char* significant_digits_end = + (end - begin > max_digits) ? begin + max_digits : end; + while (begin < significant_digits_end && IsDigit(*begin)) { + // Do not guard against *out overflow; max_digits was chosen to avoid this. + // Do assert against it, to detect problems in debug builds. + auto digit = static_cast(ToDigit(*begin)); + assert(accumulator * base >= accumulator); + accumulator *= base; + assert(accumulator + digit >= accumulator); + accumulator += digit; + ++begin; + } + bool dropped_nonzero = false; + while (begin < end && IsDigit(*begin)) { + dropped_nonzero = dropped_nonzero || (*begin != '0'); + ++begin; + } + if (dropped_nonzero && dropped_nonzero_digit != nullptr) { + *dropped_nonzero_digit = true; + } + *out = accumulator; + return begin - original_begin; +} + +// Returns true if `v` is one of the chars allowed inside parentheses following +// a NaN. +bool IsNanChar(char v) { + return (v == '_') || (v >= '0' && v <= '9') || (v >= 'a' && v <= 'z') || + (v >= 'A' && v <= 'Z'); +} + +// Checks the range [begin, end) for a strtod()-formatted infinity or NaN. If +// one is found, sets `out` appropriately and returns true. +bool ParseInfinityOrNan(const char* begin, const char* end, + strings_internal::ParsedFloat* out) { + if (end - begin < 3) { + return false; + } + switch (*begin) { + case 'i': + case 'I': { + // An infinity std::string consists of the characters "inf" or "infinity", + // case insensitive. + if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) { + return false; + } + out->type = strings_internal::FloatType::kInfinity; + if (end - begin >= 8 && + strings_internal::memcasecmp(begin + 3, "inity", 5) == 0) { + out->end = begin + 8; + } else { + out->end = begin + 3; + } + return true; + } + case 'n': + case 'N': { + // A NaN consists of the characters "nan", case insensitive, optionally + // followed by a parenthesized sequence of zero or more alphanumeric + // characters and/or underscores. + if (strings_internal::memcasecmp(begin + 1, "an", 2) != 0) { + return false; + } + out->type = strings_internal::FloatType::kNan; + out->end = begin + 3; + // NaN is allowed to be followed by a parenthesized std::string, consisting of + // only the characters [a-zA-Z0-9_]. Match that if it's present. + begin += 3; + if (begin < end && *begin == '(') { + const char* nan_begin = begin + 1; + while (nan_begin < end && IsNanChar(*nan_begin)) { + ++nan_begin; + } + if (nan_begin < end && *nan_begin == ')') { + // We found an extra NaN specifier range + out->subrange_begin = begin + 1; + out->subrange_end = nan_begin; + out->end = nan_begin + 1; + } + } + return true; + } + default: + return false; + } +} +} // namespace + +namespace strings_internal { + +template +strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end, + chars_format format_flags) { + strings_internal::ParsedFloat result; + + // Exit early if we're given an empty range. + if (begin == end) return result; + + // Handle the infinity and NaN cases. + if (ParseInfinityOrNan(begin, end, &result)) { + return result; + } + + const char* const mantissa_begin = begin; + while (begin < end && *begin == '0') { + ++begin; // skip leading zeros + } + uint64_t mantissa = 0; + + int exponent_adjustment = 0; + bool mantissa_is_inexact = false; + std::size_t pre_decimal_digits = ConsumeDigits( + begin, end, MantissaDigitsMax(), &mantissa, &mantissa_is_inexact); + begin += pre_decimal_digits; + int digits_left; + if (pre_decimal_digits >= DigitLimit()) { + // refuse to parse pathological inputs + return result; + } else if (pre_decimal_digits > MantissaDigitsMax()) { + // We dropped some non-fraction digits on the floor. Adjust our exponent + // to compensate. + exponent_adjustment = + static_cast(pre_decimal_digits - MantissaDigitsMax()); + digits_left = 0; + } else { + digits_left = + static_cast(MantissaDigitsMax() - pre_decimal_digits); + } + if (begin < end && *begin == '.') { + ++begin; + if (mantissa == 0) { + // If we haven't seen any nonzero digits yet, keep skipping zeros. We + // have to adjust the exponent to reflect the changed place value. + const char* begin_zeros = begin; + while (begin < end && *begin == '0') { + ++begin; + } + std::size_t zeros_skipped = begin - begin_zeros; + if (zeros_skipped >= DigitLimit()) { + // refuse to parse pathological inputs + return result; + } + exponent_adjustment -= static_cast(zeros_skipped); + } + std::size_t post_decimal_digits = ConsumeDigits( + begin, end, digits_left, &mantissa, &mantissa_is_inexact); + begin += post_decimal_digits; + + // Since `mantissa` is an integer, each significant digit we read after + // the decimal point requires an adjustment to the exponent. "1.23e0" will + // be stored as `mantissa` == 123 and `exponent` == -2 (that is, + // "123e-2"). + if (post_decimal_digits >= DigitLimit()) { + // refuse to parse pathological inputs + return result; + } else if (post_decimal_digits > digits_left) { + exponent_adjustment -= digits_left; + } else { + exponent_adjustment -= post_decimal_digits; + } + } + // If we've found no mantissa whatsoever, this isn't a number. + if (mantissa_begin == begin) { + return result; + } + // A bare "." doesn't count as a mantissa either. + if (begin - mantissa_begin == 1 && *mantissa_begin == '.') { + return result; + } + + if (mantissa_is_inexact) { + // We dropped significant digits on the floor. Handle this appropriately. + if (base == 10) { + // If we truncated significant decimal digits, store the full range of the + // mantissa for future big integer math for exact rounding. + result.subrange_begin = mantissa_begin; + result.subrange_end = begin; + } else if (base == 16) { + // If we truncated hex digits, reflect this fact by setting the low + // ("sticky") bit. This allows for correct rounding in all cases. + mantissa |= 1; + } + } + result.mantissa = mantissa; + + const char* const exponent_begin = begin; + result.literal_exponent = 0; + bool found_exponent = false; + if (AllowExponent(format_flags) && begin < end && + IsExponentCharacter(*begin)) { + bool negative_exponent = false; + ++begin; + if (begin < end && *begin == '-') { + negative_exponent = true; + ++begin; + } else if (begin < end && *begin == '+') { + ++begin; + } + const char* const exponent_digits_begin = begin; + // Exponent is always expressed in decimal, even for hexadecimal floats. + begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax, + &result.literal_exponent, nullptr); + if (begin == exponent_digits_begin) { + // there were no digits where we expected an exponent. We failed to read + // an exponent and should not consume the 'e' after all. Rewind 'begin'. + found_exponent = false; + begin = exponent_begin; + } else { + found_exponent = true; + if (negative_exponent) { + result.literal_exponent = -result.literal_exponent; + } + } + } + + if (!found_exponent && RequireExponent(format_flags)) { + // Provided flags required an exponent, but none was found. This results + // in a failure to scan. + return result; + } + + // Success! + result.type = strings_internal::FloatType::kNumber; + if (result.mantissa > 0) { + result.exponent = result.literal_exponent + + (DigitMagnitude() * exponent_adjustment); + } else { + result.exponent = 0; + } + result.end = begin; + return result; +} + +template ParsedFloat ParseFloat<10>(const char* begin, const char* end, + chars_format format_flags); +template ParsedFloat ParseFloat<16>(const char* begin, const char* end, + chars_format format_flags); + +} // namespace strings_internal +} // namespace absl diff --git a/absl/strings/internal/charconv_parse.h b/absl/strings/internal/charconv_parse.h new file mode 100644 index 000000000000..7a5c0874b804 --- /dev/null +++ b/absl/strings/internal/charconv_parse.h @@ -0,0 +1,96 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_ +#define ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_ + +#include + +#include "absl/strings/charconv.h" + +namespace absl { +namespace strings_internal { + +// Enum indicating whether a parsed float is a number or special value. +enum class FloatType { kNumber, kInfinity, kNan }; + +// The decomposed parts of a parsed `float` or `double`. +struct ParsedFloat { + // Representation of the parsed mantissa, with the decimal point adjusted to + // make it an integer. + // + // During decimal scanning, this contains 19 significant digits worth of + // mantissa value. If digits beyond this point are found, they + // are truncated, and if any of these dropped digits are nonzero, then + // `mantissa` is inexact, and the full mantissa is stored in [subrange_begin, + // subrange_end). + // + // During hexadecimal scanning, this contains 15 significant hex digits worth + // of mantissa value. Digits beyond this point are sticky -- they are + // truncated, but if any dropped digits are nonzero, the low bit of mantissa + // will be set. (This allows for precise rounding, and avoids the need + // to store the full mantissa in [subrange_begin, subrange_end).) + uint64_t mantissa = 0; + + // Floating point expontent. This reflects any decimal point adjustments and + // any truncated digits from the mantissa. The absolute value of the parsed + // number is represented by mantissa * (base ** exponent), where base==10 for + // decimal floats, and base==2 for hexadecimal floats. + int exponent = 0; + + // The literal exponent value scanned from the input, or 0 if none was + // present. This does not reflect any adjustments applied to mantissa. + int literal_exponent = 0; + + // The type of number scanned. + FloatType type = FloatType::kNumber; + + // When non-null, [subrange_begin, subrange_end) marks a range of characters + // that require further processing. The meaning is dependent on float type. + // If type == kNumber and this is set, this is a "wide input": the input + // mantissa contained more than 19 digits. The range contains the full + // mantissa. It plus `literal_exponent` need to be examined to find the best + // floating point match. + // If type == kNan and this is set, the range marks the contents of a + // matched parenthesized character region after the NaN. + const char* subrange_begin = nullptr; + const char* subrange_end = nullptr; + + // One-past-the-end of the successfully parsed region, or nullptr if no + // matching pattern was found. + const char* end = nullptr; +}; + +// Read the floating point number in the provided range, and populate +// ParsedFloat accordingly. +// +// format_flags is a bitmask value specifying what patterns this API will match. +// `scientific` and `fixed` are honored per std::from_chars rules +// ([utility.from.chars], C++17): if exactly one of these bits is set, then an +// exponent is required, or dislallowed, respectively. +// +// Template parameter `base` must be either 10 or 16. For base 16, a "0x" is +// *not* consumed. The `hex` bit from format_flags is ignored by ParseFloat. +template +ParsedFloat ParseFloat(const char* begin, const char* end, + absl::chars_format format_flags); + +extern template ParsedFloat ParseFloat<10>(const char* begin, const char* end, + absl::chars_format format_flags); +extern template ParsedFloat ParseFloat<16>(const char* begin, const char* end, + absl::chars_format format_flags); + +} // namespace strings_internal +} // namespace absl +#endif // ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_ diff --git a/absl/strings/internal/charconv_parse_test.cc b/absl/strings/internal/charconv_parse_test.cc new file mode 100644 index 000000000000..1ff86004973a --- /dev/null +++ b/absl/strings/internal/charconv_parse_test.cc @@ -0,0 +1,357 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/charconv_parse.h" + +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/str_cat.h" + +using absl::chars_format; +using absl::strings_internal::FloatType; +using absl::strings_internal::ParsedFloat; +using absl::strings_internal::ParseFloat; + +namespace { + +// Check that a given std::string input is parsed to the expected mantissa and +// exponent. +// +// Input std::string `s` must contain a '$' character. It marks the end of the +// characters that should be consumed by the match. It is stripped from the +// input to ParseFloat. +// +// If input std::string `s` contains '[' and ']' characters, these mark the region +// of characters that should be marked as the "subrange". For NaNs, this is +// the location of the extended NaN std::string. For numbers, this is the location +// of the full, over-large mantissa. +template +void ExpectParsedFloat(std::string s, absl::chars_format format_flags, + FloatType expected_type, uint64_t expected_mantissa, + int expected_exponent, + int expected_literal_exponent = -999) { + SCOPED_TRACE(s); + + int begin_subrange = -1; + int end_subrange = -1; + // If s contains '[' and ']', then strip these characters and set the subrange + // indices appropriately. + std::string::size_type open_bracket_pos = s.find('['); + if (open_bracket_pos != std::string::npos) { + begin_subrange = static_cast(open_bracket_pos); + s.replace(open_bracket_pos, 1, ""); + std::string::size_type close_bracket_pos = s.find(']'); + ABSL_RAW_CHECK(close_bracket_pos != absl::string_view::npos, + "Test input contains [ without matching ]"); + end_subrange = static_cast(close_bracket_pos); + s.replace(close_bracket_pos, 1, ""); + } + const std::string::size_type expected_characters_matched = s.find('$'); + ABSL_RAW_CHECK(expected_characters_matched != std::string::npos, + "Input std::string must contain $"); + s.replace(expected_characters_matched, 1, ""); + + ParsedFloat parsed = + ParseFloat(s.data(), s.data() + s.size(), format_flags); + + EXPECT_NE(parsed.end, nullptr); + if (parsed.end == nullptr) { + return; // The following tests are not useful if we fully failed to parse + } + EXPECT_EQ(parsed.type, expected_type); + if (begin_subrange == -1) { + EXPECT_EQ(parsed.subrange_begin, nullptr); + EXPECT_EQ(parsed.subrange_end, nullptr); + } else { + EXPECT_EQ(parsed.subrange_begin, s.data() + begin_subrange); + EXPECT_EQ(parsed.subrange_end, s.data() + end_subrange); + } + if (parsed.type == FloatType::kNumber) { + EXPECT_EQ(parsed.mantissa, expected_mantissa); + EXPECT_EQ(parsed.exponent, expected_exponent); + if (expected_literal_exponent != -999) { + EXPECT_EQ(parsed.literal_exponent, expected_literal_exponent); + } + } + auto characters_matched = static_cast(parsed.end - s.data()); + EXPECT_EQ(characters_matched, expected_characters_matched); +} + +// Check that a given std::string input is parsed to the expected mantissa and +// exponent. +// +// Input std::string `s` must contain a '$' character. It marks the end of the +// characters that were consumed by the match. +template +void ExpectNumber(std::string s, absl::chars_format format_flags, + uint64_t expected_mantissa, int expected_exponent, + int expected_literal_exponent = -999) { + ExpectParsedFloat(std::move(s), format_flags, FloatType::kNumber, + expected_mantissa, expected_exponent, + expected_literal_exponent); +} + +// Check that a given std::string input is parsed to the given special value. +// +// This tests against both number bases, since infinities and NaNs have +// identical representations in both modes. +void ExpectSpecial(const std::string& s, absl::chars_format format_flags, + FloatType type) { + ExpectParsedFloat<10>(s, format_flags, type, 0, 0); + ExpectParsedFloat<16>(s, format_flags, type, 0, 0); +} + +// Check that a given input std::string is not matched by Float. +template +void ExpectFailedParse(absl::string_view s, absl::chars_format format_flags) { + ParsedFloat parsed = + ParseFloat(s.data(), s.data() + s.size(), format_flags); + EXPECT_EQ(parsed.end, nullptr); +} + +TEST(ParseFloat, SimpleValue) { + // Test that various forms of floating point numbers all parse correctly. + ExpectNumber<10>("1.23456789e5$", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e+5$", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789E5$", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e05$", chars_format::general, 123456789, -3); + ExpectNumber<10>("123.456789e3$", chars_format::general, 123456789, -3); + ExpectNumber<10>("0.000123456789e9$", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456789e-3$", chars_format::general, 123456789, -3); + + ExpectNumber<16>("1.234abcdefp28$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234abcdefp+28$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234ABCDEFp28$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234AbCdEfP0028$", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("123.4abcdefp20$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("0.0001234abcdefp44$", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1234abcd.ef$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1234abcdefp-8$", chars_format::general, 0x1234abcdef, -8); + + // ExpectNumber does not attempt to drop trailing zeroes. + ExpectNumber<10>("0001.2345678900e005$", chars_format::general, 12345678900, + -5); + ExpectNumber<16>("0001.234abcdef000p28$", chars_format::general, + 0x1234abcdef000, -20); + + // Ensure non-matching characters after a number are ignored, even when they + // look like potentially matching characters. + ExpectNumber<10>("1.23456789e5$ ", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$e5e5", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$.25", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$-", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$PUPPERS!!!", chars_format::general, 123456789, + -3); + ExpectNumber<10>("123456.789$efghij", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$e", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$p5", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$.10", chars_format::general, 123456789, -3); + + ExpectNumber<16>("1.234abcdefp28$ ", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1.234abcdefp28$p28", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1.234abcdefp28$.125", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1.234abcdefp28$-", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234abcdefp28$KITTEHS!!!", chars_format::general, + 0x1234abcdef, -8); + ExpectNumber<16>("1234abcd.ef$ghijk", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1234abcd.ef$p", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1234abcd.ef$.10", chars_format::general, 0x1234abcdef, -8); + + // Ensure we can read a full resolution mantissa without overflow. + ExpectNumber<10>("9999999999999999999$", chars_format::general, + 9999999999999999999u, 0); + ExpectNumber<16>("fffffffffffffff$", chars_format::general, + 0xfffffffffffffffu, 0); + + // Check that zero is consistently read. + ExpectNumber<10>("0$", chars_format::general, 0, 0); + ExpectNumber<16>("0$", chars_format::general, 0, 0); + ExpectNumber<10>("000000000000000000000000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<16>("000000000000000000000000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<10>("0000000000000000000000.000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<16>("0000000000000000000000.000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<10>("0.00000000000000000000000000000000e123456$", + chars_format::general, 0, 0); + ExpectNumber<16>("0.00000000000000000000000000000000p123456$", + chars_format::general, 0, 0); +} + +TEST(ParseFloat, LargeDecimalMantissa) { + // After 19 significant decimal digits in the mantissa, ParsedFloat will + // truncate additional digits. We need to test that: + // 1) the truncation to 19 digits happens + // 2) the returned exponent reflects the dropped significant digits + // 3) a correct literal_exponent is set + // + // If and only if a significant digit is found after 19 digits, then the + // entirety of the mantissa in case the exact value is needed to make a + // rounding decision. The [ and ] characters below denote where such a + // subregion was marked by by ParseFloat. They are not part of the input. + + // Mark a capture group only if a dropped digit is significant (nonzero). + ExpectNumber<10>("100000000000000000000000000$", chars_format::general, + 1000000000000000000, + /* adjusted exponent */ 8); + + ExpectNumber<10>("123456789123456789100000000$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8); + + ExpectNumber<10>("[123456789123456789123456789]$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + ExpectNumber<10>("[123456789123456789100000009]$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + ExpectNumber<10>("[123456789123456789120000000]$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + // Leading zeroes should not count towards the 19 significant digit limit + ExpectNumber<10>("[00000000123456789123456789123456789]$", + chars_format::general, 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + ExpectNumber<10>("00000000123456789123456789100000000$", + chars_format::general, 1234567891234567891, + /* adjusted exponent */ 8); + + // Truncated digits after the decimal point should not cause a further + // exponent adjustment. + ExpectNumber<10>("1.234567891234567891e123$", chars_format::general, + 1234567891234567891, 105); + ExpectNumber<10>("[1.23456789123456789123456789]e123$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 105, + /* literal exponent */ 123); + + // Ensure we truncate, and not round. (The from_chars algorithm we use + // depends on our guess missing low, if it misses, so we need the rounding + // error to be downward.) + ExpectNumber<10>("[1999999999999999999999]$", chars_format::general, + 1999999999999999999, + /* adjusted exponent */ 3, + /* literal exponent */ 0); +} + +TEST(ParseFloat, LargeHexadecimalMantissa) { + // After 15 significant hex digits in the mantissa, ParsedFloat will treat + // additional digits as sticky, We need to test that: + // 1) The truncation to 15 digits happens + // 2) The returned exponent reflects the dropped significant digits + // 3) If a nonzero digit is dropped, the low bit of mantissa is set. + + ExpectNumber<16>("123456789abcdef123456789abcdef$", chars_format::general, + 0x123456789abcdef, 60); + + // Leading zeroes should not count towards the 15 significant digit limit + ExpectNumber<16>("000000123456789abcdef123456789abcdef$", + chars_format::general, 0x123456789abcdef, 60); + + // Truncated digits after the radix point should not cause a further + // exponent adjustment. + ExpectNumber<16>("1.23456789abcdefp100$", chars_format::general, + 0x123456789abcdef, 44); + ExpectNumber<16>("1.23456789abcdef123456789abcdefp100$", + chars_format::general, 0x123456789abcdef, 44); + + // test sticky digit behavior. The low bit should be set iff any dropped + // digit is nonzero. + ExpectNumber<16>("123456789abcdee123456789abcdee$", chars_format::general, + 0x123456789abcdef, 60); + ExpectNumber<16>("123456789abcdee000000000000001$", chars_format::general, + 0x123456789abcdef, 60); + ExpectNumber<16>("123456789abcdee000000000000000$", chars_format::general, + 0x123456789abcdee, 60); +} + +TEST(ParseFloat, ScientificVsFixed) { + // In fixed mode, an exponent is never matched (but the remainder of the + // number will be matched.) + ExpectNumber<10>("1.23456789$e5", chars_format::fixed, 123456789, -8); + ExpectNumber<10>("123456.789$", chars_format::fixed, 123456789, -3); + ExpectNumber<16>("1.234abcdef$p28", chars_format::fixed, 0x1234abcdef, -36); + ExpectNumber<16>("1234abcd.ef$", chars_format::fixed, 0x1234abcdef, -8); + + // In scientific mode, numbers don't match *unless* they have an exponent. + ExpectNumber<10>("1.23456789e5$", chars_format::scientific, 123456789, -3); + ExpectFailedParse<10>("-123456.789$", chars_format::scientific); + ExpectNumber<16>("1.234abcdefp28$", chars_format::scientific, 0x1234abcdef, + -8); + ExpectFailedParse<16>("1234abcd.ef$", chars_format::scientific); +} + +TEST(ParseFloat, Infinity) { + ExpectFailedParse<10>("in", chars_format::general); + ExpectFailedParse<16>("in", chars_format::general); + ExpectFailedParse<10>("inx", chars_format::general); + ExpectFailedParse<16>("inx", chars_format::general); + ExpectSpecial("inf$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("Inf$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("INF$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("inf$inite", chars_format::general, FloatType::kInfinity); + ExpectSpecial("iNfInItY$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("infinity$!!!", chars_format::general, FloatType::kInfinity); +} + +TEST(ParseFloat, NaN) { + ExpectFailedParse<10>("na", chars_format::general); + ExpectFailedParse<16>("na", chars_format::general); + ExpectFailedParse<10>("nah", chars_format::general); + ExpectFailedParse<16>("nah", chars_format::general); + ExpectSpecial("nan$", chars_format::general, FloatType::kNan); + ExpectSpecial("NaN$", chars_format::general, FloatType::kNan); + ExpectSpecial("nAn$", chars_format::general, FloatType::kNan); + ExpectSpecial("NAN$", chars_format::general, FloatType::kNan); + ExpectSpecial("NaN$aNaNaNaNaBatman!", chars_format::general, FloatType::kNan); + + // A parenthesized sequence of the characters [a-zA-Z0-9_] is allowed to + // appear after an NaN. Check that this is allowed, and that the correct + // characters are grouped. + // + // (The characters [ and ] in the pattern below delimit the expected matched + // subgroup; they are not part of the input passed to ParseFloat.) + ExpectSpecial("nan([0xabcdef])$", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([0xabcdef])$...", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([0xabcdef])$)...", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([])$", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([aAzZ09_])$", chars_format::general, FloatType::kNan); + // If the subgroup contains illegal characters, don't match it at all. + ExpectSpecial("nan$(bad-char)", chars_format::general, FloatType::kNan); + // Also cope with a missing close paren. + ExpectSpecial("nan$(0xabcdef", chars_format::general, FloatType::kNan); +} + +} // namespace diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc index 68ef7999a981..f842ed85e9f5 100644 --- a/absl/strings/numbers.cc +++ b/absl/strings/numbers.cc @@ -32,6 +32,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/strings/ascii.h" +#include "absl/strings/charconv.h" #include "absl/strings/internal/bits.h" #include "absl/strings/internal/memutil.h" #include "absl/strings/str_cat.h" @@ -40,51 +41,54 @@ namespace absl { bool SimpleAtof(absl::string_view str, float* value) { *value = 0.0; - if (str.empty()) return false; - char buf[32]; - std::unique_ptr bigbuf; - char* ptr = buf; - if (str.size() > sizeof(buf) - 1) { - bigbuf.reset(new char[str.size() + 1]); - ptr = bigbuf.get(); - } - memcpy(ptr, str.data(), str.size()); - ptr[str.size()] = '\0'; - - char* endptr; - *value = strtof(ptr, &endptr); - if (endptr != ptr) { - while (absl::ascii_isspace(*endptr)) ++endptr; - } - // Ignore range errors from strtod/strtof. - // The values it returns on underflow and - // overflow are the right fallback in a - // robust setting. - return *ptr != '\0' && *endptr == '\0'; + str = StripAsciiWhitespace(str); + if (!str.empty() && str[0] == '+') { + str.remove_prefix(1); + } + auto result = absl::from_chars(str.data(), str.data() + str.size(), *value); + if (result.ec == std::errc::invalid_argument) { + return false; + } + if (result.ptr != str.data() + str.size()) { + // not all non-whitespace characters consumed + return false; + } + // from_chars() with DR 3801's current wording will return max() on + // overflow. SimpleAtof returns infinity instead. + if (result.ec == std::errc::result_out_of_range) { + if (*value > 1.0) { + *value = std::numeric_limits::infinity(); + } else if (*value < -1.0) { + *value = -std::numeric_limits::infinity(); + } + } + return true; } bool SimpleAtod(absl::string_view str, double* value) { *value = 0.0; - if (str.empty()) return false; - char buf[32]; - std::unique_ptr bigbuf; - char* ptr = buf; - if (str.size() > sizeof(buf) - 1) { - bigbuf.reset(new char[str.size() + 1]); - ptr = bigbuf.get(); - } - memcpy(ptr, str.data(), str.size()); - ptr[str.size()] = '\0'; - - char* endptr; - *value = strtod(ptr, &endptr); - if (endptr != ptr) { - while (absl::ascii_isspace(*endptr)) ++endptr; - } - // Ignore range errors from strtod. The values it - // returns on underflow and overflow are the right - // fallback in a robust setting. - return *ptr != '\0' && *endptr == '\0'; + str = StripAsciiWhitespace(str); + if (!str.empty() && str[0] == '+') { + str.remove_prefix(1); + } + auto result = absl::from_chars(str.data(), str.data() + str.size(), *value); + if (result.ec == std::errc::invalid_argument) { + return false; + } + if (result.ptr != str.data() + str.size()) { + // not all non-whitespace characters consumed + return false; + } + // from_chars() with DR 3801's current wording will return max() on + // overflow. SimpleAtod returns infinity instead. + if (result.ec == std::errc::result_out_of_range) { + if (*value > 1.0) { + *value = std::numeric_limits::infinity(); + } else if (*value < -1.0) { + *value = -std::numeric_limits::infinity(); + } + } + return true; } namespace { -- cgit 1.4.1