4 files changed, 547 insertions, 26 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index 9640ff4627cb..acf91e574cf8 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -557,6 +557,7 @@ cc_library(
     visibility = ["//visibility:private"],
     deps = [
         ":strings",
+        "//absl/base:bits",
         "//absl/base:core_headers",
         "//absl/container:inlined_vector",
         "//absl/meta:type_traits",
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index d3393a39eb46..461b279d5c6e 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt
@@ -384,6 +384,7 @@ absl_cc_library(
   COPTS
     ${ABSL_DEFAULT_COPTS}
   DEPS
+    absl::bits
     absl::strings
     absl::core_headers
     absl::inlined_vector
diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc
index 99cc0afe4eb3..b272dd7b5cfa 100644
--- a/absl/strings/internal/str_format/convert_test.cc
+++ b/absl/strings/internal/str_format/convert_test.cc
@@ -2,6 +2,7 @@
 #include <stdarg.h>
 #include <stdio.h>
 #include <cmath>
+#include <limits>
 #include <string>
 
 #include "gtest/gtest.h"
@@ -397,8 +398,8 @@ TEST_F(FormatConvertTest, Float) {
 #endif  // _MSC_VER
 
   const char *const kFormats[] = {
-      "%",  "%.3",  "%8.5",   "%9",   "%.60", "%.30",   "%03",    "%+",
-      "% ", "%-10", "%#15.3", "%#.0", "%.0",  "%1$*2$", "%1$.*2$"};
+      "%",  "%.3", "%8.5", "%9",     "%.5000", "%.60", "%.30",   "%03",
+      "%+", "% ",  "%-10", "%#15.3", "%#.0",   "%.0",  "%1$*2$", "%1$.*2$"};
 
   std::vector<double> doubles = {0.0,
                                  -0.0,
@@ -438,12 +439,36 @@ TEST_F(FormatConvertTest, Float) {
     }
   }
 
+  // Workaround libc bug.
+  // https://sourceware.org/bugzilla/show_bug.cgi?id=22142
+  if (StrPrint("%f", std::numeric_limits<double>::max()) !=
+      "1797693134862315708145274237317043567980705675258449965989174768031"
+      "5726078002853876058955863276687817154045895351438246423432132688946"
+      "4182768467546703537516986049910576551282076245490090389328944075868"
+      "5084551339423045832369032229481658085593321233482747978262041447231"
+      "68738177180919299881250404026184124858368.000000") {
+    for (auto &d : doubles) {
+      using L = std::numeric_limits<double>;
+      double d2 = std::abs(d);
+      if (d2 == L::max() || d2 == L::min() || d2 == L::denorm_min()) {
+        d = 0;
+      }
+    }
+  }
+
   for (const char *fmt : kFormats) {
     for (char f : {'f', 'F',  //
                    'g', 'G',  //
                    'a', 'A',  //
                    'e', 'E'}) {
       std::string fmt_str = std::string(fmt) + f;
+
+      if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F') {
+        // This particular test takes way too long with snprintf.
+        // Disable for the case we are not implementing natively.
+        continue;
+      }
+
       for (double d : doubles) {
         int i = -10;
         FormatArgImpl args[2] = {FormatArgImpl(d), FormatArgImpl(i)};
@@ -454,27 +479,24 @@ TEST_F(FormatConvertTest, Float) {
         ASSERT_EQ(StrPrint(fmt_str.c_str(), d, i),
                   FormatPack(format, absl::MakeSpan(args)))
             << fmt_str << " " << StrPrint("%.18g", d) << " "
-            << StrPrint("%.999f", d);
+            << StrPrint("%a", d) << " " << StrPrint("%.1080f", d);
       }
     }
   }
 }
 
 TEST_F(FormatConvertTest, LongDouble) {
-  const char *const kFormats[] = {"%",    "%.3", "%8.5", "%9",
+#if _MSC_VER
+  // MSVC has a different rounding policy than us so we can't test our
+  // implementation against the native one there.
+  return;
+#endif  // _MSC_VER
+  const char *const kFormats[] = {"%",    "%.3", "%8.5", "%9",  "%.5000",
                                   "%.60", "%+",  "% ",   "%-10"};
 
-  // This value is not representable in double, but it is in long double that
-  // uses the extended format.
-  // This is to verify that we are not truncating the value mistakenly through a
-  // double.
-  long double very_precise = 10000000000000000.25L;
-
   std::vector<long double> doubles = {
       0.0,
       -0.0,
-      very_precise,
-      1 / very_precise,
       std::numeric_limits<long double>::max(),
       -std::numeric_limits<long double>::max(),
       std::numeric_limits<long double>::min(),
@@ -482,22 +504,44 @@ TEST_F(FormatConvertTest, LongDouble) {
       std::numeric_limits<long double>::infinity(),
       -std::numeric_limits<long double>::infinity()};
 
+  for (long double base : {1.L, 12.L, 123.L, 1234.L, 12345.L, 123456.L,
+                           1234567.L, 12345678.L, 123456789.L, 1234567890.L,
+                           12345678901.L, 123456789012.L, 1234567890123.L,
+                           // This value is not representable in double, but it
+                           // is in long double that uses the extended format.
+                           // This is to verify that we are not truncating the
+                           // value mistakenly through a double.
+                           10000000000000000.25L}) {
+    for (int exp : {-1000, -500, 0, 500, 1000}) {
+      for (int sign : {1, -1}) {
+        doubles.push_back(sign * std::ldexp(base, exp));
+        doubles.push_back(sign / std::ldexp(base, exp));
+      }
+    }
+  }
+
   for (const char *fmt : kFormats) {
     for (char f : {'f', 'F',  //
                    'g', 'G',  //
                    'a', 'A',  //
                    'e', 'E'}) {
       std::string fmt_str = std::string(fmt) + 'L' + f;
+
+      if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F') {
+        // This particular test takes way too long with snprintf.
+        // Disable for the case we are not implementing natively.
+        continue;
+      }
+
       for (auto d : doubles) {
         FormatArgImpl arg(d);
         UntypedFormatSpecImpl format(fmt_str);
         // We use ASSERT_EQ here because failures are usually correlated and a
         // bug would print way too many failed expectations causing the test to
         // time out.
-        ASSERT_EQ(StrPrint(fmt_str.c_str(), d),
-                  FormatPack(format, {&arg, 1}))
+        ASSERT_EQ(StrPrint(fmt_str.c_str(), d), FormatPack(format, {&arg, 1}))
             << fmt_str << " " << StrPrint("%.18Lg", d) << " "
-            << StrPrint("%.999Lf", d);
+            << StrPrint("%La", d) << " " << StrPrint("%.1080Lf", d);
       }
     }
   }
diff --git a/absl/strings/internal/str_format/float_conversion.cc b/absl/strings/internal/str_format/float_conversion.cc
index 6176db9cb5a2..20012b5876cc 100644
--- a/absl/strings/internal/str_format/float_conversion.cc
+++ b/absl/strings/internal/str_format/float_conversion.cc
@@ -2,15 +2,476 @@
 
 #include <string.h>
 #include <algorithm>
+#include <array>
 #include <cassert>
 #include <cmath>
+#include <limits>
 #include <string>
 
+#include "absl/base/attributes.h"
+#include "absl/base/internal/bits.h"
+#include "absl/base/optimization.h"
+#include "absl/meta/type_traits.h"
+#include "absl/numeric/int128.h"
+#include "absl/types/span.h"
+
 namespace absl {
 namespace str_format_internal {
 
 namespace {
 
+// Calculates `10 * (*v) + carry` and stores the result in `*v` and returns
+// the carry.
+template <typename Int>
+inline Int MultiplyBy10WithCarry(Int *v, Int carry) {
+  using NextInt = absl::conditional_t<sizeof(Int) == 4, uint64_t, uint128>;
+  static_assert(sizeof(void *) >= sizeof(Int),
+                "Don't want to use uint128 in 32-bit mode. It is too slow.");
+  NextInt tmp = 10 * static_cast<NextInt>(*v) + carry;
+  *v = static_cast<Int>(tmp);
+  return static_cast<Int>(tmp >> (sizeof(Int) * 8));
+}
+
+// Calculates `(2^64 * carry + *v) / 10`.
+// Stores the quotient in `*v` and returns the remainder.
+// Requires: `0 <= carry <= 9`
+inline uint64_t DivideBy10WithCarry(uint64_t *v, uint64_t carry) {
+  constexpr uint64_t divisor = 10;
+  // 2^64 / divisor = word_quotient + word_remainder / divisor
+  constexpr uint64_t word_quotient = (uint64_t{1} << 63) / (divisor / 2);
+  constexpr uint64_t word_remainder = uint64_t{} - word_quotient * divisor;
+
+  const uint64_t mod = *v % divisor;
+  const uint64_t next_carry = word_remainder * carry + mod;
+  *v = *v / divisor + carry * word_quotient + next_carry / divisor;
+  return next_carry % divisor;
+}
+
+int LeadingZeros(uint64_t v) { return base_internal::CountLeadingZeros64(v); }
+int LeadingZeros(uint128 v) {
+  auto high = static_cast<uint64_t>(v >> 64);
+  auto low = static_cast<uint64_t>(v);
+  return high != 0 ? base_internal::CountLeadingZeros64(high)
+                   : 64 + base_internal::CountLeadingZeros64(low);
+}
+
+int TrailingZeros(uint64_t v) {
+  return base_internal::CountTrailingZerosNonZero64(v);
+}
+int TrailingZeros(uint128 v) {
+  auto high = static_cast<uint64_t>(v >> 64);
+  auto low = static_cast<uint64_t>(v);
+  return low == 0 ? 64 + base_internal::CountTrailingZerosNonZero64(high)
+                  : base_internal::CountTrailingZerosNonZero64(low);
+}
+
+// The buffer must have an extra digit that is known to not need rounding.
+// This is done below by having an extra '0' digit on the left.
+void RoundUp(char *last_digit) {
+  char *p = last_digit;
+  while (*p == '9' || *p == '.') {
+    if (*p == '9') *p = '0';
+    --p;
+  }
+  ++*p;
+}
+
+void RoundToEven(char *last_digit) {
+  char *p = last_digit;
+  if (*p == '.') --p;
+  if (*p % 2 == 1) RoundUp(p);
+}
+
+char *PrintIntegralDigitsFromRightDynamic(uint128 v, Span<uint32_t> array,
+                                          int exp, char *p) {
+  if (v == 0) {
+    *--p = '0';
+    return p;
+  }
+
+  int w = exp / 32;
+  const int offset = exp % 32;
+  // Left shift v by exp bits.
+  array[w] = static_cast<uint32_t>(v << offset);
+  for (v >>= (32 - offset); v; v >>= 32) array[++w] = static_cast<uint32_t>(v);
+
+  // While we have more than one word available, go in chunks of 1e9.
+  // We are guaranteed to have at least those many digits.
+  // `w` holds the largest populated word, so keep it updated.
+  while (w > 0) {
+    uint32_t carry = 0;
+    for (int i = w; i >= 0; --i) {
+      uint64_t tmp = uint64_t{array[i]} + (uint64_t{carry} << 32);
+      array[i] = tmp / uint64_t{1000000000};
+      carry = tmp % uint64_t{1000000000};
+    }
+    // If the highest word is now empty, remove it from view.
+    if (array[w] == 0) --w;
+
+    for (int i = 0; i < 9; ++i, carry /= 10) {
+      *--p = carry % 10 + '0';
+    }
+  }
+
+  // Print the leftover of the last word.
+  for (auto last = array[0]; last != 0; last /= 10) {
+    *--p = last % 10 + '0';
+  }
+
+  return p;
+}
+
+struct FractionalResult {
+  const char *end;
+  int precision;
+};
+
+FractionalResult PrintFractionalDigitsDynamic(uint128 v, Span<uint32_t> array,
+                                              char *p, int exp, int precision) {
+  int w = exp / 32;
+  const int offset = exp % 32;
+
+  // Right shift `v` by `exp` bits.
+  array[w] = static_cast<uint32_t>(v << (32 - offset));
+  v >>= offset;
+  // Make sure we don't overflow the array. We already calculated that non-zero
+  // bits fit, so we might not have space for leading zero bits.
+  for (int pos = w; v; v >>= 32) array[--pos] = static_cast<uint32_t>(v);
+
+  // Multiply the whole sequence by 10.
+  // On each iteration, the leftover carry word is the next digit.
+  // `w` holds the largest populated word, so keep it updated.
+  for (; w >= 0 && precision > 0; --precision) {
+    uint32_t carry = 0;
+    for (int i = w; i >= 0; --i) {
+      carry = MultiplyBy10WithCarry(&array[i], carry);
+    }
+    // If the lowest word is now empty, remove it from view.
+    if (array[w] == 0) --w;
+    *p++ = carry + '0';
+  }
+
+  constexpr uint32_t threshold = 0x80000000;
+  if (array[0] < threshold) {
+    // We round down, so nothing to do.
+  } else if (array[0] > threshold ||
+             std::any_of(&array[1], &array[w + 1],
+                         [](uint32_t word) { return word != 0; })) {
+    RoundUp(p - 1);
+  } else {
+    RoundToEven(p - 1);
+  }
+  return {p, precision};
+}
+
+// Generic digit printer.
+// `bits` determines how many bits of termporary space it needs for the
+// calcualtions.
+template <int bits, typename = void>
+class DigitPrinter {
+  static constexpr int kInts = (bits + 31) / 32;
+
+ public:
+  // Quick upper bound for the number of decimal digits we need.
+  // This would be std::ceil(std::log10(std::pow(2, bits))), but that is not
+  // constexpr.
+  static constexpr int kDigits10 = 1 + (bits + 9) / 10 * 3 + bits / 900;
+  using InputType = uint128;
+
+  static char *PrintIntegralDigitsFromRight(InputType v, int exp, char *end) {
+    std::array<uint32_t, kInts> array{};
+    return PrintIntegralDigitsFromRightDynamic(v, absl::MakeSpan(array), exp,
+                                               end);
+  }
+
+  static FractionalResult PrintFractionalDigits(InputType v, char *p, int exp,
+                                                int precision) {
+    std::array<uint32_t, kInts> array{};
+    return PrintFractionalDigitsDynamic(v, absl::MakeSpan(array), p, exp,
+                                        precision);
+  }
+};
+
+// Specialiation for 64-bit working space.
+// This is a performance optimization over the generic primary template.
+// Only enabled in 64-bit platforms. The generic one is faster in 32-bit
+// platforms.
+template <int bits>
+class DigitPrinter<bits, absl::enable_if_t<bits == 64 && (sizeof(void *) >=
+                                                          sizeof(uint64_t))>> {
+ public:
+  static constexpr size_t kDigits10 = 20;
+  using InputType = uint64_t;
+
+  static char *PrintIntegralDigitsFromRight(uint64_t v, int exp, char *p) {
+    v <<= exp;
+    do {
+      *--p = DivideBy10WithCarry(&v, 0) + '0';
+    } while (v != 0);
+    return p;
+  }
+
+  static FractionalResult PrintFractionalDigits(uint64_t v, char *p, int exp,
+                                                int precision) {
+    v <<= (64 - exp);
+    while (precision > 0) {
+      if (!v) return {p, precision};
+      *p++ = MultiplyBy10WithCarry(&v, uint64_t{}) + '0';
+      --precision;
+    }
+
+    // We need to round.
+    if (v < 0x8000000000000000) {
+      // We round down, so nothing to do.
+    } else if (v > 0x8000000000000000) {
+      // We round up.
+      RoundUp(p - 1);
+    } else {
+      RoundToEven(p - 1);
+    }
+
+    assert(precision == 0);
+    // Precision can only be zero here. Return a constant instead.
+    return {p, 0};
+  }
+};
+
+// Specialiation for 128-bit working space.
+// This is a performance optimization over the generic primary template.
+template <int bits>
+class DigitPrinter<bits, absl::enable_if_t<bits == 128 && (sizeof(void *) >=
+                                                           sizeof(uint64_t))>> {
+ public:
+  static constexpr size_t kDigits10 = 40;
+  using InputType = uint128;
+
+  static char *PrintIntegralDigitsFromRight(uint128 v, int exp, char *p) {
+    v <<= exp;
+    auto high = static_cast<uint64_t>(v >> 64);
+    auto low = static_cast<uint64_t>(v);
+
+    do {
+      uint64_t carry = DivideBy10WithCarry(&high, 0);
+      carry = DivideBy10WithCarry(&low, carry);
+      *--p = carry + '0';
+    } while (high != 0u);
+
+    while (low != 0u) {
+      *--p = DivideBy10WithCarry(&low, 0) + '0';
+    }
+    return p;
+  }
+
+  static FractionalResult PrintFractionalDigits(uint128 v, char *p, int exp,
+                                                int precision) {
+    v <<= (128 - exp);
+    auto high = static_cast<uint64_t>(v >> 64);
+    auto low = static_cast<uint64_t>(v);
+
+    // While we have digits to print and `low` is not empty, do the long
+    // multiplication.
+    while (precision > 0 && low != 0) {
+      uint64_t carry = MultiplyBy10WithCarry(&low, uint64_t{});
+      carry = MultiplyBy10WithCarry(&high, carry);
+
+      *p++ = carry + '0';
+      --precision;
+    }
+
+    // Now `low` is empty, so use a faster approach for the rest of the digits.
+    // This block is pretty much the same as the main loop for the 64-bit case
+    // above.
+    while (precision > 0) {
+      if (!high) return {p, precision};
+      *p++ = MultiplyBy10WithCarry(&high, uint64_t{}) + '0';
+      --precision;
+    }
+
+    // We need to round.
+    if (high < 0x8000000000000000) {
+      // We round down, so nothing to do.
+    } else if (high > 0x8000000000000000 || low != 0) {
+      // We round up.
+      RoundUp(p - 1);
+    } else {
+      RoundToEven(p - 1);
+    }
+
+    assert(precision == 0);
+    // Precision can only be zero here. Return a constant instead.
+    return {p, 0};
+  }
+};
+
+struct FormatState {
+  char sign_char;
+  int precision;
+  const ConversionSpec &conv;
+  FormatSinkImpl *sink;
+};
+
+void FinalPrint(string_view data, int trailing_zeros,
+                const FormatState &state) {
+  if (state.conv.width() < 0) {
+    // No width specified. Fast-path.
+    if (state.sign_char != '\0') state.sink->Append(1, state.sign_char);
+    state.sink->Append(data);
+    state.sink->Append(trailing_zeros, '0');
+    return;
+  }
+
+  int left_spaces = 0, zeros = 0, right_spaces = 0;
+  int total_size = (state.sign_char != 0 ? 1 : 0) +
+                   static_cast<int>(data.size()) + trailing_zeros;
+  int missing_chars = std::max(state.conv.width() - total_size, 0);
+  if (state.conv.flags().left) {
+    right_spaces = missing_chars;
+  } else if (state.conv.flags().zero) {
+    zeros = missing_chars;
+  } else {
+    left_spaces = missing_chars;
+  }
+
+  state.sink->Append(left_spaces, ' ');
+  if (state.sign_char != '\0') state.sink->Append(1, state.sign_char);
+  state.sink->Append(zeros, '0');
+  state.sink->Append(data);
+  state.sink->Append(trailing_zeros, '0');
+  state.sink->Append(right_spaces, ' ');
+}
+
+template <int num_bits, typename Int>
+void FormatFPositiveExp(Int v, int exp, const FormatState &state) {
+  using IntegralPrinter = DigitPrinter<num_bits>;
+  char buffer[IntegralPrinter::kDigits10 + /* . */ 1];
+  buffer[IntegralPrinter::kDigits10] = '.';
+
+  const char *digits = IntegralPrinter::PrintIntegralDigitsFromRight(
+      static_cast<typename IntegralPrinter::InputType>(v), exp,
+      buffer + sizeof(buffer) - 1);
+  size_t size = buffer + sizeof(buffer) - digits;
+
+  // In `alt` mode (flag #) we keep the `.` even if there are no fractional
+  // digits. In non-alt mode, we strip it.
+  if (ABSL_PREDICT_FALSE(state.precision == 0 && !state.conv.flags().alt)) {
+    --size;
+  }
+
+  FinalPrint(string_view(digits, size), state.precision, state);
+}
+
+template <int num_bits, typename Int>
+void FormatFNegativeExp(Int v, int exp, const FormatState &state) {
+  constexpr int input_bits = sizeof(Int) * 8;
+
+  using IntegralPrinter = DigitPrinter<input_bits>;
+  using FractionalPrinter = DigitPrinter<num_bits>;
+
+  static constexpr size_t integral_size =
+      1 + /* in case we need to round up an extra digit */
+      IntegralPrinter::kDigits10 + 1;
+  char buffer[integral_size + /* . */ 1 + num_bits];
+  buffer[integral_size] = '.';
+  char *const integral_digits_end = buffer + integral_size;
+  char *integral_digits_start;
+  char *const fractional_digits_start = buffer + integral_size + 1;
+
+  if (exp < input_bits) {
+    integral_digits_start = IntegralPrinter::PrintIntegralDigitsFromRight(
+        v >> exp, 0, integral_digits_end);
+  } else {
+    integral_digits_start = integral_digits_end - 1;
+    *integral_digits_start = '0';
+  }
+
+  // PrintFractionalDigits may pull a carried 1 all the way up through the
+  // integral portion.
+  integral_digits_start[-1] = '0';
+  auto fractional_result = FractionalPrinter::PrintFractionalDigits(
+      static_cast<typename FractionalPrinter::InputType>(v),
+      fractional_digits_start, exp, state.precision);
+  if (integral_digits_start[-1] != '0') --integral_digits_start;
+
+  size_t size = fractional_result.end - integral_digits_start;
+
+  // In `alt` mode (flag #) we keep the `.` even if there are no fractional
+  // digits. In non-alt mode, we strip it.
+  if (ABSL_PREDICT_FALSE(state.precision == 0 && !state.conv.flags().alt)) {
+    --size;
+  }
+  FinalPrint(string_view(integral_digits_start, size),
+             fractional_result.precision, state);
+}
+
+template <typename Int>
+void FormatF(Int mantissa, int exp, const FormatState &state) {
+  // Remove trailing zeros as they are not useful.
+  // This helps use faster implementations/less stack space in some cases.
+  if (mantissa != 0) {
+    int trailing = TrailingZeros(mantissa);
+    mantissa >>= trailing;
+    exp += trailing;
+  }
+
+  // The table driven dispatch gives us two benefits: fast distpatch and
+  // prevent inlining.
+  // We must not inline any of the functions below (other than the ones for
+  // 64-bit) to avoid blowing up this stack frame.
+
+  if (exp >= 0) {
+    // We will left shift the mantissa. Calculate how many bits we need.
+    // Special case 64-bit as we will use a uint64_t for it. Use a table for the
+    // rest and unconditionally use uint128.
+    const int total_bits = sizeof(Int) * 8 - LeadingZeros(mantissa) + exp;
+
+    if (total_bits <= 64) {
+      return FormatFPositiveExp<64>(mantissa, exp, state);
+    } else {
+      using Formatter = void (*)(uint128, int, const FormatState &);
+      static constexpr Formatter kFormatters[] = {
+          FormatFPositiveExp<1 << 7>,  FormatFPositiveExp<1 << 8>,
+          FormatFPositiveExp<1 << 9>,  FormatFPositiveExp<1 << 10>,
+          FormatFPositiveExp<1 << 11>, FormatFPositiveExp<1 << 12>,
+          FormatFPositiveExp<1 << 13>, FormatFPositiveExp<1 << 14>,
+          FormatFPositiveExp<1 << 15>,
+      };
+      static constexpr int max_total_bits =
+          sizeof(Int) * 8 + std::numeric_limits<long double>::max_exponent;
+      assert(total_bits <= max_total_bits);
+      static_assert(max_total_bits <= (1 << 15), "");
+      const int log2 =
+          64 - LeadingZeros((static_cast<uint64_t>(total_bits) - 1) / 128);
+      assert(log2 < std::end(kFormatters) - std::begin(kFormatters));
+      kFormatters[log2](mantissa, exp, state);
+    }
+  } else {
+    exp = -exp;
+
+    // We know we don't need more than Int itself for the integral part.
+    // We need `precision` fractional digits, but there are at most `exp`
+    // non-zero digits after the decimal point. The rest will be zeros.
+    // Special case 64-bit as we will use a uint64_t for it. Use a table for the
+    // rest and unconditionally use uint128.
+
+    if (exp <= 64) {
+      return FormatFNegativeExp<64>(mantissa, exp, state);
+    } else {
+      using Formatter = void (*)(uint128, int, const FormatState &);
+      static constexpr Formatter kFormatters[] = {
+          FormatFNegativeExp<1 << 7>,  FormatFNegativeExp<1 << 8>,
+          FormatFNegativeExp<1 << 9>,  FormatFNegativeExp<1 << 10>,
+          FormatFNegativeExp<1 << 11>, FormatFNegativeExp<1 << 12>,
+          FormatFNegativeExp<1 << 13>, FormatFNegativeExp<1 << 14>};
+      static_assert(
+          -std::numeric_limits<long double>::min_exponent <= (1 << 14), "");
+      const int log2 =
+          64 - LeadingZeros((static_cast<uint64_t>(exp) - 1) / 128);
+      assert(log2 < std::end(kFormatters) - std::begin(kFormatters));
+      kFormatters[log2](mantissa, exp, state);
+    }
+  }
+}
+
 char *CopyStringTo(string_view v, char *out) {
   std::memcpy(out, v.data(), v.size());
   return out + v.size();
@@ -95,7 +556,7 @@ template <typename Float>
 bool ConvertNonNumericFloats(char sign_char, Float v,
                              const ConversionSpec &conv, FormatSinkImpl *sink) {
   char text[4], *ptr = text;
-  if (sign_char) *ptr++ = sign_char;
+  if (sign_char != '\0') *ptr++ = sign_char;
   if (std::isnan(v)) {
     ptr = std::copy_n(conv.conv().upper() ? "NAN" : "nan", 3, ptr);
   } else if (std::isinf(v)) {
@@ -165,7 +626,12 @@ constexpr bool CanFitMantissa() {
 
 template <typename Float>
 struct Decomposed {
-  Float mantissa;
+  using MantissaType =
+      absl::conditional_t<std::is_same<long double, Float>::value, uint128,
+                          uint64_t>;
+  static_assert(std::numeric_limits<Float>::digits <= sizeof(MantissaType) * 8,
+                "");
+  MantissaType mantissa;
   int exponent;
 };
 
@@ -176,7 +642,8 @@ Decomposed<Float> Decompose(Float v) {
   Float m = std::frexp(v, &exp);
   m = std::ldexp(m, std::numeric_limits<Float>::digits);
   exp -= std::numeric_limits<Float>::digits;
-  return {m, exp};
+
+  return {static_cast<typename Decomposed<Float>::MantissaType>(m), exp};
 }
 
 // Print 'digits' as decimal.
@@ -334,7 +801,7 @@ bool FloatToBuffer(Decomposed<Float> decomposed, int precision, Buffer *out,
           static_cast<std::uint64_t>(decomposed.exponent), precision, out, exp))
     return true;
 
-#if defined(__SIZEOF_INT128__)
+#if defined(ABSL_HAVE_INTRINSIC_INT128)
   // If that is not enough, try with __uint128_t.
   return CanFitMantissa<Float, __uint128_t>() &&
          FloatToBufferImpl<__uint128_t, Float, mode>(
@@ -362,7 +829,7 @@ void WriteBufferToSink(char sign_char, string_view str,
   }
 
   sink->Append(left_spaces, ' ');
-  if (sign_char) sink->Append(1, sign_char);
+  if (sign_char != '\0') sink->Append(1, sign_char);
   sink->Append(zeros, '0');
   sink->Append(str);
   sink->Append(right_spaces, ' ');
@@ -399,12 +866,9 @@ bool FloatToSink(const Float v, const ConversionSpec &conv,
   switch (conv.conv().id()) {
     case ConversionChar::f:
     case ConversionChar::F:
-      if (!FloatToBuffer<FormatStyle::Fixed>(decomposed, precision, &buffer,
-                                             nullptr)) {
-        return FallbackToSnprintf(v, conv, sink);
-      }
-      if (!conv.flags().alt && buffer.back() == '.') buffer.pop_back();
-      break;
+      FormatF(decomposed.mantissa, decomposed.exponent,
+              {sign_char, precision, conv, sink});
+      return true;
 
     case ConversionChar::e:
     case ConversionChar::E:
@@ -466,11 +930,22 @@ bool FloatToSink(const Float v, const ConversionSpec &conv,
 
 bool ConvertFloatImpl(long double v, const ConversionSpec &conv,
                       FormatSinkImpl *sink) {
+  if (std::numeric_limits<long double>::digits ==
+      2 * std::numeric_limits<double>::digits) {
+    // This is the `double-double` representation of `long double`.
+    // We do not handle it natively. Fallback to snprintf.
+    return FallbackToSnprintf(v, conv, sink);
+  }
+
   return FloatToSink(v, conv, sink);
 }
 
 bool ConvertFloatImpl(float v, const ConversionSpec &conv,
                       FormatSinkImpl *sink) {
+  // DivideBy10WithCarry is not actually used in some builds. This here silences
+  // the "unused" warning. We just need to put it in any function that is really
+  // used.
+  (void)&DivideBy10WithCarry;
   return FloatToSink(v, conv, sink);
 }