diff options
Diffstat (limited to 'absl/strings')
24 files changed, 6159 insertions, 0 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 328f52f33d26..3e50d24a322a 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -492,3 +492,142 @@ cc_test( "@com_github_google_benchmark//:benchmark_main", ], ) + +cc_library( + name = "str_format", + hdrs = [ + "str_format.h", + ], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":str_format_internal", + ], +) + +cc_library( + name = "str_format_internal", + srcs = [ + "internal/str_format/arg.cc", + "internal/str_format/bind.cc", + "internal/str_format/extension.cc", + "internal/str_format/float_conversion.cc", + "internal/str_format/output.cc", + "internal/str_format/parser.cc", + ], + hdrs = [ + "internal/str_format/arg.h", + "internal/str_format/bind.h", + "internal/str_format/checker.h", + "internal/str_format/extension.h", + "internal/str_format/float_conversion.h", + "internal/str_format/output.h", + "internal/str_format/parser.h", + ], + copts = ABSL_DEFAULT_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:core_headers", + "//absl/container:inlined_vector", + "//absl/meta:type_traits", + "//absl/numeric:int128", + "//absl/types:span", + ], +) + +cc_test( + name = "str_format_test", + srcs = ["str_format_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":str_format", + ":strings", + "//absl/base:core_headers", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_format_extension_test", + srcs = [ + "internal/str_format/extension_test.cc", + ], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":str_format", + ":str_format_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_format_arg_test", + srcs = ["internal/str_format/arg_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":str_format", + ":str_format_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_format_bind_test", + srcs = ["internal/str_format/bind_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":str_format_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_format_checker_test", + srcs = ["internal/str_format/checker_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":str_format", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_format_convert_test", + size = "small", + srcs = ["internal/str_format/convert_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":str_format_internal", + "//absl/numeric:int128", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_format_output_test", + srcs = ["internal/str_format/output_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":str_format_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_format_parser_test", + srcs = ["internal/str_format/parser_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":str_format_internal", + "//absl/base:core_headers", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index cab2c4561e48..3d30c72a0cda 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -81,6 +81,58 @@ absl_library( strings ) +# add str_format library +absl_library( + TARGET + absl_str_format + SOURCES + "str_format.h" + PUBLIC_LIBRARIES + str_format_internal + EXPORT_NAME + str_format +) + +# str_format_internal + absl_library( + TARGET + str_format_internal + SOURCES + "internal/str_format/arg.cc" + "internal/str_format/bind.cc" + "internal/str_format/extension.cc" + "internal/str_format/float_conversion.cc" + "internal/str_format/output.cc" + "internal/str_format/parser.cc" + "internal/str_format/arg.h" + "internal/str_format/bind.h" + "internal/str_format/checker.h" + "internal/str_format/extension.h" + "internal/str_format/float_conversion.h" + "internal/str_format/output.h" + "internal/str_format/parser.h" + PUBLIC_LIBRARIES + str_format_extension_internal + absl::strings + absl::base + absl::numeric + absl::container + absl::span +) + +# str_format_extension_internal +absl_library( + TARGET + str_format_extension_internal + SOURCES + "internal/str_format/extension.cc" + "internal/str_format/extension.h" + "internal/str_format/output.cc" + "internal/str_format/output.h" + PUBLIC_LIBRARIES + absl::base + absl::strings +) # ## TESTS @@ -347,3 +399,68 @@ absl_test( PUBLIC_LIBRARIES ${CHARCONV_BIGINT_TEST_PUBLIC_LIBRARIES} ) +# test str_format_test +absl_test( + TARGET + str_format_test + SOURCES + "str_format_test.cc" + PUBLIC_LIBRARIES + absl::base + absl::str_format + absl::strings +) + +# test str_format_bind_test +absl_test( + TARGET + str_format_bind_test + SOURCES + "internal/str_format/bind_test.cc" + PUBLIC_LIBRARIES + str_format_internal +) + +# test str_format_checker_test +absl_test( + TARGET + str_format_checker_test + SOURCES + "internal/str_format/checker_test.cc" + PUBLIC_LIBRARIES + absl::str_format +) + +# test str_format_convert_test +absl_test( + TARGET + str_format_convert_test + SOURCES + "internal/str_format/convert_test.cc" + PUBLIC_LIBRARIES + str_format_internal + absl::numeric +) + +# test str_format_output_test +absl_test( + TARGET + str_format_output_test + SOURCES + "internal/str_format/output_test.cc" + PUBLIC_LIBRARIES + str_format_extension_internal +) + +# test str_format_parser_test +absl_test( + TARGET + str_format_parser_test + SOURCES + "internal/str_format/parser_test.cc" + PUBLIC_LIBRARIES + str_format_internal + absl::base +) + + diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc new file mode 100644 index 000000000000..eafb068fe286 --- /dev/null +++ b/absl/strings/internal/str_format/arg.cc @@ -0,0 +1,399 @@ +// +// POSIX spec: +// http://pubs.opengroup.org/onlinepubs/009695399/functions/fprintf.html +// +#include "absl/strings/internal/str_format/arg.h" + +#include <cassert> +#include <cerrno> +#include <cstdlib> +#include <string> +#include <type_traits> + +#include "absl/base/port.h" +#include "absl/strings/internal/str_format/float_conversion.h" + +namespace absl { +namespace str_format_internal { +namespace { + +const char kDigit[2][32] = { "0123456789abcdef", "0123456789ABCDEF" }; + +// Reduce *capacity by s.size(), clipped to a 0 minimum. +void ReducePadding(string_view s, size_t *capacity) { + *capacity = Excess(s.size(), *capacity); +} + +// Reduce *capacity by n, clipped to a 0 minimum. +void ReducePadding(size_t n, size_t *capacity) { + *capacity = Excess(n, *capacity); +} + +template <typename T> +struct MakeUnsigned : std::make_unsigned<T> {}; +template <> +struct MakeUnsigned<absl::uint128> { + using type = absl::uint128; +}; + +template <typename T> +struct IsSigned : std::is_signed<T> {}; +template <> +struct IsSigned<absl::uint128> : std::false_type {}; + +class ConvertedIntInfo { + public: + template <typename T> + ConvertedIntInfo(T v, ConversionChar conv) { + using Unsigned = typename MakeUnsigned<T>::type; + auto u = static_cast<Unsigned>(v); + if (IsNeg(v)) { + is_neg_ = true; + u = Unsigned{} - u; + } else { + is_neg_ = false; + } + UnsignedToStringRight(u, conv); + } + + string_view digits() const { + return {end() - size_, static_cast<size_t>(size_)}; + } + bool is_neg() const { return is_neg_; } + + private: + template <typename T, bool IsSigned> + struct IsNegImpl { + static bool Eval(T v) { return v < 0; } + }; + template <typename T> + struct IsNegImpl<T, false> { + static bool Eval(T) { + return false; + } + }; + + template <typename T> + bool IsNeg(T v) { + return IsNegImpl<T, IsSigned<T>::value>::Eval(v); + } + + template <typename T> + void UnsignedToStringRight(T u, ConversionChar conv) { + char *p = end(); + switch (conv.radix()) { + default: + case 10: + for (; u; u /= 10) + *--p = static_cast<char>('0' + static_cast<size_t>(u % 10)); + break; + case 8: + for (; u; u /= 8) + *--p = static_cast<char>('0' + static_cast<size_t>(u % 8)); + break; + case 16: { + const char *digits = kDigit[conv.upper() ? 1 : 0]; + for (; u; u /= 16) *--p = digits[static_cast<size_t>(u % 16)]; + break; + } + } + size_ = static_cast<int>(end() - p); + } + + const char *end() const { return storage_ + sizeof(storage_); } + char *end() { return storage_ + sizeof(storage_); } + + bool is_neg_; + int size_; + // Max size: 128 bit value as octal -> 43 digits + char storage_[128 / 3 + 1]; +}; + +// Note: 'o' conversions do not have a base indicator, it's just that +// the '#' flag is specified to modify the precision for 'o' conversions. +string_view BaseIndicator(const ConvertedIntInfo &info, + const ConversionSpec &conv) { + bool alt = conv.flags().alt; + int radix = conv.conv().radix(); + if (conv.conv().id() == ConversionChar::p) + alt = true; // always show 0x for %p. + // From the POSIX description of '#' flag: + // "For x or X conversion specifiers, a non-zero result shall have + // 0x (or 0X) prefixed to it." + if (alt && radix == 16 && !info.digits().empty()) { + if (conv.conv().upper()) return "0X"; + return "0x"; + } + return {}; +} + +string_view SignColumn(bool neg, const ConversionSpec &conv) { + if (conv.conv().is_signed()) { + if (neg) return "-"; + if (conv.flags().show_pos) return "+"; + if (conv.flags().sign_col) return " "; + } + return {}; +} + +bool ConvertCharImpl(unsigned char v, const ConversionSpec &conv, + FormatSinkImpl *sink) { + size_t fill = 0; + if (conv.width() >= 0) fill = conv.width(); + ReducePadding(1, &fill); + if (!conv.flags().left) sink->Append(fill, ' '); + sink->Append(1, v); + if (conv.flags().left) sink->Append(fill, ' '); + return true; +} + +bool ConvertIntImplInner(const ConvertedIntInfo &info, + const ConversionSpec &conv, FormatSinkImpl *sink) { + // Print as a sequence of Substrings: + // [left_spaces][sign][base_indicator][zeroes][formatted][right_spaces] + size_t fill = 0; + if (conv.width() >= 0) fill = conv.width(); + + string_view formatted = info.digits(); + ReducePadding(formatted, &fill); + + string_view sign = SignColumn(info.is_neg(), conv); + ReducePadding(sign, &fill); + + string_view base_indicator = BaseIndicator(info, conv); + ReducePadding(base_indicator, &fill); + + int precision = conv.precision(); + bool precision_specified = precision >= 0; + if (!precision_specified) + precision = 1; + + if (conv.flags().alt && conv.conv().id() == ConversionChar::o) { + // From POSIX description of the '#' (alt) flag: + // "For o conversion, it increases the precision (if necessary) to + // force the first digit of the result to be zero." + if (formatted.empty() || *formatted.begin() != '0') { + int needed = static_cast<int>(formatted.size()) + 1; + precision = std::max(precision, needed); + } + } + + size_t num_zeroes = Excess(formatted.size(), precision); + ReducePadding(num_zeroes, &fill); + + size_t num_left_spaces = !conv.flags().left ? fill : 0; + size_t num_right_spaces = conv.flags().left ? fill : 0; + + // From POSIX description of the '0' (zero) flag: + // "For d, i, o, u, x, and X conversion specifiers, if a precision + // is specified, the '0' flag is ignored." + if (!precision_specified && conv.flags().zero) { + num_zeroes += num_left_spaces; + num_left_spaces = 0; + } + + sink->Append(num_left_spaces, ' '); + sink->Append(sign); + sink->Append(base_indicator); + sink->Append(num_zeroes, '0'); + sink->Append(formatted); + sink->Append(num_right_spaces, ' '); + return true; +} + +template <typename T> +bool ConvertIntImplInner(T v, const ConversionSpec &conv, + FormatSinkImpl *sink) { + ConvertedIntInfo info(v, conv.conv()); + if (conv.flags().basic && conv.conv().id() != ConversionChar::p) { + if (info.is_neg()) sink->Append(1, '-'); + if (info.digits().empty()) { + sink->Append(1, '0'); + } else { + sink->Append(info.digits()); + } + return true; + } + return ConvertIntImplInner(info, conv, sink); +} + +template <typename T> +bool ConvertIntArg(T v, const ConversionSpec &conv, FormatSinkImpl *sink) { + if (conv.conv().is_float()) { + return FormatConvertImpl(static_cast<double>(v), conv, sink).value; + } + if (conv.conv().id() == ConversionChar::c) + return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink); + if (!conv.conv().is_integral()) + return false; + if (!conv.conv().is_signed() && IsSigned<T>::value) { + using U = typename MakeUnsigned<T>::type; + return FormatConvertImpl(static_cast<U>(v), conv, sink).value; + } + return ConvertIntImplInner(v, conv, sink); +} + +template <typename T> +bool ConvertFloatArg(T v, const ConversionSpec &conv, FormatSinkImpl *sink) { + return conv.conv().is_float() && ConvertFloatImpl(v, conv, sink); +} + +inline bool ConvertStringArg(string_view v, const ConversionSpec &conv, + FormatSinkImpl *sink) { + if (conv.conv().id() != ConversionChar::s) + return false; + if (conv.flags().basic) { + sink->Append(v); + return true; + } + return sink->PutPaddedString(v, conv.width(), conv.precision(), + conv.flags().left); +} + +} // namespace + +// ==================== Strings ==================== +ConvertResult<Conv::s> FormatConvertImpl(const std::string &v, + const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertStringArg(v, conv, sink)}; +} + +ConvertResult<Conv::s> FormatConvertImpl(string_view v, + const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertStringArg(v, conv, sink)}; +} + +ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char *v, + const ConversionSpec &conv, + FormatSinkImpl *sink) { + if (conv.conv().id() == ConversionChar::p) + return {FormatConvertImpl(VoidPtr(v), conv, sink).value}; + size_t len; + if (v == nullptr) { + len = 0; + } else if (conv.precision() < 0) { + len = std::strlen(v); + } else { + // If precision is set, we look for the null terminator on the valid range. + len = std::find(v, v + conv.precision(), '\0') - v; + } + return {ConvertStringArg(string_view(v, len), conv, sink)}; +} + +// ==================== Raw pointers ==================== +ConvertResult<Conv::p> FormatConvertImpl(VoidPtr v, const ConversionSpec &conv, + FormatSinkImpl *sink) { + if (conv.conv().id() != ConversionChar::p) + return {false}; + if (!v.value) { + sink->Append("(nil)"); + return {true}; + } + return {ConvertIntImplInner(v.value, conv, sink)}; +} + +// ==================== Floats ==================== +FloatingConvertResult FormatConvertImpl(float v, const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertFloatArg(v, conv, sink)}; +} +FloatingConvertResult FormatConvertImpl(double v, const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertFloatArg(v, conv, sink)}; +} +FloatingConvertResult FormatConvertImpl(long double v, + const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertFloatArg(v, conv, sink)}; +} + +// ==================== Chars ==================== +IntegralConvertResult FormatConvertImpl(char v, const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(signed char v, + const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned char v, + const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} + +// ==================== Ints ==================== +IntegralConvertResult FormatConvertImpl(short v, // NOLINT + const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT + const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(int v, const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned v, const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(long v, // NOLINT + const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT + const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(long long v, // NOLINT + const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT + const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(absl::uint128 v, + const ConversionSpec &conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} + +template struct FormatArgImpl::TypedVTable<str_format_internal::VoidPtr>; + +template struct FormatArgImpl::TypedVTable<bool>; +template struct FormatArgImpl::TypedVTable<char>; +template struct FormatArgImpl::TypedVTable<signed char>; +template struct FormatArgImpl::TypedVTable<unsigned char>; +template struct FormatArgImpl::TypedVTable<short>; // NOLINT +template struct FormatArgImpl::TypedVTable<unsigned short>; // NOLINT +template struct FormatArgImpl::TypedVTable<int>; +template struct FormatArgImpl::TypedVTable<unsigned>; +template struct FormatArgImpl::TypedVTable<long>; // NOLINT +template struct FormatArgImpl::TypedVTable<unsigned long>; // NOLINT +template struct FormatArgImpl::TypedVTable<long long>; // NOLINT +template struct FormatArgImpl::TypedVTable<unsigned long long>; // NOLINT +template struct FormatArgImpl::TypedVTable<absl::uint128>; + +template struct FormatArgImpl::TypedVTable<float>; +template struct FormatArgImpl::TypedVTable<double>; +template struct FormatArgImpl::TypedVTable<long double>; + +template struct FormatArgImpl::TypedVTable<const char *>; +template struct FormatArgImpl::TypedVTable<std::string>; +template struct FormatArgImpl::TypedVTable<string_view>; + +} // namespace str_format_internal + +} // namespace absl diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h new file mode 100644 index 000000000000..a9562188ea91 --- /dev/null +++ b/absl/strings/internal/str_format/arg.h @@ -0,0 +1,434 @@ +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ + +#include <string.h> +#include <wchar.h> + +#include <cstdio> +#include <iomanip> +#include <limits> +#include <sstream> +#include <string> +#include <type_traits> + +#include "absl/base/port.h" +#include "absl/meta/type_traits.h" +#include "absl/numeric/int128.h" +#include "absl/strings/internal/str_format/extension.h" +#include "absl/strings/string_view.h" + +class Cord; +class CordReader; + +namespace absl { + +class FormatCountCapture; +class FormatSink; + +namespace str_format_internal { + +template <typename T, typename = void> +struct HasUserDefinedConvert : std::false_type {}; + +template <typename T> +struct HasUserDefinedConvert< + T, void_t<decltype(AbslFormatConvert( + std::declval<const T&>(), std::declval<const ConversionSpec&>(), + std::declval<FormatSink*>()))>> : std::true_type {}; +template <typename T> +class StreamedWrapper; + +// If 'v' can be converted (in the printf sense) according to 'conv', +// then convert it, appending to `sink` and return `true`. +// Otherwise fail and return `false`. +// Raw pointers. +struct VoidPtr { + VoidPtr() = default; + template <typename T, + decltype(reinterpret_cast<uintptr_t>(std::declval<T*>())) = 0> + VoidPtr(T* ptr) // NOLINT + : value(ptr ? reinterpret_cast<uintptr_t>(ptr) : 0) {} + uintptr_t value; +}; +ConvertResult<Conv::p> FormatConvertImpl(VoidPtr v, const ConversionSpec& conv, + FormatSinkImpl* sink); + +// Strings. +ConvertResult<Conv::s> FormatConvertImpl(const std::string& v, + const ConversionSpec& conv, + FormatSinkImpl* sink); +ConvertResult<Conv::s> FormatConvertImpl(string_view v, + const ConversionSpec& conv, + FormatSinkImpl* sink); +ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char* v, + const ConversionSpec& conv, + FormatSinkImpl* sink); +template <class AbslCord, + typename std::enable_if< + std::is_same<AbslCord, ::Cord>::value>::type* = nullptr, + class AbslCordReader = ::CordReader> +ConvertResult<Conv::s> FormatConvertImpl(const AbslCord& value, + const ConversionSpec& conv, + FormatSinkImpl* sink) { + if (conv.conv().id() != ConversionChar::s) return {false}; + + bool is_left = conv.flags().left; + size_t space_remaining = 0; + + int width = conv.width(); + if (width >= 0) space_remaining = width; + + size_t to_write = value.size(); + + int precision = conv.precision(); + if (precision >= 0) + to_write = std::min(to_write, static_cast<size_t>(precision)); + + space_remaining = Excess(to_write, space_remaining); + + if (space_remaining > 0 && !is_left) sink->Append(space_remaining, ' '); + + string_view piece; + for (AbslCordReader reader(value); + to_write > 0 && reader.ReadFragment(&piece); to_write -= piece.size()) { + if (piece.size() > to_write) piece.remove_suffix(piece.size() - to_write); + sink->Append(piece); + } + + if (space_remaining > 0 && is_left) sink->Append(space_remaining, ' '); + return {true}; +} + +using IntegralConvertResult = + ConvertResult<Conv::c | Conv::numeric | Conv::star>; +using FloatingConvertResult = ConvertResult<Conv::floating>; + +// Floats. +FloatingConvertResult FormatConvertImpl(float v, const ConversionSpec& conv, + FormatSinkImpl* sink); +FloatingConvertResult FormatConvertImpl(double v, const ConversionSpec& conv, + FormatSinkImpl* sink); +FloatingConvertResult FormatConvertImpl(long double v, + const ConversionSpec& conv, + FormatSinkImpl* sink); + +// Chars. +IntegralConvertResult FormatConvertImpl(char v, const ConversionSpec& conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(signed char v, + const ConversionSpec& conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned char v, + const ConversionSpec& conv, + FormatSinkImpl* sink); + +// Ints. +IntegralConvertResult FormatConvertImpl(short v, // NOLINT + const ConversionSpec& conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT + const ConversionSpec& conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(int v, const ConversionSpec& conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned v, const ConversionSpec& conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(long v, // NOLINT + const ConversionSpec& conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT + const ConversionSpec& conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(long long v, // NOLINT + const ConversionSpec& conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT + const ConversionSpec& conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(uint128 v, const ConversionSpec& conv, + FormatSinkImpl* sink); +template <typename T, enable_if_t<std::is_same<T, bool>::value, int> = 0> +IntegralConvertResult FormatConvertImpl(T v, const ConversionSpec& conv, + FormatSinkImpl* sink) { + return FormatConvertImpl(static_cast<int>(v), conv, sink); +} + +// We provide this function to help the checker, but it is never defined. +// FormatArgImpl will use the underlying Convert functions instead. +template <typename T> +typename std::enable_if<std::is_enum<T>::value && + !HasUserDefinedConvert<T>::value, + IntegralConvertResult>::type +FormatConvertImpl(T v, const ConversionSpec& conv, FormatSinkImpl* sink); + +template <typename T> +ConvertResult<Conv::s> FormatConvertImpl(const StreamedWrapper<T>& v, + const ConversionSpec& conv, + FormatSinkImpl* out) { + std::ostringstream oss; + oss << v.v_; + if (!oss) return {false}; + return str_format_internal::FormatConvertImpl(oss.str(), conv, out); +} + +// Use templates and dependent types to delay evaluation of the function +// until after FormatCountCapture is fully defined. +struct FormatCountCaptureHelper { + template <class T = int> + static ConvertResult<Conv::n> ConvertHelper(const FormatCountCapture& v, + const ConversionSpec& conv, + FormatSinkImpl* sink) { + const absl::enable_if_t<sizeof(T) != 0, FormatCountCapture>& v2 = v; + + if (conv.conv().id() != str_format_internal::ConversionChar::n) + return {false}; + *v2.p_ = static_cast<int>(sink->size()); + return {true}; + } +}; + +template <class T = int> +ConvertResult<Conv::n> FormatConvertImpl(const FormatCountCapture& v, + const ConversionSpec& conv, + FormatSinkImpl* sink) { + return FormatCountCaptureHelper::ConvertHelper(v, conv, sink); +} + +// Helper friend struct to hide implementation details from the public API of +// FormatArgImpl. +struct FormatArgImplFriend { + template <typename Arg> + static bool ToInt(Arg arg, int* out) { + if (!arg.vtbl_->to_int) return false; + *out = arg.vtbl_->to_int(arg.data_); + return true; + } + + template <typename Arg> + static bool Convert(Arg arg, const str_format_internal::ConversionSpec& conv, + FormatSinkImpl* out) { + return arg.vtbl_->convert(arg.data_, conv, out); + } + + template <typename Arg> + static const void* GetVTablePtrForTest(Arg arg) { + return arg.vtbl_; + } +}; + +// A type-erased handle to a format argument. +class FormatArgImpl { + private: + enum { kInlinedSpace = 8 }; + + using VoidPtr = str_format_internal::VoidPtr; + + union Data { + const void* ptr; + const volatile void* volatile_ptr; + char buf[kInlinedSpace]; + }; + + struct VTable { + bool (*convert)(Data, const str_format_internal::ConversionSpec& conv, + FormatSinkImpl* out); + int (*to_int)(Data); + }; + + template <typename T> + struct store_by_value + : std::integral_constant<bool, (sizeof(T) <= kInlinedSpace) && + (std::is_integral<T>::value || + std::is_floating_point<T>::value || + std::is_pointer<T>::value || + std::is_same<VoidPtr, T>::value)> {}; + + enum StoragePolicy { ByPointer, ByVolatilePointer, ByValue }; + template <typename T> + struct storage_policy + : std::integral_constant<StoragePolicy, + (std::is_volatile<T>::value + ? ByVolatilePointer + : (store_by_value<T>::value ? ByValue + : ByPointer))> { + }; + + // An instance of an FormatArgImpl::VTable suitable for 'T'. + template <typename T> + struct TypedVTable; + + // To reduce the number of vtables we will decay values before hand. + // Anything with a user-defined Convert will get its own vtable. + // For everything else: + // - Decay char* and char arrays into `const char*` + // - Decay any other pointer to `const void*` + // - Decay all enums to their underlying type. + // - Decay function pointers to void*. + template <typename T, typename = void> + struct DecayType { + static constexpr bool kHasUserDefined = + str_format_internal::HasUserDefinedConvert<T>::value; + using type = typename std::conditional< + !kHasUserDefined && std::is_convertible<T, const char*>::value, + const char*, + typename std::conditional<!kHasUserDefined && + std::is_convertible<T, VoidPtr>::value, + VoidPtr, const T&>::type>::type; + }; + template <typename T> + struct DecayType<T, + typename std::enable_if< + !str_format_internal::HasUserDefinedConvert<T>::value && + std::is_enum<T>::value>::type> { + using type = typename std::underlying_type<T>::type; + }; + + public: + template <typename T> + explicit FormatArgImpl(const T& value) { + using D = typename DecayType<T>::type; + static_assert( + std::is_same<D, const T&>::value || storage_policy<D>::value == ByValue, + "Decayed types must be stored by value"); + Init(static_cast<D>(value)); + } + + private: + friend struct str_format_internal::FormatArgImplFriend; + template <typename T, StoragePolicy = storage_policy<T>::value> + struct Manager; + + template <typename T> + struct Manager<T, ByPointer> { + static Data SetValue(const T& value) { + Data data; + data.ptr = &value; + return data; + } + + static const T& Value(Data arg) { return *static_cast<const T*>(arg.ptr); } + }; + + template <typename T> + struct Manager<T, ByVolatilePointer> { + static Data SetValue(const T& value) { + Data data; + data.volatile_ptr = &value; + return data; + } + + static const T& Value(Data arg) { + return *static_cast<const T*>(arg.volatile_ptr); + } + }; + + template <typename T> + struct Manager<T, ByValue> { + static Data SetValue(const T& value) { + Data data; + memcpy(data.buf, &value, sizeof(value)); + return data; + } + + static T Value(Data arg) { + T value; + memcpy(&value, arg.buf, sizeof(T)); + return value; + } + }; + + template <typename T> + void Init(const T& value); + + template <typename T> + static int ToIntVal(const T& val) { + using CommonType = typename std::conditional<std::is_signed<T>::value, + int64_t, uint64_t>::type; + if (static_cast<CommonType>(val) > + static_cast<CommonType>(std::numeric_limits<int>::max())) { + return std::numeric_limits<int>::max(); + } else if (std::is_signed<T>::value && + static_cast<CommonType>(val) < + static_cast<CommonType>(std::numeric_limits<int>::min())) { + return std::numeric_limits<int>::min(); + } + return static_cast<int>(val); + } + + Data data_; + const VTable* vtbl_; +}; + +template <typename T> +struct FormatArgImpl::TypedVTable { + private: + static bool ConvertImpl(Data arg, + const str_format_internal::ConversionSpec& conv, + FormatSinkImpl* out) { + return str_format_internal::FormatConvertImpl(Manager<T>::Value(arg), conv, + out) + .value; + } + + template <typename U = T, typename = void> + struct ToIntImpl { + static constexpr int (*value)(Data) = nullptr; + }; + + template <typename U> + struct ToIntImpl<U, + typename std::enable_if<std::is_integral<U>::value>::type> { + static int Invoke(Data arg) { return ToIntVal(Manager<T>::Value(arg)); } + static constexpr int (*value)(Data) = &Invoke; + }; + + template <typename U> + struct ToIntImpl<U, typename std::enable_if<std::is_enum<U>::value>::type> { + static int Invoke(Data arg) { + return ToIntVal(static_cast<typename std::underlying_type<T>::type>( + Manager<T>::Value(arg))); + } + static constexpr int (*value)(Data) = &Invoke; + }; + + public: + static constexpr VTable value{&ConvertImpl, ToIntImpl<>::value}; +}; + +template <typename T> +constexpr FormatArgImpl::VTable FormatArgImpl::TypedVTable<T>::value; + +template <typename T> +void FormatArgImpl::Init(const T& value) { + data_ = Manager<T>::SetValue(value); + vtbl_ = &TypedVTable<T>::value; +} + +extern template struct FormatArgImpl::TypedVTable<str_format_internal::VoidPtr>; + +extern template struct FormatArgImpl::TypedVTable<bool>; +extern template struct FormatArgImpl::TypedVTable<char>; +extern template struct FormatArgImpl::TypedVTable<signed char>; +extern template struct FormatArgImpl::TypedVTable<unsigned char>; +extern template struct FormatArgImpl::TypedVTable<short>; // NOLINT +extern template struct FormatArgImpl::TypedVTable<unsigned short>; // NOLINT +extern template struct FormatArgImpl::TypedVTable<int>; +extern template struct FormatArgImpl::TypedVTable<unsigned>; +extern template struct FormatArgImpl::TypedVTable<long>; // NOLINT +extern template struct FormatArgImpl::TypedVTable<unsigned long>; // NOLINT +extern template struct FormatArgImpl::TypedVTable<long long>; // NOLINT +extern template struct FormatArgImpl::TypedVTable< + unsigned long long>; // NOLINT +extern template struct FormatArgImpl::TypedVTable<uint128>; + +extern template struct FormatArgImpl::TypedVTable<float>; +extern template struct FormatArgImpl::TypedVTable<double>; +extern template struct FormatArgImpl::TypedVTable<long double>; + +extern template struct FormatArgImpl::TypedVTable<const char*>; +extern template struct FormatArgImpl::TypedVTable<std::string>; +extern template struct FormatArgImpl::TypedVTable<string_view>; +} // namespace str_format_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ diff --git a/absl/strings/internal/str_format/arg_test.cc b/absl/strings/internal/str_format/arg_test.cc new file mode 100644 index 000000000000..83d59048ea27 --- /dev/null +++ b/absl/strings/internal/str_format/arg_test.cc @@ -0,0 +1,111 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +#include "absl/strings/internal/str_format/arg.h" + +#include <ostream> +#include <string> +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" + +namespace absl { +namespace str_format_internal { +namespace { + +class FormatArgImplTest : public ::testing::Test { + public: + enum Color { kRed, kGreen, kBlue }; + + static const char *hi() { return "hi"; } +}; + +TEST_F(FormatArgImplTest, ToInt) { + int out = 0; + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(1), &out)); + EXPECT_EQ(1, out); + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(-1), &out)); + EXPECT_EQ(-1, out); + EXPECT_TRUE( + FormatArgImplFriend::ToInt(FormatArgImpl(static_cast<char>(64)), &out)); + EXPECT_EQ(64, out); + EXPECT_TRUE(FormatArgImplFriend::ToInt( + FormatArgImpl(static_cast<unsigned long long>(123456)), &out)); // NOLINT + EXPECT_EQ(123456, out); + EXPECT_TRUE(FormatArgImplFriend::ToInt( + FormatArgImpl(static_cast<unsigned long long>( // NOLINT + std::numeric_limits<int>::max()) + + 1), + &out)); + EXPECT_EQ(std::numeric_limits<int>::max(), out); + EXPECT_TRUE(FormatArgImplFriend::ToInt( + FormatArgImpl(static_cast<long long>( // NOLINT + std::numeric_limits<int>::min()) - + 10), + &out)); + EXPECT_EQ(std::numeric_limits<int>::min(), out); + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(false), &out)); + EXPECT_EQ(0, out); + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(true), &out)); + EXPECT_EQ(1, out); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(2.2), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(3.2f), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt( + FormatArgImpl(static_cast<int *>(nullptr)), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(hi()), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl("hi"), &out)); + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(kBlue), &out)); + EXPECT_EQ(2, out); +} + +extern const char kMyArray[]; + +TEST_F(FormatArgImplTest, CharArraysDecayToCharPtr) { + const char* a = ""; + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(""))); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("A"))); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("ABC"))); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(kMyArray))); +} + +TEST_F(FormatArgImplTest, OtherPtrDecayToVoidPtr) { + auto expected = FormatArgImplFriend::GetVTablePtrForTest( + FormatArgImpl(static_cast<void *>(nullptr))); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest( + FormatArgImpl(static_cast<int *>(nullptr))), + expected); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest( + FormatArgImpl(static_cast<volatile int *>(nullptr))), + expected); + + auto p = static_cast<void (*)()>([] {}); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(p)), + expected); +} + +TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) { + std::string s; + FormatSinkImpl sink(&s); + ConversionSpec conv; + conv.set_conv(ConversionChar::FromChar('s')); + conv.set_flags(Flags()); + conv.set_width(-1); + conv.set_precision(-1); + EXPECT_TRUE( + FormatArgImplFriend::Convert(FormatArgImpl(kMyArray), conv, &sink)); + sink.Flush(); + EXPECT_EQ("ABCDE", s); +} +const char kMyArray[] = "ABCDE"; + +} // namespace +} // namespace str_format_internal +} // namespace absl diff --git a/absl/strings/internal/str_format/bind.cc b/absl/strings/internal/str_format/bind.cc new file mode 100644 index 000000000000..33e8641558ab --- /dev/null +++ b/absl/strings/internal/str_format/bind.cc @@ -0,0 +1,232 @@ +#include "absl/strings/internal/str_format/bind.h" + +#include <cerrno> +#include <limits> +#include <sstream> +#include <string> + +namespace absl { +namespace str_format_internal { + +namespace { + +inline bool BindFromPosition(int position, int* value, + absl::Span<const FormatArgImpl> pack) { + assert(position > 0); + if (static_cast<size_t>(position) > pack.size()) { + return false; + } + // -1 because positions are 1-based + return FormatArgImplFriend::ToInt(pack[position - 1], value); +} + +class ArgContext { + public: + explicit ArgContext(absl::Span<const FormatArgImpl> pack) : pack_(pack) {} + + // Fill 'bound' with the results of applying the context's argument pack + // to the specified 'props'. We synthesize a BoundConversion by + // lining up a UnboundConversion with a user argument. We also + // resolve any '*' specifiers for width and precision, so after + // this call, 'bound' has all the information it needs to be formatted. + // Returns false on failure. + bool Bind(const UnboundConversion *props, BoundConversion *bound); + + private: + absl::Span<const FormatArgImpl> pack_; +}; + +inline bool ArgContext::Bind(const UnboundConversion* unbound, + BoundConversion* bound) { + const FormatArgImpl* arg = nullptr; + int arg_position = unbound->arg_position; + if (static_cast<size_t>(arg_position - 1) >= pack_.size()) return false; + arg = &pack_[arg_position - 1]; // 1-based + + if (!unbound->flags.basic) { + int width = unbound->width.value(); + bool force_left = false; + if (unbound->width.is_from_arg()) { + if (!BindFromPosition(unbound->width.get_from_arg(), &width, pack_)) + return false; + if (width < 0) { + // "A negative field width is taken as a '-' flag followed by a + // positive field width." + force_left = true; + width = -width; + } + } + + int precision = unbound->precision.value(); + if (unbound->precision.is_from_arg()) { + if (!BindFromPosition(unbound->precision.get_from_arg(), &precision, + pack_)) + return false; + } + + bound->set_width(width); + bound->set_precision(precision); + bound->set_flags(unbound->flags); + if (force_left) + bound->set_left(true); + } else { + bound->set_flags(unbound->flags); + bound->set_width(-1); + bound->set_precision(-1); + } + + bound->set_length_mod(unbound->length_mod); + bound->set_conv(unbound->conv); + bound->set_arg(arg); + return true; +} + +template <typename Converter> +class ConverterConsumer { + public: + ConverterConsumer(Converter converter, absl::Span<const FormatArgImpl> pack) + : converter_(converter), arg_context_(pack) {} + + bool Append(string_view s) { + converter_.Append(s); + return true; + } + bool ConvertOne(const UnboundConversion& conv, string_view conv_string) { + BoundConversion bound; + if (!arg_context_.Bind(&conv, &bound)) return false; + return converter_.ConvertOne(bound, conv_string); + } + + private: + Converter converter_; + ArgContext arg_context_; +}; + +template <typename Converter> +bool ConvertAll(const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args, + const Converter& converter) { + const ParsedFormatBase* pc = format.parsed_conversion(); + if (pc) + return pc->ProcessFormat(ConverterConsumer<Converter>(converter, args)); + + return ParseFormatString(format.str(), + ConverterConsumer<Converter>(converter, args)); +} + +class DefaultConverter { + public: + explicit DefaultConverter(FormatSinkImpl* sink) : sink_(sink) {} + + void Append(string_view s) const { sink_->Append(s); } + + bool ConvertOne(const BoundConversion& bound, string_view /*conv*/) const { + return FormatArgImplFriend::Convert(*bound.arg(), bound, sink_); + } + + private: + FormatSinkImpl* sink_; +}; + +class SummarizingConverter { + public: + explicit SummarizingConverter(FormatSinkImpl* sink) : sink_(sink) {} + + void Append(string_view s) const { sink_->Append(s); } + + bool ConvertOne(const BoundConversion& bound, string_view /*conv*/) const { + UntypedFormatSpecImpl spec("%d"); + + std::ostringstream ss; + ss << "{" << Streamable(spec, {*bound.arg()}) << ":" << bound.flags(); + if (bound.width() >= 0) ss << bound.width(); + if (bound.precision() >= 0) ss << "." << bound.precision(); + ss << bound.length_mod() << bound.conv() << "}"; + Append(ss.str()); + return true; + } + + private: + FormatSinkImpl* sink_; +}; + +} // namespace + +bool BindWithPack(const UnboundConversion* props, + absl::Span<const FormatArgImpl> pack, + BoundConversion* bound) { + return ArgContext(pack).Bind(props, bound); +} + +std::string Summarize(const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args) { + typedef SummarizingConverter Converter; + std::string out; + { + // inner block to destroy sink before returning out. It ensures a last + // flush. + FormatSinkImpl sink(&out); + if (!ConvertAll(format, args, Converter(&sink))) { + sink.Flush(); + out.clear(); + } + } + return out; +} + +bool FormatUntyped(FormatRawSinkImpl raw_sink, + const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args) { + FormatSinkImpl sink(raw_sink); + using Converter = DefaultConverter; + if (!ConvertAll(format, args, Converter(&sink))) { + sink.Flush(); + return false; + } + return true; +} + +std::ostream& Streamable::Print(std::ostream& os) const { + if (!FormatUntyped(&os, format_, args_)) os.setstate(std::ios::failbit); + return os; +} + +std::string& AppendPack(std::string* out, const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args) { + size_t orig = out->size(); + if (!FormatUntyped(out, format, args)) out->resize(orig); + return *out; +} + +int FprintF(std::FILE* output, const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args) { + FILERawSink sink(output); + if (!FormatUntyped(&sink, format, args)) { + errno = EINVAL; + return -1; + } + if (sink.error()) { + errno = sink.error(); + return -1; + } + if (sink.count() > std::numeric_limits<int>::max()) { + errno = EFBIG; + return -1; + } + return static_cast<int>(sink.count()); +} + +int SnprintF(char* output, size_t size, const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args) { + BufferRawSink sink(output, size ? size - 1 : 0); + if (!FormatUntyped(&sink, format, args)) { + errno = EINVAL; + return -1; + } + size_t total = sink.total_written(); + if (size) output[std::min(total, size - 1)] = 0; + return static_cast<int>(total); +} + +} // namespace str_format_internal +} // namespace absl diff --git a/absl/strings/internal/str_format/bind.h b/absl/strings/internal/str_format/bind.h new file mode 100644 index 000000000000..4008611211cf --- /dev/null +++ b/absl/strings/internal/str_format/bind.h @@ -0,0 +1,189 @@ +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ + +#include <array> +#include <cstdio> +#include <sstream> +#include <string> + +#include "absl/base/port.h" +#include "absl/container/inlined_vector.h" +#include "absl/strings/internal/str_format/arg.h" +#include "absl/strings/internal/str_format/checker.h" +#include "absl/strings/internal/str_format/parser.h" +#include "absl/types/span.h" + +namespace absl { + +class UntypedFormatSpec; + +namespace str_format_internal { + +class BoundConversion : public ConversionSpec { + public: + const FormatArgImpl* arg() const { return arg_; } + void set_arg(const FormatArgImpl* a) { arg_ = a; } + + private: + const FormatArgImpl* arg_; +}; + +// This is the type-erased class that the implementation uses. +class UntypedFormatSpecImpl { + public: + UntypedFormatSpecImpl() = delete; + + explicit UntypedFormatSpecImpl(string_view s) : str_(s), pc_() {} + explicit UntypedFormatSpecImpl( + const str_format_internal::ParsedFormatBase* pc) + : pc_(pc) {} + string_view str() const { return str_; } + const str_format_internal::ParsedFormatBase* parsed_conversion() const { + return pc_; + } + + template <typename T> + static const UntypedFormatSpecImpl& Extract(const T& s) { + return s.spec_; + } + + private: + string_view str_; + const str_format_internal::ParsedFormatBase* pc_; +}; + +template <typename T, typename...> +struct MakeDependent { + using type = T; +}; + +// Implicitly convertible from `const char*`, `string_view`, and the +// `ExtendedParsedFormat` type. This abstraction allows all format functions to +// operate on any without providing too many overloads. +template <typename... Args> +class FormatSpecTemplate + : public MakeDependent<UntypedFormatSpec, Args...>::type { + using Base = typename MakeDependent<UntypedFormatSpec, Args...>::type; + + public: +#if ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + + // Honeypot overload for when the std::string is not constexpr. + // We use the 'unavailable' attribute to give a better compiler error than + // just 'method is deleted'. + FormatSpecTemplate(...) // NOLINT + __attribute__((unavailable("Format std::string is not constexpr."))); + + // Honeypot overload for when the format is constexpr and invalid. + // We use the 'unavailable' attribute to give a better compiler error than + // just 'method is deleted'. + // To avoid checking the format twice, we just check that the format is + // constexpr. If is it valid, then the overload below will kick in. + // We add the template here to make this overload have lower priority. + template <typename = void> + FormatSpecTemplate(const char* s) // NOLINT + __attribute__(( + enable_if(str_format_internal::EnsureConstexpr(s), "constexpr trap"), + unavailable( + "Format specified does not match the arguments passed."))); + + template <typename T = void> + FormatSpecTemplate(string_view s) // NOLINT + __attribute__((enable_if(str_format_internal::EnsureConstexpr(s), + "constexpr trap"))) { + static_assert(sizeof(T*) == 0, + "Format specified does not match the arguments passed."); + } + + // Good format overload. + FormatSpecTemplate(const char* s) // NOLINT + __attribute__((enable_if(ValidFormatImpl<ArgumentToConv<Args>()...>(s), + "bad format trap"))) + : Base(s) {} + + FormatSpecTemplate(string_view s) // NOLINT + __attribute__((enable_if(ValidFormatImpl<ArgumentToConv<Args>()...>(s), + "bad format trap"))) + : Base(s) {} + +#else // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + + FormatSpecTemplate(const char* s) : Base(s) {} // NOLINT + FormatSpecTemplate(string_view s) : Base(s) {} // NOLINT + +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + + template <Conv... C, typename = typename std::enable_if< + sizeof...(C) == sizeof...(Args) && + AllOf(Contains(ArgumentToConv<Args>(), + C)...)>::type> + FormatSpecTemplate(const ExtendedParsedFormat<C...>& pc) // NOLINT + : Base(&pc) {} +}; + +template <typename... Args> +struct FormatSpecDeductionBarrier { + using type = FormatSpecTemplate<Args...>; +}; + +class Streamable { + public: + Streamable(const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args) + : format_(format), args_(args.begin(), args.end()) {} + + std::ostream& Print(std::ostream& os) const; + + friend std::ostream& operator<<(std::ostream& os, const Streamable& l) { + return l.Print(os); + } + + private: + const UntypedFormatSpecImpl& format_; + absl::InlinedVector<FormatArgImpl, 4> args_; +}; + +// for testing +std::string Summarize(const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args); +bool BindWithPack(const UnboundConversion* props, + absl::Span<const FormatArgImpl> pack, BoundConversion* bound); + +bool FormatUntyped(FormatRawSinkImpl raw_sink, + const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args); + +std::string& AppendPack(std::string* out, const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args); + +inline std::string FormatPack(const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args) { + std::string out; + AppendPack(&out, format, args); + return out; +} + +int FprintF(std::FILE* output, const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args); +int SnprintF(char* output, size_t size, const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args); + +// Returned by Streamed(v). Converts via '%s' to the std::string created +// by std::ostream << v. +template <typename T> +class StreamedWrapper { + public: + explicit StreamedWrapper(const T& v) : v_(v) { } + + private: + template <typename S> + friend ConvertResult<Conv::s> FormatConvertImpl(const StreamedWrapper<S>& v, + const ConversionSpec& conv, + FormatSinkImpl* out); + const T& v_; +}; + +} // namespace str_format_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ diff --git a/absl/strings/internal/str_format/bind_test.cc b/absl/strings/internal/str_format/bind_test.cc new file mode 100644 index 000000000000..47575739ba13 --- /dev/null +++ b/absl/strings/internal/str_format/bind_test.cc @@ -0,0 +1,131 @@ +#include "absl/strings/internal/str_format/bind.h" + +#include <string.h> + +#include "gtest/gtest.h" + +namespace absl { +namespace str_format_internal { +namespace { + +template <typename T, size_t N> +size_t ArraySize(T (&)[N]) { + return N; +} + +class FormatBindTest : public ::testing::Test { + public: + bool Extract(const char *s, UnboundConversion *props, int *next) const { + absl::string_view src = s; + return ConsumeUnboundConversion(&src, props, next) && src.empty(); + } +}; + +TEST_F(FormatBindTest, BindSingle) { + struct Expectation { + int line; + const char *fmt; + int ok_phases; + const FormatArgImpl *arg; + int width; + int precision; + int next_arg; + }; + const int no = -1; + const int ia[] = { 10, 20, 30, 40}; + const FormatArgImpl args[] = {FormatArgImpl(ia[0]), FormatArgImpl(ia[1]), + FormatArgImpl(ia[2]), FormatArgImpl(ia[3])}; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + const Expectation kExpect[] = { + {__LINE__, "d", 2, &args[0], no, no, 2}, + {__LINE__, "4d", 2, &args[0], 4, no, 2}, + {__LINE__, ".5d", 2, &args[0], no, 5, 2}, + {__LINE__, "4.5d", 2, &args[0], 4, 5, 2}, + {__LINE__, "*d", 2, &args[1], 10, no, 3}, + {__LINE__, ".*d", 2, &args[1], no, 10, 3}, + {__LINE__, "*.*d", 2, &args[2], 10, 20, 4}, + {__LINE__, "1$d", 2, &args[0], no, no, 0}, + {__LINE__, "2$d", 2, &args[1], no, no, 0}, + {__LINE__, "3$d", 2, &args[2], no, no, 0}, + {__LINE__, "4$d", 2, &args[3], no, no, 0}, + {__LINE__, "2$*1$d", 2, &args[1], 10, no, 0}, + {__LINE__, "2$*2$d", 2, &args[1], 20, no, 0}, + {__LINE__, "2$*3$d", 2, &args[1], 30, no, 0}, + {__LINE__, "2$.*1$d", 2, &args[1], no, 10, 0}, + {__LINE__, "2$.*2$d", 2, &args[1], no, 20, 0}, + {__LINE__, "2$.*3$d", 2, &args[1], no, 30, 0}, + {__LINE__, "2$*3$.*1$d", 2, &args[1], 30, 10, 0}, + {__LINE__, "2$*2$.*2$d", 2, &args[1], 20, 20, 0}, + {__LINE__, "2$*1$.*3$d", 2, &args[1], 10, 30, 0}, + {__LINE__, "2$*3$.*1$d", 2, &args[1], 30, 10, 0}, + {__LINE__, "1$*d", 0}, // indexed, then positional + {__LINE__, "*2$d", 0}, // positional, then indexed + {__LINE__, "6$d", 1}, // arg position out of bounds + {__LINE__, "1$6$d", 0}, // width position incorrectly specified + {__LINE__, "1$.6$d", 0}, // precision position incorrectly specified + {__LINE__, "1$*6$d", 1}, // width position out of bounds + {__LINE__, "1$.*6$d", 1}, // precision position out of bounds + }; +#pragma GCC diagnostic pop + for (const Expectation &e : kExpect) { + SCOPED_TRACE(e.line); + SCOPED_TRACE(e.fmt); + UnboundConversion props; + BoundConversion bound; + int ok_phases = 0; + int next = 0; + if (Extract(e.fmt, &props, &next)) { + ++ok_phases; + if (BindWithPack(&props, args, &bound)) { + ++ok_phases; + } + } + EXPECT_EQ(e.ok_phases, ok_phases); + if (e.ok_phases < 2) continue; + if (e.arg != nullptr) { + EXPECT_EQ(e.arg, bound.arg()); + } + EXPECT_EQ(e.width, bound.width()); + EXPECT_EQ(e.precision, bound.precision()); + } +} + +TEST_F(FormatBindTest, FormatPack) { + struct Expectation { + int line; + const char *fmt; + const char *summary; + }; + const int ia[] = { 10, 20, 30, 40, -10 }; + const FormatArgImpl args[] = {FormatArgImpl(ia[0]), FormatArgImpl(ia[1]), + FormatArgImpl(ia[2]), FormatArgImpl(ia[3]), + FormatArgImpl(ia[4])}; + const Expectation kExpect[] = { + {__LINE__, "a%4db%dc", "a{10:4d}b{20:d}c"}, + {__LINE__, "a%.4db%dc", "a{10:.4d}b{20:d}c"}, + {__LINE__, "a%4.5db%dc", "a{10:4.5d}b{20:d}c"}, + {__LINE__, "a%db%4.5dc", "a{10:d}b{20:4.5d}c"}, + {__LINE__, "a%db%*.*dc", "a{10:d}b{40:20.30d}c"}, + {__LINE__, "a%.*fb", "a{20:.10f}b"}, + {__LINE__, "a%1$db%2$*3$.*4$dc", "a{10:d}b{20:30.40d}c"}, + {__LINE__, "a%4$db%3$*2$.*1$dc", "a{40:d}b{30:20.10d}c"}, + {__LINE__, "a%04ldb", "a{10:04ld}b"}, + {__LINE__, "a%-#04lldb", "a{10:-#04lld}b"}, + {__LINE__, "a%1$*5$db", "a{10:-10d}b"}, + {__LINE__, "a%1$.*5$db", "a{10:d}b"}, + }; + for (const Expectation &e : kExpect) { + absl::string_view fmt = e.fmt; + SCOPED_TRACE(e.line); + SCOPED_TRACE(e.fmt); + UntypedFormatSpecImpl format(fmt); + EXPECT_EQ(e.summary, + str_format_internal::Summarize(format, absl::MakeSpan(args))) + << "line:" << e.line; + } +} + +} // namespace +} // namespace str_format_internal +} // namespace absl diff --git a/absl/strings/internal/str_format/checker.h b/absl/strings/internal/str_format/checker.h new file mode 100644 index 000000000000..8b594f2d5cc6 --- /dev/null +++ b/absl/strings/internal/str_format/checker.h @@ -0,0 +1,325 @@ +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ + +#include "absl/strings/internal/str_format/arg.h" +#include "absl/strings/internal/str_format/extension.h" + +// Compile time check support for entry points. + +#ifndef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER +#if defined(__clang__) && !defined(__native_client__) +#if __has_attribute(enable_if) +#define ABSL_INTERNAL_ENABLE_FORMAT_CHECKER 1 +#endif // __has_attribute(enable_if) +#endif // defined(__clang__) && !defined(__native_client__) +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +namespace absl { +namespace str_format_internal { + +constexpr bool AllOf() { return true; } + +template <typename... T> +constexpr bool AllOf(bool b, T... t) { + return b && AllOf(t...); +} + +template <typename Arg> +constexpr Conv ArgumentToConv() { + return decltype(str_format_internal::FormatConvertImpl( + std::declval<const Arg&>(), std::declval<const ConversionSpec&>(), + std::declval<FormatSinkImpl*>()))::kConv; +} + +#if ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +constexpr bool ContainsChar(const char* chars, char c) { + return *chars == c || (*chars && ContainsChar(chars + 1, c)); +} + +// A constexpr compatible list of Convs. +struct ConvList { + const Conv* array; + int count; + + // We do the bound check here to avoid having to do it on the callers. + // Returning an empty Conv has the same effect as short circuiting because it + // will never match any conversion. + constexpr Conv operator[](int i) const { + return i < count ? array[i] : Conv{}; + } + + constexpr ConvList without_front() const { + return count != 0 ? ConvList{array + 1, count - 1} : *this; + } +}; + +template <size_t count> +struct ConvListT { + // Make sure the array has size > 0. + Conv list[count ? count : 1]; +}; + +constexpr char GetChar(string_view str, size_t index) { + return index < str.size() ? str[index] : char{}; +} + +constexpr string_view ConsumeFront(string_view str, size_t len = 1) { + return len <= str.size() ? string_view(str.data() + len, str.size() - len) + : string_view(); +} + +constexpr string_view ConsumeAnyOf(string_view format, const char* chars) { + return ContainsChar(chars, GetChar(format, 0)) + ? ConsumeAnyOf(ConsumeFront(format), chars) + : format; +} + +constexpr bool IsDigit(char c) { return c >= '0' && c <= '9'; } + +// Helper class for the ParseDigits function. +// It encapsulates the two return values we need there. +struct Integer { + string_view format; + int value; + + // If the next character is a '$', consume it. + // Otherwise, make `this` an invalid positional argument. + constexpr Integer ConsumePositionalDollar() const { + return GetChar(format, 0) == '$' ? Integer{ConsumeFront(format), value} + : Integer{format, 0}; + } +}; + +constexpr Integer ParseDigits(string_view format, int value = 0) { + return IsDigit(GetChar(format, 0)) + ? ParseDigits(ConsumeFront(format), + 10 * value + GetChar(format, 0) - '0') + : Integer{format, value}; +} + +// Parse digits for a positional argument. +// The parsing also consumes the '$'. +constexpr Integer ParsePositional(string_view format) { + return ParseDigits(format).ConsumePositionalDollar(); +} + +// Parses a single conversion specifier. +// See ConvParser::Run() for post conditions. +class ConvParser { + constexpr ConvParser SetFormat(string_view format) const { + return ConvParser(format, args_, error_, arg_position_, is_positional_); + } + + constexpr ConvParser SetArgs(ConvList args) const { + return ConvParser(format_, args, error_, arg_position_, is_positional_); + } + + constexpr ConvParser SetError(bool error) const { + return ConvParser(format_, args_, error_ || error, arg_position_, + is_positional_); + } + + constexpr ConvParser SetArgPosition(int arg_position) const { + return ConvParser(format_, args_, error_, arg_position, is_positional_); + } + + // Consumes the next arg and verifies that it matches `conv`. + // `error_` is set if there is no next arg or if it doesn't match `conv`. + constexpr ConvParser ConsumeNextArg(char conv) const { + return SetArgs(args_.without_front()).SetError(!Contains(args_[0], conv)); + } + + // Verify that positional argument `i.value` matches `conv`. + // `error_` is set if `i.value` is not a valid argument or if it doesn't + // match. + constexpr ConvParser VerifyPositional(Integer i, char conv) const { + return SetFormat(i.format).SetError(!Contains(args_[i.value - 1], conv)); + } + + // Parse the position of the arg and store it in `arg_position_`. + constexpr ConvParser ParseArgPosition(Integer arg) const { + return SetFormat(arg.format).SetArgPosition(arg.value); + } + + // Consume the flags. + constexpr ConvParser ParseFlags() const { + return SetFormat(ConsumeAnyOf(format_, "-+ #0")); + } + + // Consume the width. + // If it is '*', we verify that it matches `args_`. `error_` is set if it + // doesn't match. + constexpr ConvParser ParseWidth() const { + return IsDigit(GetChar(format_, 0)) + ? SetFormat(ParseDigits(format_).format) + : GetChar(format_, 0) == '*' + ? is_positional_ + ? VerifyPositional( + ParsePositional(ConsumeFront(format_)), '*') + : SetFormat(ConsumeFront(format_)) + .ConsumeNextArg('*') + : *this; + } + + // Consume the precision. + // If it is '*', we verify that it matches `args_`. `error_` is set if it + // doesn't match. + constexpr ConvParser ParsePrecision() const { + return GetChar(format_, 0) != '.' + ? *this + : GetChar(format_, 1) == '*' + ? is_positional_ + ? VerifyPositional( + ParsePositional(ConsumeFront(format_, 2)), '*') + : SetFormat(ConsumeFront(format_, 2)) + .ConsumeNextArg('*') + : SetFormat(ParseDigits(ConsumeFront(format_)).format); + } + + // Consume the length characters. + constexpr ConvParser ParseLength() const { + return SetFormat(ConsumeAnyOf(format_, "lLhjztq")); + } + + // Consume the conversion character and verify that it matches `args_`. + // `error_` is set if it doesn't match. + constexpr ConvParser ParseConversion() const { + return is_positional_ + ? VerifyPositional({ConsumeFront(format_), arg_position_}, + GetChar(format_, 0)) + : ConsumeNextArg(GetChar(format_, 0)) + .SetFormat(ConsumeFront(format_)); + } + + constexpr ConvParser(string_view format, ConvList args, bool error, + int arg_position, bool is_positional) + : format_(format), + args_(args), + error_(error), + arg_position_(arg_position), + is_positional_(is_positional) {} + + public: + constexpr ConvParser(string_view format, ConvList args, bool is_positional) + : format_(format), + args_(args), + error_(false), + arg_position_(0), + is_positional_(is_positional) {} + + // Consume the whole conversion specifier. + // `format()` will be set to the character after the conversion character. + // `error()` will be set if any of the arguments do not match. + constexpr ConvParser Run() const { + return (is_positional_ ? ParseArgPosition(ParsePositional(format_)) : *this) + .ParseFlags() + .ParseWidth() + .ParsePrecision() + .ParseLength() + .ParseConversion(); + } + + constexpr string_view format() const { return format_; } + constexpr ConvList args() const { return args_; } + constexpr bool error() const { return error_; } + constexpr bool is_positional() const { return is_positional_; } + + private: + string_view format_; + // Current list of arguments. If we are not in positional mode we will consume + // from the front. + ConvList args_; + bool error_; + // Holds the argument position of the conversion character, if we are in + // positional mode. Otherwise, it is unspecified. + int arg_position_; + // Whether we are in positional mode. + // It changes the behavior of '*' and where to find the converted argument. + bool is_positional_; +}; + +// Parses a whole format expression. +// See FormatParser::Run(). +class FormatParser { + static constexpr bool FoundPercent(string_view format) { + return format.empty() || + (GetChar(format, 0) == '%' && GetChar(format, 1) != '%'); + } + + // We use an inner function to increase the recursion limit. + // The inner function consumes up to `limit` characters on every run. + // This increases the limit from 512 to ~512*limit. + static constexpr string_view ConsumeNonPercentInner(string_view format, + int limit = 20) { + return FoundPercent(format) || !limit + ? format + : ConsumeNonPercentInner( + ConsumeFront(format, GetChar(format, 0) == '%' && + GetChar(format, 1) == '%' + ? 2 + : 1), + limit - 1); + } + + // Consume characters until the next conversion spec %. + // It skips %%. + static constexpr string_view ConsumeNonPercent(string_view format) { + return FoundPercent(format) + ? format + : ConsumeNonPercent(ConsumeNonPercentInner(format)); + } + + static constexpr bool IsPositional(string_view format) { + return IsDigit(GetChar(format, 0)) ? IsPositional(ConsumeFront(format)) + : GetChar(format, 0) == '$'; + } + + constexpr bool RunImpl(bool is_positional) const { + // In non-positional mode we require all arguments to be consumed. + // In positional mode just reaching the end of the format without errors is + // enough. + return (format_.empty() && (is_positional || args_.count == 0)) || + (!format_.empty() && + ValidateArg( + ConvParser(ConsumeFront(format_), args_, is_positional).Run())); + } + + constexpr bool ValidateArg(ConvParser conv) const { + return !conv.error() && FormatParser(conv.format(), conv.args()) + .RunImpl(conv.is_positional()); + } + + public: + constexpr FormatParser(string_view format, ConvList args) + : format_(ConsumeNonPercent(format)), args_(args) {} + + // Runs the parser for `format` and `args`. + // It verifies that the format is valid and that all conversion specifiers + // match the arguments passed. + // In non-positional mode it also verfies that all arguments are consumed. + constexpr bool Run() const { + return RunImpl(!format_.empty() && IsPositional(ConsumeFront(format_))); + } + + private: + string_view format_; + // Current list of arguments. + // If we are not in positional mode we will consume from the front and will + // have to be empty in the end. + ConvList args_; +}; + +template <Conv... C> +constexpr bool ValidFormatImpl(string_view format) { + return FormatParser(format, + {ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)}) + .Run(); +} + +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +} // namespace str_format_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ diff --git a/absl/strings/internal/str_format/checker_test.cc b/absl/strings/internal/str_format/checker_test.cc new file mode 100644 index 000000000000..14d11ea8bd30 --- /dev/null +++ b/absl/strings/internal/str_format/checker_test.cc @@ -0,0 +1,150 @@ +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" + +namespace absl { +namespace str_format_internal { +namespace { + +std::string ConvToString(Conv conv) { + std::string out; +#define CONV_SET_CASE(c) \ + if (Contains(conv, Conv::c)) out += #c; + ABSL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) +#undef CONV_SET_CASE + if (Contains(conv, Conv::star)) out += "*"; + return out; +} + +TEST(StrFormatChecker, ArgumentToConv) { + Conv conv = ArgumentToConv<std::string>(); + EXPECT_EQ(ConvToString(conv), "s"); + + conv = ArgumentToConv<const char*>(); + EXPECT_EQ(ConvToString(conv), "sp"); + + conv = ArgumentToConv<double>(); + EXPECT_EQ(ConvToString(conv), "fFeEgGaA"); + + conv = ArgumentToConv<int>(); + EXPECT_EQ(ConvToString(conv), "cdiouxXfFeEgGaA*"); + + conv = ArgumentToConv<std::string*>(); + EXPECT_EQ(ConvToString(conv), "p"); +} + +#if ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +struct Case { + bool result; + const char* format; +}; + +template <typename... Args> +constexpr Case ValidFormat(const char* format) { + return {ValidFormatImpl<ArgumentToConv<Args>()...>(format), format}; +} + +TEST(StrFormatChecker, ValidFormat) { + // We want to make sure these expressions are constexpr and they have the + // expected value. + // If they are not constexpr the attribute will just ignore them and not give + // a compile time error. + enum e {}; + enum class e2 {}; + constexpr Case trues[] = { + ValidFormat<>("abc"), // + + ValidFormat<e>("%d"), // + ValidFormat<e2>("%d"), // + ValidFormat<int>("%% %d"), // + ValidFormat<int>("%ld"), // + ValidFormat<int>("%lld"), // + ValidFormat<std::string>("%s"), // + ValidFormat<std::string>("%10s"), // + ValidFormat<int>("%.10x"), // + ValidFormat<int, int>("%*.3x"), // + ValidFormat<int>("%1.d"), // + ValidFormat<int>("%.d"), // + ValidFormat<int, double>("%d %g"), // + ValidFormat<int, std::string>("%*s"), // + ValidFormat<int, double>("%.*f"), // + ValidFormat<void (*)(), volatile int*>("%p %p"), // + ValidFormat<string_view, const char*, double, void*>( + "string_view=%s const char*=%s double=%f void*=%p)"), + + ValidFormat<int>("%% %1$d"), // + ValidFormat<int>("%1$ld"), // + ValidFormat<int>("%1$lld"), // + ValidFormat<std::string>("%1$s"), // + ValidFormat<std::string>("%1$10s"), // + ValidFormat<int>("%1$.10x"), // + ValidFormat<int>("%1$*1$.*1$d"), // + ValidFormat<int, int>("%1$*2$.3x"), // + ValidFormat<int>("%1$1.d"), // + ValidFormat<int>("%1$.d"), // + ValidFormat<double, int>("%2$d %1$g"), // + ValidFormat<int, std::string>("%2$*1$s"), // + ValidFormat<int, double>("%2$.*1$f"), // + ValidFormat<void*, string_view, const char*, double>( + "string_view=%2$s const char*=%3$s double=%4$f void*=%1$p " + "repeat=%3$s)")}; + + for (Case c : trues) { + EXPECT_TRUE(c.result) << c.format; + } + + constexpr Case falses[] = { + ValidFormat<int>(""), // + + ValidFormat<e>("%s"), // + ValidFormat<e2>("%s"), // + ValidFormat<>("%s"), // + ValidFormat<>("%r"), // + ValidFormat<int>("%s"), // + ValidFormat<int>("%.1.d"), // + ValidFormat<int>("%*1d"), // + ValidFormat<int>("%1-d"), // + ValidFormat<std::string, int>("%*s"), // + ValidFormat<int>("%*d"), // + ValidFormat<std::string>("%p"), // + ValidFormat<int (*)(int)>("%d"), // + + ValidFormat<>("%3$d"), // + ValidFormat<>("%1$r"), // + ValidFormat<int>("%1$s"), // + ValidFormat<int>("%1$.1.d"), // + ValidFormat<int>("%1$*2$1d"), // + ValidFormat<int>("%1$1-d"), // + ValidFormat<std::string, int>("%2$*1$s"), // + ValidFormat<std::string>("%1$p"), + + ValidFormat<int, int>("%d %2$d"), // + }; + + for (Case c : falses) { + EXPECT_FALSE(c.result) << c.format; + } +} + +TEST(StrFormatChecker, LongFormat) { +#define CHARS_X_40 "1234567890123456789012345678901234567890" +#define CHARS_X_400 \ + CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 \ + CHARS_X_40 CHARS_X_40 CHARS_X_40 +#define CHARS_X_4000 \ + CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 \ + CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 + constexpr char long_format[] = + CHARS_X_4000 "%d" CHARS_X_4000 "%s" CHARS_X_4000; + constexpr bool is_valid = ValidFormat<int, std::string>(long_format).result; + EXPECT_TRUE(is_valid); +} + +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +} // namespace +} // namespace str_format_internal +} // namespace absl diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc new file mode 100644 index 000000000000..32f8a0f9ad1a --- /dev/null +++ b/absl/strings/internal/str_format/convert_test.cc @@ -0,0 +1,575 @@ +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <cmath> +#include <string> + +#include "gtest/gtest.h" +#include "absl/strings/internal/str_format/bind.h" + +namespace absl { +namespace str_format_internal { +namespace { + +template <typename T, size_t N> +size_t ArraySize(T (&)[N]) { + return N; +} + +std::string LengthModFor(float) { return ""; } +std::string LengthModFor(double) { return ""; } +std::string LengthModFor(long double) { return "L"; } +std::string LengthModFor(char) { return "hh"; } +std::string LengthModFor(signed char) { return "hh"; } +std::string LengthModFor(unsigned char) { return "hh"; } +std::string LengthModFor(short) { return "h"; } // NOLINT +std::string LengthModFor(unsigned short) { return "h"; } // NOLINT +std::string LengthModFor(int) { return ""; } +std::string LengthModFor(unsigned) { return ""; } +std::string LengthModFor(long) { return "l"; } // NOLINT +std::string LengthModFor(unsigned long) { return "l"; } // NOLINT +std::string LengthModFor(long long) { return "ll"; } // NOLINT +std::string LengthModFor(unsigned long long) { return "ll"; } // NOLINT + +std::string EscCharImpl(int v) { + if (isprint(v)) return std::string(1, static_cast<char>(v)); + char buf[64]; + int n = snprintf(buf, sizeof(buf), "\\%#.2x", + static_cast<unsigned>(v & 0xff)); + assert(n > 0 && n < sizeof(buf)); + return std::string(buf, n); +} + +std::string Esc(char v) { return EscCharImpl(v); } +std::string Esc(signed char v) { return EscCharImpl(v); } +std::string Esc(unsigned char v) { return EscCharImpl(v); } + +template <typename T> +std::string Esc(const T &v) { + std::ostringstream oss; + oss << v; + return oss.str(); +} + +void StrAppend(std::string *dst, const char *format, va_list ap) { + // First try with a small fixed size buffer + static const int kSpaceLength = 1024; + char space[kSpaceLength]; + + // It's possible for methods that use a va_list to invalidate + // the data in it upon use. The fix is to make a copy + // of the structure before using it and use that copy instead. + va_list backup_ap; + va_copy(backup_ap, ap); + int result = vsnprintf(space, kSpaceLength, format, backup_ap); + va_end(backup_ap); + if (result < kSpaceLength) { + if (result >= 0) { + // Normal case -- everything fit. + dst->append(space, result); + return; + } + if (result < 0) { + // Just an error. + return; + } + } + + // Increase the buffer size to the size requested by vsnprintf, + // plus one for the closing \0. + int length = result + 1; + char *buf = new char[length]; + + // Restore the va_list before we use it again + va_copy(backup_ap, ap); + result = vsnprintf(buf, length, format, backup_ap); + va_end(backup_ap); + + if (result >= 0 && result < length) { + // It fit + dst->append(buf, result); + } + delete[] buf; +} + +std::string StrPrint(const char *format, ...) { + va_list ap; + va_start(ap, format); + std::string result; + StrAppend(&result, format, ap); + va_end(ap); + return result; +} + +class FormatConvertTest : public ::testing::Test { }; + +template <typename T> +void TestStringConvert(const T& str) { + const FormatArgImpl args[] = {FormatArgImpl(str)}; + struct Expectation { + const char *out; + const char *fmt; + }; + const Expectation kExpect[] = { + {"hello", "%1$s" }, + {"", "%1$.s" }, + {"", "%1$.0s" }, + {"h", "%1$.1s" }, + {"he", "%1$.2s" }, + {"hello", "%1$.10s" }, + {" hello", "%1$6s" }, + {" he", "%1$5.2s" }, + {"he ", "%1$-5.2s" }, + {"hello ", "%1$-6.10s" }, + }; + for (const Expectation &e : kExpect) { + UntypedFormatSpecImpl format(e.fmt); + EXPECT_EQ(e.out, FormatPack(format, absl::MakeSpan(args))); + } +} + +TEST_F(FormatConvertTest, BasicString) { + TestStringConvert("hello"); // As char array. + TestStringConvert(static_cast<const char*>("hello")); + TestStringConvert(std::string("hello")); + TestStringConvert(string_view("hello")); +} + +TEST_F(FormatConvertTest, NullString) { + const char* p = nullptr; + UntypedFormatSpecImpl format("%s"); + EXPECT_EQ("", FormatPack(format, {FormatArgImpl(p)})); +} + +TEST_F(FormatConvertTest, StringPrecision) { + // We cap at the precision. + char c = 'a'; + const char* p = &c; + UntypedFormatSpecImpl format("%.1s"); + EXPECT_EQ("a", FormatPack(format, {FormatArgImpl(p)})); + + // We cap at the nul terminator. + p = "ABC"; + UntypedFormatSpecImpl format2("%.10s"); + EXPECT_EQ("ABC", FormatPack(format2, {FormatArgImpl(p)})); +} + +TEST_F(FormatConvertTest, Pointer) { +#if _MSC_VER + // MSVC's printf implementation prints pointers differently. We can't easily + // compare our implementation to theirs. + return; +#endif + static int x = 0; + const int *xp = &x; + char c = 'h'; + char *mcp = &c; + const char *cp = "hi"; + const char *cnil = nullptr; + const int *inil = nullptr; + using VoidF = void (*)(); + VoidF fp = [] {}, fnil = nullptr; + volatile char vc; + volatile char* vcp = &vc; + volatile char* vcnil = nullptr; + const FormatArgImpl args[] = { + FormatArgImpl(xp), FormatArgImpl(cp), FormatArgImpl(inil), + FormatArgImpl(cnil), FormatArgImpl(mcp), FormatArgImpl(fp), + FormatArgImpl(fnil), FormatArgImpl(vcp), FormatArgImpl(vcnil), + }; + struct Expectation { + std::string out; + const char *fmt; + }; + const Expectation kExpect[] = { + {StrPrint("%p", &x), "%p"}, + {StrPrint("%20p", &x), "%20p"}, + {StrPrint("%.1p", &x), "%.1p"}, + {StrPrint("%.20p", &x), "%.20p"}, + {StrPrint("%30.20p", &x), "%30.20p"}, + + {StrPrint("%-p", &x), "%-p"}, + {StrPrint("%-20p", &x), "%-20p"}, + {StrPrint("%-.1p", &x), "%-.1p"}, + {StrPrint("%.20p", &x), "%.20p"}, + {StrPrint("%-30.20p", &x), "%-30.20p"}, + + {StrPrint("%p", cp), "%2$p"}, // const char* + {"(nil)", "%3$p"}, // null const char * + {"(nil)", "%4$p"}, // null const int * + {StrPrint("%p", mcp), "%5$p"}, // nonconst char* + + {StrPrint("%p", fp), "%6$p"}, // function pointer + {StrPrint("%p", vcp), "%8$p"}, // function pointer + +#ifndef __APPLE__ + // Apple's printf differs here (0x0 vs. nil) + {StrPrint("%p", fnil), "%7$p"}, // null function pointer + {StrPrint("%p", vcnil), "%9$p"}, // null function pointer +#endif + }; + for (const Expectation &e : kExpect) { + UntypedFormatSpecImpl format(e.fmt); + EXPECT_EQ(e.out, FormatPack(format, absl::MakeSpan(args))) << e.fmt; + } +} + +struct Cardinal { + enum Pos { k1 = 1, k2 = 2, k3 = 3 }; + enum Neg { kM1 = -1, kM2 = -2, kM3 = -3 }; +}; + +TEST_F(FormatConvertTest, Enum) { + const Cardinal::Pos k3 = Cardinal::k3; + const Cardinal::Neg km3 = Cardinal::kM3; + const FormatArgImpl args[] = {FormatArgImpl(k3), FormatArgImpl(km3)}; + UntypedFormatSpecImpl format("%1$d"); + UntypedFormatSpecImpl format2("%2$d"); + EXPECT_EQ("3", FormatPack(format, absl::MakeSpan(args))); + EXPECT_EQ("-3", FormatPack(format2, absl::MakeSpan(args))); +} + +template <typename T> +class TypedFormatConvertTest : public FormatConvertTest { }; + +TYPED_TEST_CASE_P(TypedFormatConvertTest); + +std::vector<std::string> AllFlagCombinations() { + const char kFlags[] = {'-', '#', '0', '+', ' '}; + std::vector<std::string> result; + for (size_t fsi = 0; fsi < (1ull << ArraySize(kFlags)); ++fsi) { + std::string flag_set; + for (size_t fi = 0; fi < ArraySize(kFlags); ++fi) + if (fsi & (1ull << fi)) + flag_set += kFlags[fi]; + result.push_back(flag_set); + } + return result; +} + +TYPED_TEST_P(TypedFormatConvertTest, AllIntsWithFlags) { + typedef TypeParam T; + typedef typename std::make_unsigned<T>::type UnsignedT; + using remove_volatile_t = typename std::remove_volatile<T>::type; + const T kMin = std::numeric_limits<remove_volatile_t>::min(); + const T kMax = std::numeric_limits<remove_volatile_t>::max(); + const T kVals[] = { + remove_volatile_t(1), + remove_volatile_t(2), + remove_volatile_t(3), + remove_volatile_t(123), + remove_volatile_t(-1), + remove_volatile_t(-2), + remove_volatile_t(-3), + remove_volatile_t(-123), + remove_volatile_t(0), + kMax - remove_volatile_t(1), + kMax, + kMin + remove_volatile_t(1), + kMin, + }; + const char kConvChars[] = {'d', 'i', 'u', 'o', 'x', 'X'}; + const std::string kWid[] = {"", "4", "10"}; + const std::string kPrec[] = {"", ".", ".0", ".4", ".10"}; + + const std::vector<std::string> flag_sets = AllFlagCombinations(); + + for (size_t vi = 0; vi < ArraySize(kVals); ++vi) { + const T val = kVals[vi]; + SCOPED_TRACE(Esc(val)); + const FormatArgImpl args[] = {FormatArgImpl(val)}; + for (size_t ci = 0; ci < ArraySize(kConvChars); ++ci) { + const char conv_char = kConvChars[ci]; + for (size_t fsi = 0; fsi < flag_sets.size(); ++fsi) { + const std::string &flag_set = flag_sets[fsi]; + for (size_t wi = 0; wi < ArraySize(kWid); ++wi) { + const std::string &wid = kWid[wi]; + for (size_t pi = 0; pi < ArraySize(kPrec); ++pi) { + const std::string &prec = kPrec[pi]; + + const bool is_signed_conv = (conv_char == 'd' || conv_char == 'i'); + const bool is_unsigned_to_signed = + !std::is_signed<T>::value && is_signed_conv; + // Don't consider sign-related flags '+' and ' ' when doing + // unsigned to signed conversions. + if (is_unsigned_to_signed && + flag_set.find_first_of("+ ") != std::string::npos) { + continue; + } + + std::string new_fmt("%"); + new_fmt += flag_set; + new_fmt += wid; + new_fmt += prec; + // old and new always agree up to here. + std::string old_fmt = new_fmt; + new_fmt += conv_char; + std::string old_result; + if (is_unsigned_to_signed) { + // don't expect agreement on unsigned formatted as signed, + // as printf can't do that conversion properly. For those + // cases, we do expect agreement with printf with a "%u" + // and the unsigned equivalent of 'val'. + UnsignedT uval = val; + old_fmt += LengthModFor(uval); + old_fmt += "u"; + old_result = StrPrint(old_fmt.c_str(), uval); + } else { + old_fmt += LengthModFor(val); + old_fmt += conv_char; + old_result = StrPrint(old_fmt.c_str(), val); + } + + SCOPED_TRACE(std::string() + " old_fmt: \"" + old_fmt + + "\"'" + " new_fmt: \"" + + new_fmt + "\""); + UntypedFormatSpecImpl format(new_fmt); + EXPECT_EQ(old_result, FormatPack(format, absl::MakeSpan(args))); + } + } + } + } + } +} + +TYPED_TEST_P(TypedFormatConvertTest, Char) { + typedef TypeParam T; + using remove_volatile_t = typename std::remove_volatile<T>::type; + static const T kMin = std::numeric_limits<remove_volatile_t>::min(); + static const T kMax = std::numeric_limits<remove_volatile_t>::max(); + T kVals[] = { + remove_volatile_t(1), remove_volatile_t(2), remove_volatile_t(10), + remove_volatile_t(-1), remove_volatile_t(-2), remove_volatile_t(-10), + remove_volatile_t(0), + kMin + remove_volatile_t(1), kMin, + kMax - remove_volatile_t(1), kMax + }; + for (const T &c : kVals) { + const FormatArgImpl args[] = {FormatArgImpl(c)}; + UntypedFormatSpecImpl format("%c"); + EXPECT_EQ(StrPrint("%c", c), FormatPack(format, absl::MakeSpan(args))); + } +} + +REGISTER_TYPED_TEST_CASE_P(TypedFormatConvertTest, AllIntsWithFlags, Char); + +typedef ::testing::Types< + int, unsigned, volatile int, + short, unsigned short, + long, unsigned long, + long long, unsigned long long, + signed char, unsigned char, char> + AllIntTypes; +INSTANTIATE_TYPED_TEST_CASE_P(TypedFormatConvertTestWithAllIntTypes, + TypedFormatConvertTest, AllIntTypes); +TEST_F(FormatConvertTest, Uint128) { + absl::uint128 v = static_cast<absl::uint128>(0x1234567890abcdef) * 1979; + absl::uint128 max = absl::Uint128Max(); + const FormatArgImpl args[] = {FormatArgImpl(v), FormatArgImpl(max)}; + + struct Case { + const char* format; + const char* expected; + } cases[] = { + {"%1$d", "2595989796776606496405"}, + {"%1$30d", " 2595989796776606496405"}, + {"%1$-30d", "2595989796776606496405 "}, + {"%1$u", "2595989796776606496405"}, + {"%1$x", "8cba9876066020f695"}, + {"%2$d", "340282366920938463463374607431768211455"}, + {"%2$u", "340282366920938463463374607431768211455"}, + {"%2$x", "ffffffffffffffffffffffffffffffff"}, + }; + + for (auto c : cases) { + UntypedFormatSpecImpl format(c.format); + EXPECT_EQ(c.expected, FormatPack(format, absl::MakeSpan(args))); + } +} + +TEST_F(FormatConvertTest, Float) { +#if _MSC_VER + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + return; +#endif // _MSC_VER + + const char *const kFormats[] = { + "%", "%.3", "%8.5", "%9", "%.60", "%.30", "%03", "%+", + "% ", "%-10", "%#15.3", "%#.0", "%.0", "%1$*2$", "%1$.*2$"}; + + std::vector<double> doubles = {0.0, + -0.0, + .99999999999999, + 99999999999999., + std::numeric_limits<double>::max(), + -std::numeric_limits<double>::max(), + std::numeric_limits<double>::min(), + -std::numeric_limits<double>::min(), + std::numeric_limits<double>::lowest(), + -std::numeric_limits<double>::lowest(), + std::numeric_limits<double>::epsilon(), + std::numeric_limits<double>::epsilon() + 1, + std::numeric_limits<double>::infinity(), + -std::numeric_limits<double>::infinity()}; + +#ifndef __APPLE__ + // Apple formats NaN differently (+nan) vs. (nan) + doubles.push_back(std::nan("")); +#endif + + // Some regression tests. + doubles.push_back(0.99999999999999989); + + if (std::numeric_limits<double>::has_denorm != std::denorm_absent) { + doubles.push_back(std::numeric_limits<double>::denorm_min()); + doubles.push_back(-std::numeric_limits<double>::denorm_min()); + } + + for (double base : + {1., 12., 123., 1234., 12345., 123456., 1234567., 12345678., 123456789., + 1234567890., 12345678901., 123456789012., 1234567890123.}) { + for (int exp = -123; exp <= 123; ++exp) { + for (int sign : {1, -1}) { + doubles.push_back(sign * std::ldexp(base, exp)); + } + } + } + + for (const char *fmt : kFormats) { + for (char f : {'f', 'F', // + 'g', 'G', // + 'a', 'A', // + 'e', 'E'}) { + std::string fmt_str = std::string(fmt) + f; + for (double d : doubles) { + int i = -10; + FormatArgImpl args[2] = {FormatArgImpl(d), FormatArgImpl(i)}; + UntypedFormatSpecImpl format(fmt_str); + // We use ASSERT_EQ here because failures are usually correlated and a + // bug would print way too many failed expectations causing the test to + // time out. + ASSERT_EQ(StrPrint(fmt_str.c_str(), d, i), + FormatPack(format, absl::MakeSpan(args))) + << fmt_str << " " << StrPrint("%.18g", d) << " " + << StrPrint("%.999f", d); + } + } + } +} + +TEST_F(FormatConvertTest, LongDouble) { + const char *const kFormats[] = {"%", "%.3", "%8.5", "%9", + "%.60", "%+", "% ", "%-10"}; + + // This value is not representable in double, but it is in long double that + // uses the extended format. + // This is to verify that we are not truncating the value mistakenly through a + // double. + long double very_precise = 10000000000000000.25L; + + std::vector<long double> doubles = { + 0.0, + -0.0, + very_precise, + 1 / very_precise, + std::numeric_limits<long double>::max(), + -std::numeric_limits<long double>::max(), + std::numeric_limits<long double>::min(), + -std::numeric_limits<long double>::min(), + std::numeric_limits<long double>::infinity(), + -std::numeric_limits<long double>::infinity()}; + + for (const char *fmt : kFormats) { + for (char f : {'f', 'F', // + 'g', 'G', // + 'a', 'A', // + 'e', 'E'}) { + std::string fmt_str = std::string(fmt) + 'L' + f; + for (auto d : doubles) { + FormatArgImpl arg(d); + UntypedFormatSpecImpl format(fmt_str); + // We use ASSERT_EQ here because failures are usually correlated and a + // bug would print way too many failed expectations causing the test to + // time out. + ASSERT_EQ(StrPrint(fmt_str.c_str(), d), + FormatPack(format, {&arg, 1})) + << fmt_str << " " << StrPrint("%.18Lg", d) << " " + << StrPrint("%.999Lf", d); + } + } + } +} + +TEST_F(FormatConvertTest, IntAsFloat) { + const int kMin = std::numeric_limits<int>::min(); + const int kMax = std::numeric_limits<int>::max(); + const int ia[] = { + 1, 2, 3, 123, + -1, -2, -3, -123, + 0, kMax - 1, kMax, kMin + 1, kMin }; + for (const int fx : ia) { + SCOPED_TRACE(fx); + const FormatArgImpl args[] = {FormatArgImpl(fx)}; + struct Expectation { + int line; + std::string out; + const char *fmt; + }; + const double dx = static_cast<double>(fx); + const Expectation kExpect[] = { + { __LINE__, StrPrint("%f", dx), "%f" }, + { __LINE__, StrPrint("%12f", dx), "%12f" }, + { __LINE__, StrPrint("%.12f", dx), "%.12f" }, + { __LINE__, StrPrint("%12a", dx), "%12a" }, + { __LINE__, StrPrint("%.12a", dx), "%.12a" }, + }; + for (const Expectation &e : kExpect) { + SCOPED_TRACE(e.line); + SCOPED_TRACE(e.fmt); + UntypedFormatSpecImpl format(e.fmt); + EXPECT_EQ(e.out, FormatPack(format, absl::MakeSpan(args))); + } + } +} + +template <typename T> +bool FormatFails(const char* test_format, T value) { + std::string format_string = std::string("<<") + test_format + ">>"; + UntypedFormatSpecImpl format(format_string); + + int one = 1; + const FormatArgImpl args[] = {FormatArgImpl(value), FormatArgImpl(one)}; + EXPECT_EQ(FormatPack(format, absl::MakeSpan(args)), "") + << "format=" << test_format << " value=" << value; + return FormatPack(format, absl::MakeSpan(args)).empty(); +} + +TEST_F(FormatConvertTest, ExpectedFailures) { + // Int input + EXPECT_TRUE(FormatFails("%p", 1)); + EXPECT_TRUE(FormatFails("%s", 1)); + EXPECT_TRUE(FormatFails("%n", 1)); + + // Double input + EXPECT_TRUE(FormatFails("%p", 1.)); + EXPECT_TRUE(FormatFails("%s", 1.)); + EXPECT_TRUE(FormatFails("%n", 1.)); + EXPECT_TRUE(FormatFails("%c", 1.)); + EXPECT_TRUE(FormatFails("%d", 1.)); + EXPECT_TRUE(FormatFails("%x", 1.)); + EXPECT_TRUE(FormatFails("%*d", 1.)); + + // String input + EXPECT_TRUE(FormatFails("%n", "")); + EXPECT_TRUE(FormatFails("%c", "")); + EXPECT_TRUE(FormatFails("%d", "")); + EXPECT_TRUE(FormatFails("%x", "")); + EXPECT_TRUE(FormatFails("%f", "")); + EXPECT_TRUE(FormatFails("%*d", "")); +} + +} // namespace +} // namespace str_format_internal +} // namespace absl diff --git a/absl/strings/internal/str_format/extension.cc b/absl/strings/internal/str_format/extension.cc new file mode 100644 index 000000000000..c2174703c3e8 --- /dev/null +++ b/absl/strings/internal/str_format/extension.cc @@ -0,0 +1,84 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/extension.h" + +#include <errno.h> +#include <algorithm> +#include <string> + +namespace absl { +namespace str_format_internal { +namespace { +// clang-format off +#define ABSL_LENGTH_MODS_EXPAND_ \ + X_VAL(h) X_SEP \ + X_VAL(hh) X_SEP \ + X_VAL(l) X_SEP \ + X_VAL(ll) X_SEP \ + X_VAL(L) X_SEP \ + X_VAL(j) X_SEP \ + X_VAL(z) X_SEP \ + X_VAL(t) X_SEP \ + X_VAL(q) +// clang-format on +} // namespace + +const LengthMod::Spec LengthMod::kSpecs[] = { +#define X_VAL(id) { LengthMod::id, #id, strlen(#id) } +#define X_SEP , + ABSL_LENGTH_MODS_EXPAND_, {LengthMod::none, "", 0} +#undef X_VAL +#undef X_SEP +}; + +const ConversionChar::Spec ConversionChar::kSpecs[] = { +#define X_VAL(id) { ConversionChar::id, #id[0] } +#define X_SEP , + ABSL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP), + {ConversionChar::none, '\0'}, +#undef X_VAL +#undef X_SEP +}; + +std::string Flags::ToString() const { + std::string s; + s.append(left ? "-" : ""); + s.append(show_pos ? "+" : ""); + s.append(sign_col ? " " : ""); + s.append(alt ? "#" : ""); + s.append(zero ? "0" : ""); + return s; +} + +const size_t LengthMod::kNumValues; + +const size_t ConversionChar::kNumValues; + +bool FormatSinkImpl::PutPaddedString(string_view v, int w, int p, bool l) { + size_t space_remaining = 0; + if (w >= 0) space_remaining = w; + size_t n = v.size(); + if (p >= 0) n = std::min(n, static_cast<size_t>(p)); + string_view shown(v.data(), n); + space_remaining = Excess(shown.size(), space_remaining); + if (!l) Append(space_remaining, ' '); + Append(shown); + if (l) Append(space_remaining, ' '); + return true; +} + +} // namespace str_format_internal +} // namespace absl diff --git a/absl/strings/internal/str_format/extension.h b/absl/strings/internal/str_format/extension.h new file mode 100644 index 000000000000..810330b9d71b --- /dev/null +++ b/absl/strings/internal/str_format/extension.h @@ -0,0 +1,406 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ + +#include <limits.h> +#include <cstring> +#include <ostream> + +#include "absl/base/port.h" +#include "absl/strings/internal/str_format/output.h" +#include "absl/strings/string_view.h" + +class Cord; + +namespace absl { + +namespace str_format_internal { + +class FormatRawSinkImpl { + public: + // Implicitly convert from any type that provides the hook function as + // described above. + template <typename T, decltype(str_format_internal::InvokeFlush( + std::declval<T*>(), string_view()))* = nullptr> + FormatRawSinkImpl(T* raw) // NOLINT + : sink_(raw), write_(&FormatRawSinkImpl::Flush<T>) {} + + void Write(string_view s) { write_(sink_, s); } + + template <typename T> + static FormatRawSinkImpl Extract(T s) { + return s.sink_; + } + + private: + template <typename T> + static void Flush(void* r, string_view s) { + str_format_internal::InvokeFlush(static_cast<T*>(r), s); + } + + void* sink_; + void (*write_)(void*, string_view); +}; + +// An abstraction to which conversions write their std::string data. +class FormatSinkImpl { + public: + explicit FormatSinkImpl(FormatRawSinkImpl raw) : raw_(raw) {} + + ~FormatSinkImpl() { Flush(); } + + void Flush() { + raw_.Write(string_view(buf_, pos_ - buf_)); + pos_ = buf_; + } + + void Append(size_t n, char c) { + if (n == 0) return; + size_ += n; + auto raw_append = [&](size_t count) { + memset(pos_, c, count); + pos_ += count; + }; + while (n > Avail()) { + n -= Avail(); + if (Avail() > 0) { + raw_append(Avail()); + } + Flush(); + } + raw_append(n); + } + + void Append(string_view v) { + size_t n = v.size(); + if (n == 0) return; + size_ += n; + if (n >= Avail()) { + Flush(); + raw_.Write(v); + return; + } + memcpy(pos_, v.data(), n); + pos_ += n; + } + + size_t size() const { return size_; } + + // Put 'v' to 'sink' with specified width, precision, and left flag. + bool PutPaddedString(string_view v, int w, int p, bool l); + + template <typename T> + T Wrap() { + return T(this); + } + + template <typename T> + static FormatSinkImpl* Extract(T* s) { + return s->sink_; + } + + private: + size_t Avail() const { return buf_ + sizeof(buf_) - pos_; } + + FormatRawSinkImpl raw_; + size_t size_ = 0; + char* pos_ = buf_; + char buf_[1024]; +}; + +struct Flags { + bool basic : 1; // fastest conversion: no flags, width, or precision + bool left : 1; // "-" + bool show_pos : 1; // "+" + bool sign_col : 1; // " " + bool alt : 1; // "#" + bool zero : 1; // "0" + std::string ToString() const; + friend std::ostream& operator<<(std::ostream& os, const Flags& v) { + return os << v.ToString(); + } +}; + +struct LengthMod { + public: + enum Id : uint8_t { + h, hh, l, ll, L, j, z, t, q, none + }; + static const size_t kNumValues = none + 1; + + LengthMod() : id_(none) {} + + // Index into the opaque array of LengthMod enums. + // Requires: i < kNumValues + static LengthMod FromIndex(size_t i) { + return LengthMod(kSpecs[i].value); + } + + static LengthMod FromId(Id id) { return LengthMod(id); } + + // The length modifier std::string associated with a specified LengthMod. + string_view name() const { + const Spec& spec = kSpecs[id_]; + return {spec.name, spec.name_length}; + } + + Id id() const { return id_; } + + friend bool operator==(const LengthMod& a, const LengthMod& b) { + return a.id() == b.id(); + } + friend bool operator!=(const LengthMod& a, const LengthMod& b) { + return !(a == b); + } + friend std::ostream& operator<<(std::ostream& os, const LengthMod& v) { + return os << v.name(); + } + + private: + struct Spec { + Id value; + const char *name; + size_t name_length; + }; + static const Spec kSpecs[]; + + explicit LengthMod(Id id) : id_(id) {} + + Id id_; +}; + +// clang-format off +#define ABSL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) \ + /* text */ \ + X_VAL(c) X_SEP X_VAL(C) X_SEP X_VAL(s) X_SEP X_VAL(S) X_SEP \ + /* ints */ \ + X_VAL(d) X_SEP X_VAL(i) X_SEP X_VAL(o) X_SEP \ + X_VAL(u) X_SEP X_VAL(x) X_SEP X_VAL(X) X_SEP \ + /* floats */ \ + X_VAL(f) X_SEP X_VAL(F) X_SEP X_VAL(e) X_SEP X_VAL(E) X_SEP \ + X_VAL(g) X_SEP X_VAL(G) X_SEP X_VAL(a) X_SEP X_VAL(A) X_SEP \ + /* misc */ \ + X_VAL(n) X_SEP X_VAL(p) +// clang-format on + +struct ConversionChar { + public: + enum Id : uint8_t { + c, C, s, S, // text + d, i, o, u, x, X, // int + f, F, e, E, g, G, a, A, // float + n, p, // misc + none + }; + static const size_t kNumValues = none + 1; + + ConversionChar() : id_(none) {} + + public: + // Index into the opaque array of ConversionChar enums. + // Requires: i < kNumValues + static ConversionChar FromIndex(size_t i) { + return ConversionChar(kSpecs[i].value); + } + + static ConversionChar FromChar(char c) { + ConversionChar::Id out_id = ConversionChar::none; + switch (c) { +#define X_VAL(id) \ + case #id[0]: \ + out_id = ConversionChar::id; \ + break; + ABSL_CONVERSION_CHARS_EXPAND_(X_VAL, ) +#undef X_VAL + default: + break; + } + return ConversionChar(out_id); + } + + static ConversionChar FromId(Id id) { return ConversionChar(id); } + Id id() const { return id_; } + + int radix() const { + switch (id()) { + case x: case X: case a: case A: case p: return 16; + case o: return 8; + default: return 10; + } + } + + bool upper() const { + switch (id()) { + case X: case F: case E: case G: case A: return true; + default: return false; + } + } + + bool is_signed() const { + switch (id()) { + case d: case i: return true; + default: return false; + } + } + + bool is_integral() const { + switch (id()) { + case d: case i: case u: case o: case x: case X: + return true; + default: return false; + } + } + + bool is_float() const { + switch (id()) { + case a: case e: case f: case g: case A: case E: case F: case G: + return true; + default: return false; + } + } + + bool IsValid() const { return id() != none; } + + // The associated char. + char Char() const { return kSpecs[id_].name; } + + friend bool operator==(const ConversionChar& a, const ConversionChar& b) { + return a.id() == b.id(); + } + friend bool operator!=(const ConversionChar& a, const ConversionChar& b) { + return !(a == b); + } + friend std::ostream& operator<<(std::ostream& os, const ConversionChar& v) { + char c = v.Char(); + if (!c) c = '?'; + return os << c; + } + + private: + struct Spec { + Id value; + char name; + }; + static const Spec kSpecs[]; + + explicit ConversionChar(Id id) : id_(id) {} + + Id id_; +}; + +class ConversionSpec { + public: + Flags flags() const { return flags_; } + LengthMod length_mod() const { return length_mod_; } + ConversionChar conv() const { return conv_; } + + // Returns the specified width. If width is unspecfied, it returns a negative + // value. + int width() const { return width_; } + // Returns the specified precision. If precision is unspecfied, it returns a + // negative value. + int precision() const { return precision_; } + + void set_flags(Flags f) { flags_ = f; } + void set_length_mod(LengthMod lm) { length_mod_ = lm; } + void set_conv(ConversionChar c) { conv_ = c; } + void set_width(int w) { width_ = w; } + void set_precision(int p) { precision_ = p; } + void set_left(bool b) { flags_.left = b; } + + private: + Flags flags_; + LengthMod length_mod_; + ConversionChar conv_; + int width_; + int precision_; +}; + +constexpr uint64_t ConversionCharToConvValue(char conv) { + return +#define CONV_SET_CASE(c) \ + conv == #c[0] ? (uint64_t{1} << (1 + ConversionChar::Id::c)): + ABSL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) +#undef CONV_SET_CASE + conv == '*' + ? 1 + : 0; +} + +enum class Conv : uint64_t { +#define CONV_SET_CASE(c) c = ConversionCharToConvValue(#c[0]), + ABSL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) +#undef CONV_SET_CASE + + // Used for width/precision '*' specification. + star = ConversionCharToConvValue('*'), + + // Some predefined values: + integral = d | i | u | o | x | X, + floating = a | e | f | g | A | E | F | G, + numeric = integral | floating, + string = s, // absl:ignore(std::string) + pointer = p +}; + +// Type safe OR operator. +// We need this for two reasons: +// 1. operator| on enums makes them decay to integers and the result is an +// integer. We need the result to stay as an enum. +// 2. We use "enum class" which would not work even if we accepted the decay. +constexpr Conv operator|(Conv a, Conv b) { + return Conv(static_cast<uint64_t>(a) | static_cast<uint64_t>(b)); +} + +// Get a conversion with a single character in it. +constexpr Conv ConversionCharToConv(char c) { + return Conv(ConversionCharToConvValue(c)); +} + +// Checks whether `c` exists in `set`. +constexpr bool Contains(Conv set, char c) { + return (static_cast<uint64_t>(set) & ConversionCharToConvValue(c)) != 0; +} + +// Checks whether all the characters in `c` are contained in `set` +constexpr bool Contains(Conv set, Conv c) { + return (static_cast<uint64_t>(set) & static_cast<uint64_t>(c)) == + static_cast<uint64_t>(c); +} + +// Return type of the AbslFormatConvert() functions. +// The Conv template parameter is used to inform the framework of what +// conversion characters are supported by that AbslFormatConvert routine. +template <Conv C> +struct ConvertResult { + static constexpr Conv kConv = C; + bool value; +}; +template <Conv C> +constexpr Conv ConvertResult<C>::kConv; + +// Return capacity - used, clipped to a minimum of 0. +inline size_t Excess(size_t used, size_t capacity) { + return used < capacity ? capacity - used : 0; +} + +} // namespace str_format_internal + +} // namespace absl + +#endif // ABSL_STRINGS_STR_FORMAT_EXTENSION_H_ diff --git a/absl/strings/internal/str_format/extension_test.cc b/absl/strings/internal/str_format/extension_test.cc new file mode 100644 index 000000000000..224fc923d3e3 --- /dev/null +++ b/absl/strings/internal/str_format/extension_test.cc @@ -0,0 +1,65 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "absl/strings/internal/str_format/extension.h" + +#include <random> +#include <string> +#include "absl/strings/str_format.h" + +#include "gtest/gtest.h" + +namespace { + +std::string MakeRandomString(size_t len) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis('a', 'z'); + std::string s(len, '0'); + for (char& c : s) { + c = dis(gen); + } + return s; +} + +TEST(FormatExtensionTest, SinkAppendSubstring) { + for (size_t chunk_size : {1, 10, 100, 1000, 10000}) { + std::string expected, actual; + absl::str_format_internal::FormatSinkImpl sink(&actual); + for (size_t chunks = 0; chunks < 10; ++chunks) { + std::string rand = MakeRandomString(chunk_size); + expected += rand; + sink.Append(rand); + } + sink.Flush(); + EXPECT_EQ(actual, expected); + } +} + +TEST(FormatExtensionTest, SinkAppendChars) { + for (size_t chunk_size : {1, 10, 100, 1000, 10000}) { + std::string expected, actual; + absl::str_format_internal::FormatSinkImpl sink(&actual); + for (size_t chunks = 0; chunks < 10; ++chunks) { + std::string rand = MakeRandomString(1); + expected.append(chunk_size, rand[0]); + sink.Append(chunk_size, rand[0]); + } + sink.Flush(); + EXPECT_EQ(actual, expected); + } +} +} // namespace diff --git a/absl/strings/internal/str_format/float_conversion.cc b/absl/strings/internal/str_format/float_conversion.cc new file mode 100644 index 000000000000..37952b4699e9 --- /dev/null +++ b/absl/strings/internal/str_format/float_conversion.cc @@ -0,0 +1,476 @@ +#include "absl/strings/internal/str_format/float_conversion.h" + +#include <string.h> +#include <algorithm> +#include <cassert> +#include <cmath> +#include <string> + +namespace absl { +namespace str_format_internal { + +namespace { + +char *CopyStringTo(string_view v, char *out) { + std::memcpy(out, v.data(), v.size()); + return out + v.size(); +} + +template <typename Float> +bool FallbackToSnprintf(const Float v, const ConversionSpec &conv, + FormatSinkImpl *sink) { + int w = conv.width() >= 0 ? conv.width() : 0; + int p = conv.precision() >= 0 ? conv.precision() : -1; + char fmt[32]; + { + char *fp = fmt; + *fp++ = '%'; + fp = CopyStringTo(conv.flags().ToString(), fp); + fp = CopyStringTo("*.*", fp); + if (std::is_same<long double, Float>()) { + *fp++ = 'L'; + } + *fp++ = conv.conv().Char(); + *fp = 0; + assert(fp < fmt + sizeof(fmt)); + } + std::string space(512, '\0'); + string_view result; + while (true) { + int n = snprintf(&space[0], space.size(), fmt, w, p, v); + if (n < 0) return false; + if (static_cast<size_t>(n) < space.size()) { + result = string_view(space.data(), n); + break; + } + space.resize(n + 1); + } + sink->Append(result); + return true; +} + +// 128-bits in decimal: ceil(128*log(2)/log(10)) +// or std::numeric_limits<__uint128_t>::digits10 +constexpr int kMaxFixedPrecision = 39; + +constexpr int kBufferLength = /*sign*/ 1 + + /*integer*/ kMaxFixedPrecision + + /*point*/ 1 + + /*fraction*/ kMaxFixedPrecision + + /*exponent e+123*/ 5; + +struct Buffer { + void push_front(char c) { + assert(begin > data); + *--begin = c; + } + void push_back(char c) { + assert(end < data + sizeof(data)); + *end++ = c; + } + void pop_back() { + assert(begin < end); + --end; + } + + char &back() { + assert(begin < end); + return end[-1]; + } + + char last_digit() const { return end[-1] == '.' ? end[-2] : end[-1]; } + + int size() const { return static_cast<int>(end - begin); } + + char data[kBufferLength]; + char *begin; + char *end; +}; + +enum class FormatStyle { Fixed, Precision }; + +// If the value is Inf or Nan, print it and return true. +// Otherwise, return false. +template <typename Float> +bool ConvertNonNumericFloats(char sign_char, Float v, + const ConversionSpec &conv, FormatSinkImpl *sink) { + char text[4], *ptr = text; + if (sign_char) *ptr++ = sign_char; + if (std::isnan(v)) { + ptr = std::copy_n(conv.conv().upper() ? "NAN" : "nan", 3, ptr); + } else if (std::isinf(v)) { + ptr = std::copy_n(conv.conv().upper() ? "INF" : "inf", 3, ptr); + } else { + return false; + } + + return sink->PutPaddedString(string_view(text, ptr - text), conv.width(), -1, + conv.flags().left); +} + +// Round up the last digit of the value. +// It will carry over and potentially overflow. 'exp' will be adjusted in that +// case. +template <FormatStyle mode> +void RoundUp(Buffer *buffer, int *exp) { + char *p = &buffer->back(); + while (p >= buffer->begin && (*p == '9' || *p == '.')) { + if (*p == '9') *p = '0'; + --p; + } + + if (p < buffer->begin) { + *p = '1'; + buffer->begin = p; + if (mode == FormatStyle::Precision) { + std::swap(p[1], p[2]); // move the . + ++*exp; + buffer->pop_back(); + } + } else { + ++*p; + } +} + +void PrintExponent(int exp, char e, Buffer *out) { + out->push_back(e); + if (exp < 0) { + out->push_back('-'); + exp = -exp; + } else { + out->push_back('+'); + } + // Exponent digits. + if (exp > 99) { + out->push_back(exp / 100 + '0'); + out->push_back(exp / 10 % 10 + '0'); + out->push_back(exp % 10 + '0'); + } else { + out->push_back(exp / 10 + '0'); + out->push_back(exp % 10 + '0'); + } +} + +template <typename Float, typename Int> +constexpr bool CanFitMantissa() { + return std::numeric_limits<Float>::digits <= std::numeric_limits<Int>::digits; +} + +template <typename Float> +struct Decomposed { + Float mantissa; + int exponent; +}; + +// Decompose the double into an integer mantissa and an exponent. +template <typename Float> +Decomposed<Float> Decompose(Float v) { + int exp; + Float m = std::frexp(v, &exp); + m = std::ldexp(m, std::numeric_limits<Float>::digits); + exp -= std::numeric_limits<Float>::digits; + return {m, exp}; +} + +// Print 'digits' as decimal. +// In Fixed mode, we add a '.' at the end. +// In Precision mode, we add a '.' after the first digit. +template <FormatStyle mode, typename Int> +int PrintIntegralDigits(Int digits, Buffer *out) { + int printed = 0; + if (digits) { + for (; digits; digits /= 10) out->push_front(digits % 10 + '0'); + printed = out->size(); + if (mode == FormatStyle::Precision) { + out->push_front(*out->begin); + out->begin[1] = '.'; + } else { + out->push_back('.'); + } + } else if (mode == FormatStyle::Fixed) { + out->push_front('0'); + out->push_back('.'); + printed = 1; + } + return printed; +} + +// Back out 'extra_digits' digits and round up if necessary. +bool RemoveExtraPrecision(int extra_digits, bool has_leftover_value, + Buffer *out, int *exp_out) { + if (extra_digits <= 0) return false; + + // Back out the extra digits + out->end -= extra_digits; + + bool needs_to_round_up = [&] { + // We look at the digit just past the end. + // There must be 'extra_digits' extra valid digits after end. + if (*out->end > '5') return true; + if (*out->end < '5') return false; + if (has_leftover_value || std::any_of(out->end + 1, out->end + extra_digits, + [](char c) { return c != '0'; })) + return true; + + // Ends in ...50*, round to even. + return out->last_digit() % 2 == 1; + }(); + + if (needs_to_round_up) { + RoundUp<FormatStyle::Precision>(out, exp_out); + } + return true; +} + +// Print the value into the buffer. +// This will not include the exponent, which will be returned in 'exp_out' for +// Precision mode. +template <typename Int, typename Float, FormatStyle mode> +bool FloatToBufferImpl(Int int_mantissa, int exp, int precision, Buffer *out, + int *exp_out) { + assert((CanFitMantissa<Float, Int>())); + + const int int_bits = std::numeric_limits<Int>::digits; + + // In precision mode, we start printing one char to the right because it will + // also include the '.' + // In fixed mode we put the dot afterwards on the right. + out->begin = out->end = + out->data + 1 + kMaxFixedPrecision + (mode == FormatStyle::Precision); + + if (exp >= 0) { + if (std::numeric_limits<Float>::digits + exp > int_bits) { + // The value will overflow the Int + return false; + } + int digits_printed = PrintIntegralDigits<mode>(int_mantissa << exp, out); + int digits_to_zero_pad = precision; + if (mode == FormatStyle::Precision) { + *exp_out = digits_printed - 1; + digits_to_zero_pad -= digits_printed - 1; + if (RemoveExtraPrecision(-digits_to_zero_pad, false, out, exp_out)) { + return true; + } + } + for (; digits_to_zero_pad-- > 0;) out->push_back('0'); + return true; + } + + exp = -exp; + // We need at least 4 empty bits for the next decimal digit. + // We will multiply by 10. + if (exp > int_bits - 4) return false; + + const Int mask = (Int{1} << exp) - 1; + + // Print the integral part first. + int digits_printed = PrintIntegralDigits<mode>(int_mantissa >> exp, out); + int_mantissa &= mask; + + int fractional_count = precision; + if (mode == FormatStyle::Precision) { + if (digits_printed == 0) { + // Find the first non-zero digit, when in Precision mode. + *exp_out = 0; + if (int_mantissa) { + while (int_mantissa <= mask) { + int_mantissa *= 10; + --*exp_out; + } + } + out->push_front(static_cast<char>(int_mantissa >> exp) + '0'); + out->push_back('.'); + int_mantissa &= mask; + } else { + // We already have a digit, and a '.' + *exp_out = digits_printed - 1; + fractional_count -= *exp_out; + if (RemoveExtraPrecision(-fractional_count, int_mantissa != 0, out, + exp_out)) { + // If we had enough digits, return right away. + // The code below will try to round again otherwise. + return true; + } + } + } + + auto get_next_digit = [&] { + int_mantissa *= 10; + int digit = static_cast<int>(int_mantissa >> exp); + int_mantissa &= mask; + return digit; + }; + + // Print fractional_count more digits, if available. + for (; fractional_count > 0; --fractional_count) { + out->push_back(get_next_digit() + '0'); + } + + int next_digit = get_next_digit(); + if (next_digit > 5 || + (next_digit == 5 && (int_mantissa || out->last_digit() % 2 == 1))) { + RoundUp<mode>(out, exp_out); + } + + return true; +} + +template <FormatStyle mode, typename Float> +bool FloatToBuffer(Decomposed<Float> decomposed, int precision, Buffer *out, + int *exp) { + if (precision > kMaxFixedPrecision) return false; + + // Try with uint64_t. + if (CanFitMantissa<Float, std::uint64_t>() && + FloatToBufferImpl<std::uint64_t, Float, mode>( + static_cast<std::uint64_t>(decomposed.mantissa), + static_cast<std::uint64_t>(decomposed.exponent), precision, out, exp)) + return true; + +#if defined(__SIZEOF_INT128__) + // If that is not enough, try with __uint128_t. + return CanFitMantissa<Float, __uint128_t>() && + FloatToBufferImpl<__uint128_t, Float, mode>( + static_cast<__uint128_t>(decomposed.mantissa), + static_cast<__uint128_t>(decomposed.exponent), precision, out, + exp); +#endif + return false; +} + +void WriteBufferToSink(char sign_char, string_view str, + const ConversionSpec &conv, FormatSinkImpl *sink) { + int left_spaces = 0, zeros = 0, right_spaces = 0; + int missing_chars = + conv.width() >= 0 ? std::max(conv.width() - static_cast<int>(str.size()) - + static_cast<int>(sign_char != 0), + 0) + : 0; + if (conv.flags().left) { + right_spaces = missing_chars; + } else if (conv.flags().zero) { + zeros = missing_chars; + } else { + left_spaces = missing_chars; + } + + sink->Append(left_spaces, ' '); + if (sign_char) sink->Append(1, sign_char); + sink->Append(zeros, '0'); + sink->Append(str); + sink->Append(right_spaces, ' '); +} + +template <typename Float> +bool FloatToSink(const Float v, const ConversionSpec &conv, + FormatSinkImpl *sink) { + // Print the sign or the sign column. + Float abs_v = v; + char sign_char = 0; + if (std::signbit(abs_v)) { + sign_char = '-'; + abs_v = -abs_v; + } else if (conv.flags().show_pos) { + sign_char = '+'; + } else if (conv.flags().sign_col) { + sign_char = ' '; + } + + // Print nan/inf. + if (ConvertNonNumericFloats(sign_char, abs_v, conv, sink)) { + return true; + } + + int precision = conv.precision() < 0 ? 6 : conv.precision(); + + int exp = 0; + + auto decomposed = Decompose(abs_v); + + Buffer buffer; + + switch (conv.conv().id()) { + case ConversionChar::f: + case ConversionChar::F: + if (!FloatToBuffer<FormatStyle::Fixed>(decomposed, precision, &buffer, + nullptr)) { + return FallbackToSnprintf(v, conv, sink); + } + if (!conv.flags().alt && buffer.back() == '.') buffer.pop_back(); + break; + + case ConversionChar::e: + case ConversionChar::E: + if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, + &exp)) { + return FallbackToSnprintf(v, conv, sink); + } + if (!conv.flags().alt && buffer.back() == '.') buffer.pop_back(); + PrintExponent(exp, conv.conv().upper() ? 'E' : 'e', &buffer); + break; + + case ConversionChar::g: + case ConversionChar::G: + precision = std::max(0, precision - 1); + if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, + &exp)) { + return FallbackToSnprintf(v, conv, sink); + } + if (precision + 1 > exp && exp >= -4) { + if (exp < 0) { + // Have 1.23456, needs 0.00123456 + // Move the first digit + buffer.begin[1] = *buffer.begin; + // Add some zeros + for (; exp < -1; ++exp) *buffer.begin-- = '0'; + *buffer.begin-- = '.'; + *buffer.begin = '0'; + } else if (exp > 0) { + // Have 1.23456, needs 1234.56 + // Move the '.' exp positions to the right. + std::rotate(buffer.begin + 1, buffer.begin + 2, + buffer.begin + exp + 2); + } + exp = 0; + } + if (!conv.flags().alt) { + while (buffer.back() == '0') buffer.pop_back(); + if (buffer.back() == '.') buffer.pop_back(); + } + if (exp) PrintExponent(exp, conv.conv().upper() ? 'E' : 'e', &buffer); + break; + + case ConversionChar::a: + case ConversionChar::A: + return FallbackToSnprintf(v, conv, sink); + + default: + return false; + } + + WriteBufferToSink(sign_char, + string_view(buffer.begin, buffer.end - buffer.begin), conv, + sink); + + return true; +} + +} // namespace + +bool ConvertFloatImpl(long double v, const ConversionSpec &conv, + FormatSinkImpl *sink) { + return FloatToSink(v, conv, sink); +} + +bool ConvertFloatImpl(float v, const ConversionSpec &conv, + FormatSinkImpl *sink) { + return FloatToSink(v, conv, sink); +} + +bool ConvertFloatImpl(double v, const ConversionSpec &conv, + FormatSinkImpl *sink) { + return FloatToSink(v, conv, sink); +} + +} // namespace str_format_internal +} // namespace absl diff --git a/absl/strings/internal/str_format/float_conversion.h b/absl/strings/internal/str_format/float_conversion.h new file mode 100644 index 000000000000..8ba5566d3eef --- /dev/null +++ b/absl/strings/internal/str_format/float_conversion.h @@ -0,0 +1,21 @@ +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ + +#include "absl/strings/internal/str_format/extension.h" + +namespace absl { +namespace str_format_internal { + +bool ConvertFloatImpl(float v, const ConversionSpec &conv, + FormatSinkImpl *sink); + +bool ConvertFloatImpl(double v, const ConversionSpec &conv, + FormatSinkImpl *sink); + +bool ConvertFloatImpl(long double v, const ConversionSpec &conv, + FormatSinkImpl *sink); + +} // namespace str_format_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ diff --git a/absl/strings/internal/str_format/output.cc b/absl/strings/internal/str_format/output.cc new file mode 100644 index 000000000000..5c3795b737ca --- /dev/null +++ b/absl/strings/internal/str_format/output.cc @@ -0,0 +1,47 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/output.h" + +#include <errno.h> +#include <cstring> + +namespace absl { +namespace str_format_internal { + +void BufferRawSink::Write(string_view v) { + size_t to_write = std::min(v.size(), size_); + std::memcpy(buffer_, v.data(), to_write); + buffer_ += to_write; + size_ -= to_write; + total_written_ += v.size(); +} + +void FILERawSink::Write(string_view v) { + while (!v.empty() && !error_) { + if (size_t result = std::fwrite(v.data(), 1, v.size(), output_)) { + // Some progress was made. + count_ += result; + v.remove_prefix(result); + } else { + // Some error occurred. + if (errno != EINTR) { + error_ = errno; + } + } + } +} + +} // namespace str_format_internal +} // namespace absl diff --git a/absl/strings/internal/str_format/output.h b/absl/strings/internal/str_format/output.h new file mode 100644 index 000000000000..3b0aa5e7157e --- /dev/null +++ b/absl/strings/internal/str_format/output.h @@ -0,0 +1,101 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Output extension hooks for the Format library. +// `internal::InvokeFlush` calls the appropriate flush function for the +// specified output argument. +// `BufferRawSink` is a simple output sink for a char buffer. Used by SnprintF. +// `FILERawSink` is a std::FILE* based sink. Used by PrintF and FprintF. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_ + +#include <cstdio> +#include <ostream> +#include <string> + +#include "absl/base/port.h" +#include "absl/strings/string_view.h" + +class Cord; + +namespace absl { +namespace str_format_internal { + +// RawSink implementation that writes into a char* buffer. +// It will not overflow the buffer, but will keep the total count of chars +// that would have been written. +class BufferRawSink { + public: + BufferRawSink(char* buffer, size_t size) : buffer_(buffer), size_(size) {} + + size_t total_written() const { return total_written_; } + void Write(string_view v); + + private: + char* buffer_; + size_t size_; + size_t total_written_ = 0; +}; + +// RawSink implementation that writes into a FILE*. +// It keeps track of the total number of bytes written and any error encountered +// during the writes. +class FILERawSink { + public: + explicit FILERawSink(std::FILE* output) : output_(output) {} + + void Write(string_view v); + + size_t count() const { return count_; } + int error() const { return error_; } + + private: + std::FILE* output_; + int error_ = 0; + size_t count_ = 0; +}; + +// Provide RawSink integration with common types from the STL. +inline void AbslFormatFlush(std::string* out, string_view s) { + out->append(s.begin(), s.size()); +} +inline void AbslFormatFlush(std::ostream* out, string_view s) { + out->write(s.begin(), s.size()); +} + +template <class AbslCord, typename = typename std::enable_if< + std::is_same<AbslCord, ::Cord>::value>::type> +inline void AbslFormatFlush(AbslCord* out, string_view s) { + out->Append(s); +} + +inline void AbslFormatFlush(FILERawSink* sink, string_view v) { + sink->Write(v); +} + +inline void AbslFormatFlush(BufferRawSink* sink, string_view v) { + sink->Write(v); +} + +template <typename T> +auto InvokeFlush(T* out, string_view s) + -> decltype(str_format_internal::AbslFormatFlush(out, s)) { + str_format_internal::AbslFormatFlush(out, s); +} + +} // namespace str_format_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_ diff --git a/absl/strings/internal/str_format/output_test.cc b/absl/strings/internal/str_format/output_test.cc new file mode 100644 index 000000000000..cc3c615557fe --- /dev/null +++ b/absl/strings/internal/str_format/output_test.cc @@ -0,0 +1,78 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/output.h" + +#include <sstream> +#include <string> + + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace absl { +namespace { + +TEST(InvokeFlush, String) { + std::string str = "ABC"; + str_format_internal::InvokeFlush(&str, "DEF"); + EXPECT_EQ(str, "ABCDEF"); + +#if UTIL_FORMAT_HAS_GLOBAL_STRING + std::string str2 = "ABC"; + str_format_internal::InvokeFlush(&str2, "DEF"); + EXPECT_EQ(str2, "ABCDEF"); +#endif // UTIL_FORMAT_HAS_GLOBAL_STRING +} + +TEST(InvokeFlush, Stream) { + std::stringstream str; + str << "ABC"; + str_format_internal::InvokeFlush(&str, "DEF"); + EXPECT_EQ(str.str(), "ABCDEF"); +} + +TEST(BufferRawSink, Limits) { + char buf[16]; + { + std::fill(std::begin(buf), std::end(buf), 'x'); + str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1); + str_format_internal::InvokeFlush(&bufsink, "Hello World237"); + EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World237xx"); + } + { + std::fill(std::begin(buf), std::end(buf), 'x'); + str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1); + str_format_internal::InvokeFlush(&bufsink, "Hello World237237"); + EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World2372x"); + } + { + std::fill(std::begin(buf), std::end(buf), 'x'); + str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1); + str_format_internal::InvokeFlush(&bufsink, "Hello World"); + str_format_internal::InvokeFlush(&bufsink, "237"); + EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World237xx"); + } + { + std::fill(std::begin(buf), std::end(buf), 'x'); + str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1); + str_format_internal::InvokeFlush(&bufsink, "Hello World"); + str_format_internal::InvokeFlush(&bufsink, "237237"); + EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World2372x"); + } +} + +} // namespace +} // namespace absl + diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc new file mode 100644 index 000000000000..10114f489c01 --- /dev/null +++ b/absl/strings/internal/str_format/parser.cc @@ -0,0 +1,294 @@ +#include "absl/strings/internal/str_format/parser.h" + +#include <assert.h> +#include <string.h> +#include <wchar.h> +#include <cctype> +#include <cstdint> + +#include <algorithm> +#include <initializer_list> +#include <limits> +#include <ostream> +#include <string> +#include <unordered_set> + +namespace absl { +namespace str_format_internal { +namespace { + +bool CheckFastPathSetting(const UnboundConversion& conv) { + bool should_be_basic = !conv.flags.left && // + !conv.flags.show_pos && // + !conv.flags.sign_col && // + !conv.flags.alt && // + !conv.flags.zero && // + (conv.width.value() == -1) && + (conv.precision.value() == -1); + if (should_be_basic != conv.flags.basic) { + fprintf(stderr, + "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d " + "width=%d precision=%d\n", + conv.flags.basic, conv.flags.left, conv.flags.show_pos, + conv.flags.sign_col, conv.flags.alt, conv.flags.zero, + conv.width.value(), conv.precision.value()); + } + return should_be_basic == conv.flags.basic; +} + +// Keep a single table for all the conversion chars and length modifiers. +// We invert the length modifiers to make them negative so that we can easily +// test for them. +// Everything else is `none`, which is a negative constant. +using CC = ConversionChar::Id; +using LM = LengthMod::Id; +static constexpr std::int8_t none = -128; +static constexpr std::int8_t kIds[] = { + none, none, none, none, none, none, none, none, // 00-07 + none, none, none, none, none, none, none, none, // 08-0f + none, none, none, none, none, none, none, none, // 10-17 + none, none, none, none, none, none, none, none, // 18-1f + none, none, none, none, none, none, none, none, // 20-27 + none, none, none, none, none, none, none, none, // 28-2f + none, none, none, none, none, none, none, none, // 30-37 + none, none, none, none, none, none, none, none, // 38-3f + none, CC::A, none, CC::C, none, CC::E, CC::F, CC::G, // @ABCDEFG + none, none, none, none, ~LM::L, none, none, none, // HIJKLMNO + none, none, none, CC::S, none, none, none, none, // PQRSTUVW + CC::X, none, none, none, none, none, none, none, // XYZ[\]^_ + none, CC::a, none, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg + ~LM::h, CC::i, ~LM::j, none, ~LM::l, none, CC::n, CC::o, // hijklmno + CC::p, ~LM::q, none, CC::s, ~LM::t, CC::u, none, none, // pqrstuvw + CC::x, none, ~LM::z, none, none, none, none, none, // xyz{|}~! + none, none, none, none, none, none, none, none, // 80-87 + none, none, none, none, none, none, none, none, // 88-8f + none, none, none, none, none, none, none, none, // 90-97 + none, none, none, none, none, none, none, none, // 98-9f + none, none, none, none, none, none, none, none, // a0-a7 + none, none, none, none, none, none, none, none, // a8-af + none, none, none, none, none, none, none, none, // b0-b7 + none, none, none, none, none, none, none, none, // b8-bf + none, none, none, none, none, none, none, none, // c0-c7 + none, none, none, none, none, none, none, none, // c8-cf + none, none, none, none, none, none, none, none, // d0-d7 + none, none, none, none, none, none, none, none, // d8-df + none, none, none, none, none, none, none, none, // e0-e7 + none, none, none, none, none, none, none, none, // e8-ef + none, none, none, none, none, none, none, none, // f0-f7 + none, none, none, none, none, none, none, none, // f8-ff +}; + +template <bool is_positional> +bool ConsumeConversion(string_view *src, UnboundConversion *conv, + int *next_arg) { + const char *pos = src->begin(); + const char *const end = src->end(); + char c; + // Read the next char into `c` and update `pos`. Reads '\0' if at end. + const auto get_char = [&] { c = pos == end ? '\0' : *pos++; }; + + const auto parse_digits = [&] { + int digits = c - '0'; + // We do not want to overflow `digits` so we consume at most digits10-1 + // digits. If there are more digits the parsing will fail later on when the + // digit doesn't match the expected characters. + int num_digits = std::numeric_limits<int>::digits10 - 2; + for (get_char(); num_digits && std::isdigit(c); get_char()) { + --num_digits; + digits = 10 * digits + c - '0'; + } + return digits; + }; + + if (is_positional) { + get_char(); + if (c < '1' || c > '9') return false; + conv->arg_position = parse_digits(); + assert(conv->arg_position > 0); + if (c != '$') return false; + } + + get_char(); + + // We should start with the basic flag on. + assert(conv->flags.basic); + + // Any non alpha character makes this conversion not basic. + // This includes flags (-+ #0), width (1-9, *) or precision (.). + // All conversion characters and length modifiers are alpha characters. + if (c < 'A') { + conv->flags.basic = false; + + for (; c <= '0'; get_char()) { + switch (c) { + case '-': + conv->flags.left = true; + continue; + case '+': + conv->flags.show_pos = true; + continue; + case ' ': + conv->flags.sign_col = true; + continue; + case '#': + conv->flags.alt = true; + continue; + case '0': + conv->flags.zero = true; + continue; + } + break; + } + + if (c <= '9') { + if (c >= '0') { + int maybe_width = parse_digits(); + if (!is_positional && c == '$') { + if (*next_arg != 0) return false; + // Positional conversion. + *next_arg = -1; + conv->flags = Flags(); + conv->flags.basic = true; + return ConsumeConversion<true>(src, conv, next_arg); + } + conv->width.set_value(maybe_width); + } else if (c == '*') { + get_char(); + if (is_positional) { + if (c < '1' || c > '9') return false; + conv->width.set_from_arg(parse_digits()); + if (c != '$') return false; + get_char(); + } else { + conv->width.set_from_arg(++*next_arg); + } + } + } + + if (c == '.') { + get_char(); + if (std::isdigit(c)) { + conv->precision.set_value(parse_digits()); + } else if (c == '*') { + get_char(); + if (is_positional) { + if (c < '1' || c > '9') return false; + conv->precision.set_from_arg(parse_digits()); + if (c != '$') return false; + get_char(); + } else { + conv->precision.set_from_arg(++*next_arg); + } + } else { + conv->precision.set_value(0); + } + } + } + + std::int8_t id = kIds[static_cast<unsigned char>(c)]; + + if (id < 0) { + if (id == none) return false; + + // It is a length modifier. + using str_format_internal::LengthMod; + LengthMod length_mod = LengthMod::FromId(static_cast<LM>(~id)); + get_char(); + if (c == 'h' && length_mod.id() == LengthMod::h) { + conv->length_mod = LengthMod::FromId(LengthMod::hh); + get_char(); + } else if (c == 'l' && length_mod.id() == LengthMod::l) { + conv->length_mod = LengthMod::FromId(LengthMod::ll); + get_char(); + } else { + conv->length_mod = length_mod; + } + id = kIds[static_cast<unsigned char>(c)]; + if (id < 0) return false; + } + + assert(CheckFastPathSetting(*conv)); + (void)(&CheckFastPathSetting); + + conv->conv = ConversionChar::FromId(static_cast<CC>(id)); + if (!is_positional) conv->arg_position = ++*next_arg; + *src = string_view(pos, end - pos); + return true; +} + +} // namespace + +bool ConsumeUnboundConversion(string_view *src, UnboundConversion *conv, + int *next_arg) { + if (*next_arg < 0) return ConsumeConversion<true>(src, conv, next_arg); + return ConsumeConversion<false>(src, conv, next_arg); +} + +struct ParsedFormatBase::ParsedFormatConsumer { + explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat) + : parsed(parsedformat), data_pos(parsedformat->data_.get()) {} + + bool Append(string_view s) { + if (s.empty()) return true; + + size_t text_end = AppendText(s); + + if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) { + // Let's extend the existing text run. + parsed->items_.back().text_end = text_end; + } else { + // Let's make a new text run. + parsed->items_.push_back({false, text_end, {}}); + } + return true; + } + + bool ConvertOne(const UnboundConversion &conv, string_view s) { + size_t text_end = AppendText(s); + parsed->items_.push_back({true, text_end, conv}); + return true; + } + + size_t AppendText(string_view s) { + memcpy(data_pos, s.data(), s.size()); + data_pos += s.size(); + return static_cast<size_t>(data_pos - parsed->data_.get()); + } + + ParsedFormatBase *parsed; + char* data_pos; +}; + +ParsedFormatBase::ParsedFormatBase(string_view format, bool allow_ignored, + std::initializer_list<Conv> convs) + : data_(format.empty() ? nullptr : new char[format.size()]) { + has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) || + !MatchesConversions(allow_ignored, convs); +} + +bool ParsedFormatBase::MatchesConversions( + bool allow_ignored, std::initializer_list<Conv> convs) const { + std::unordered_set<int> used; + auto add_if_valid_conv = [&](int pos, char c) { + if (static_cast<size_t>(pos) > convs.size() || + !Contains(convs.begin()[pos - 1], c)) + return false; + used.insert(pos); + return true; + }; + for (const ConversionItem &item : items_) { + if (!item.is_conversion) continue; + auto &conv = item.conv; + if (conv.precision.is_from_arg() && + !add_if_valid_conv(conv.precision.get_from_arg(), '*')) + return false; + if (conv.width.is_from_arg() && + !add_if_valid_conv(conv.width.get_from_arg(), '*')) + return false; + if (!add_if_valid_conv(conv.arg_position, conv.conv.Char())) return false; + } + return used.size() == convs.size() || allow_ignored; +} + +} // namespace str_format_internal +} // namespace absl diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h new file mode 100644 index 000000000000..5bebc95540e6 --- /dev/null +++ b/absl/strings/internal/str_format/parser.h @@ -0,0 +1,291 @@ +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ + +#include <limits.h> +#include <stddef.h> +#include <stdlib.h> + +#include <cassert> +#include <initializer_list> +#include <iosfwd> +#include <iterator> +#include <memory> +#include <vector> + +#include "absl/strings/internal/str_format/checker.h" +#include "absl/strings/internal/str_format/extension.h" + +namespace absl { +namespace str_format_internal { + +// The analyzed properties of a single specified conversion. +struct UnboundConversion { + UnboundConversion() + : flags() /* This is required to zero all the fields of flags. */ { + flags.basic = true; + } + + class InputValue { + public: + void set_value(int value) { + assert(value >= 0); + value_ = value; + } + int value() const { return value_; } + + // Marks the value as "from arg". aka the '*' format. + // Requires `value >= 1`. + // When set, is_from_arg() return true and get_from_arg() returns the + // original value. + // `value()`'s return value is unspecfied in this state. + void set_from_arg(int value) { + assert(value > 0); + value_ = -value - 1; + } + bool is_from_arg() const { return value_ < -1; } + int get_from_arg() const { + assert(is_from_arg()); + return -value_ - 1; + } + + private: + int value_ = -1; + }; + + // No need to initialize. It will always be set in the parser. + int arg_position; + + InputValue width; + InputValue precision; + + Flags flags; + LengthMod length_mod; + ConversionChar conv; +}; + +// Consume conversion spec prefix (not including '%') of '*src' if valid. +// Examples of valid specs would be e.g.: "s", "d", "-12.6f". +// If valid, the front of src is advanced such that src becomes the +// part following the conversion spec, and the spec part is broken down and +// returned in 'conv'. +// If invalid, returns false and leaves 'src' unmodified. +// For example: +// Given "d9", returns "d", and leaves src="9", +// Given "!", returns "" and leaves src="!". +bool ConsumeUnboundConversion(string_view* src, UnboundConversion* conv, + int* next_arg); + +// Parse the format std::string provided in 'src' and pass the identified items into +// 'consumer'. +// Text runs will be passed by calling +// Consumer::Append(string_view); +// ConversionItems will be passed by calling +// Consumer::ConvertOne(UnboundConversion, string_view); +// In the case of ConvertOne, the string_view that is passed is the +// portion of the format std::string corresponding to the conversion, not including +// the leading %. On success, it returns true. On failure, it stops and returns +// false. +template <typename Consumer> +bool ParseFormatString(string_view src, Consumer consumer) { + int next_arg = 0; + while (!src.empty()) { + const char* percent = + static_cast<const char*>(memchr(src.begin(), '%', src.size())); + if (!percent) { + // We found the last substring. + return consumer.Append(src); + } + // We found a percent, so push the text run then process the percent. + size_t percent_loc = percent - src.data(); + if (!consumer.Append(string_view(src.data(), percent_loc))) return false; + if (percent + 1 >= src.end()) return false; + + UnboundConversion conv; + + switch (percent[1]) { + case '%': + if (!consumer.Append("%")) return false; + src.remove_prefix(percent_loc + 2); + continue; + +#define PARSER_CASE(ch) \ + case #ch[0]: \ + src.remove_prefix(percent_loc + 2); \ + conv.conv = ConversionChar::FromId(ConversionChar::ch); \ + conv.arg_position = ++next_arg; \ + break; + ABSL_CONVERSION_CHARS_EXPAND_(PARSER_CASE, ); +#undef PARSER_CASE + + default: + src.remove_prefix(percent_loc + 1); + if (!ConsumeUnboundConversion(&src, &conv, &next_arg)) return false; + break; + } + if (next_arg == 0) { + // This indicates an error in the format std::string. + // The only way to get next_arg == 0 is to have a positional argument + // first which sets next_arg to -1 and then a non-positional argument + // which does ++next_arg. + // Checking here seems to be the cheapeast place to do it. + return false; + } + if (!consumer.ConvertOne( + conv, string_view(percent + 1, src.data() - (percent + 1)))) { + return false; + } + } + return true; +} + +// Always returns true, or fails to compile in a constexpr context if s does not +// point to a constexpr char array. +constexpr bool EnsureConstexpr(string_view s) { + return s.empty() || s[0] == s[0]; +} + +class ParsedFormatBase { + public: + explicit ParsedFormatBase(string_view format, bool allow_ignored, + std::initializer_list<Conv> convs); + + ParsedFormatBase(const ParsedFormatBase& other) { *this = other; } + + ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); } + + ParsedFormatBase& operator=(const ParsedFormatBase& other) { + if (this == &other) return *this; + has_error_ = other.has_error_; + items_ = other.items_; + size_t text_size = items_.empty() ? 0 : items_.back().text_end; + data_.reset(new char[text_size]); + memcpy(data_.get(), other.data_.get(), text_size); + return *this; + } + + ParsedFormatBase& operator=(ParsedFormatBase&& other) { + if (this == &other) return *this; + has_error_ = other.has_error_; + data_ = std::move(other.data_); + items_ = std::move(other.items_); + // Reset the vector to make sure the invariants hold. + other.items_.clear(); + return *this; + } + + template <typename Consumer> + bool ProcessFormat(Consumer consumer) const { + const char* const base = data_.get(); + string_view text(base, 0); + for (const auto& item : items_) { + text = string_view(text.end(), (base + item.text_end) - text.end()); + if (item.is_conversion) { + if (!consumer.ConvertOne(item.conv, text)) return false; + } else { + if (!consumer.Append(text)) return false; + } + } + return !has_error_; + } + + bool has_error() const { return has_error_; } + + private: + // Returns whether the conversions match and if !allow_ignored it verifies + // that all conversions are used by the format. + bool MatchesConversions(bool allow_ignored, + std::initializer_list<Conv> convs) const; + + struct ParsedFormatConsumer; + + struct ConversionItem { + bool is_conversion; + // Points to the past-the-end location of this element in the data_ array. + size_t text_end; + UnboundConversion conv; + }; + + bool has_error_; + std::unique_ptr<char[]> data_; + std::vector<ConversionItem> items_; +}; + + +// A value type representing a preparsed format. These can be created, copied +// around, and reused to speed up formatting loops. +// The user must specify through the template arguments the conversion +// characters used in the format. This will be checked at compile time. +// +// This class uses Conv enum values to specify each argument. +// This allows for more flexibility as you can specify multiple possible +// conversion characters for each argument. +// ParsedFormat<char...> is a simplified alias for when the user only +// needs to specify a single conversion character for each argument. +// +// Example: +// // Extended format supports multiple characters per argument: +// using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>; +// MyFormat GetFormat(bool use_hex) { +// if (use_hex) return MyFormat("foo %x bar"); +// return MyFormat("foo %d bar"); +// } +// // 'format' can be used with any value that supports 'd' and 'x', +// // like `int`. +// auto format = GetFormat(use_hex); +// value = StringF(format, i); +// +// This class also supports runtime format checking with the ::New() and +// ::NewAllowIgnored() factory functions. +// This is the only API that allows the user to pass a runtime specified format +// std::string. These factory functions will return NULL if the format does not match +// the conversions requested by the user. +template <str_format_internal::Conv... C> +class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase { + public: + explicit ExtendedParsedFormat(string_view format) +#if ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + __attribute__(( + enable_if(str_format_internal::EnsureConstexpr(format), + "Format std::string is not constexpr."), + enable_if(str_format_internal::ValidFormatImpl<C...>(format), + "Format specified does not match the template arguments."))) +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + : ExtendedParsedFormat(format, false) { + } + + // ExtendedParsedFormat factory function. + // The user still has to specify the conversion characters, but they will not + // be checked at compile time. Instead, it will be checked at runtime. + // This delays the checking to runtime, but allows the user to pass + // dynamically sourced formats. + // It returns NULL if the format does not match the conversion characters. + // The user is responsible for checking the return value before using it. + // + // The 'New' variant will check that all the specified arguments are being + // consumed by the format and return NULL if any argument is being ignored. + // The 'NewAllowIgnored' variant will not verify this and will allow formats + // that ignore arguments. + static std::unique_ptr<ExtendedParsedFormat> New(string_view format) { + return New(format, false); + } + static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored( + string_view format) { + return New(format, true); + } + + private: + static std::unique_ptr<ExtendedParsedFormat> New(string_view format, + bool allow_ignored) { + std::unique_ptr<ExtendedParsedFormat> conv( + new ExtendedParsedFormat(format, allow_ignored)); + if (conv->has_error()) return nullptr; + return conv; + } + + ExtendedParsedFormat(string_view s, bool allow_ignored) + : ParsedFormatBase(s, allow_ignored, {C...}) {} +}; +} // namespace str_format_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc new file mode 100644 index 000000000000..e698020b1aba --- /dev/null +++ b/absl/strings/internal/str_format/parser_test.cc @@ -0,0 +1,379 @@ +#include "absl/strings/internal/str_format/parser.h" + +#include <string.h> +#include "gtest/gtest.h" +#include "absl/base/macros.h" + +namespace absl { +namespace str_format_internal { + +namespace { + +TEST(LengthModTest, Names) { + struct Expectation { + int line; + LengthMod::Id id; + const char *name; + }; + const Expectation kExpect[] = { + {__LINE__, LengthMod::none, "" }, + {__LINE__, LengthMod::h, "h" }, + {__LINE__, LengthMod::hh, "hh"}, + {__LINE__, LengthMod::l, "l" }, + {__LINE__, LengthMod::ll, "ll"}, + {__LINE__, LengthMod::L, "L" }, + {__LINE__, LengthMod::j, "j" }, + {__LINE__, LengthMod::z, "z" }, + {__LINE__, LengthMod::t, "t" }, + {__LINE__, LengthMod::q, "q" }, + }; + EXPECT_EQ(ABSL_ARRAYSIZE(kExpect), LengthMod::kNumValues); + for (auto e : kExpect) { + SCOPED_TRACE(e.line); + LengthMod mod = LengthMod::FromId(e.id); + EXPECT_EQ(e.id, mod.id()); + EXPECT_EQ(e.name, mod.name()); + } +} + +TEST(ConversionCharTest, Names) { + struct Expectation { + ConversionChar::Id id; + char name; + }; + // clang-format off + const Expectation kExpect[] = { +#define X(c) {ConversionChar::c, #c[0]} + X(c), X(C), X(s), X(S), // text + X(d), X(i), X(o), X(u), X(x), X(X), // int + X(f), X(F), X(e), X(E), X(g), X(G), X(a), X(A), // float + X(n), X(p), // misc +#undef X + {ConversionChar::none, '\0'}, + }; + // clang-format on + EXPECT_EQ(ABSL_ARRAYSIZE(kExpect), ConversionChar::kNumValues); + for (auto e : kExpect) { + SCOPED_TRACE(e.name); + ConversionChar v = ConversionChar::FromId(e.id); + EXPECT_EQ(e.id, v.id()); + EXPECT_EQ(e.name, v.Char()); + } +} + +class ConsumeUnboundConversionTest : public ::testing::Test { + public: + typedef UnboundConversion Props; + string_view Consume(string_view* src) { + int next = 0; + const char* prev_begin = src->begin(); + o = UnboundConversion(); // refresh + ConsumeUnboundConversion(src, &o, &next); + return {prev_begin, static_cast<size_t>(src->begin() - prev_begin)}; + } + + bool Run(const char *fmt, bool force_positional = false) { + string_view src = fmt; + int next = force_positional ? -1 : 0; + o = UnboundConversion(); // refresh + return ConsumeUnboundConversion(&src, &o, &next) && src.empty(); + } + UnboundConversion o; +}; + +TEST_F(ConsumeUnboundConversionTest, ConsumeSpecification) { + struct Expectation { + int line; + const char *src; + const char *out; + const char *src_post; + }; + const Expectation kExpect[] = { + {__LINE__, "", "", "" }, + {__LINE__, "b", "", "b" }, // 'b' is invalid + {__LINE__, "ba", "", "ba"}, // 'b' is invalid + {__LINE__, "l", "", "l" }, // just length mod isn't okay + {__LINE__, "d", "d", "" }, // basic + {__LINE__, "d ", "d", " " }, // leave suffix + {__LINE__, "dd", "d", "d" }, // don't be greedy + {__LINE__, "d9", "d", "9" }, // leave non-space suffix + {__LINE__, "dzz", "d", "zz"}, // length mod as suffix + {__LINE__, "1$*2$d", "1$*2$d", "" }, // arg indexing and * allowed. + {__LINE__, "0-14.3hhd", "0-14.3hhd", ""}, // precision, width + {__LINE__, " 0-+#14.3hhd", " 0-+#14.3hhd", ""}, // flags + }; + for (const auto& e : kExpect) { + SCOPED_TRACE(e.line); + string_view src = e.src; + EXPECT_EQ(e.src, src); + string_view out = Consume(&src); + EXPECT_EQ(e.out, out); + EXPECT_EQ(e.src_post, src); + } +} + +TEST_F(ConsumeUnboundConversionTest, BasicConversion) { + EXPECT_FALSE(Run("")); + EXPECT_FALSE(Run("z")); + + EXPECT_FALSE(Run("dd")); // no excess allowed + + EXPECT_TRUE(Run("d")); + EXPECT_EQ('d', o.conv.Char()); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_LT(o.precision.value(), 0); + EXPECT_EQ(1, o.arg_position); + EXPECT_EQ(LengthMod::none, o.length_mod.id()); +} + +TEST_F(ConsumeUnboundConversionTest, ArgPosition) { + EXPECT_TRUE(Run("d")); + EXPECT_EQ(1, o.arg_position); + EXPECT_TRUE(Run("3$d")); + EXPECT_EQ(3, o.arg_position); + EXPECT_TRUE(Run("1$d")); + EXPECT_EQ(1, o.arg_position); + EXPECT_TRUE(Run("1$d", true)); + EXPECT_EQ(1, o.arg_position); + EXPECT_TRUE(Run("123$d")); + EXPECT_EQ(123, o.arg_position); + EXPECT_TRUE(Run("123$d", true)); + EXPECT_EQ(123, o.arg_position); + EXPECT_TRUE(Run("10$d")); + EXPECT_EQ(10, o.arg_position); + EXPECT_TRUE(Run("10$d", true)); + EXPECT_EQ(10, o.arg_position); + + // Position can't be zero. + EXPECT_FALSE(Run("0$d")); + EXPECT_FALSE(Run("0$d", true)); + EXPECT_FALSE(Run("1$*0$d")); + EXPECT_FALSE(Run("1$.*0$d")); + + // Position can't start with a zero digit at all. That is not a 'decimal'. + EXPECT_FALSE(Run("01$p")); + EXPECT_FALSE(Run("01$p", true)); + EXPECT_FALSE(Run("1$*01$p")); + EXPECT_FALSE(Run("1$.*01$p")); +} + +TEST_F(ConsumeUnboundConversionTest, WidthAndPrecision) { + EXPECT_TRUE(Run("14d")); + EXPECT_EQ('d', o.conv.Char()); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_EQ(14, o.width.value()); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_LT(o.precision.value(), 0); + + EXPECT_TRUE(Run("14.d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(14, o.width.value()); + EXPECT_EQ(0, o.precision.value()); + + EXPECT_TRUE(Run(".d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(0, o.precision.value()); + + EXPECT_TRUE(Run(".5d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(5, o.precision.value()); + + EXPECT_TRUE(Run(".0d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(0, o.precision.value()); + + EXPECT_TRUE(Run("14.5d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(14, o.width.value()); + EXPECT_EQ(5, o.precision.value()); + + EXPECT_TRUE(Run("*.*d")); + EXPECT_TRUE(o.width.is_from_arg()); + EXPECT_EQ(1, o.width.get_from_arg()); + EXPECT_TRUE(o.precision.is_from_arg()); + EXPECT_EQ(2, o.precision.get_from_arg()); + EXPECT_EQ(3, o.arg_position); + + EXPECT_TRUE(Run("*d")); + EXPECT_TRUE(o.width.is_from_arg()); + EXPECT_EQ(1, o.width.get_from_arg()); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_LT(o.precision.value(), 0); + EXPECT_EQ(2, o.arg_position); + + EXPECT_TRUE(Run(".*d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_TRUE(o.precision.is_from_arg()); + EXPECT_EQ(1, o.precision.get_from_arg()); + EXPECT_EQ(2, o.arg_position); + + // mixed implicit and explicit: didn't specify arg position. + EXPECT_FALSE(Run("*23$.*34$d")); + + EXPECT_TRUE(Run("12$*23$.*34$d")); + EXPECT_EQ(12, o.arg_position); + EXPECT_TRUE(o.width.is_from_arg()); + EXPECT_EQ(23, o.width.get_from_arg()); + EXPECT_TRUE(o.precision.is_from_arg()); + EXPECT_EQ(34, o.precision.get_from_arg()); + + EXPECT_TRUE(Run("2$*5$.*9$d")); + EXPECT_EQ(2, o.arg_position); + EXPECT_TRUE(o.width.is_from_arg()); + EXPECT_EQ(5, o.width.get_from_arg()); + EXPECT_TRUE(o.precision.is_from_arg()); + EXPECT_EQ(9, o.precision.get_from_arg()); + + EXPECT_FALSE(Run(".*0$d")) << "no arg 0"; +} + +TEST_F(ConsumeUnboundConversionTest, Flags) { + static const char kAllFlags[] = "-+ #0"; + static const int kNumFlags = ABSL_ARRAYSIZE(kAllFlags) - 1; + for (int rev = 0; rev < 2; ++rev) { + for (int i = 0; i < 1 << kNumFlags; ++i) { + std::string fmt; + for (int k = 0; k < kNumFlags; ++k) + if ((i >> k) & 1) fmt += kAllFlags[k]; + // flag order shouldn't matter + if (rev == 1) { std::reverse(fmt.begin(), fmt.end()); } + fmt += 'd'; + SCOPED_TRACE(fmt); + EXPECT_TRUE(Run(fmt.c_str())); + EXPECT_EQ(fmt.find('-') == std::string::npos, !o.flags.left); + EXPECT_EQ(fmt.find('+') == std::string::npos, !o.flags.show_pos); + EXPECT_EQ(fmt.find(' ') == std::string::npos, !o.flags.sign_col); + EXPECT_EQ(fmt.find('#') == std::string::npos, !o.flags.alt); + EXPECT_EQ(fmt.find('0') == std::string::npos, !o.flags.zero); + } + } +} + +TEST_F(ConsumeUnboundConversionTest, BasicFlag) { + // Flag is on + for (const char* fmt : {"d", "llx", "G", "1$X"}) { + SCOPED_TRACE(fmt); + EXPECT_TRUE(Run(fmt)); + EXPECT_TRUE(o.flags.basic); + } + + // Flag is off + for (const char* fmt : {"3d", ".llx", "-G", "1$#X"}) { + SCOPED_TRACE(fmt); + EXPECT_TRUE(Run(fmt)); + EXPECT_FALSE(o.flags.basic); + } +} + +struct SummarizeConsumer { + std::string* out; + explicit SummarizeConsumer(std::string* out) : out(out) {} + + bool Append(string_view s) { + *out += "[" + std::string(s) + "]"; + return true; + } + + bool ConvertOne(const UnboundConversion& conv, string_view s) { + *out += "{"; + *out += std::string(s); + *out += ":"; + *out += std::to_string(conv.arg_position) + "$"; + if (conv.width.is_from_arg()) { + *out += std::to_string(conv.width.get_from_arg()) + "$*"; + } + if (conv.precision.is_from_arg()) { + *out += "." + std::to_string(conv.precision.get_from_arg()) + "$*"; + } + *out += conv.conv.Char(); + *out += "}"; + return true; + } +}; + +std::string SummarizeParsedFormat(const ParsedFormatBase& pc) { + std::string out; + if (!pc.ProcessFormat(SummarizeConsumer(&out))) out += "!"; + return out; +} + +class ParsedFormatTest : public testing::Test {}; + +TEST_F(ParsedFormatTest, ValueSemantics) { + ParsedFormatBase p1({}, true, {}); // empty format + EXPECT_EQ("", SummarizeParsedFormat(p1)); + + ParsedFormatBase p2 = p1; // copy construct (empty) + EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2)); + + p1 = ParsedFormatBase("hello%s", true, {Conv::s}); // move assign + EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p1)); + + ParsedFormatBase p3 = p1; // copy construct (nonempty) + EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p3)); + + using std::swap; + swap(p1, p2); + EXPECT_EQ("", SummarizeParsedFormat(p1)); + EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p2)); + swap(p1, p2); // undo + + p2 = p1; // copy assign + EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2)); +} + +struct ExpectParse { + const char* in; + std::initializer_list<Conv> conv_set; + const char* out; +}; + +TEST_F(ParsedFormatTest, Parsing) { + // Parse should be equivalent to that obtained by ConversionParseIterator. + // No need to retest the parsing edge cases here. + const ExpectParse kExpect[] = { + {"", {}, ""}, + {"ab", {}, "[ab]"}, + {"a%d", {Conv::d}, "[a]{d:1$d}"}, + {"a%+d", {Conv::d}, "[a]{+d:1$d}"}, + {"a% d", {Conv::d}, "[a]{ d:1$d}"}, + {"a%b %d", {}, "[a]!"}, // stop after error + }; + for (const auto& e : kExpect) { + SCOPED_TRACE(e.in); + EXPECT_EQ(e.out, + SummarizeParsedFormat(ParsedFormatBase(e.in, false, e.conv_set))); + } +} + +TEST_F(ParsedFormatTest, ParsingFlagOrder) { + const ExpectParse kExpect[] = { + {"a%+ 0d", {Conv::d}, "[a]{+ 0d:1$d}"}, + {"a%+0 d", {Conv::d}, "[a]{+0 d:1$d}"}, + {"a%0+ d", {Conv::d}, "[a]{0+ d:1$d}"}, + {"a% +0d", {Conv::d}, "[a]{ +0d:1$d}"}, + {"a%0 +d", {Conv::d}, "[a]{0 +d:1$d}"}, + {"a% 0+d", {Conv::d}, "[a]{ 0+d:1$d}"}, + {"a%+ 0+d", {Conv::d}, "[a]{+ 0+d:1$d}"}, + }; + for (const auto& e : kExpect) { + SCOPED_TRACE(e.in); + EXPECT_EQ(e.out, + SummarizeParsedFormat(ParsedFormatBase(e.in, false, e.conv_set))); + } +} + +} // namespace +} // namespace str_format_internal +} // namespace absl diff --git a/absl/strings/str_format.h b/absl/strings/str_format.h new file mode 100644 index 000000000000..98e0fef4906b --- /dev/null +++ b/absl/strings/str_format.h @@ -0,0 +1,512 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_format.h +// ----------------------------------------------------------------------------- +// +// The `str_format` library is a typesafe replacement for the family of +// `printf()` std::string formatting routines within the `<cstdio>` standard library +// header. Like the `printf` family, the `str_format` uses a "format string" to +// perform argument substitutions based on types. +// +// Example: +// +// std::string s = absl::StrFormat("%s %s You have $%d!", "Hello", name, dollars); +// +// The library consists of the following basic utilities: +// +// * `absl::StrFormat()`, a type-safe replacement for `std::sprintf()`, to +// write a format std::string to a `string` value. +// * `absl::StrAppendFormat()` to append a format std::string to a `string` +// * `absl::StreamFormat()` to more efficiently write a format std::string to a +// stream, such as`std::cout`. +// * `absl::PrintF()`, `absl::FPrintF()` and `absl::SNPrintF()` as +// replacements for `std::printf()`, `std::fprintf()` and `std::snprintf()`. +// +// Note: a version of `std::sprintf()` is not supported as it is +// generally unsafe due to buffer overflows. +// +// Additionally, you can provide a format std::string (and its associated arguments) +// using one of the following abstractions: +// +// * A `FormatSpec` class template fully encapsulates a format std::string and its +// type arguments and is usually provided to `str_format` functions as a +// variadic argument of type `FormatSpec<Arg...>`. The `FormatSpec<Args...>` +// template is evaluated at compile-time, providing type safety. +// * A `ParsedFormat` instance, which encapsulates a specific, pre-compiled +// format std::string for a specific set of type(s), and which can be passed +// between API boundaries. (The `FormatSpec` type should not be used +// directly.) +// +// The `str_format` library provides the ability to output its format strings to +// arbitrary sink types: +// +// * A generic `Format()` function to write outputs to arbitrary sink types, +// which must implement a `RawSinkFormat` interface. (See +// `str_format_sink.h` for more information.) +// +// * A `FormatUntyped()` function that is similar to `Format()` except it is +// loosely typed. `FormatUntyped()` is not a template and does not perform +// any compile-time checking of the format std::string; instead, it returns a +// boolean from a runtime check. +// +// In addition, the `str_format` library provides extension points for +// augmenting formatting to new types. These extensions are fully documented +// within the `str_format_extension.h` header file. +#ifndef ABSL_STRINGS_STR_FORMAT_H_ +#define ABSL_STRINGS_STR_FORMAT_H_ + +#include <cstdio> +#include <string> + +#include "absl/strings/internal/str_format/arg.h" // IWYU pragma: export +#include "absl/strings/internal/str_format/bind.h" // IWYU pragma: export +#include "absl/strings/internal/str_format/checker.h" // IWYU pragma: export +#include "absl/strings/internal/str_format/extension.h" // IWYU pragma: export +#include "absl/strings/internal/str_format/parser.h" // IWYU pragma: export + +namespace absl { + +// UntypedFormatSpec +// +// A type-erased class that can be used directly within untyped API entry +// points. An `UntypedFormatSpec` is specifically used as an argument to +// `FormatUntyped()`. +// +// Example: +// +// absl::UntypedFormatSpec format("%d"); +// std::string out; +// CHECK(absl::FormatUntyped(&out, format, {absl::FormatArg(1)})); +class UntypedFormatSpec { + public: + UntypedFormatSpec() = delete; + UntypedFormatSpec(const UntypedFormatSpec&) = delete; + UntypedFormatSpec& operator=(const UntypedFormatSpec&) = delete; + + explicit UntypedFormatSpec(string_view s) : spec_(s) {} + + protected: + explicit UntypedFormatSpec(const str_format_internal::ParsedFormatBase* pc) + : spec_(pc) {} + + private: + friend str_format_internal::UntypedFormatSpecImpl; + str_format_internal::UntypedFormatSpecImpl spec_; +}; + +// FormatStreamed() +// +// Takes a streamable argument and returns an object that can print it +// with '%s'. Allows printing of types that have an `operator<<` but no +// intrinsic type support within `StrFormat()` itself. +// +// Example: +// +// absl::StrFormat("%s", absl::FormatStreamed(obj)); +template <typename T> +str_format_internal::StreamedWrapper<T> FormatStreamed(const T& v) { + return str_format_internal::StreamedWrapper<T>(v); +} + +// FormatCountCapture +// +// This class provides a way to safely wrap `StrFormat()` captures of `%n` +// conversions, which denote the number of characters written by a formatting +// operation to this point, into an integer value. +// +// This wrapper is designed to allow safe usage of `%n` within `StrFormat(); in +// the `printf()` family of functions, `%n` is not safe to use, as the `int *` +// buffer can be used to capture arbitrary data. +// +// Example: +// +// int n = 0; +// std::string s = absl::StrFormat("%s%d%n", "hello", 123, +// absl::FormatCountCapture(&n)); +// EXPECT_EQ(8, n); +class FormatCountCapture { + public: + explicit FormatCountCapture(int* p) : p_(p) {} + + private: + // FormatCountCaptureHelper is used to define FormatConvertImpl() for this + // class. + friend struct str_format_internal::FormatCountCaptureHelper; + // Unused() is here because of the false positive from -Wunused-private-field + // p_ is used in the templated function of the friend FormatCountCaptureHelper + // class. + int* Unused() { return p_; } + int* p_; +}; + +// FormatSpec +// +// The `FormatSpec` type defines the makeup of a format std::string within the +// `str_format` library. You should not need to use or manipulate this type +// directly. A `FormatSpec` is a variadic class template that is evaluated at +// compile-time, according to the format std::string and arguments that are passed +// to it. +// +// For a `FormatSpec` to be valid at compile-time, it must be provided as +// either: +// +// * A `constexpr` literal or `absl::string_view`, which is how it most often +// used. +// * A `ParsedFormat` instantiation, which ensures the format std::string is +// valid before use. (See below.) +// +// Example: +// +// // Provided as a std::string literal. +// absl::StrFormat("Welcome to %s, Number %d!", "The Village", 6); +// +// // Provided as a constexpr absl::string_view. +// constexpr absl::string_view formatString = "Welcome to %s, Number %d!"; +// absl::StrFormat(formatString, "The Village", 6); +// +// // Provided as a pre-compiled ParsedFormat object. +// // Note that this example is useful only for illustration purposes. +// absl::ParsedFormat<'s', 'd'> formatString("Welcome to %s, Number %d!"); +// absl::StrFormat(formatString, "TheVillage", 6); +// +// A format std::string generally follows the POSIX syntax as used within the POSIX +// `printf` specification. +// +// (See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/printf.html.) +// +// In specific, the `FormatSpec` supports the following type specifiers: +// * `c` for characters +// * `s` for strings +// * `d` or `i` for integers +// * `o` for unsigned integer conversions into octal +// * `x` or `X` for unsigned integer conversions into hex +// * `u` for unsigned integers +// * `f` or `F` for floating point values into decimal notation +// * `e` or `E` for floating point values into exponential notation +// * `a` or `A` for floating point values into hex exponential notation +// * `g` or `G` for floating point values into decimal or exponential +// notation based on their precision +// * `p` for pointer address values +// * `n` for the special case of writing out the number of characters +// written to this point. The resulting value must be captured within an +// `absl::FormatCountCapture` type. +// +// NOTE: `o`, `x\X` and `u` will convert signed values to their unsigned +// counterpart before formatting. +// +// Examples: +// "%c", 'a' -> "a" +// "%c", 32 -> " " +// "%s", "C" -> "C" +// "%s", std::string("C++") -> "C++" +// "%d", -10 -> "-10" +// "%o", 10 -> "12" +// "%x", 16 -> "10" +// "%f", 123456789 -> "123456789.000000" +// "%e", .01 -> "1.00000e-2" +// "%a", -3.0 -> "-0x1.8p+1" +// "%g", .01 -> "1e-2" +// "%p", *int -> "0x7ffdeb6ad2a4" +// +// int n = 0; +// std::string s = absl::StrFormat( +// "%s%d%n", "hello", 123, absl::FormatCountCapture(&n)); +// EXPECT_EQ(8, n); +// +// The `FormatSpec` intrinsically supports all of these fundamental C++ types: +// +// * Characters: `char`, `signed char`, `unsigned char` +// * Integers: `int`, `short`, `unsigned short`, `unsigned`, `long`, +// `unsigned long`, `long long`, `unsigned long long` +// * Floating-point: `float`, `double`, `long double` +// +// However, in the `str_format` library, a format conversion specifies a broader +// C++ conceptual category instead of an exact type. For example, `%s` binds to +// any std::string-like argument, so `std::string`, `absl::string_view`, and +// `const char*` are all accepted. Likewise, `%d` accepts any integer-like +// argument, etc. + +template <typename... Args> +using FormatSpec = + typename str_format_internal::FormatSpecDeductionBarrier<Args...>::type; + +using absl::str_format_internal::ExtendedParsedFormat; + +// ParsedFormat +// +// A `ParsedFormat` is a class template representing a preparsed `FormatSpec`, +// with template arguments specifying the conversion characters used within the +// format std::string. Such characters must be valid format type specifiers, and +// these type specifiers are checked at compile-time. +// +// Instances of `ParsedFormat` can be created, copied, and reused to speed up +// formatting loops. A `ParsedFormat` may either be constructed statically, or +// dynamically through its `New()` factory function, which only constructs a +// runtime object if the format is valid at that time. +// +// Example: +// +// // Verified at compile time. +// absl::ParsedFormat<'s', 'd'> formatString("Welcome to %s, Number %d!"); +// absl::StrFormat(formatString, "TheVillage", 6); +// +// // Verified at runtime. +// auto format_runtime = absl::ParsedFormat<'d'>::New(format_string); +// if (format_runtime) { +// value = absl::StrFormat(*format_runtime, i); +// } else { +// ... error case ... +// } +template <char... Conv> +using ParsedFormat = str_format_internal::ExtendedParsedFormat< + str_format_internal::ConversionCharToConv(Conv)...>; + +// StrFormat() +// +// Returns a `string` given a `printf()`-style format std::string and zero or more +// additional arguments. Use it as you would `sprintf()`. `StrFormat()` is the +// primary formatting function within the `str_format` library, and should be +// used in most cases where you need type-safe conversion of types into +// formatted strings. +// +// The format std::string generally consists of ordinary character data along with +// one or more format conversion specifiers (denoted by the `%` character). +// Ordinary character data is returned unchanged into the result std::string, while +// each conversion specification performs a type substitution from +// `StrFormat()`'s other arguments. See the comments for `FormatSpec` for full +// information on the makeup of this format std::string. +// +// Example: +// +// std::string s = absl::StrFormat( +// "Welcome to %s, Number %d!", "The Village", 6); +// EXPECT_EQ("Welcome to The Village, Number 6!", s); +// +// Returns an empty std::string in case of error. +template <typename... Args> +ABSL_MUST_USE_RESULT std::string StrFormat(const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::FormatPack( + str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// StrAppendFormat() +// +// Appends to a `dst` std::string given a format std::string, and zero or more additional +// arguments, returning `*dst` as a convenience for chaining purposes. Appends +// nothing in case of error (but possibly alters its capacity). +// +// Example: +// +// std::string orig("For example PI is approximately "); +// std::cout << StrAppendFormat(&orig, "%12.6f", 3.14); +template <typename... Args> +std::string& StrAppendFormat(std::string* dst, const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::AppendPack( + dst, str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// StreamFormat() +// +// Writes to an output stream given a format std::string and zero or more arguments, +// generally in a manner that is more efficient than streaming the result of +// `absl:: StrFormat()`. The returned object must be streamed before the full +// expression ends. +// +// Example: +// +// std::cout << StreamFormat("%12.6f", 3.14); +template <typename... Args> +ABSL_MUST_USE_RESULT str_format_internal::Streamable StreamFormat( + const FormatSpec<Args...>& format, const Args&... args) { + return str_format_internal::Streamable( + str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// PrintF() +// +// Writes to stdout given a format std::string and zero or more arguments. This +// function is functionally equivalent to `std::print()` (and type-safe); prefer +// `absl::PrintF()` over `std::printf()`. +// +// Example: +// +// std::string_view s = "Ulaanbaatar"; +// absl::PrintF("The capital of Mongolia is: %s \n", s); +// +// Outputs: "The capital of Mongolia is Ulaanbaatar" +// +template <typename... Args> +int PrintF(const FormatSpec<Args...>& format, const Args&... args) { + return str_format_internal::FprintF( + stdout, str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// FPrintF() +// +// Writes to a file given a format std::string and zero or more arguments. This +// function is functionally equivalent to `std::fprint()` (and type-safe); +// prefer `absl::FPrintF()` over `std::fprintf()`. +// +// Example: +// +// std::string_view s = "Ulaanbaatar"; +// absl::FPrintF("The capital of Mongolia is: %s \n", s); +// +// Outputs: "The capital of Mongolia is Ulaanbaatar" +// +template <typename... Args> +int FPrintF(std::FILE* output, const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::FprintF( + output, str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// SNPrintF() +// +// Writes to a sized buffer given a format std::string and zero or more arguments. +// This function is functionally equivalent to `std::snprint()` (and type-safe); +// prefer `absl::SNPrintF()` over `std::snprintf()`. +// +// Example: +// +// std::string_view s = "Ulaanbaatar"; +// absl::FPrintF("The capital of Mongolia is: %s \n", s); +// +// Outputs: "The capital of Mongolia is Ulaanbaatar" +// +template <typename... Args> +int SNPrintF(char* output, std::size_t size, const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::SnprintF( + output, size, str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// ----------------------------------------------------------------------------- +// Custom Output Formatting Functions +// ----------------------------------------------------------------------------- + +// FormatRawSink +// +// FormatRawSink is a type erased wrapper around arbitrary sink objects +// specifically used as an argument to `Format()`. +// FormatRawSink does not own the passed sink object. The passed object must +// outlive the FormatRawSink. +class FormatRawSink { + public: + // Implicitly convert from any type that provides the hook function as + // described above. + template <typename T, + typename = typename std::enable_if<std::is_constructible< + str_format_internal::FormatRawSinkImpl, T*>::value>::type> + FormatRawSink(T* raw) // NOLINT + : sink_(raw) {} + + private: + friend str_format_internal::FormatRawSinkImpl; + str_format_internal::FormatRawSinkImpl sink_; +}; + +// Format() +// +// Writes a formatted std::string to an arbitrary sink object (implementing the +// `absl::FormatRawSink` interface), using a format std::string and zero or more +// additional arguments. +// +// By default, `string` and `std::ostream` are supported as destination objects. +// +// `absl::Format()` is a generic version of `absl::StrFormat(), for custom +// sinks. The format std::string, like format strings for `StrFormat()`, is checked +// at compile-time. +// +// On failure, this function returns `false` and the state of the sink is +// unspecified. +template <typename... Args> +bool Format(FormatRawSink raw_sink, const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::FormatUntyped( + str_format_internal::FormatRawSinkImpl::Extract(raw_sink), + str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// FormatArg +// +// A type-erased handle to a format argument specifically used as an argument to +// `FormatUntyped()`. You may construct `FormatArg` by passing +// reference-to-const of any printable type. `FormatArg` is both copyable and +// assignable. The source data must outlive the `FormatArg` instance. See +// example below. +// +using FormatArg = str_format_internal::FormatArgImpl; + +// FormatUntyped() +// +// Writes a formatted std::string to an arbitrary sink object (implementing the +// `absl::FormatRawSink` interface), using an `UntypedFormatSpec` and zero or +// more additional arguments. +// +// This function acts as the most generic formatting function in the +// `str_format` library. The caller provides a raw sink, an unchecked format +// std::string, and (usually) a runtime specified list of arguments; no compile-time +// checking of formatting is performed within this function. As a result, a +// caller should check the return value to verify that no error occurred. +// On failure, this function returns `false` and the state of the sink is +// unspecified. +// +// The arguments are provided in an `absl::Span<const absl::FormatArg>`. +// Each `absl::FormatArg` object binds to a single argument and keeps a +// reference to it. The values used to create the `FormatArg` objects must +// outlive this function call. (See `str_format_arg.h` for information on +// the `FormatArg` class.)_ +// +// Example: +// +// std::optional<std::string> FormatDynamic(const std::string& in_format, +// const vector<std::string>& in_args) { +// std::string out; +// std::vector<absl::FormatArg> args; +// for (const auto& v : in_args) { +// // It is important that 'v' is a reference to the objects in in_args. +// // The values we pass to FormatArg must outlive the call to +// // FormatUntyped. +// args.emplace_back(v); +// } +// absl::UntypedFormatSpec format(in_format); +// if (!absl::FormatUntyped(&out, format, args)) { +// return std::nullopt; +// } +// return std::move(out); +// } +// +ABSL_MUST_USE_RESULT inline bool FormatUntyped( + FormatRawSink raw_sink, const UntypedFormatSpec& format, + absl::Span<const FormatArg> args) { + return str_format_internal::FormatUntyped( + str_format_internal::FormatRawSinkImpl::Extract(raw_sink), + str_format_internal::UntypedFormatSpecImpl::Extract(format), args); +} + +} // namespace absl +#endif // ABSL_STRINGS_STR_FORMAT_H_ diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc new file mode 100644 index 000000000000..fe742bf99533 --- /dev/null +++ b/absl/strings/str_format_test.cc @@ -0,0 +1,603 @@ + +#include <cstdarg> +#include <cstdint> +#include <cstdio> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" + +namespace absl { +namespace { +using str_format_internal::FormatArgImpl; + +class FormatEntryPointTest : public ::testing::Test { }; + +TEST_F(FormatEntryPointTest, Format) { + std::string sink; + EXPECT_TRUE(Format(&sink, "A format %d", 123)); + EXPECT_EQ("A format 123", sink); + sink.clear(); + + ParsedFormat<'d'> pc("A format %d"); + EXPECT_TRUE(Format(&sink, pc, 123)); + EXPECT_EQ("A format 123", sink); +} +TEST_F(FormatEntryPointTest, UntypedFormat) { + constexpr const char* formats[] = { + "", + "a", + "%80d", +#if !defined(_MSC_VER) && !defined(__ANDROID__) + // MSVC and Android don't support positional syntax. + "complicated multipart %% %1$d format %1$0999d", +#endif // _MSC_VER + }; + for (const char* fmt : formats) { + std::string actual; + int i = 123; + FormatArgImpl arg_123(i); + absl::Span<const FormatArgImpl> args(&arg_123, 1); + UntypedFormatSpec format(fmt); + + EXPECT_TRUE(FormatUntyped(&actual, format, args)); + char buf[4096]{}; + snprintf(buf, sizeof(buf), fmt, 123); + EXPECT_EQ( + str_format_internal::FormatPack( + str_format_internal::UntypedFormatSpecImpl::Extract(format), args), + buf); + EXPECT_EQ(actual, buf); + } + // The internal version works with a preparsed format. + ParsedFormat<'d'> pc("A format %d"); + int i = 345; + FormatArg arg(i); + std::string out; + EXPECT_TRUE(str_format_internal::FormatUntyped( + &out, str_format_internal::UntypedFormatSpecImpl(&pc), {&arg, 1})); + EXPECT_EQ("A format 345", out); +} + +TEST_F(FormatEntryPointTest, StringFormat) { + EXPECT_EQ("123", StrFormat("%d", 123)); + constexpr absl::string_view view("=%d=", 4); + EXPECT_EQ("=123=", StrFormat(view, 123)); +} + +TEST_F(FormatEntryPointTest, AppendFormat) { + std::string s; + std::string& r = StrAppendFormat(&s, "%d", 123); + EXPECT_EQ(&s, &r); // should be same object + EXPECT_EQ("123", r); +} + +TEST_F(FormatEntryPointTest, AppendFormatFail) { + std::string s = "orig"; + + UntypedFormatSpec format(" more %d"); + FormatArgImpl arg("not an int"); + + EXPECT_EQ("orig", + str_format_internal::AppendPack( + &s, str_format_internal::UntypedFormatSpecImpl::Extract(format), + {&arg, 1})); +} + + +TEST_F(FormatEntryPointTest, ManyArgs) { + EXPECT_EQ("24", StrFormat("%24$d", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)); + EXPECT_EQ("60", StrFormat("%60$d", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, + 53, 54, 55, 56, 57, 58, 59, 60)); +} + +TEST_F(FormatEntryPointTest, Preparsed) { + ParsedFormat<'d'> pc("%d"); + EXPECT_EQ("123", StrFormat(pc, 123)); + // rvalue ok? + EXPECT_EQ("123", StrFormat(ParsedFormat<'d'>("%d"), 123)); + constexpr absl::string_view view("=%d=", 4); + EXPECT_EQ("=123=", StrFormat(ParsedFormat<'d'>(view), 123)); +} + +TEST_F(FormatEntryPointTest, FormatCountCapture) { + int n = 0; + EXPECT_EQ("", StrFormat("%n", FormatCountCapture(&n))); + EXPECT_EQ(0, n); + EXPECT_EQ("123", StrFormat("%d%n", 123, FormatCountCapture(&n))); + EXPECT_EQ(3, n); +} + +TEST_F(FormatEntryPointTest, FormatCountCaptureWrongType) { + // Should reject int*. + int n = 0; + UntypedFormatSpec format("%d%n"); + int i = 123, *ip = &n; + FormatArgImpl args[2] = {FormatArgImpl(i), FormatArgImpl(ip)}; + + EXPECT_EQ("", str_format_internal::FormatPack( + str_format_internal::UntypedFormatSpecImpl::Extract(format), + absl::MakeSpan(args))); +} + +TEST_F(FormatEntryPointTest, FormatCountCaptureMultiple) { + int n1 = 0; + int n2 = 0; + EXPECT_EQ(" 1 2", + StrFormat("%5d%n%10d%n", 1, FormatCountCapture(&n1), 2, + FormatCountCapture(&n2))); + EXPECT_EQ(5, n1); + EXPECT_EQ(15, n2); +} + +TEST_F(FormatEntryPointTest, FormatCountCaptureExample) { + int n; + std::string s; + StrAppendFormat(&s, "%s: %n%s\n", "(1,1)", FormatCountCapture(&n), "(1,2)"); + StrAppendFormat(&s, "%*s%s\n", n, "", "(2,2)"); + EXPECT_EQ(7, n); + EXPECT_EQ( + "(1,1): (1,2)\n" + " (2,2)\n", + s); +} + +TEST_F(FormatEntryPointTest, Stream) { + const std::string formats[] = { + "", + "a", + "%80d", +#if !defined(_MSC_VER) && !defined(__ANDROID__) + // MSVC doesn't support positional syntax. + "complicated multipart %% %1$d format %1$080d", +#endif // _MSC_VER + }; + std::string buf(4096, '\0'); + for (const auto& fmt : formats) { + const auto parsed = ParsedFormat<'d'>::NewAllowIgnored(fmt); + std::ostringstream oss; + oss << StreamFormat(*parsed, 123); + int fmt_result = snprintf(&*buf.begin(), buf.size(), fmt.c_str(), 123); + ASSERT_TRUE(oss) << fmt; + ASSERT_TRUE(fmt_result >= 0 && static_cast<size_t>(fmt_result) < buf.size()) + << fmt_result; + EXPECT_EQ(buf.c_str(), oss.str()); + } +} + +TEST_F(FormatEntryPointTest, StreamOk) { + std::ostringstream oss; + oss << StreamFormat("hello %d", 123); + EXPECT_EQ("hello 123", oss.str()); + EXPECT_TRUE(oss.good()); +} + +TEST_F(FormatEntryPointTest, StreamFail) { + std::ostringstream oss; + UntypedFormatSpec format("hello %d"); + FormatArgImpl arg("non-numeric"); + oss << str_format_internal::Streamable( + str_format_internal::UntypedFormatSpecImpl::Extract(format), {&arg, 1}); + EXPECT_EQ("hello ", oss.str()); // partial write + EXPECT_TRUE(oss.fail()); +} + +std::string WithSnprintf(const char* fmt, ...) { + std::string buf; + buf.resize(128); + va_list va; + va_start(va, fmt); + int r = vsnprintf(&*buf.begin(), buf.size(), fmt, va); + va_end(va); + EXPECT_GE(r, 0); + EXPECT_LT(r, buf.size()); + buf.resize(r); + return buf; +} + +TEST_F(FormatEntryPointTest, FloatPrecisionArg) { + // Test that positional parameters for width and precision + // are indexed to precede the value. + // Also sanity check the same formats against snprintf. + EXPECT_EQ("0.1", StrFormat("%.1f", 0.1)); + EXPECT_EQ("0.1", WithSnprintf("%.1f", 0.1)); + EXPECT_EQ(" 0.1", StrFormat("%*.1f", 5, 0.1)); + EXPECT_EQ(" 0.1", WithSnprintf("%*.1f", 5, 0.1)); + EXPECT_EQ("0.1", StrFormat("%.*f", 1, 0.1)); + EXPECT_EQ("0.1", WithSnprintf("%.*f", 1, 0.1)); + EXPECT_EQ(" 0.1", StrFormat("%*.*f", 5, 1, 0.1)); + EXPECT_EQ(" 0.1", WithSnprintf("%*.*f", 5, 1, 0.1)); +} +namespace streamed_test { +struct X {}; +std::ostream& operator<<(std::ostream& os, const X&) { + return os << "X"; +} +} // streamed_test + +TEST_F(FormatEntryPointTest, FormatStreamed) { + EXPECT_EQ("123", StrFormat("%s", FormatStreamed(123))); + EXPECT_EQ(" 123", StrFormat("%5s", FormatStreamed(123))); + EXPECT_EQ("123 ", StrFormat("%-5s", FormatStreamed(123))); + EXPECT_EQ("X", StrFormat("%s", FormatStreamed(streamed_test::X()))); + EXPECT_EQ("123", StrFormat("%s", FormatStreamed(StreamFormat("%d", 123)))); +} + +// Helper class that creates a temporary file and exposes a FILE* to it. +// It will close the file on destruction. +class TempFile { + public: + TempFile() : file_(std::tmpfile()) {} + ~TempFile() { std::fclose(file_); } + + std::FILE* file() const { return file_; } + + // Read the file into a std::string. + std::string ReadFile() { + std::fseek(file_, 0, SEEK_END); + int size = std::ftell(file_); + std::rewind(file_); + std::string str(2 * size, ' '); + int read_bytes = std::fread(&str[0], 1, str.size(), file_); + EXPECT_EQ(read_bytes, size); + str.resize(read_bytes); + EXPECT_TRUE(std::feof(file_)); + return str; + } + + private: + std::FILE* file_; +}; + +TEST_F(FormatEntryPointTest, FPrintF) { + TempFile tmp; + int result = + FPrintF(tmp.file(), "STRING: %s NUMBER: %010d", std::string("ABC"), -19); + EXPECT_EQ(result, 30); + EXPECT_EQ(tmp.ReadFile(), "STRING: ABC NUMBER: -000000019"); +} + +TEST_F(FormatEntryPointTest, FPrintFError) { + errno = 0; + int result = FPrintF(stdin, "ABC"); + EXPECT_LT(result, 0); + EXPECT_EQ(errno, EBADF); +} + +#if __GNUC__ +TEST_F(FormatEntryPointTest, FprintfTooLarge) { + std::FILE* f = std::fopen("/dev/null", "w"); + int width = 2000000000; + errno = 0; + int result = FPrintF(f, "%*d %*d", width, 0, width, 0); + EXPECT_LT(result, 0); + EXPECT_EQ(errno, EFBIG); + std::fclose(f); +} + +TEST_F(FormatEntryPointTest, PrintF) { + int stdout_tmp = dup(STDOUT_FILENO); + + TempFile tmp; + std::fflush(stdout); + dup2(fileno(tmp.file()), STDOUT_FILENO); + + int result = PrintF("STRING: %s NUMBER: %010d", std::string("ABC"), -19); + + std::fflush(stdout); + dup2(stdout_tmp, STDOUT_FILENO); + close(stdout_tmp); + + EXPECT_EQ(result, 30); + EXPECT_EQ(tmp.ReadFile(), "STRING: ABC NUMBER: -000000019"); +} +#endif // __GNUC__ + +TEST_F(FormatEntryPointTest, SNPrintF) { + char buffer[16]; + int result = + SNPrintF(buffer, sizeof(buffer), "STRING: %s", std::string("ABC")); + EXPECT_EQ(result, 11); + EXPECT_EQ(std::string(buffer), "STRING: ABC"); + + result = SNPrintF(buffer, sizeof(buffer), "NUMBER: %d", 123456); + EXPECT_EQ(result, 14); + EXPECT_EQ(std::string(buffer), "NUMBER: 123456"); + + result = SNPrintF(buffer, sizeof(buffer), "NUMBER: %d", 1234567); + EXPECT_EQ(result, 15); + EXPECT_EQ(std::string(buffer), "NUMBER: 1234567"); + + result = SNPrintF(buffer, sizeof(buffer), "NUMBER: %d", 12345678); + EXPECT_EQ(result, 16); + EXPECT_EQ(std::string(buffer), "NUMBER: 1234567"); + + result = SNPrintF(buffer, sizeof(buffer), "NUMBER: %d", 123456789); + EXPECT_EQ(result, 17); + EXPECT_EQ(std::string(buffer), "NUMBER: 1234567"); + + result = SNPrintF(nullptr, 0, "Just checking the %s of the output.", "size"); + EXPECT_EQ(result, 37); +} + +TEST(StrFormat, BehavesAsDocumented) { + std::string s = absl::StrFormat("%s, %d!", "Hello", 123); + EXPECT_EQ("Hello, 123!", s); + // The format of a replacement is + // '%'[position][flags][width['.'precision]][length_modifier][format] + EXPECT_EQ(absl::StrFormat("%1$+3.2Lf", 1.1), "+1.10"); + // Text conversion: + // "c" - Character. Eg: 'a' -> "A", 20 -> " " + EXPECT_EQ(StrFormat("%c", 'a'), "a"); + EXPECT_EQ(StrFormat("%c", 0x20), " "); + // Formats char and integral types: int, long, uint64_t, etc. + EXPECT_EQ(StrFormat("%c", int{'a'}), "a"); + EXPECT_EQ(StrFormat("%c", long{'a'}), "a"); // NOLINT + EXPECT_EQ(StrFormat("%c", uint64_t{'a'}), "a"); + // "s" - std::string Eg: "C" -> "C", std::string("C++") -> "C++" + // Formats std::string, char*, string_view, and Cord. + EXPECT_EQ(StrFormat("%s", "C"), "C"); + EXPECT_EQ(StrFormat("%s", std::string("C++")), "C++"); + EXPECT_EQ(StrFormat("%s", string_view("view")), "view"); + // Integral Conversion + // These format integral types: char, int, long, uint64_t, etc. + EXPECT_EQ(StrFormat("%d", char{10}), "10"); + EXPECT_EQ(StrFormat("%d", int{10}), "10"); + EXPECT_EQ(StrFormat("%d", long{10}), "10"); // NOLINT + EXPECT_EQ(StrFormat("%d", uint64_t{10}), "10"); + // d,i - signed decimal Eg: -10 -> "-10" + EXPECT_EQ(StrFormat("%d", -10), "-10"); + EXPECT_EQ(StrFormat("%i", -10), "-10"); + // o - octal Eg: 10 -> "12" + EXPECT_EQ(StrFormat("%o", 10), "12"); + // u - unsigned decimal Eg: 10 -> "10" + EXPECT_EQ(StrFormat("%u", 10), "10"); + // x/X - lower,upper case hex Eg: 10 -> "a"/"A" + EXPECT_EQ(StrFormat("%x", 10), "a"); + EXPECT_EQ(StrFormat("%X", 10), "A"); + // Floating-point, with upper/lower-case output. + // These format floating points types: float, double, long double, etc. + EXPECT_EQ(StrFormat("%.1f", float{1}), "1.0"); + EXPECT_EQ(StrFormat("%.1f", double{1}), "1.0"); + const long double long_double = 1.0; + EXPECT_EQ(StrFormat("%.1f", long_double), "1.0"); + // These also format integral types: char, int, long, uint64_t, etc.: + EXPECT_EQ(StrFormat("%.1f", char{1}), "1.0"); + EXPECT_EQ(StrFormat("%.1f", int{1}), "1.0"); + EXPECT_EQ(StrFormat("%.1f", long{1}), "1.0"); // NOLINT + EXPECT_EQ(StrFormat("%.1f", uint64_t{1}), "1.0"); + // f/F - decimal. Eg: 123456789 -> "123456789.000000" + EXPECT_EQ(StrFormat("%f", 123456789), "123456789.000000"); + EXPECT_EQ(StrFormat("%F", 123456789), "123456789.000000"); + // e/E - exponentiated Eg: .01 -> "1.00000e-2"/"1.00000E-2" + EXPECT_EQ(StrFormat("%e", .01), "1.000000e-02"); + EXPECT_EQ(StrFormat("%E", .01), "1.000000E-02"); + // g/G - exponentiate to fit Eg: .01 -> "0.01", 1e10 ->"1e+10"/"1E+10" + EXPECT_EQ(StrFormat("%g", .01), "0.01"); + EXPECT_EQ(StrFormat("%g", 1e10), "1e+10"); + EXPECT_EQ(StrFormat("%G", 1e10), "1E+10"); + // a/A - lower,upper case hex Eg: -3.0 -> "-0x1.8p+1"/"-0X1.8P+1" + +// On NDK r16, there is a regression in hexfloat formatting. +#if !defined(__NDK_MAJOR__) || __NDK_MAJOR__ != 16 + EXPECT_EQ(StrFormat("%.1a", -3.0), "-0x1.8p+1"); // .1 to fix MSVC output + EXPECT_EQ(StrFormat("%.1A", -3.0), "-0X1.8P+1"); // .1 to fix MSVC output +#endif + + // Other conversion + int64_t value = 0x7ffdeb6; + auto ptr_value = static_cast<uintptr_t>(value); + const int& something = *reinterpret_cast<const int*>(ptr_value); + EXPECT_EQ(StrFormat("%p", &something), StrFormat("0x%x", ptr_value)); + + // Output widths are supported, with optional flags. + EXPECT_EQ(StrFormat("%3d", 1), " 1"); + EXPECT_EQ(StrFormat("%3d", 123456), "123456"); + EXPECT_EQ(StrFormat("%06.2f", 1.234), "001.23"); + EXPECT_EQ(StrFormat("%+d", 1), "+1"); + EXPECT_EQ(StrFormat("% d", 1), " 1"); + EXPECT_EQ(StrFormat("%-4d", -1), "-1 "); + EXPECT_EQ(StrFormat("%#o", 10), "012"); + EXPECT_EQ(StrFormat("%#x", 15), "0xf"); + EXPECT_EQ(StrFormat("%04d", 8), "0008"); + // Posix positional substitution. + EXPECT_EQ(absl::StrFormat("%2$s, %3$s, %1$s!", "vici", "veni", "vidi"), + "veni, vidi, vici!"); + // Length modifiers are ignored. + EXPECT_EQ(StrFormat("%hhd", int{1}), "1"); + EXPECT_EQ(StrFormat("%hd", int{1}), "1"); + EXPECT_EQ(StrFormat("%ld", int{1}), "1"); + EXPECT_EQ(StrFormat("%lld", int{1}), "1"); + EXPECT_EQ(StrFormat("%Ld", int{1}), "1"); + EXPECT_EQ(StrFormat("%jd", int{1}), "1"); + EXPECT_EQ(StrFormat("%zd", int{1}), "1"); + EXPECT_EQ(StrFormat("%td", int{1}), "1"); + EXPECT_EQ(StrFormat("%qd", int{1}), "1"); +} + +using str_format_internal::ExtendedParsedFormat; +using str_format_internal::ParsedFormatBase; + +struct SummarizeConsumer { + std::string* out; + explicit SummarizeConsumer(std::string* out) : out(out) {} + + bool Append(string_view s) { + *out += "[" + std::string(s) + "]"; + return true; + } + + bool ConvertOne(const str_format_internal::UnboundConversion& conv, + string_view s) { + *out += "{"; + *out += std::string(s); + *out += ":"; + *out += std::to_string(conv.arg_position) + "$"; + if (conv.width.is_from_arg()) { + *out += std::to_string(conv.width.get_from_arg()) + "$*"; + } + if (conv.precision.is_from_arg()) { + *out += "." + std::to_string(conv.precision.get_from_arg()) + "$*"; + } + *out += conv.conv.Char(); + *out += "}"; + return true; + } +}; + +std::string SummarizeParsedFormat(const ParsedFormatBase& pc) { + std::string out; + if (!pc.ProcessFormat(SummarizeConsumer(&out))) out += "!"; + return out; +} + +class ParsedFormatTest : public testing::Test {}; + +TEST_F(ParsedFormatTest, SimpleChecked) { + EXPECT_EQ("[ABC]{d:1$d}[DEF]", + SummarizeParsedFormat(ParsedFormat<'d'>("ABC%dDEF"))); + EXPECT_EQ("{s:1$s}[FFF]{d:2$d}[ZZZ]{f:3$f}", + SummarizeParsedFormat(ParsedFormat<'s', 'd', 'f'>("%sFFF%dZZZ%f"))); + EXPECT_EQ("{s:1$s}[ ]{.*d:3$.2$*d}", + SummarizeParsedFormat(ParsedFormat<'s', '*', 'd'>("%s %.*d"))); +} + +TEST_F(ParsedFormatTest, SimpleUncheckedCorrect) { + auto f = ParsedFormat<'d'>::New("ABC%dDEF"); + ASSERT_TRUE(f); + EXPECT_EQ("[ABC]{d:1$d}[DEF]", SummarizeParsedFormat(*f)); + + std::string format = "%sFFF%dZZZ%f"; + auto f2 = ParsedFormat<'s', 'd', 'f'>::New(format); + + ASSERT_TRUE(f2); + EXPECT_EQ("{s:1$s}[FFF]{d:2$d}[ZZZ]{f:3$f}", SummarizeParsedFormat(*f2)); + + f2 = ParsedFormat<'s', 'd', 'f'>::New("%s %d %f"); + + ASSERT_TRUE(f2); + EXPECT_EQ("{s:1$s}[ ]{d:2$d}[ ]{f:3$f}", SummarizeParsedFormat(*f2)); + + auto star = ParsedFormat<'*', 'd'>::New("%*d"); + ASSERT_TRUE(star); + EXPECT_EQ("{*d:2$1$*d}", SummarizeParsedFormat(*star)); + + auto dollar = ParsedFormat<'d', 's'>::New("%2$s %1$d"); + ASSERT_TRUE(dollar); + EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}", SummarizeParsedFormat(*dollar)); + // with reuse + dollar = ParsedFormat<'d', 's'>::New("%2$s %1$d %1$d"); + ASSERT_TRUE(dollar); + EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}[ ]{1$d:1$d}", + SummarizeParsedFormat(*dollar)); +} + +TEST_F(ParsedFormatTest, SimpleUncheckedIgnoredArgs) { + EXPECT_FALSE((ParsedFormat<'d', 's'>::New("ABC"))); + EXPECT_FALSE((ParsedFormat<'d', 's'>::New("%dABC"))); + EXPECT_FALSE((ParsedFormat<'d', 's'>::New("ABC%2$s"))); + auto f = ParsedFormat<'d', 's'>::NewAllowIgnored("ABC"); + ASSERT_TRUE(f); + EXPECT_EQ("[ABC]", SummarizeParsedFormat(*f)); + f = ParsedFormat<'d', 's'>::NewAllowIgnored("%dABC"); + ASSERT_TRUE(f); + EXPECT_EQ("{d:1$d}[ABC]", SummarizeParsedFormat(*f)); + f = ParsedFormat<'d', 's'>::NewAllowIgnored("ABC%2$s"); + ASSERT_TRUE(f); + EXPECT_EQ("[ABC]{2$s:2$s}", SummarizeParsedFormat(*f)); +} + +TEST_F(ParsedFormatTest, SimpleUncheckedUnsupported) { + EXPECT_FALSE(ParsedFormat<'d'>::New("%1$d %1$x")); + EXPECT_FALSE(ParsedFormat<'x'>::New("%1$d %1$x")); +} + +TEST_F(ParsedFormatTest, SimpleUncheckedIncorrect) { + EXPECT_FALSE(ParsedFormat<'d'>::New("")); + + EXPECT_FALSE(ParsedFormat<'d'>::New("ABC%dDEF%d")); + + std::string format = "%sFFF%dZZZ%f"; + EXPECT_FALSE((ParsedFormat<'s', 'd', 'g'>::New(format))); +} + +using str_format_internal::Conv; + +TEST_F(ParsedFormatTest, UncheckedCorrect) { + auto f = ExtendedParsedFormat<Conv::d>::New("ABC%dDEF"); + ASSERT_TRUE(f); + EXPECT_EQ("[ABC]{d:1$d}[DEF]", SummarizeParsedFormat(*f)); + + std::string format = "%sFFF%dZZZ%f"; + auto f2 = + ExtendedParsedFormat<Conv::string, Conv::d, Conv::floating>::New(format); + + ASSERT_TRUE(f2); + EXPECT_EQ("{s:1$s}[FFF]{d:2$d}[ZZZ]{f:3$f}", SummarizeParsedFormat(*f2)); + + f2 = ExtendedParsedFormat<Conv::string, Conv::d, Conv::floating>::New( + "%s %d %f"); + + ASSERT_TRUE(f2); + EXPECT_EQ("{s:1$s}[ ]{d:2$d}[ ]{f:3$f}", SummarizeParsedFormat(*f2)); + + auto star = ExtendedParsedFormat<Conv::star, Conv::d>::New("%*d"); + ASSERT_TRUE(star); + EXPECT_EQ("{*d:2$1$*d}", SummarizeParsedFormat(*star)); + + auto dollar = ExtendedParsedFormat<Conv::d, Conv::s>::New("%2$s %1$d"); + ASSERT_TRUE(dollar); + EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}", SummarizeParsedFormat(*dollar)); + // with reuse + dollar = ExtendedParsedFormat<Conv::d, Conv::s>::New("%2$s %1$d %1$d"); + ASSERT_TRUE(dollar); + EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}[ ]{1$d:1$d}", + SummarizeParsedFormat(*dollar)); +} + +TEST_F(ParsedFormatTest, UncheckedIgnoredArgs) { + EXPECT_FALSE((ExtendedParsedFormat<Conv::d, Conv::s>::New("ABC"))); + EXPECT_FALSE((ExtendedParsedFormat<Conv::d, Conv::s>::New("%dABC"))); + EXPECT_FALSE((ExtendedParsedFormat<Conv::d, Conv::s>::New("ABC%2$s"))); + auto f = ExtendedParsedFormat<Conv::d, Conv::s>::NewAllowIgnored("ABC"); + ASSERT_TRUE(f); + EXPECT_EQ("[ABC]", SummarizeParsedFormat(*f)); + f = ExtendedParsedFormat<Conv::d, Conv::s>::NewAllowIgnored("%dABC"); + ASSERT_TRUE(f); + EXPECT_EQ("{d:1$d}[ABC]", SummarizeParsedFormat(*f)); + f = ExtendedParsedFormat<Conv::d, Conv::s>::NewAllowIgnored("ABC%2$s"); + ASSERT_TRUE(f); + EXPECT_EQ("[ABC]{2$s:2$s}", SummarizeParsedFormat(*f)); +} + +TEST_F(ParsedFormatTest, UncheckedMultipleTypes) { + auto dx = ExtendedParsedFormat<Conv::d | Conv::x>::New("%1$d %1$x"); + EXPECT_TRUE(dx); + EXPECT_EQ("{1$d:1$d}[ ]{1$x:1$x}", SummarizeParsedFormat(*dx)); + + dx = ExtendedParsedFormat<Conv::d | Conv::x>::New("%1$d"); + EXPECT_TRUE(dx); + EXPECT_EQ("{1$d:1$d}", SummarizeParsedFormat(*dx)); +} + +TEST_F(ParsedFormatTest, UncheckedIncorrect) { + EXPECT_FALSE(ExtendedParsedFormat<Conv::d>::New("")); + + EXPECT_FALSE(ExtendedParsedFormat<Conv::d>::New("ABC%dDEF%d")); + + std::string format = "%sFFF%dZZZ%f"; + EXPECT_FALSE((ExtendedParsedFormat<Conv::s, Conv::d, Conv::g>::New(format))); +} + +TEST_F(ParsedFormatTest, RegressionMixPositional) { + EXPECT_FALSE((ExtendedParsedFormat<Conv::d, Conv::o>::New("%1$d %o"))); +} + +} // namespace +} // namespace absl |