diff options
Diffstat (limited to 'third_party/abseil_cpp/absl/strings')
106 files changed, 37920 insertions, 0 deletions
diff --git a/third_party/abseil_cpp/absl/strings/BUILD.bazel b/third_party/abseil_cpp/absl/strings/BUILD.bazel new file mode 100644 index 000000000000..30a8dd28b2d1 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/BUILD.bazel @@ -0,0 +1,788 @@ +# +# Copyright 2017 The Abseil Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") +load( + "//absl:copts/configure_copts.bzl", + "ABSL_DEFAULT_COPTS", + "ABSL_TEST_COPTS", +) + +package( + default_visibility = ["//visibility:public"], + features = ["parse_headers"], +) + +licenses(["notice"]) + +cc_library( + name = "strings", + srcs = [ + "ascii.cc", + "charconv.cc", + "escaping.cc", + "internal/charconv_bigint.cc", + "internal/charconv_bigint.h", + "internal/charconv_parse.cc", + "internal/charconv_parse.h", + "internal/memutil.cc", + "internal/memutil.h", + "internal/stl_type_traits.h", + "internal/str_join_internal.h", + "internal/str_split_internal.h", + "match.cc", + "numbers.cc", + "str_cat.cc", + "str_replace.cc", + "str_split.cc", + "string_view.cc", + "substitute.cc", + ], + hdrs = [ + "ascii.h", + "charconv.h", + "escaping.h", + "internal/string_constant.h", + "match.h", + "numbers.h", + "str_cat.h", + "str_join.h", + "str_replace.h", + "str_split.h", + "string_view.h", + "strip.h", + "substitute.h", + ], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":internal", + "//absl/base", + "//absl/base:bits", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:endian", + "//absl/base:raw_logging_internal", + "//absl/base:throw_delegate", + "//absl/memory", + "//absl/meta:type_traits", + "//absl/numeric:int128", + ], +) + +cc_library( + name = "internal", + srcs = [ + "internal/escaping.cc", + "internal/ostringstream.cc", + "internal/utf8.cc", + ], + hdrs = [ + "internal/char_map.h", + "internal/escaping.h", + "internal/ostringstream.h", + "internal/resize_uninitialized.h", + "internal/utf8.h", + ], + copts = ABSL_DEFAULT_COPTS, + deps = [ + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:endian", + "//absl/base:raw_logging_internal", + "//absl/meta:type_traits", + ], +) + +cc_test( + name = "match_test", + size = "small", + srcs = ["match_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "escaping_test", + size = "small", + srcs = [ + "escaping_test.cc", + "internal/escaping_test_common.h", + ], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord", + ":strings", + "//absl/base:core_headers", + "//absl/container:fixed_array", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "escaping_benchmark", + srcs = [ + "escaping_benchmark.cc", + "internal/escaping_test_common.h", + ], + copts = ABSL_TEST_COPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:raw_logging_internal", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_test( + name = "ascii_test", + size = "small", + srcs = ["ascii_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:core_headers", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "ascii_benchmark", + srcs = ["ascii_benchmark.cc"], + copts = ABSL_TEST_COPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":strings", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_test( + name = "memutil_benchmark", + srcs = [ + "internal/memutil.h", + "internal/memutil_benchmark.cc", + ], + copts = ABSL_TEST_COPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:core_headers", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_test( + name = "memutil_test", + size = "small", + srcs = [ + "internal/memutil.h", + "internal/memutil_test.cc", + ], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:core_headers", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "utf8_test", + size = "small", + srcs = [ + "internal/utf8_test.cc", + ], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":internal", + "//absl/base:core_headers", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "string_constant_test", + size = "small", + srcs = ["internal/string_constant_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/meta:type_traits", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "string_view_benchmark", + srcs = ["string_view_benchmark.cc"], + copts = ABSL_TEST_COPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:core_headers", + "//absl/base:raw_logging_internal", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_test( + name = "string_view_test", + size = "small", + srcs = ["string_view_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:dynamic_annotations", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "cord_internal", + hdrs = ["internal/cord_internal.h"], + copts = ABSL_DEFAULT_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:base_internal", + "//absl/container:compressed_tuple", + "//absl/meta:type_traits", + ], +) + +cc_library( + name = "cord", + srcs = [ + "cord.cc", + ], + hdrs = [ + "cord.h", + ], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":cord_internal", + ":internal", + ":str_format", + ":strings", + "//absl/base", + "//absl/base:core_headers", + "//absl/base:endian", + "//absl/base:raw_logging_internal", + "//absl/container:fixed_array", + "//absl/container:inlined_vector", + "//absl/functional:function_ref", + "//absl/meta:type_traits", + "//absl/types:optional", + ], +) + +cc_library( + name = "cord_test_helpers", + testonly = 1, + hdrs = [ + "cord_test_helpers.h", + ], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":cord", + ], +) + +cc_test( + name = "cord_test", + size = "medium", + srcs = ["cord_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord", + ":cord_test_helpers", + ":str_format", + ":strings", + "//absl/base", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:endian", + "//absl/base:raw_logging_internal", + "//absl/container:fixed_array", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "substitute_test", + size = "small", + srcs = ["substitute_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:core_headers", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_replace_benchmark", + srcs = ["str_replace_benchmark.cc"], + copts = ABSL_TEST_COPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:raw_logging_internal", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_test( + name = "str_replace_test", + size = "small", + srcs = ["str_replace_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_split_test", + srcs = ["str_split_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:core_headers", + "//absl/base:dynamic_annotations", + "//absl/container:flat_hash_map", + "//absl/container:node_hash_map", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_split_benchmark", + srcs = ["str_split_benchmark.cc"], + copts = ABSL_TEST_COPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:raw_logging_internal", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_test( + name = "ostringstream_test", + size = "small", + srcs = ["internal/ostringstream_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "ostringstream_benchmark", + srcs = ["internal/ostringstream_benchmark.cc"], + copts = ABSL_TEST_COPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":internal", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_test( + name = "resize_uninitialized_test", + size = "small", + srcs = [ + "internal/resize_uninitialized.h", + "internal/resize_uninitialized_test.cc", + ], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + "//absl/base:core_headers", + "//absl/meta:type_traits", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_join_test", + size = "small", + srcs = ["str_join_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:core_headers", + "//absl/memory", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_join_benchmark", + srcs = ["str_join_benchmark.cc"], + copts = ABSL_TEST_COPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":strings", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_test( + name = "str_cat_test", + size = "small", + srcs = ["str_cat_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:core_headers", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_cat_benchmark", + srcs = ["str_cat_benchmark.cc"], + copts = ABSL_TEST_COPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":strings", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_test( + name = "numbers_test", + size = "medium", + srcs = [ + "internal/numbers_test_common.h", + "numbers_test.cc", + ], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":internal", + ":pow10_helper", + ":strings", + "//absl/base:config", + "//absl/base:raw_logging_internal", + "//absl/random", + "//absl/random:distributions", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "numbers_benchmark", + srcs = ["numbers_benchmark.cc"], + copts = ABSL_TEST_COPTS, + tags = ["benchmark"], + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:raw_logging_internal", + "//absl/random", + "//absl/random:distributions", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_test( + name = "strip_test", + size = "small", + srcs = ["strip_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "char_map_test", + srcs = ["internal/char_map_test.cc"], + copts = ABSL_TEST_COPTS, + deps = [ + ":internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "char_map_benchmark", + srcs = ["internal/char_map_benchmark.cc"], + copts = ABSL_TEST_COPTS, + tags = ["benchmark"], + deps = [ + ":internal", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_test( + name = "charconv_test", + srcs = ["charconv_test.cc"], + copts = ABSL_TEST_COPTS, + deps = [ + ":pow10_helper", + ":str_format", + ":strings", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "charconv_parse_test", + srcs = [ + "internal/charconv_parse.h", + "internal/charconv_parse_test.cc", + ], + copts = ABSL_TEST_COPTS, + deps = [ + ":strings", + "//absl/base:config", + "//absl/base:raw_logging_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "charconv_bigint_test", + srcs = [ + "internal/charconv_bigint.h", + "internal/charconv_bigint_test.cc", + "internal/charconv_parse.h", + ], + copts = ABSL_TEST_COPTS, + deps = [ + ":strings", + "//absl/base:config", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "charconv_benchmark", + srcs = [ + "charconv_benchmark.cc", + ], + tags = [ + "benchmark", + ], + deps = [ + ":strings", + "@com_github_google_benchmark//:benchmark_main", + ], +) + +cc_library( + name = "str_format", + hdrs = [ + "str_format.h", + ], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":str_format_internal", + ], +) + +cc_library( + name = "str_format_internal", + srcs = [ + "internal/str_format/arg.cc", + "internal/str_format/bind.cc", + "internal/str_format/extension.cc", + "internal/str_format/float_conversion.cc", + "internal/str_format/output.cc", + "internal/str_format/parser.cc", + ], + hdrs = [ + "internal/str_format/arg.h", + "internal/str_format/bind.h", + "internal/str_format/checker.h", + "internal/str_format/extension.h", + "internal/str_format/float_conversion.h", + "internal/str_format/output.h", + "internal/str_format/parser.h", + ], + copts = ABSL_DEFAULT_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/base:bits", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/functional:function_ref", + "//absl/meta:type_traits", + "//absl/numeric:int128", + "//absl/types:optional", + "//absl/types:span", + ], +) + +cc_test( + name = "str_format_test", + srcs = ["str_format_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord", + ":str_format", + ":strings", + "//absl/base:core_headers", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_format_extension_test", + srcs = [ + "internal/str_format/extension_test.cc", + ], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":str_format", + ":str_format_internal", + ":strings", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_format_arg_test", + srcs = ["internal/str_format/arg_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":str_format", + ":str_format_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_format_bind_test", + srcs = ["internal/str_format/bind_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":str_format_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_format_checker_test", + srcs = ["internal/str_format/checker_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":str_format", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_format_convert_test", + size = "medium", + srcs = ["internal/str_format/convert_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":str_format_internal", + ":strings", + "//absl/base:raw_logging_internal", + "//absl/types:optional", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_format_output_test", + srcs = ["internal/str_format/output_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord", + ":str_format_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "str_format_parser_test", + srcs = ["internal/str_format/parser_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":str_format_internal", + "//absl/base:core_headers", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "pow10_helper", + testonly = True, + srcs = ["internal/pow10_helper.cc"], + hdrs = ["internal/pow10_helper.h"], + visibility = ["//visibility:private"], + deps = ["//absl/base:config"], +) + +cc_test( + name = "pow10_helper_test", + srcs = ["internal/pow10_helper_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":pow10_helper", + ":str_format", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/third_party/abseil_cpp/absl/strings/CMakeLists.txt b/third_party/abseil_cpp/absl/strings/CMakeLists.txt new file mode 100644 index 000000000000..2b994a71c07e --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/CMakeLists.txt @@ -0,0 +1,609 @@ +# +# Copyright 2017 The Abseil Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +absl_cc_library( + NAME + strings + HDRS + "ascii.h" + "charconv.h" + "escaping.h" + "internal/string_constant.h" + "match.h" + "numbers.h" + "str_cat.h" + "str_join.h" + "str_replace.h" + "str_split.h" + "string_view.h" + "strip.h" + "substitute.h" + SRCS + "ascii.cc" + "charconv.cc" + "escaping.cc" + "internal/charconv_bigint.cc" + "internal/charconv_bigint.h" + "internal/charconv_parse.cc" + "internal/charconv_parse.h" + "internal/memutil.cc" + "internal/memutil.h" + "internal/stl_type_traits.h" + "internal/str_join_internal.h" + "internal/str_split_internal.h" + "match.cc" + "numbers.cc" + "str_cat.cc" + "str_replace.cc" + "str_split.cc" + "string_view.cc" + "substitute.cc" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::strings_internal + absl::base + absl::bits + absl::config + absl::core_headers + absl::endian + absl::int128 + absl::memory + absl::raw_logging_internal + absl::throw_delegate + absl::type_traits + PUBLIC +) + +absl_cc_library( + NAME + strings_internal + HDRS + "internal/char_map.h" + "internal/escaping.cc" + "internal/escaping.h" + "internal/ostringstream.h" + "internal/resize_uninitialized.h" + "internal/utf8.h" + SRCS + "internal/ostringstream.cc" + "internal/utf8.cc" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config + absl::core_headers + absl::endian + absl::raw_logging_internal + absl::type_traits +) + +absl_cc_test( + NAME + match_test + SRCS + "match_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::base + gmock_main +) + +absl_cc_test( + NAME + escaping_test + SRCS + "escaping_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::core_headers + absl::fixed_array + gmock_main +) + +absl_cc_test( + NAME + ascii_test + SRCS + "ascii_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::core_headers + gmock_main +) + +absl_cc_test( + NAME + memutil_test + SRCS + "internal/memutil.h" + "internal/memutil_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::core_headers + gmock_main +) + +absl_cc_test( + NAME + utf8_test + SRCS + "internal/utf8_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings_internal + absl::base + absl::core_headers + gmock_main +) + +absl_cc_test( + NAME + string_constant_test + SRCS + "internal/string_constant_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::type_traits + gmock_main +) + +absl_cc_test( + NAME + string_view_test + SRCS + "string_view_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::config + absl::core_headers + absl::dynamic_annotations + gmock_main +) + +absl_cc_test( + NAME + substitute_test + SRCS + "substitute_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::core_headers + gmock_main +) + +absl_cc_test( + NAME + str_replace_test + SRCS + "str_replace_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + gmock_main +) + +absl_cc_test( + NAME + str_split_test + SRCS + "str_split_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::base + absl::core_headers + absl::dynamic_annotations + absl::flat_hash_map + absl::node_hash_map + gmock_main +) + +absl_cc_test( + NAME + ostringstream_test + SRCS + "internal/ostringstream_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings_internal + gmock_main +) + +absl_cc_test( + NAME + resize_uninitialized_test + SRCS + "internal/resize_uninitialized.h" + "internal/resize_uninitialized_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::core_headers + absl::type_traits + gmock_main +) + +absl_cc_test( + NAME + str_join_test + SRCS + "str_join_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::base + absl::core_headers + absl::memory + gmock_main +) + +absl_cc_test( + NAME + str_cat_test + SRCS + "str_cat_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::core_headers + gmock_main +) + +absl_cc_test( + NAME + numbers_test + SRCS + "internal/numbers_test_common.h" + "numbers_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::core_headers + absl::pow10_helper + absl::config + absl::raw_logging_internal + absl::random_random + absl::random_distributions + absl::strings_internal + gmock_main +) + +absl_cc_test( + NAME + strip_test + SRCS + "strip_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::base + gmock_main +) + +absl_cc_test( + NAME + char_map_test + SRCS + "internal/char_map_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings_internal + gmock_main +) + +absl_cc_test( + NAME + charconv_test + SRCS + "charconv_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::str_format + absl::pow10_helper + gmock_main +) + +absl_cc_test( + NAME + charconv_parse_test + SRCS + "internal/charconv_parse.h" + "internal/charconv_parse_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::config + absl::raw_logging_internal + gmock_main +) + +absl_cc_test( + NAME + charconv_bigint_test + SRCS + "internal/charconv_bigint.h" + "internal/charconv_bigint_test.cc" + "internal/charconv_parse.h" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::config + gmock_main +) + +absl_cc_library( + NAME + str_format + HDRS + "str_format.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::str_format_internal + PUBLIC +) + +absl_cc_library( + NAME + str_format_internal + HDRS + "internal/str_format/arg.h" + "internal/str_format/bind.h" + "internal/str_format/checker.h" + "internal/str_format/extension.h" + "internal/str_format/float_conversion.h" + "internal/str_format/output.h" + "internal/str_format/parser.h" + SRCS + "internal/str_format/arg.cc" + "internal/str_format/bind.cc" + "internal/str_format/extension.cc" + "internal/str_format/float_conversion.cc" + "internal/str_format/output.cc" + "internal/str_format/parser.cc" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::bits + absl::strings + absl::config + absl::core_headers + absl::type_traits + absl::int128 + absl::span +) + +absl_cc_test( + NAME + str_format_test + SRCS + "str_format_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::str_format + absl::cord + absl::strings + absl::core_headers + gmock_main +) + +absl_cc_test( + NAME + str_format_extension_test + SRCS + "internal/str_format/extension_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::str_format + absl::str_format_internal + absl::strings + gmock_main +) + +absl_cc_test( + NAME + str_format_arg_test + SRCS + "internal/str_format/arg_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::str_format + absl::str_format_internal + gmock_main +) + +absl_cc_test( + NAME + str_format_bind_test + SRCS + "internal/str_format/bind_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::str_format_internal + gmock_main +) + +absl_cc_test( + NAME + str_format_checker_test + SRCS + "internal/str_format/checker_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::str_format + gmock_main +) + +absl_cc_test( + NAME + str_format_convert_test + SRCS + "internal/str_format/convert_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::str_format_internal + absl::raw_logging_internal + absl::int128 + gmock_main +) + +absl_cc_test( + NAME + str_format_output_test + SRCS + "internal/str_format/output_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::str_format_internal + absl::cord + gmock_main +) + +absl_cc_test( + NAME + str_format_parser_test + SRCS + "internal/str_format/parser_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::str_format_internal + absl::core_headers + gmock_main +) + +absl_cc_library( + NAME + pow10_helper + HDRS + "internal/pow10_helper.h" + SRCS + "internal/pow10_helper.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + TESTONLY +) + +absl_cc_test( + NAME + pow10_helper_test + SRCS + "internal/pow10_helper_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::pow10_helper + absl::str_format + gmock_main +) + +absl_cc_library( + NAME + cord + HDRS + "cord.h" + SRCS + "cord.cc" + "internal/cord_internal.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::base + absl::base_internal + absl::compressed_tuple + absl::core_headers + absl::endian + absl::fixed_array + absl::function_ref + absl::inlined_vector + absl::optional + absl::raw_logging_internal + absl::strings + absl::strings_internal + absl::type_traits + PUBLIC +) + +absl_cc_library( + NAME + cord_test_helpers + HDRS + "cord_test_helpers.h" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::cord + TESTONLY +) + +absl_cc_test( + NAME + cord_test + SRCS + "cord_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::cord + absl::str_format + absl::strings + absl::base + absl::config + absl::core_headers + absl::endian + absl::raw_logging_internal + absl::fixed_array + gmock_main +) diff --git a/third_party/abseil_cpp/absl/strings/ascii.cc b/third_party/abseil_cpp/absl/strings/ascii.cc new file mode 100644 index 000000000000..93bb03e95815 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/ascii.cc @@ -0,0 +1,200 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/ascii.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace ascii_internal { + +// # Table generated by this Python code (bit 0x02 is currently unused): +// TODO(mbar) Move Python code for generation of table to BUILD and link here. + +// NOTE: The kAsciiPropertyBits table used within this code was generated by +// Python code of the following form. (Bit 0x02 is currently unused and +// available.) +// +// def Hex2(n): +// return '0x' + hex(n/16)[2:] + hex(n%16)[2:] +// def IsPunct(ch): +// return (ord(ch) >= 32 and ord(ch) < 127 and +// not ch.isspace() and not ch.isalnum()) +// def IsBlank(ch): +// return ch in ' \t' +// def IsCntrl(ch): +// return ord(ch) < 32 or ord(ch) == 127 +// def IsXDigit(ch): +// return ch.isdigit() or ch.lower() in 'abcdef' +// for i in range(128): +// ch = chr(i) +// mask = ((ch.isalpha() and 0x01 or 0) | +// (ch.isalnum() and 0x04 or 0) | +// (ch.isspace() and 0x08 or 0) | +// (IsPunct(ch) and 0x10 or 0) | +// (IsBlank(ch) and 0x20 or 0) | +// (IsCntrl(ch) and 0x40 or 0) | +// (IsXDigit(ch) and 0x80 or 0)) +// print Hex2(mask) + ',', +// if i % 16 == 7: +// print ' //', Hex2(i & 0x78) +// elif i % 16 == 15: +// print + +// clang-format off +// Array of bitfields holding character information. Each bit value corresponds +// to a particular character feature. For readability, and because the value +// of these bits is tightly coupled to this implementation, the individual bits +// are not named. Note that bitfields for all characters above ASCII 127 are +// zero-initialized. +ABSL_DLL const unsigned char kPropertyBits[256] = { + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x00 + 0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x10 + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x28, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x20 + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, // 0x30 + 0x84, 0x84, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x40 + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x50 + 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x60 + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x70 + 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x40, +}; + +// Array of characters for the ascii_tolower() function. For values 'A' +// through 'Z', return the lower-case character; otherwise, return the +// identity of the passed character. +ABSL_DLL const char kToLower[256] = { + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', + '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', + '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', + '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', + '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', + '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', + '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', + '\x40', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', + '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', + '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', + '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', + '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f', + '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', + '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', + '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', + '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', + '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', + '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', + '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', + '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', + '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', + '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', + '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', + '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', + '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', + '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', + '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', + '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff', +}; + +// Array of characters for the ascii_toupper() function. For values 'a' +// through 'z', return the upper-case character; otherwise, return the +// identity of the passed character. +ABSL_DLL const char kToUpper[256] = { + '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', + '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', + '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', + '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', + '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', + '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', + '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', + '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', + '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', + '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', + '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', + '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', + '\x60', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f', + '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87', + '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f', + '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97', + '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f', + '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7', + '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf', + '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7', + '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf', + '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7', + '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf', + '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7', + '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf', + '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7', + '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef', + '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7', + '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff', +}; +// clang-format on + +} // namespace ascii_internal + +void AsciiStrToLower(std::string* s) { + for (auto& ch : *s) { + ch = absl::ascii_tolower(ch); + } +} + +void AsciiStrToUpper(std::string* s) { + for (auto& ch : *s) { + ch = absl::ascii_toupper(ch); + } +} + +void RemoveExtraAsciiWhitespace(std::string* str) { + auto stripped = StripAsciiWhitespace(*str); + + if (stripped.empty()) { + str->clear(); + return; + } + + auto input_it = stripped.begin(); + auto input_end = stripped.end(); + auto output_it = &(*str)[0]; + bool is_ws = false; + + for (; input_it < input_end; ++input_it) { + if (is_ws) { + // Consecutive whitespace? Keep only the last. + is_ws = absl::ascii_isspace(*input_it); + if (is_ws) --output_it; + } else { + is_ws = absl::ascii_isspace(*input_it); + } + + *output_it = *input_it; + ++output_it; + } + + str->erase(output_it - &(*str)[0]); +} + +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/ascii.h b/third_party/abseil_cpp/absl/strings/ascii.h new file mode 100644 index 000000000000..b46bc71f35b9 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/ascii.h @@ -0,0 +1,242 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: ascii.h +// ----------------------------------------------------------------------------- +// +// This package contains functions operating on characters and strings +// restricted to standard ASCII. These include character classification +// functions analogous to those found in the ANSI C Standard Library <ctype.h> +// header file. +// +// C++ implementations provide <ctype.h> functionality based on their +// C environment locale. In general, reliance on such a locale is not ideal, as +// the locale standard is problematic (and may not return invariant information +// for the same character set, for example). These `ascii_*()` functions are +// hard-wired for standard ASCII, much faster, and guaranteed to behave +// consistently. They will never be overloaded, nor will their function +// signature change. +// +// `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`, +// `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`, +// `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`, +// `ascii_isxdigit()` +// Analogous to the <ctype.h> functions with similar names, these +// functions take an unsigned char and return a bool, based on whether the +// character matches the condition specified. +// +// If the input character has a numerical value greater than 127, these +// functions return `false`. +// +// `ascii_tolower()`, `ascii_toupper()` +// Analogous to the <ctype.h> functions with similar names, these functions +// take an unsigned char and return a char. +// +// If the input character is not an ASCII {lower,upper}-case letter (including +// numerical values greater than 127) then the functions return the same value +// as the input character. + +#ifndef ABSL_STRINGS_ASCII_H_ +#define ABSL_STRINGS_ASCII_H_ + +#include <algorithm> +#include <string> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace ascii_internal { + +// Declaration for an array of bitfields holding character information. +ABSL_DLL extern const unsigned char kPropertyBits[256]; + +// Declaration for the array of characters to upper-case characters. +ABSL_DLL extern const char kToUpper[256]; + +// Declaration for the array of characters to lower-case characters. +ABSL_DLL extern const char kToLower[256]; + +} // namespace ascii_internal + +// ascii_isalpha() +// +// Determines whether the given character is an alphabetic character. +inline bool ascii_isalpha(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x01) != 0; +} + +// ascii_isalnum() +// +// Determines whether the given character is an alphanumeric character. +inline bool ascii_isalnum(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x04) != 0; +} + +// ascii_isspace() +// +// Determines whether the given character is a whitespace character (space, +// tab, vertical tab, formfeed, linefeed, or carriage return). +inline bool ascii_isspace(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x08) != 0; +} + +// ascii_ispunct() +// +// Determines whether the given character is a punctuation character. +inline bool ascii_ispunct(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x10) != 0; +} + +// ascii_isblank() +// +// Determines whether the given character is a blank character (tab or space). +inline bool ascii_isblank(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x20) != 0; +} + +// ascii_iscntrl() +// +// Determines whether the given character is a control character. +inline bool ascii_iscntrl(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x40) != 0; +} + +// ascii_isxdigit() +// +// Determines whether the given character can be represented as a hexadecimal +// digit character (i.e. {0-9} or {A-F}). +inline bool ascii_isxdigit(unsigned char c) { + return (ascii_internal::kPropertyBits[c] & 0x80) != 0; +} + +// ascii_isdigit() +// +// Determines whether the given character can be represented as a decimal +// digit character (i.e. {0-9}). +inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; } + +// ascii_isprint() +// +// Determines whether the given character is printable, including whitespace. +inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; } + +// ascii_isgraph() +// +// Determines whether the given character has a graphical representation. +inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; } + +// ascii_isupper() +// +// Determines whether the given character is uppercase. +inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; } + +// ascii_islower() +// +// Determines whether the given character is lowercase. +inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; } + +// ascii_isascii() +// +// Determines whether the given character is ASCII. +inline bool ascii_isascii(unsigned char c) { return c < 128; } + +// ascii_tolower() +// +// Returns an ASCII character, converting to lowercase if uppercase is +// passed. Note that character values > 127 are simply returned. +inline char ascii_tolower(unsigned char c) { + return ascii_internal::kToLower[c]; +} + +// Converts the characters in `s` to lowercase, changing the contents of `s`. +void AsciiStrToLower(std::string* s); + +// Creates a lowercase string from a given absl::string_view. +ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) { + std::string result(s); + absl::AsciiStrToLower(&result); + return result; +} + +// ascii_toupper() +// +// Returns the ASCII character, converting to upper-case if lower-case is +// passed. Note that characters values > 127 are simply returned. +inline char ascii_toupper(unsigned char c) { + return ascii_internal::kToUpper[c]; +} + +// Converts the characters in `s` to uppercase, changing the contents of `s`. +void AsciiStrToUpper(std::string* s); + +// Creates an uppercase string from a given absl::string_view. +ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) { + std::string result(s); + absl::AsciiStrToUpper(&result); + return result; +} + +// Returns absl::string_view with whitespace stripped from the beginning of the +// given string_view. +ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace( + absl::string_view str) { + auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace); + return str.substr(it - str.begin()); +} + +// Strips in place whitespace from the beginning of the given string. +inline void StripLeadingAsciiWhitespace(std::string* str) { + auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace); + str->erase(str->begin(), it); +} + +// Returns absl::string_view with whitespace stripped from the end of the given +// string_view. +ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace( + absl::string_view str) { + auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace); + return str.substr(0, str.rend() - it); +} + +// Strips in place whitespace from the end of the given string +inline void StripTrailingAsciiWhitespace(std::string* str) { + auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace); + str->erase(str->rend() - it); +} + +// Returns absl::string_view with whitespace stripped from both ends of the +// given string_view. +ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace( + absl::string_view str) { + return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str)); +} + +// Strips in place whitespace from both ends of the given string +inline void StripAsciiWhitespace(std::string* str) { + StripTrailingAsciiWhitespace(str); + StripLeadingAsciiWhitespace(str); +} + +// Removes leading, trailing, and consecutive internal whitespace. +void RemoveExtraAsciiWhitespace(std::string*); + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_ASCII_H_ diff --git a/third_party/abseil_cpp/absl/strings/ascii_benchmark.cc b/third_party/abseil_cpp/absl/strings/ascii_benchmark.cc new file mode 100644 index 000000000000..aca458c8042f --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/ascii_benchmark.cc @@ -0,0 +1,120 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/ascii.h" + +#include <cctype> +#include <string> +#include <array> +#include <random> + +#include "benchmark/benchmark.h" + +namespace { + +std::array<unsigned char, 256> MakeShuffledBytes() { + std::array<unsigned char, 256> bytes; + for (size_t i = 0; i < 256; ++i) bytes[i] = static_cast<unsigned char>(i); + std::random_device rd; + std::seed_seq seed({rd(), rd(), rd(), rd(), rd(), rd(), rd(), rd()}); + std::mt19937 g(seed); + std::shuffle(bytes.begin(), bytes.end(), g); + return bytes; +} + +template <typename Function> +void AsciiBenchmark(benchmark::State& state, Function f) { + std::array<unsigned char, 256> bytes = MakeShuffledBytes(); + size_t sum = 0; + for (auto _ : state) { + for (unsigned char b : bytes) sum += f(b) ? 1 : 0; + } + // Make a copy of `sum` before calling `DoNotOptimize` to make sure that `sum` + // can be put in a CPU register and not degrade performance in the loop above. + size_t sum2 = sum; + benchmark::DoNotOptimize(sum2); + state.SetBytesProcessed(state.iterations() * bytes.size()); +} + +using StdAsciiFunction = int (*)(int); +template <StdAsciiFunction f> +void BM_Ascii(benchmark::State& state) { + AsciiBenchmark(state, f); +} + +using AbslAsciiIsFunction = bool (*)(unsigned char); +template <AbslAsciiIsFunction f> +void BM_Ascii(benchmark::State& state) { + AsciiBenchmark(state, f); +} + +using AbslAsciiToFunction = char (*)(unsigned char); +template <AbslAsciiToFunction f> +void BM_Ascii(benchmark::State& state) { + AsciiBenchmark(state, f); +} + +inline char Noop(unsigned char b) { return static_cast<char>(b); } + +BENCHMARK_TEMPLATE(BM_Ascii, Noop); +BENCHMARK_TEMPLATE(BM_Ascii, std::isalpha); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isalpha); +BENCHMARK_TEMPLATE(BM_Ascii, std::isdigit); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isdigit); +BENCHMARK_TEMPLATE(BM_Ascii, std::isalnum); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isalnum); +BENCHMARK_TEMPLATE(BM_Ascii, std::isspace); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isspace); +BENCHMARK_TEMPLATE(BM_Ascii, std::ispunct); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_ispunct); +BENCHMARK_TEMPLATE(BM_Ascii, std::isblank); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isblank); +BENCHMARK_TEMPLATE(BM_Ascii, std::iscntrl); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_iscntrl); +BENCHMARK_TEMPLATE(BM_Ascii, std::isxdigit); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isxdigit); +BENCHMARK_TEMPLATE(BM_Ascii, std::isprint); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isprint); +BENCHMARK_TEMPLATE(BM_Ascii, std::isgraph); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isgraph); +BENCHMARK_TEMPLATE(BM_Ascii, std::isupper); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isupper); +BENCHMARK_TEMPLATE(BM_Ascii, std::islower); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_islower); +BENCHMARK_TEMPLATE(BM_Ascii, isascii); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isascii); +BENCHMARK_TEMPLATE(BM_Ascii, std::tolower); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_tolower); +BENCHMARK_TEMPLATE(BM_Ascii, std::toupper); +BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_toupper); + +static void BM_StrToLower(benchmark::State& state) { + const int size = state.range(0); + std::string s(size, 'X'); + for (auto _ : state) { + benchmark::DoNotOptimize(absl::AsciiStrToLower(s)); + } +} +BENCHMARK(BM_StrToLower)->Range(1, 1 << 20); + +static void BM_StrToUpper(benchmark::State& state) { + const int size = state.range(0); + std::string s(size, 'x'); + for (auto _ : state) { + benchmark::DoNotOptimize(absl::AsciiStrToUpper(s)); + } +} +BENCHMARK(BM_StrToUpper)->Range(1, 1 << 20); + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/ascii_test.cc b/third_party/abseil_cpp/absl/strings/ascii_test.cc new file mode 100644 index 000000000000..5ecd23f8697d --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/ascii_test.cc @@ -0,0 +1,361 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/ascii.h" + +#include <cctype> +#include <clocale> +#include <cstring> +#include <string> + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" + +namespace { + +TEST(AsciiIsFoo, All) { + for (int i = 0; i < 256; i++) { + if ((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z')) + EXPECT_TRUE(absl::ascii_isalpha(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isalpha(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if ((i >= '0' && i <= '9')) + EXPECT_TRUE(absl::ascii_isdigit(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isdigit(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (absl::ascii_isalpha(i) || absl::ascii_isdigit(i)) + EXPECT_TRUE(absl::ascii_isalnum(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isalnum(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i != '\0' && strchr(" \r\n\t\v\f", i)) + EXPECT_TRUE(absl::ascii_isspace(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isspace(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i >= 32 && i < 127) + EXPECT_TRUE(absl::ascii_isprint(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isprint(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (absl::ascii_isprint(i) && !absl::ascii_isspace(i) && + !absl::ascii_isalnum(i)) + EXPECT_TRUE(absl::ascii_ispunct(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_ispunct(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i == ' ' || i == '\t') + EXPECT_TRUE(absl::ascii_isblank(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isblank(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i < 32 || i == 127) + EXPECT_TRUE(absl::ascii_iscntrl(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_iscntrl(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (absl::ascii_isdigit(i) || (i >= 'A' && i <= 'F') || + (i >= 'a' && i <= 'f')) + EXPECT_TRUE(absl::ascii_isxdigit(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isxdigit(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i > 32 && i < 127) + EXPECT_TRUE(absl::ascii_isgraph(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isgraph(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i >= 'A' && i <= 'Z') + EXPECT_TRUE(absl::ascii_isupper(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_isupper(i)) << ": failed on " << i; + } + for (int i = 0; i < 256; i++) { + if (i >= 'a' && i <= 'z') + EXPECT_TRUE(absl::ascii_islower(i)) << ": failed on " << i; + else + EXPECT_TRUE(!absl::ascii_islower(i)) << ": failed on " << i; + } + for (int i = 0; i < 128; i++) { + EXPECT_TRUE(absl::ascii_isascii(i)) << ": failed on " << i; + } + for (int i = 128; i < 256; i++) { + EXPECT_TRUE(!absl::ascii_isascii(i)) << ": failed on " << i; + } + + // The official is* functions don't accept negative signed chars, but + // our absl::ascii_is* functions do. + for (int i = 0; i < 256; i++) { + signed char sc = static_cast<signed char>(static_cast<unsigned char>(i)); + EXPECT_EQ(absl::ascii_isalpha(i), absl::ascii_isalpha(sc)) << i; + EXPECT_EQ(absl::ascii_isdigit(i), absl::ascii_isdigit(sc)) << i; + EXPECT_EQ(absl::ascii_isalnum(i), absl::ascii_isalnum(sc)) << i; + EXPECT_EQ(absl::ascii_isspace(i), absl::ascii_isspace(sc)) << i; + EXPECT_EQ(absl::ascii_ispunct(i), absl::ascii_ispunct(sc)) << i; + EXPECT_EQ(absl::ascii_isblank(i), absl::ascii_isblank(sc)) << i; + EXPECT_EQ(absl::ascii_iscntrl(i), absl::ascii_iscntrl(sc)) << i; + EXPECT_EQ(absl::ascii_isxdigit(i), absl::ascii_isxdigit(sc)) << i; + EXPECT_EQ(absl::ascii_isprint(i), absl::ascii_isprint(sc)) << i; + EXPECT_EQ(absl::ascii_isgraph(i), absl::ascii_isgraph(sc)) << i; + EXPECT_EQ(absl::ascii_isupper(i), absl::ascii_isupper(sc)) << i; + EXPECT_EQ(absl::ascii_islower(i), absl::ascii_islower(sc)) << i; + EXPECT_EQ(absl::ascii_isascii(i), absl::ascii_isascii(sc)) << i; + } +} + +// Checks that absl::ascii_isfoo returns the same value as isfoo in the C +// locale. +TEST(AsciiIsFoo, SameAsIsFoo) { +#ifndef __ANDROID__ + // temporarily change locale to C. It should already be C, but just for safety + const char* old_locale = setlocale(LC_CTYPE, "C"); + ASSERT_TRUE(old_locale != nullptr); +#endif + + for (int i = 0; i < 256; i++) { + EXPECT_EQ(isalpha(i) != 0, absl::ascii_isalpha(i)) << i; + EXPECT_EQ(isdigit(i) != 0, absl::ascii_isdigit(i)) << i; + EXPECT_EQ(isalnum(i) != 0, absl::ascii_isalnum(i)) << i; + EXPECT_EQ(isspace(i) != 0, absl::ascii_isspace(i)) << i; + EXPECT_EQ(ispunct(i) != 0, absl::ascii_ispunct(i)) << i; + EXPECT_EQ(isblank(i) != 0, absl::ascii_isblank(i)) << i; + EXPECT_EQ(iscntrl(i) != 0, absl::ascii_iscntrl(i)) << i; + EXPECT_EQ(isxdigit(i) != 0, absl::ascii_isxdigit(i)) << i; + EXPECT_EQ(isprint(i) != 0, absl::ascii_isprint(i)) << i; + EXPECT_EQ(isgraph(i) != 0, absl::ascii_isgraph(i)) << i; + EXPECT_EQ(isupper(i) != 0, absl::ascii_isupper(i)) << i; + EXPECT_EQ(islower(i) != 0, absl::ascii_islower(i)) << i; + EXPECT_EQ(isascii(i) != 0, absl::ascii_isascii(i)) << i; + } + +#ifndef __ANDROID__ + // restore the old locale. + ASSERT_TRUE(setlocale(LC_CTYPE, old_locale)); +#endif +} + +TEST(AsciiToFoo, All) { +#ifndef __ANDROID__ + // temporarily change locale to C. It should already be C, but just for safety + const char* old_locale = setlocale(LC_CTYPE, "C"); + ASSERT_TRUE(old_locale != nullptr); +#endif + + for (int i = 0; i < 256; i++) { + if (absl::ascii_islower(i)) + EXPECT_EQ(absl::ascii_toupper(i), 'A' + (i - 'a')) << i; + else + EXPECT_EQ(absl::ascii_toupper(i), static_cast<char>(i)) << i; + + if (absl::ascii_isupper(i)) + EXPECT_EQ(absl::ascii_tolower(i), 'a' + (i - 'A')) << i; + else + EXPECT_EQ(absl::ascii_tolower(i), static_cast<char>(i)) << i; + + // These CHECKs only hold in a C locale. + EXPECT_EQ(static_cast<char>(tolower(i)), absl::ascii_tolower(i)) << i; + EXPECT_EQ(static_cast<char>(toupper(i)), absl::ascii_toupper(i)) << i; + + // The official to* functions don't accept negative signed chars, but + // our absl::ascii_to* functions do. + signed char sc = static_cast<signed char>(static_cast<unsigned char>(i)); + EXPECT_EQ(absl::ascii_tolower(i), absl::ascii_tolower(sc)) << i; + EXPECT_EQ(absl::ascii_toupper(i), absl::ascii_toupper(sc)) << i; + } +#ifndef __ANDROID__ + // restore the old locale. + ASSERT_TRUE(setlocale(LC_CTYPE, old_locale)); +#endif +} + +TEST(AsciiStrTo, Lower) { + const char buf[] = "ABCDEF"; + const std::string str("GHIJKL"); + const std::string str2("MNOPQR"); + const absl::string_view sp(str2); + + EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf)); + EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str)); + EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp)); + + char mutable_buf[] = "Mutable"; + std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), + mutable_buf, absl::ascii_tolower); + EXPECT_STREQ("mutable", mutable_buf); +} + +TEST(AsciiStrTo, Upper) { + const char buf[] = "abcdef"; + const std::string str("ghijkl"); + const std::string str2("mnopqr"); + const absl::string_view sp(str2); + + EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf)); + EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str)); + EXPECT_EQ("MNOPQR", absl::AsciiStrToUpper(sp)); + + char mutable_buf[] = "Mutable"; + std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), + mutable_buf, absl::ascii_toupper); + EXPECT_STREQ("MUTABLE", mutable_buf); +} + +TEST(StripLeadingAsciiWhitespace, FromStringView) { + EXPECT_EQ(absl::string_view{}, + absl::StripLeadingAsciiWhitespace(absl::string_view{})); + EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace({"foo"})); + EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace({"\t \n\f\r\n\vfoo"})); + EXPECT_EQ("foo foo\n ", + absl::StripLeadingAsciiWhitespace({"\t \n\f\r\n\vfoo foo\n "})); + EXPECT_EQ(absl::string_view{}, absl::StripLeadingAsciiWhitespace( + {"\t \n\f\r\v\n\t \n\f\r\v\n"})); +} + +TEST(StripLeadingAsciiWhitespace, InPlace) { + std::string str; + + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ("", str); + + str = "foo"; + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "\t \n\f\r\n\vfoo"; + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "\t \n\f\r\n\vfoo foo\n "; + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ("foo foo\n ", str); + + str = "\t \n\f\r\v\n\t \n\f\r\v\n"; + absl::StripLeadingAsciiWhitespace(&str); + EXPECT_EQ(absl::string_view{}, str); +} + +TEST(StripTrailingAsciiWhitespace, FromStringView) { + EXPECT_EQ(absl::string_view{}, + absl::StripTrailingAsciiWhitespace(absl::string_view{})); + EXPECT_EQ("foo", absl::StripTrailingAsciiWhitespace({"foo"})); + EXPECT_EQ("foo", absl::StripTrailingAsciiWhitespace({"foo\t \n\f\r\n\v"})); + EXPECT_EQ(" \nfoo foo", + absl::StripTrailingAsciiWhitespace({" \nfoo foo\t \n\f\r\n\v"})); + EXPECT_EQ(absl::string_view{}, absl::StripTrailingAsciiWhitespace( + {"\t \n\f\r\v\n\t \n\f\r\v\n"})); +} + +TEST(StripTrailingAsciiWhitespace, InPlace) { + std::string str; + + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ("", str); + + str = "foo"; + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "foo\t \n\f\r\n\v"; + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = " \nfoo foo\t \n\f\r\n\v"; + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ(" \nfoo foo", str); + + str = "\t \n\f\r\v\n\t \n\f\r\v\n"; + absl::StripTrailingAsciiWhitespace(&str); + EXPECT_EQ(absl::string_view{}, str); +} + +TEST(StripAsciiWhitespace, FromStringView) { + EXPECT_EQ(absl::string_view{}, + absl::StripAsciiWhitespace(absl::string_view{})); + EXPECT_EQ("foo", absl::StripAsciiWhitespace({"foo"})); + EXPECT_EQ("foo", + absl::StripAsciiWhitespace({"\t \n\f\r\n\vfoo\t \n\f\r\n\v"})); + EXPECT_EQ("foo foo", absl::StripAsciiWhitespace( + {"\t \n\f\r\n\vfoo foo\t \n\f\r\n\v"})); + EXPECT_EQ(absl::string_view{}, + absl::StripAsciiWhitespace({"\t \n\f\r\v\n\t \n\f\r\v\n"})); +} + +TEST(StripAsciiWhitespace, InPlace) { + std::string str; + + absl::StripAsciiWhitespace(&str); + EXPECT_EQ("", str); + + str = "foo"; + absl::StripAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "\t \n\f\r\n\vfoo\t \n\f\r\n\v"; + absl::StripAsciiWhitespace(&str); + EXPECT_EQ("foo", str); + + str = "\t \n\f\r\n\vfoo foo\t \n\f\r\n\v"; + absl::StripAsciiWhitespace(&str); + EXPECT_EQ("foo foo", str); + + str = "\t \n\f\r\v\n\t \n\f\r\v\n"; + absl::StripAsciiWhitespace(&str); + EXPECT_EQ(absl::string_view{}, str); +} + +TEST(RemoveExtraAsciiWhitespace, InPlace) { + const char* inputs[] = {"No extra space", + " Leading whitespace", + "Trailing whitespace ", + " Leading and trailing ", + " Whitespace \t in\v middle ", + "'Eeeeep! \n Newlines!\n", + "nospaces", + "", + "\n\t a\t\n\nb \t\n"}; + + const char* outputs[] = { + "No extra space", + "Leading whitespace", + "Trailing whitespace", + "Leading and trailing", + "Whitespace in middle", + "'Eeeeep! Newlines!", + "nospaces", + "", + "a\nb", + }; + const int NUM_TESTS = ABSL_ARRAYSIZE(inputs); + + for (int i = 0; i < NUM_TESTS; i++) { + std::string s(inputs[i]); + absl::RemoveExtraAsciiWhitespace(&s); + EXPECT_EQ(outputs[i], s); + } +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/charconv.cc b/third_party/abseil_cpp/absl/strings/charconv.cc new file mode 100644 index 000000000000..3613a6528665 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/charconv.cc @@ -0,0 +1,984 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/charconv.h" + +#include <algorithm> +#include <cassert> +#include <cmath> +#include <cstring> + +#include "absl/base/casts.h" +#include "absl/base/internal/bits.h" +#include "absl/numeric/int128.h" +#include "absl/strings/internal/charconv_bigint.h" +#include "absl/strings/internal/charconv_parse.h" + +// The macro ABSL_BIT_PACK_FLOATS is defined on x86-64, where IEEE floating +// point numbers have the same endianness in memory as a bitfield struct +// containing the corresponding parts. +// +// When set, we replace calls to ldexp() with manual bit packing, which is +// faster and is unaffected by floating point environment. +#ifdef ABSL_BIT_PACK_FLOATS +#error ABSL_BIT_PACK_FLOATS cannot be directly set +#elif defined(__x86_64__) || defined(_M_X64) +#define ABSL_BIT_PACK_FLOATS 1 +#endif + +// A note about subnormals: +// +// The code below talks about "normals" and "subnormals". A normal IEEE float +// has a fixed-width mantissa and power of two exponent. For example, a normal +// `double` has a 53-bit mantissa. Because the high bit is always 1, it is not +// stored in the representation. The implicit bit buys an extra bit of +// resolution in the datatype. +// +// The downside of this scheme is that there is a large gap between DBL_MIN and +// zero. (Large, at least, relative to the different between DBL_MIN and the +// next representable number). This gap is softened by the "subnormal" numbers, +// which have the same power-of-two exponent as DBL_MIN, but no implicit 53rd +// bit. An all-bits-zero exponent in the encoding represents subnormals. (Zero +// is represented as a subnormal with an all-bits-zero mantissa.) +// +// The code below, in calculations, represents the mantissa as a uint64_t. The +// end result normally has the 53rd bit set. It represents subnormals by using +// narrower mantissas. + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace { + +template <typename FloatType> +struct FloatTraits; + +template <> +struct FloatTraits<double> { + // The number of mantissa bits in the given float type. This includes the + // implied high bit. + static constexpr int kTargetMantissaBits = 53; + + // The largest supported IEEE exponent, in our integral mantissa + // representation. + // + // If `m` is the largest possible int kTargetMantissaBits bits wide, then + // m * 2**kMaxExponent is exactly equal to DBL_MAX. + static constexpr int kMaxExponent = 971; + + // The smallest supported IEEE normal exponent, in our integral mantissa + // representation. + // + // If `m` is the smallest possible int kTargetMantissaBits bits wide, then + // m * 2**kMinNormalExponent is exactly equal to DBL_MIN. + static constexpr int kMinNormalExponent = -1074; + + static double MakeNan(const char* tagp) { + // Support nan no matter which namespace it's in. Some platforms + // incorrectly don't put it in namespace std. + using namespace std; // NOLINT + return nan(tagp); + } + + // Builds a nonzero floating point number out of the provided parts. + // + // This is intended to do the same operation as ldexp(mantissa, exponent), + // but using purely integer math, to avoid -ffastmath and floating + // point environment issues. Using type punning is also faster. We fall back + // to ldexp on a per-platform basis for portability. + // + // `exponent` must be between kMinNormalExponent and kMaxExponent. + // + // `mantissa` must either be exactly kTargetMantissaBits wide, in which case + // a normal value is made, or it must be less narrow than that, in which case + // `exponent` must be exactly kMinNormalExponent, and a subnormal value is + // made. + static double Make(uint64_t mantissa, int exponent, bool sign) { +#ifndef ABSL_BIT_PACK_FLOATS + // Support ldexp no matter which namespace it's in. Some platforms + // incorrectly don't put it in namespace std. + using namespace std; // NOLINT + return sign ? -ldexp(mantissa, exponent) : ldexp(mantissa, exponent); +#else + constexpr uint64_t kMantissaMask = + (uint64_t(1) << (kTargetMantissaBits - 1)) - 1; + uint64_t dbl = static_cast<uint64_t>(sign) << 63; + if (mantissa > kMantissaMask) { + // Normal value. + // Adjust by 1023 for the exponent representation bias, and an additional + // 52 due to the implied decimal point in the IEEE mantissa represenation. + dbl += uint64_t{exponent + 1023u + kTargetMantissaBits - 1} << 52; + mantissa &= kMantissaMask; + } else { + // subnormal value + assert(exponent == kMinNormalExponent); + } + dbl += mantissa; + return absl::bit_cast<double>(dbl); +#endif // ABSL_BIT_PACK_FLOATS + } +}; + +// Specialization of floating point traits for the `float` type. See the +// FloatTraits<double> specialization above for meaning of each of the following +// members and methods. +template <> +struct FloatTraits<float> { + static constexpr int kTargetMantissaBits = 24; + static constexpr int kMaxExponent = 104; + static constexpr int kMinNormalExponent = -149; + static float MakeNan(const char* tagp) { + // Support nanf no matter which namespace it's in. Some platforms + // incorrectly don't put it in namespace std. + using namespace std; // NOLINT + return nanf(tagp); + } + static float Make(uint32_t mantissa, int exponent, bool sign) { +#ifndef ABSL_BIT_PACK_FLOATS + // Support ldexpf no matter which namespace it's in. Some platforms + // incorrectly don't put it in namespace std. + using namespace std; // NOLINT + return sign ? -ldexpf(mantissa, exponent) : ldexpf(mantissa, exponent); +#else + constexpr uint32_t kMantissaMask = + (uint32_t(1) << (kTargetMantissaBits - 1)) - 1; + uint32_t flt = static_cast<uint32_t>(sign) << 31; + if (mantissa > kMantissaMask) { + // Normal value. + // Adjust by 127 for the exponent representation bias, and an additional + // 23 due to the implied decimal point in the IEEE mantissa represenation. + flt += uint32_t{exponent + 127u + kTargetMantissaBits - 1} << 23; + mantissa &= kMantissaMask; + } else { + // subnormal value + assert(exponent == kMinNormalExponent); + } + flt += mantissa; + return absl::bit_cast<float>(flt); +#endif // ABSL_BIT_PACK_FLOATS + } +}; + +// Decimal-to-binary conversions require coercing powers of 10 into a mantissa +// and a power of 2. The two helper functions Power10Mantissa(n) and +// Power10Exponent(n) perform this task. Together, these represent a hand- +// rolled floating point value which is equal to or just less than 10**n. +// +// The return values satisfy two range guarantees: +// +// Power10Mantissa(n) * 2**Power10Exponent(n) <= 10**n +// < (Power10Mantissa(n) + 1) * 2**Power10Exponent(n) +// +// 2**63 <= Power10Mantissa(n) < 2**64. +// +// Lookups into the power-of-10 table must first check the Power10Overflow() and +// Power10Underflow() functions, to avoid out-of-bounds table access. +// +// Indexes into these tables are biased by -kPower10TableMin, and the table has +// values in the range [kPower10TableMin, kPower10TableMax]. +extern const uint64_t kPower10MantissaTable[]; +extern const int16_t kPower10ExponentTable[]; + +// The smallest allowed value for use with the Power10Mantissa() and +// Power10Exponent() functions below. (If a smaller exponent is needed in +// calculations, the end result is guaranteed to underflow.) +constexpr int kPower10TableMin = -342; + +// The largest allowed value for use with the Power10Mantissa() and +// Power10Exponent() functions below. (If a smaller exponent is needed in +// calculations, the end result is guaranteed to overflow.) +constexpr int kPower10TableMax = 308; + +uint64_t Power10Mantissa(int n) { + return kPower10MantissaTable[n - kPower10TableMin]; +} + +int Power10Exponent(int n) { + return kPower10ExponentTable[n - kPower10TableMin]; +} + +// Returns true if n is large enough that 10**n always results in an IEEE +// overflow. +bool Power10Overflow(int n) { return n > kPower10TableMax; } + +// Returns true if n is small enough that 10**n times a ParsedFloat mantissa +// always results in an IEEE underflow. +bool Power10Underflow(int n) { return n < kPower10TableMin; } + +// Returns true if Power10Mantissa(n) * 2**Power10Exponent(n) is exactly equal +// to 10**n numerically. Put another way, this returns true if there is no +// truncation error in Power10Mantissa(n). +bool Power10Exact(int n) { return n >= 0 && n <= 27; } + +// Sentinel exponent values for representing numbers too large or too close to +// zero to represent in a double. +constexpr int kOverflow = 99999; +constexpr int kUnderflow = -99999; + +// Struct representing the calculated conversion result of a positive (nonzero) +// floating point number. +// +// The calculated number is mantissa * 2**exponent (mantissa is treated as an +// integer.) `mantissa` is chosen to be the correct width for the IEEE float +// representation being calculated. (`mantissa` will always have the same bit +// width for normal values, and narrower bit widths for subnormals.) +// +// If the result of conversion was an underflow or overflow, exponent is set +// to kUnderflow or kOverflow. +struct CalculatedFloat { + uint64_t mantissa = 0; + int exponent = 0; +}; + +// Returns the bit width of the given uint128. (Equivalently, returns 128 +// minus the number of leading zero bits.) +int BitWidth(uint128 value) { + if (Uint128High64(value) == 0) { + return 64 - base_internal::CountLeadingZeros64(Uint128Low64(value)); + } + return 128 - base_internal::CountLeadingZeros64(Uint128High64(value)); +} + +// Calculates how far to the right a mantissa needs to be shifted to create a +// properly adjusted mantissa for an IEEE floating point number. +// +// `mantissa_width` is the bit width of the mantissa to be shifted, and +// `binary_exponent` is the exponent of the number before the shift. +// +// This accounts for subnormal values, and will return a larger-than-normal +// shift if binary_exponent would otherwise be too low. +template <typename FloatType> +int NormalizedShiftSize(int mantissa_width, int binary_exponent) { + const int normal_shift = + mantissa_width - FloatTraits<FloatType>::kTargetMantissaBits; + const int minimum_shift = + FloatTraits<FloatType>::kMinNormalExponent - binary_exponent; + return std::max(normal_shift, minimum_shift); +} + +// Right shifts a uint128 so that it has the requested bit width. (The +// resulting value will have 128 - bit_width leading zeroes.) The initial +// `value` must be wider than the requested bit width. +// +// Returns the number of bits shifted. +int TruncateToBitWidth(int bit_width, uint128* value) { + const int current_bit_width = BitWidth(*value); + const int shift = current_bit_width - bit_width; + *value >>= shift; + return shift; +} + +// Checks if the given ParsedFloat represents one of the edge cases that are +// not dependent on number base: zero, infinity, or NaN. If so, sets *value +// the appropriate double, and returns true. +template <typename FloatType> +bool HandleEdgeCase(const strings_internal::ParsedFloat& input, bool negative, + FloatType* value) { + if (input.type == strings_internal::FloatType::kNan) { + // A bug in both clang and gcc would cause the compiler to optimize away the + // buffer we are building below. Declaring the buffer volatile avoids the + // issue, and has no measurable performance impact in microbenchmarks. + // + // https://bugs.llvm.org/show_bug.cgi?id=37778 + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86113 + constexpr ptrdiff_t kNanBufferSize = 128; + volatile char n_char_sequence[kNanBufferSize]; + if (input.subrange_begin == nullptr) { + n_char_sequence[0] = '\0'; + } else { + ptrdiff_t nan_size = input.subrange_end - input.subrange_begin; + nan_size = std::min(nan_size, kNanBufferSize - 1); + std::copy_n(input.subrange_begin, nan_size, n_char_sequence); + n_char_sequence[nan_size] = '\0'; + } + char* nan_argument = const_cast<char*>(n_char_sequence); + *value = negative ? -FloatTraits<FloatType>::MakeNan(nan_argument) + : FloatTraits<FloatType>::MakeNan(nan_argument); + return true; + } + if (input.type == strings_internal::FloatType::kInfinity) { + *value = negative ? -std::numeric_limits<FloatType>::infinity() + : std::numeric_limits<FloatType>::infinity(); + return true; + } + if (input.mantissa == 0) { + *value = negative ? -0.0 : 0.0; + return true; + } + return false; +} + +// Given a CalculatedFloat result of a from_chars conversion, generate the +// correct output values. +// +// CalculatedFloat can represent an underflow or overflow, in which case the +// error code in *result is set. Otherwise, the calculated floating point +// number is stored in *value. +template <typename FloatType> +void EncodeResult(const CalculatedFloat& calculated, bool negative, + absl::from_chars_result* result, FloatType* value) { + if (calculated.exponent == kOverflow) { + result->ec = std::errc::result_out_of_range; + *value = negative ? -std::numeric_limits<FloatType>::max() + : std::numeric_limits<FloatType>::max(); + return; + } else if (calculated.mantissa == 0 || calculated.exponent == kUnderflow) { + result->ec = std::errc::result_out_of_range; + *value = negative ? -0.0 : 0.0; + return; + } + *value = FloatTraits<FloatType>::Make(calculated.mantissa, + calculated.exponent, negative); +} + +// Returns the given uint128 shifted to the right by `shift` bits, and rounds +// the remaining bits using round_to_nearest logic. The value is returned as a +// uint64_t, since this is the type used by this library for storing calculated +// floating point mantissas. +// +// It is expected that the width of the input value shifted by `shift` will +// be the correct bit-width for the target mantissa, which is strictly narrower +// than a uint64_t. +// +// If `input_exact` is false, then a nonzero error epsilon is assumed. For +// rounding purposes, the true value being rounded is strictly greater than the +// input value. The error may represent a single lost carry bit. +// +// When input_exact, shifted bits of the form 1000000... represent a tie, which +// is broken by rounding to even -- the rounding direction is chosen so the low +// bit of the returned value is 0. +// +// When !input_exact, shifted bits of the form 10000000... represent a value +// strictly greater than one half (due to the error epsilon), and so ties are +// always broken by rounding up. +// +// When !input_exact, shifted bits of the form 01111111... are uncertain; +// the true value may or may not be greater than 10000000..., due to the +// possible lost carry bit. The correct rounding direction is unknown. In this +// case, the result is rounded down, and `output_exact` is set to false. +// +// Zero and negative values of `shift` are accepted, in which case the word is +// shifted left, as necessary. +uint64_t ShiftRightAndRound(uint128 value, int shift, bool input_exact, + bool* output_exact) { + if (shift <= 0) { + *output_exact = input_exact; + return static_cast<uint64_t>(value << -shift); + } + if (shift >= 128) { + // Exponent is so small that we are shifting away all significant bits. + // Answer will not be representable, even as a subnormal, so return a zero + // mantissa (which represents underflow). + *output_exact = true; + return 0; + } + + *output_exact = true; + const uint128 shift_mask = (uint128(1) << shift) - 1; + const uint128 halfway_point = uint128(1) << (shift - 1); + + const uint128 shifted_bits = value & shift_mask; + value >>= shift; + if (shifted_bits > halfway_point) { + // Shifted bits greater than 10000... require rounding up. + return static_cast<uint64_t>(value + 1); + } + if (shifted_bits == halfway_point) { + // In exact mode, shifted bits of 10000... mean we're exactly halfway + // between two numbers, and we must round to even. So only round up if + // the low bit of `value` is set. + // + // In inexact mode, the nonzero error means the actual value is greater + // than the halfway point and we must alway round up. + if ((value & 1) == 1 || !input_exact) { + ++value; + } + return static_cast<uint64_t>(value); + } + if (!input_exact && shifted_bits == halfway_point - 1) { + // Rounding direction is unclear, due to error. + *output_exact = false; + } + // Otherwise, round down. + return static_cast<uint64_t>(value); +} + +// Checks if a floating point guess needs to be rounded up, using high precision +// math. +// +// `guess_mantissa` and `guess_exponent` represent a candidate guess for the +// number represented by `parsed_decimal`. +// +// The exact number represented by `parsed_decimal` must lie between the two +// numbers: +// A = `guess_mantissa * 2**guess_exponent` +// B = `(guess_mantissa + 1) * 2**guess_exponent` +// +// This function returns false if `A` is the better guess, and true if `B` is +// the better guess, with rounding ties broken by rounding to even. +bool MustRoundUp(uint64_t guess_mantissa, int guess_exponent, + const strings_internal::ParsedFloat& parsed_decimal) { + // 768 is the number of digits needed in the worst case. We could determine a + // better limit dynamically based on the value of parsed_decimal.exponent. + // This would optimize pathological input cases only. (Sane inputs won't have + // hundreds of digits of mantissa.) + absl::strings_internal::BigUnsigned<84> exact_mantissa; + int exact_exponent = exact_mantissa.ReadFloatMantissa(parsed_decimal, 768); + + // Adjust the `guess` arguments to be halfway between A and B. + guess_mantissa = guess_mantissa * 2 + 1; + guess_exponent -= 1; + + // In our comparison: + // lhs = exact = exact_mantissa * 10**exact_exponent + // = exact_mantissa * 5**exact_exponent * 2**exact_exponent + // rhs = guess = guess_mantissa * 2**guess_exponent + // + // Because we are doing integer math, we can't directly deal with negative + // exponents. We instead move these to the other side of the inequality. + absl::strings_internal::BigUnsigned<84>& lhs = exact_mantissa; + int comparison; + if (exact_exponent >= 0) { + lhs.MultiplyByFiveToTheNth(exact_exponent); + absl::strings_internal::BigUnsigned<84> rhs(guess_mantissa); + // There are powers of 2 on both sides of the inequality; reduce this to + // a single bit-shift. + if (exact_exponent > guess_exponent) { + lhs.ShiftLeft(exact_exponent - guess_exponent); + } else { + rhs.ShiftLeft(guess_exponent - exact_exponent); + } + comparison = Compare(lhs, rhs); + } else { + // Move the power of 5 to the other side of the equation, giving us: + // lhs = exact_mantissa * 2**exact_exponent + // rhs = guess_mantissa * 5**(-exact_exponent) * 2**guess_exponent + absl::strings_internal::BigUnsigned<84> rhs = + absl::strings_internal::BigUnsigned<84>::FiveToTheNth(-exact_exponent); + rhs.MultiplyBy(guess_mantissa); + if (exact_exponent > guess_exponent) { + lhs.ShiftLeft(exact_exponent - guess_exponent); + } else { + rhs.ShiftLeft(guess_exponent - exact_exponent); + } + comparison = Compare(lhs, rhs); + } + if (comparison < 0) { + return false; + } else if (comparison > 0) { + return true; + } else { + // When lhs == rhs, the decimal input is exactly between A and B. + // Round towards even -- round up only if the low bit of the initial + // `guess_mantissa` was a 1. We shifted guess_mantissa left 1 bit at + // the beginning of this function, so test the 2nd bit here. + return (guess_mantissa & 2) == 2; + } +} + +// Constructs a CalculatedFloat from a given mantissa and exponent, but +// with the following normalizations applied: +// +// If rounding has caused mantissa to increase just past the allowed bit +// width, shift and adjust exponent. +// +// If exponent is too high, sets kOverflow. +// +// If mantissa is zero (representing a non-zero value not representable, even +// as a subnormal), sets kUnderflow. +template <typename FloatType> +CalculatedFloat CalculatedFloatFromRawValues(uint64_t mantissa, int exponent) { + CalculatedFloat result; + if (mantissa == uint64_t(1) << FloatTraits<FloatType>::kTargetMantissaBits) { + mantissa >>= 1; + exponent += 1; + } + if (exponent > FloatTraits<FloatType>::kMaxExponent) { + result.exponent = kOverflow; + } else if (mantissa == 0) { + result.exponent = kUnderflow; + } else { + result.exponent = exponent; + result.mantissa = mantissa; + } + return result; +} + +template <typename FloatType> +CalculatedFloat CalculateFromParsedHexadecimal( + const strings_internal::ParsedFloat& parsed_hex) { + uint64_t mantissa = parsed_hex.mantissa; + int exponent = parsed_hex.exponent; + int mantissa_width = 64 - base_internal::CountLeadingZeros64(mantissa); + const int shift = NormalizedShiftSize<FloatType>(mantissa_width, exponent); + bool result_exact; + exponent += shift; + mantissa = ShiftRightAndRound(mantissa, shift, + /* input exact= */ true, &result_exact); + // ParseFloat handles rounding in the hexadecimal case, so we don't have to + // check `result_exact` here. + return CalculatedFloatFromRawValues<FloatType>(mantissa, exponent); +} + +template <typename FloatType> +CalculatedFloat CalculateFromParsedDecimal( + const strings_internal::ParsedFloat& parsed_decimal) { + CalculatedFloat result; + + // Large or small enough decimal exponents will always result in overflow + // or underflow. + if (Power10Underflow(parsed_decimal.exponent)) { + result.exponent = kUnderflow; + return result; + } else if (Power10Overflow(parsed_decimal.exponent)) { + result.exponent = kOverflow; + return result; + } + + // Otherwise convert our power of 10 into a power of 2 times an integer + // mantissa, and multiply this by our parsed decimal mantissa. + uint128 wide_binary_mantissa = parsed_decimal.mantissa; + wide_binary_mantissa *= Power10Mantissa(parsed_decimal.exponent); + int binary_exponent = Power10Exponent(parsed_decimal.exponent); + + // Discard bits that are inaccurate due to truncation error. The magic + // `mantissa_width` constants below are justified in + // https://abseil.io/about/design/charconv. They represent the number of bits + // in `wide_binary_mantissa` that are guaranteed to be unaffected by error + // propagation. + bool mantissa_exact; + int mantissa_width; + if (parsed_decimal.subrange_begin) { + // Truncated mantissa + mantissa_width = 58; + mantissa_exact = false; + binary_exponent += + TruncateToBitWidth(mantissa_width, &wide_binary_mantissa); + } else if (!Power10Exact(parsed_decimal.exponent)) { + // Exact mantissa, truncated power of ten + mantissa_width = 63; + mantissa_exact = false; + binary_exponent += + TruncateToBitWidth(mantissa_width, &wide_binary_mantissa); + } else { + // Product is exact + mantissa_width = BitWidth(wide_binary_mantissa); + mantissa_exact = true; + } + + // Shift into an FloatType-sized mantissa, and round to nearest. + const int shift = + NormalizedShiftSize<FloatType>(mantissa_width, binary_exponent); + bool result_exact; + binary_exponent += shift; + uint64_t binary_mantissa = ShiftRightAndRound(wide_binary_mantissa, shift, + mantissa_exact, &result_exact); + if (!result_exact) { + // We could not determine the rounding direction using int128 math. Use + // full resolution math instead. + if (MustRoundUp(binary_mantissa, binary_exponent, parsed_decimal)) { + binary_mantissa += 1; + } + } + + return CalculatedFloatFromRawValues<FloatType>(binary_mantissa, + binary_exponent); +} + +template <typename FloatType> +from_chars_result FromCharsImpl(const char* first, const char* last, + FloatType& value, chars_format fmt_flags) { + from_chars_result result; + result.ptr = first; // overwritten on successful parse + result.ec = std::errc(); + + bool negative = false; + if (first != last && *first == '-') { + ++first; + negative = true; + } + // If the `hex` flag is *not* set, then we will accept a 0x prefix and try + // to parse a hexadecimal float. + if ((fmt_flags & chars_format::hex) == chars_format{} && last - first >= 2 && + *first == '0' && (first[1] == 'x' || first[1] == 'X')) { + const char* hex_first = first + 2; + strings_internal::ParsedFloat hex_parse = + strings_internal::ParseFloat<16>(hex_first, last, fmt_flags); + if (hex_parse.end == nullptr || + hex_parse.type != strings_internal::FloatType::kNumber) { + // Either we failed to parse a hex float after the "0x", or we read + // "0xinf" or "0xnan" which we don't want to match. + // + // However, a string that begins with "0x" also begins with "0", which + // is normally a valid match for the number zero. So we want these + // strings to match zero unless fmt_flags is `scientific`. (This flag + // means an exponent is required, which the string "0" does not have.) + if (fmt_flags == chars_format::scientific) { + result.ec = std::errc::invalid_argument; + } else { + result.ptr = first + 1; + value = negative ? -0.0 : 0.0; + } + return result; + } + // We matched a value. + result.ptr = hex_parse.end; + if (HandleEdgeCase(hex_parse, negative, &value)) { + return result; + } + CalculatedFloat calculated = + CalculateFromParsedHexadecimal<FloatType>(hex_parse); + EncodeResult(calculated, negative, &result, &value); + return result; + } + // Otherwise, we choose the number base based on the flags. + if ((fmt_flags & chars_format::hex) == chars_format::hex) { + strings_internal::ParsedFloat hex_parse = + strings_internal::ParseFloat<16>(first, last, fmt_flags); + if (hex_parse.end == nullptr) { + result.ec = std::errc::invalid_argument; + return result; + } + result.ptr = hex_parse.end; + if (HandleEdgeCase(hex_parse, negative, &value)) { + return result; + } + CalculatedFloat calculated = + CalculateFromParsedHexadecimal<FloatType>(hex_parse); + EncodeResult(calculated, negative, &result, &value); + return result; + } else { + strings_internal::ParsedFloat decimal_parse = + strings_internal::ParseFloat<10>(first, last, fmt_flags); + if (decimal_parse.end == nullptr) { + result.ec = std::errc::invalid_argument; + return result; + } + result.ptr = decimal_parse.end; + if (HandleEdgeCase(decimal_parse, negative, &value)) { + return result; + } + CalculatedFloat calculated = + CalculateFromParsedDecimal<FloatType>(decimal_parse); + EncodeResult(calculated, negative, &result, &value); + return result; + } +} +} // namespace + +from_chars_result from_chars(const char* first, const char* last, double& value, + chars_format fmt) { + return FromCharsImpl(first, last, value, fmt); +} + +from_chars_result from_chars(const char* first, const char* last, float& value, + chars_format fmt) { + return FromCharsImpl(first, last, value, fmt); +} + +namespace { + +// Table of powers of 10, from kPower10TableMin to kPower10TableMax. +// +// kPower10MantissaTable[i - kPower10TableMin] stores the 64-bit mantissa (high +// bit always on), and kPower10ExponentTable[i - kPower10TableMin] stores the +// power-of-two exponent. For a given number i, this gives the unique mantissa +// and exponent such that mantissa * 2**exponent <= 10**i < (mantissa + 1) * +// 2**exponent. + +const uint64_t kPower10MantissaTable[] = { + 0xeef453d6923bd65aU, 0x9558b4661b6565f8U, 0xbaaee17fa23ebf76U, + 0xe95a99df8ace6f53U, 0x91d8a02bb6c10594U, 0xb64ec836a47146f9U, + 0xe3e27a444d8d98b7U, 0x8e6d8c6ab0787f72U, 0xb208ef855c969f4fU, + 0xde8b2b66b3bc4723U, 0x8b16fb203055ac76U, 0xaddcb9e83c6b1793U, + 0xd953e8624b85dd78U, 0x87d4713d6f33aa6bU, 0xa9c98d8ccb009506U, + 0xd43bf0effdc0ba48U, 0x84a57695fe98746dU, 0xa5ced43b7e3e9188U, + 0xcf42894a5dce35eaU, 0x818995ce7aa0e1b2U, 0xa1ebfb4219491a1fU, + 0xca66fa129f9b60a6U, 0xfd00b897478238d0U, 0x9e20735e8cb16382U, + 0xc5a890362fddbc62U, 0xf712b443bbd52b7bU, 0x9a6bb0aa55653b2dU, + 0xc1069cd4eabe89f8U, 0xf148440a256e2c76U, 0x96cd2a865764dbcaU, + 0xbc807527ed3e12bcU, 0xeba09271e88d976bU, 0x93445b8731587ea3U, + 0xb8157268fdae9e4cU, 0xe61acf033d1a45dfU, 0x8fd0c16206306babU, + 0xb3c4f1ba87bc8696U, 0xe0b62e2929aba83cU, 0x8c71dcd9ba0b4925U, + 0xaf8e5410288e1b6fU, 0xdb71e91432b1a24aU, 0x892731ac9faf056eU, + 0xab70fe17c79ac6caU, 0xd64d3d9db981787dU, 0x85f0468293f0eb4eU, + 0xa76c582338ed2621U, 0xd1476e2c07286faaU, 0x82cca4db847945caU, + 0xa37fce126597973cU, 0xcc5fc196fefd7d0cU, 0xff77b1fcbebcdc4fU, + 0x9faacf3df73609b1U, 0xc795830d75038c1dU, 0xf97ae3d0d2446f25U, + 0x9becce62836ac577U, 0xc2e801fb244576d5U, 0xf3a20279ed56d48aU, + 0x9845418c345644d6U, 0xbe5691ef416bd60cU, 0xedec366b11c6cb8fU, + 0x94b3a202eb1c3f39U, 0xb9e08a83a5e34f07U, 0xe858ad248f5c22c9U, + 0x91376c36d99995beU, 0xb58547448ffffb2dU, 0xe2e69915b3fff9f9U, + 0x8dd01fad907ffc3bU, 0xb1442798f49ffb4aU, 0xdd95317f31c7fa1dU, + 0x8a7d3eef7f1cfc52U, 0xad1c8eab5ee43b66U, 0xd863b256369d4a40U, + 0x873e4f75e2224e68U, 0xa90de3535aaae202U, 0xd3515c2831559a83U, + 0x8412d9991ed58091U, 0xa5178fff668ae0b6U, 0xce5d73ff402d98e3U, + 0x80fa687f881c7f8eU, 0xa139029f6a239f72U, 0xc987434744ac874eU, + 0xfbe9141915d7a922U, 0x9d71ac8fada6c9b5U, 0xc4ce17b399107c22U, + 0xf6019da07f549b2bU, 0x99c102844f94e0fbU, 0xc0314325637a1939U, + 0xf03d93eebc589f88U, 0x96267c7535b763b5U, 0xbbb01b9283253ca2U, + 0xea9c227723ee8bcbU, 0x92a1958a7675175fU, 0xb749faed14125d36U, + 0xe51c79a85916f484U, 0x8f31cc0937ae58d2U, 0xb2fe3f0b8599ef07U, + 0xdfbdcece67006ac9U, 0x8bd6a141006042bdU, 0xaecc49914078536dU, + 0xda7f5bf590966848U, 0x888f99797a5e012dU, 0xaab37fd7d8f58178U, + 0xd5605fcdcf32e1d6U, 0x855c3be0a17fcd26U, 0xa6b34ad8c9dfc06fU, + 0xd0601d8efc57b08bU, 0x823c12795db6ce57U, 0xa2cb1717b52481edU, + 0xcb7ddcdda26da268U, 0xfe5d54150b090b02U, 0x9efa548d26e5a6e1U, + 0xc6b8e9b0709f109aU, 0xf867241c8cc6d4c0U, 0x9b407691d7fc44f8U, + 0xc21094364dfb5636U, 0xf294b943e17a2bc4U, 0x979cf3ca6cec5b5aU, + 0xbd8430bd08277231U, 0xece53cec4a314ebdU, 0x940f4613ae5ed136U, + 0xb913179899f68584U, 0xe757dd7ec07426e5U, 0x9096ea6f3848984fU, + 0xb4bca50b065abe63U, 0xe1ebce4dc7f16dfbU, 0x8d3360f09cf6e4bdU, + 0xb080392cc4349decU, 0xdca04777f541c567U, 0x89e42caaf9491b60U, + 0xac5d37d5b79b6239U, 0xd77485cb25823ac7U, 0x86a8d39ef77164bcU, + 0xa8530886b54dbdebU, 0xd267caa862a12d66U, 0x8380dea93da4bc60U, + 0xa46116538d0deb78U, 0xcd795be870516656U, 0x806bd9714632dff6U, + 0xa086cfcd97bf97f3U, 0xc8a883c0fdaf7df0U, 0xfad2a4b13d1b5d6cU, + 0x9cc3a6eec6311a63U, 0xc3f490aa77bd60fcU, 0xf4f1b4d515acb93bU, + 0x991711052d8bf3c5U, 0xbf5cd54678eef0b6U, 0xef340a98172aace4U, + 0x9580869f0e7aac0eU, 0xbae0a846d2195712U, 0xe998d258869facd7U, + 0x91ff83775423cc06U, 0xb67f6455292cbf08U, 0xe41f3d6a7377eecaU, + 0x8e938662882af53eU, 0xb23867fb2a35b28dU, 0xdec681f9f4c31f31U, + 0x8b3c113c38f9f37eU, 0xae0b158b4738705eU, 0xd98ddaee19068c76U, + 0x87f8a8d4cfa417c9U, 0xa9f6d30a038d1dbcU, 0xd47487cc8470652bU, + 0x84c8d4dfd2c63f3bU, 0xa5fb0a17c777cf09U, 0xcf79cc9db955c2ccU, + 0x81ac1fe293d599bfU, 0xa21727db38cb002fU, 0xca9cf1d206fdc03bU, + 0xfd442e4688bd304aU, 0x9e4a9cec15763e2eU, 0xc5dd44271ad3cdbaU, + 0xf7549530e188c128U, 0x9a94dd3e8cf578b9U, 0xc13a148e3032d6e7U, + 0xf18899b1bc3f8ca1U, 0x96f5600f15a7b7e5U, 0xbcb2b812db11a5deU, + 0xebdf661791d60f56U, 0x936b9fcebb25c995U, 0xb84687c269ef3bfbU, + 0xe65829b3046b0afaU, 0x8ff71a0fe2c2e6dcU, 0xb3f4e093db73a093U, + 0xe0f218b8d25088b8U, 0x8c974f7383725573U, 0xafbd2350644eeacfU, + 0xdbac6c247d62a583U, 0x894bc396ce5da772U, 0xab9eb47c81f5114fU, + 0xd686619ba27255a2U, 0x8613fd0145877585U, 0xa798fc4196e952e7U, + 0xd17f3b51fca3a7a0U, 0x82ef85133de648c4U, 0xa3ab66580d5fdaf5U, + 0xcc963fee10b7d1b3U, 0xffbbcfe994e5c61fU, 0x9fd561f1fd0f9bd3U, + 0xc7caba6e7c5382c8U, 0xf9bd690a1b68637bU, 0x9c1661a651213e2dU, + 0xc31bfa0fe5698db8U, 0xf3e2f893dec3f126U, 0x986ddb5c6b3a76b7U, + 0xbe89523386091465U, 0xee2ba6c0678b597fU, 0x94db483840b717efU, + 0xba121a4650e4ddebU, 0xe896a0d7e51e1566U, 0x915e2486ef32cd60U, + 0xb5b5ada8aaff80b8U, 0xe3231912d5bf60e6U, 0x8df5efabc5979c8fU, + 0xb1736b96b6fd83b3U, 0xddd0467c64bce4a0U, 0x8aa22c0dbef60ee4U, + 0xad4ab7112eb3929dU, 0xd89d64d57a607744U, 0x87625f056c7c4a8bU, + 0xa93af6c6c79b5d2dU, 0xd389b47879823479U, 0x843610cb4bf160cbU, + 0xa54394fe1eedb8feU, 0xce947a3da6a9273eU, 0x811ccc668829b887U, + 0xa163ff802a3426a8U, 0xc9bcff6034c13052U, 0xfc2c3f3841f17c67U, + 0x9d9ba7832936edc0U, 0xc5029163f384a931U, 0xf64335bcf065d37dU, + 0x99ea0196163fa42eU, 0xc06481fb9bcf8d39U, 0xf07da27a82c37088U, + 0x964e858c91ba2655U, 0xbbe226efb628afeaU, 0xeadab0aba3b2dbe5U, + 0x92c8ae6b464fc96fU, 0xb77ada0617e3bbcbU, 0xe55990879ddcaabdU, + 0x8f57fa54c2a9eab6U, 0xb32df8e9f3546564U, 0xdff9772470297ebdU, + 0x8bfbea76c619ef36U, 0xaefae51477a06b03U, 0xdab99e59958885c4U, + 0x88b402f7fd75539bU, 0xaae103b5fcd2a881U, 0xd59944a37c0752a2U, + 0x857fcae62d8493a5U, 0xa6dfbd9fb8e5b88eU, 0xd097ad07a71f26b2U, + 0x825ecc24c873782fU, 0xa2f67f2dfa90563bU, 0xcbb41ef979346bcaU, + 0xfea126b7d78186bcU, 0x9f24b832e6b0f436U, 0xc6ede63fa05d3143U, + 0xf8a95fcf88747d94U, 0x9b69dbe1b548ce7cU, 0xc24452da229b021bU, + 0xf2d56790ab41c2a2U, 0x97c560ba6b0919a5U, 0xbdb6b8e905cb600fU, + 0xed246723473e3813U, 0x9436c0760c86e30bU, 0xb94470938fa89bceU, + 0xe7958cb87392c2c2U, 0x90bd77f3483bb9b9U, 0xb4ecd5f01a4aa828U, + 0xe2280b6c20dd5232U, 0x8d590723948a535fU, 0xb0af48ec79ace837U, + 0xdcdb1b2798182244U, 0x8a08f0f8bf0f156bU, 0xac8b2d36eed2dac5U, + 0xd7adf884aa879177U, 0x86ccbb52ea94baeaU, 0xa87fea27a539e9a5U, + 0xd29fe4b18e88640eU, 0x83a3eeeef9153e89U, 0xa48ceaaab75a8e2bU, + 0xcdb02555653131b6U, 0x808e17555f3ebf11U, 0xa0b19d2ab70e6ed6U, + 0xc8de047564d20a8bU, 0xfb158592be068d2eU, 0x9ced737bb6c4183dU, + 0xc428d05aa4751e4cU, 0xf53304714d9265dfU, 0x993fe2c6d07b7fabU, + 0xbf8fdb78849a5f96U, 0xef73d256a5c0f77cU, 0x95a8637627989aadU, + 0xbb127c53b17ec159U, 0xe9d71b689dde71afU, 0x9226712162ab070dU, + 0xb6b00d69bb55c8d1U, 0xe45c10c42a2b3b05U, 0x8eb98a7a9a5b04e3U, + 0xb267ed1940f1c61cU, 0xdf01e85f912e37a3U, 0x8b61313bbabce2c6U, + 0xae397d8aa96c1b77U, 0xd9c7dced53c72255U, 0x881cea14545c7575U, + 0xaa242499697392d2U, 0xd4ad2dbfc3d07787U, 0x84ec3c97da624ab4U, + 0xa6274bbdd0fadd61U, 0xcfb11ead453994baU, 0x81ceb32c4b43fcf4U, + 0xa2425ff75e14fc31U, 0xcad2f7f5359a3b3eU, 0xfd87b5f28300ca0dU, + 0x9e74d1b791e07e48U, 0xc612062576589ddaU, 0xf79687aed3eec551U, + 0x9abe14cd44753b52U, 0xc16d9a0095928a27U, 0xf1c90080baf72cb1U, + 0x971da05074da7beeU, 0xbce5086492111aeaU, 0xec1e4a7db69561a5U, + 0x9392ee8e921d5d07U, 0xb877aa3236a4b449U, 0xe69594bec44de15bU, + 0x901d7cf73ab0acd9U, 0xb424dc35095cd80fU, 0xe12e13424bb40e13U, + 0x8cbccc096f5088cbU, 0xafebff0bcb24aafeU, 0xdbe6fecebdedd5beU, + 0x89705f4136b4a597U, 0xabcc77118461cefcU, 0xd6bf94d5e57a42bcU, + 0x8637bd05af6c69b5U, 0xa7c5ac471b478423U, 0xd1b71758e219652bU, + 0x83126e978d4fdf3bU, 0xa3d70a3d70a3d70aU, 0xccccccccccccccccU, + 0x8000000000000000U, 0xa000000000000000U, 0xc800000000000000U, + 0xfa00000000000000U, 0x9c40000000000000U, 0xc350000000000000U, + 0xf424000000000000U, 0x9896800000000000U, 0xbebc200000000000U, + 0xee6b280000000000U, 0x9502f90000000000U, 0xba43b74000000000U, + 0xe8d4a51000000000U, 0x9184e72a00000000U, 0xb5e620f480000000U, + 0xe35fa931a0000000U, 0x8e1bc9bf04000000U, 0xb1a2bc2ec5000000U, + 0xde0b6b3a76400000U, 0x8ac7230489e80000U, 0xad78ebc5ac620000U, + 0xd8d726b7177a8000U, 0x878678326eac9000U, 0xa968163f0a57b400U, + 0xd3c21bcecceda100U, 0x84595161401484a0U, 0xa56fa5b99019a5c8U, + 0xcecb8f27f4200f3aU, 0x813f3978f8940984U, 0xa18f07d736b90be5U, + 0xc9f2c9cd04674edeU, 0xfc6f7c4045812296U, 0x9dc5ada82b70b59dU, + 0xc5371912364ce305U, 0xf684df56c3e01bc6U, 0x9a130b963a6c115cU, + 0xc097ce7bc90715b3U, 0xf0bdc21abb48db20U, 0x96769950b50d88f4U, + 0xbc143fa4e250eb31U, 0xeb194f8e1ae525fdU, 0x92efd1b8d0cf37beU, + 0xb7abc627050305adU, 0xe596b7b0c643c719U, 0x8f7e32ce7bea5c6fU, + 0xb35dbf821ae4f38bU, 0xe0352f62a19e306eU, 0x8c213d9da502de45U, + 0xaf298d050e4395d6U, 0xdaf3f04651d47b4cU, 0x88d8762bf324cd0fU, + 0xab0e93b6efee0053U, 0xd5d238a4abe98068U, 0x85a36366eb71f041U, + 0xa70c3c40a64e6c51U, 0xd0cf4b50cfe20765U, 0x82818f1281ed449fU, + 0xa321f2d7226895c7U, 0xcbea6f8ceb02bb39U, 0xfee50b7025c36a08U, + 0x9f4f2726179a2245U, 0xc722f0ef9d80aad6U, 0xf8ebad2b84e0d58bU, + 0x9b934c3b330c8577U, 0xc2781f49ffcfa6d5U, 0xf316271c7fc3908aU, + 0x97edd871cfda3a56U, 0xbde94e8e43d0c8ecU, 0xed63a231d4c4fb27U, + 0x945e455f24fb1cf8U, 0xb975d6b6ee39e436U, 0xe7d34c64a9c85d44U, + 0x90e40fbeea1d3a4aU, 0xb51d13aea4a488ddU, 0xe264589a4dcdab14U, + 0x8d7eb76070a08aecU, 0xb0de65388cc8ada8U, 0xdd15fe86affad912U, + 0x8a2dbf142dfcc7abU, 0xacb92ed9397bf996U, 0xd7e77a8f87daf7fbU, + 0x86f0ac99b4e8dafdU, 0xa8acd7c0222311bcU, 0xd2d80db02aabd62bU, + 0x83c7088e1aab65dbU, 0xa4b8cab1a1563f52U, 0xcde6fd5e09abcf26U, + 0x80b05e5ac60b6178U, 0xa0dc75f1778e39d6U, 0xc913936dd571c84cU, + 0xfb5878494ace3a5fU, 0x9d174b2dcec0e47bU, 0xc45d1df942711d9aU, + 0xf5746577930d6500U, 0x9968bf6abbe85f20U, 0xbfc2ef456ae276e8U, + 0xefb3ab16c59b14a2U, 0x95d04aee3b80ece5U, 0xbb445da9ca61281fU, + 0xea1575143cf97226U, 0x924d692ca61be758U, 0xb6e0c377cfa2e12eU, + 0xe498f455c38b997aU, 0x8edf98b59a373fecU, 0xb2977ee300c50fe7U, + 0xdf3d5e9bc0f653e1U, 0x8b865b215899f46cU, 0xae67f1e9aec07187U, + 0xda01ee641a708de9U, 0x884134fe908658b2U, 0xaa51823e34a7eedeU, + 0xd4e5e2cdc1d1ea96U, 0x850fadc09923329eU, 0xa6539930bf6bff45U, + 0xcfe87f7cef46ff16U, 0x81f14fae158c5f6eU, 0xa26da3999aef7749U, + 0xcb090c8001ab551cU, 0xfdcb4fa002162a63U, 0x9e9f11c4014dda7eU, + 0xc646d63501a1511dU, 0xf7d88bc24209a565U, 0x9ae757596946075fU, + 0xc1a12d2fc3978937U, 0xf209787bb47d6b84U, 0x9745eb4d50ce6332U, + 0xbd176620a501fbffU, 0xec5d3fa8ce427affU, 0x93ba47c980e98cdfU, + 0xb8a8d9bbe123f017U, 0xe6d3102ad96cec1dU, 0x9043ea1ac7e41392U, + 0xb454e4a179dd1877U, 0xe16a1dc9d8545e94U, 0x8ce2529e2734bb1dU, + 0xb01ae745b101e9e4U, 0xdc21a1171d42645dU, 0x899504ae72497ebaU, + 0xabfa45da0edbde69U, 0xd6f8d7509292d603U, 0x865b86925b9bc5c2U, + 0xa7f26836f282b732U, 0xd1ef0244af2364ffU, 0x8335616aed761f1fU, + 0xa402b9c5a8d3a6e7U, 0xcd036837130890a1U, 0x802221226be55a64U, + 0xa02aa96b06deb0fdU, 0xc83553c5c8965d3dU, 0xfa42a8b73abbf48cU, + 0x9c69a97284b578d7U, 0xc38413cf25e2d70dU, 0xf46518c2ef5b8cd1U, + 0x98bf2f79d5993802U, 0xbeeefb584aff8603U, 0xeeaaba2e5dbf6784U, + 0x952ab45cfa97a0b2U, 0xba756174393d88dfU, 0xe912b9d1478ceb17U, + 0x91abb422ccb812eeU, 0xb616a12b7fe617aaU, 0xe39c49765fdf9d94U, + 0x8e41ade9fbebc27dU, 0xb1d219647ae6b31cU, 0xde469fbd99a05fe3U, + 0x8aec23d680043beeU, 0xada72ccc20054ae9U, 0xd910f7ff28069da4U, + 0x87aa9aff79042286U, 0xa99541bf57452b28U, 0xd3fa922f2d1675f2U, + 0x847c9b5d7c2e09b7U, 0xa59bc234db398c25U, 0xcf02b2c21207ef2eU, + 0x8161afb94b44f57dU, 0xa1ba1ba79e1632dcU, 0xca28a291859bbf93U, + 0xfcb2cb35e702af78U, 0x9defbf01b061adabU, 0xc56baec21c7a1916U, + 0xf6c69a72a3989f5bU, 0x9a3c2087a63f6399U, 0xc0cb28a98fcf3c7fU, + 0xf0fdf2d3f3c30b9fU, 0x969eb7c47859e743U, 0xbc4665b596706114U, + 0xeb57ff22fc0c7959U, 0x9316ff75dd87cbd8U, 0xb7dcbf5354e9beceU, + 0xe5d3ef282a242e81U, 0x8fa475791a569d10U, 0xb38d92d760ec4455U, + 0xe070f78d3927556aU, 0x8c469ab843b89562U, 0xaf58416654a6babbU, + 0xdb2e51bfe9d0696aU, 0x88fcf317f22241e2U, 0xab3c2fddeeaad25aU, + 0xd60b3bd56a5586f1U, 0x85c7056562757456U, 0xa738c6bebb12d16cU, + 0xd106f86e69d785c7U, 0x82a45b450226b39cU, 0xa34d721642b06084U, + 0xcc20ce9bd35c78a5U, 0xff290242c83396ceU, 0x9f79a169bd203e41U, + 0xc75809c42c684dd1U, 0xf92e0c3537826145U, 0x9bbcc7a142b17ccbU, + 0xc2abf989935ddbfeU, 0xf356f7ebf83552feU, 0x98165af37b2153deU, + 0xbe1bf1b059e9a8d6U, 0xeda2ee1c7064130cU, 0x9485d4d1c63e8be7U, + 0xb9a74a0637ce2ee1U, 0xe8111c87c5c1ba99U, 0x910ab1d4db9914a0U, + 0xb54d5e4a127f59c8U, 0xe2a0b5dc971f303aU, 0x8da471a9de737e24U, + 0xb10d8e1456105dadU, 0xdd50f1996b947518U, 0x8a5296ffe33cc92fU, + 0xace73cbfdc0bfb7bU, 0xd8210befd30efa5aU, 0x8714a775e3e95c78U, + 0xa8d9d1535ce3b396U, 0xd31045a8341ca07cU, 0x83ea2b892091e44dU, + 0xa4e4b66b68b65d60U, 0xce1de40642e3f4b9U, 0x80d2ae83e9ce78f3U, + 0xa1075a24e4421730U, 0xc94930ae1d529cfcU, 0xfb9b7cd9a4a7443cU, + 0x9d412e0806e88aa5U, 0xc491798a08a2ad4eU, 0xf5b5d7ec8acb58a2U, + 0x9991a6f3d6bf1765U, 0xbff610b0cc6edd3fU, 0xeff394dcff8a948eU, + 0x95f83d0a1fb69cd9U, 0xbb764c4ca7a4440fU, 0xea53df5fd18d5513U, + 0x92746b9be2f8552cU, 0xb7118682dbb66a77U, 0xe4d5e82392a40515U, + 0x8f05b1163ba6832dU, 0xb2c71d5bca9023f8U, 0xdf78e4b2bd342cf6U, + 0x8bab8eefb6409c1aU, 0xae9672aba3d0c320U, 0xda3c0f568cc4f3e8U, + 0x8865899617fb1871U, 0xaa7eebfb9df9de8dU, 0xd51ea6fa85785631U, + 0x8533285c936b35deU, 0xa67ff273b8460356U, 0xd01fef10a657842cU, + 0x8213f56a67f6b29bU, 0xa298f2c501f45f42U, 0xcb3f2f7642717713U, + 0xfe0efb53d30dd4d7U, 0x9ec95d1463e8a506U, 0xc67bb4597ce2ce48U, + 0xf81aa16fdc1b81daU, 0x9b10a4e5e9913128U, 0xc1d4ce1f63f57d72U, + 0xf24a01a73cf2dccfU, 0x976e41088617ca01U, 0xbd49d14aa79dbc82U, + 0xec9c459d51852ba2U, 0x93e1ab8252f33b45U, 0xb8da1662e7b00a17U, + 0xe7109bfba19c0c9dU, 0x906a617d450187e2U, 0xb484f9dc9641e9daU, + 0xe1a63853bbd26451U, 0x8d07e33455637eb2U, 0xb049dc016abc5e5fU, + 0xdc5c5301c56b75f7U, 0x89b9b3e11b6329baU, 0xac2820d9623bf429U, + 0xd732290fbacaf133U, 0x867f59a9d4bed6c0U, 0xa81f301449ee8c70U, + 0xd226fc195c6a2f8cU, 0x83585d8fd9c25db7U, 0xa42e74f3d032f525U, + 0xcd3a1230c43fb26fU, 0x80444b5e7aa7cf85U, 0xa0555e361951c366U, + 0xc86ab5c39fa63440U, 0xfa856334878fc150U, 0x9c935e00d4b9d8d2U, + 0xc3b8358109e84f07U, 0xf4a642e14c6262c8U, 0x98e7e9cccfbd7dbdU, + 0xbf21e44003acdd2cU, 0xeeea5d5004981478U, 0x95527a5202df0ccbU, + 0xbaa718e68396cffdU, 0xe950df20247c83fdU, 0x91d28b7416cdd27eU, + 0xb6472e511c81471dU, 0xe3d8f9e563a198e5U, 0x8e679c2f5e44ff8fU, +}; + +const int16_t kPower10ExponentTable[] = { + -1200, -1196, -1193, -1190, -1186, -1183, -1180, -1176, -1173, -1170, -1166, + -1163, -1160, -1156, -1153, -1150, -1146, -1143, -1140, -1136, -1133, -1130, + -1127, -1123, -1120, -1117, -1113, -1110, -1107, -1103, -1100, -1097, -1093, + -1090, -1087, -1083, -1080, -1077, -1073, -1070, -1067, -1063, -1060, -1057, + -1053, -1050, -1047, -1043, -1040, -1037, -1034, -1030, -1027, -1024, -1020, + -1017, -1014, -1010, -1007, -1004, -1000, -997, -994, -990, -987, -984, + -980, -977, -974, -970, -967, -964, -960, -957, -954, -950, -947, + -944, -940, -937, -934, -931, -927, -924, -921, -917, -914, -911, + -907, -904, -901, -897, -894, -891, -887, -884, -881, -877, -874, + -871, -867, -864, -861, -857, -854, -851, -847, -844, -841, -838, + -834, -831, -828, -824, -821, -818, -814, -811, -808, -804, -801, + -798, -794, -791, -788, -784, -781, -778, -774, -771, -768, -764, + -761, -758, -754, -751, -748, -744, -741, -738, -735, -731, -728, + -725, -721, -718, -715, -711, -708, -705, -701, -698, -695, -691, + -688, -685, -681, -678, -675, -671, -668, -665, -661, -658, -655, + -651, -648, -645, -642, -638, -635, -632, -628, -625, -622, -618, + -615, -612, -608, -605, -602, -598, -595, -592, -588, -585, -582, + -578, -575, -572, -568, -565, -562, -558, -555, -552, -549, -545, + -542, -539, -535, -532, -529, -525, -522, -519, -515, -512, -509, + -505, -502, -499, -495, -492, -489, -485, -482, -479, -475, -472, + -469, -465, -462, -459, -455, -452, -449, -446, -442, -439, -436, + -432, -429, -426, -422, -419, -416, -412, -409, -406, -402, -399, + -396, -392, -389, -386, -382, -379, -376, -372, -369, -366, -362, + -359, -356, -353, -349, -346, -343, -339, -336, -333, -329, -326, + -323, -319, -316, -313, -309, -306, -303, -299, -296, -293, -289, + -286, -283, -279, -276, -273, -269, -266, -263, -259, -256, -253, + -250, -246, -243, -240, -236, -233, -230, -226, -223, -220, -216, + -213, -210, -206, -203, -200, -196, -193, -190, -186, -183, -180, + -176, -173, -170, -166, -163, -160, -157, -153, -150, -147, -143, + -140, -137, -133, -130, -127, -123, -120, -117, -113, -110, -107, + -103, -100, -97, -93, -90, -87, -83, -80, -77, -73, -70, + -67, -63, -60, -57, -54, -50, -47, -44, -40, -37, -34, + -30, -27, -24, -20, -17, -14, -10, -7, -4, 0, 3, + 6, 10, 13, 16, 20, 23, 26, 30, 33, 36, 39, + 43, 46, 49, 53, 56, 59, 63, 66, 69, 73, 76, + 79, 83, 86, 89, 93, 96, 99, 103, 106, 109, 113, + 116, 119, 123, 126, 129, 132, 136, 139, 142, 146, 149, + 152, 156, 159, 162, 166, 169, 172, 176, 179, 182, 186, + 189, 192, 196, 199, 202, 206, 209, 212, 216, 219, 222, + 226, 229, 232, 235, 239, 242, 245, 249, 252, 255, 259, + 262, 265, 269, 272, 275, 279, 282, 285, 289, 292, 295, + 299, 302, 305, 309, 312, 315, 319, 322, 325, 328, 332, + 335, 338, 342, 345, 348, 352, 355, 358, 362, 365, 368, + 372, 375, 378, 382, 385, 388, 392, 395, 398, 402, 405, + 408, 412, 415, 418, 422, 425, 428, 431, 435, 438, 441, + 445, 448, 451, 455, 458, 461, 465, 468, 471, 475, 478, + 481, 485, 488, 491, 495, 498, 501, 505, 508, 511, 515, + 518, 521, 524, 528, 531, 534, 538, 541, 544, 548, 551, + 554, 558, 561, 564, 568, 571, 574, 578, 581, 584, 588, + 591, 594, 598, 601, 604, 608, 611, 614, 617, 621, 624, + 627, 631, 634, 637, 641, 644, 647, 651, 654, 657, 661, + 664, 667, 671, 674, 677, 681, 684, 687, 691, 694, 697, + 701, 704, 707, 711, 714, 717, 720, 724, 727, 730, 734, + 737, 740, 744, 747, 750, 754, 757, 760, 764, 767, 770, + 774, 777, 780, 784, 787, 790, 794, 797, 800, 804, 807, + 810, 813, 817, 820, 823, 827, 830, 833, 837, 840, 843, + 847, 850, 853, 857, 860, 863, 867, 870, 873, 877, 880, + 883, 887, 890, 893, 897, 900, 903, 907, 910, 913, 916, + 920, 923, 926, 930, 933, 936, 940, 943, 946, 950, 953, + 956, 960, +}; + +} // namespace +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/charconv.h b/third_party/abseil_cpp/absl/strings/charconv.h new file mode 100644 index 000000000000..e04be32f9514 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/charconv.h @@ -0,0 +1,119 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CHARCONV_H_ +#define ABSL_STRINGS_CHARCONV_H_ + +#include <system_error> // NOLINT(build/c++11) + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// Workalike compatibilty version of std::chars_format from C++17. +// +// This is an bitfield enumerator which can be passed to absl::from_chars to +// configure the string-to-float conversion. +enum class chars_format { + scientific = 1, + fixed = 2, + hex = 4, + general = fixed | scientific, +}; + +// The return result of a string-to-number conversion. +// +// `ec` will be set to `invalid_argument` if a well-formed number was not found +// at the start of the input range, `result_out_of_range` if a well-formed +// number was found, but it was out of the representable range of the requested +// type, or to std::errc() otherwise. +// +// If a well-formed number was found, `ptr` is set to one past the sequence of +// characters that were successfully parsed. If none was found, `ptr` is set +// to the `first` argument to from_chars. +struct from_chars_result { + const char* ptr; + std::errc ec; +}; + +// Workalike compatibilty version of std::from_chars from C++17. Currently +// this only supports the `double` and `float` types. +// +// This interface incorporates the proposed resolutions for library issues +// DR 3080 and DR 3081. If these are adopted with different wording, +// Abseil's behavior will change to match the standard. (The behavior most +// likely to change is for DR 3081, which says what `value` will be set to in +// the case of overflow and underflow. Code that wants to avoid possible +// breaking changes in this area should not depend on `value` when the returned +// from_chars_result indicates a range error.) +// +// Searches the range [first, last) for the longest matching pattern beginning +// at `first` that represents a floating point number. If one is found, store +// the result in `value`. +// +// The matching pattern format is almost the same as that of strtod(), except +// that C locale is not respected, and an initial '+' character in the input +// range will never be matched. +// +// If `fmt` is set, it must be one of the enumerator values of the chars_format. +// (This is despite the fact that chars_format is a bitmask type.) If set to +// `scientific`, a matching number must contain an exponent. If set to `fixed`, +// then an exponent will never match. (For example, the string "1e5" will be +// parsed as "1".) If set to `hex`, then a hexadecimal float is parsed in the +// format that strtod() accepts, except that a "0x" prefix is NOT matched. +// (In particular, in `hex` mode, the input "0xff" results in the largest +// matching pattern "0".) +absl::from_chars_result from_chars(const char* first, const char* last, + double& value, // NOLINT + chars_format fmt = chars_format::general); + +absl::from_chars_result from_chars(const char* first, const char* last, + float& value, // NOLINT + chars_format fmt = chars_format::general); + +// std::chars_format is specified as a bitmask type, which means the following +// operations must be provided: +inline constexpr chars_format operator&(chars_format lhs, chars_format rhs) { + return static_cast<chars_format>(static_cast<int>(lhs) & + static_cast<int>(rhs)); +} +inline constexpr chars_format operator|(chars_format lhs, chars_format rhs) { + return static_cast<chars_format>(static_cast<int>(lhs) | + static_cast<int>(rhs)); +} +inline constexpr chars_format operator^(chars_format lhs, chars_format rhs) { + return static_cast<chars_format>(static_cast<int>(lhs) ^ + static_cast<int>(rhs)); +} +inline constexpr chars_format operator~(chars_format arg) { + return static_cast<chars_format>(~static_cast<int>(arg)); +} +inline chars_format& operator&=(chars_format& lhs, chars_format rhs) { + lhs = lhs & rhs; + return lhs; +} +inline chars_format& operator|=(chars_format& lhs, chars_format rhs) { + lhs = lhs | rhs; + return lhs; +} +inline chars_format& operator^=(chars_format& lhs, chars_format rhs) { + lhs = lhs ^ rhs; + return lhs; +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CHARCONV_H_ diff --git a/third_party/abseil_cpp/absl/strings/charconv_benchmark.cc b/third_party/abseil_cpp/absl/strings/charconv_benchmark.cc new file mode 100644 index 000000000000..e8c7371d6586 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/charconv_benchmark.cc @@ -0,0 +1,204 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/charconv.h" + +#include <cstdlib> +#include <cstring> +#include <string> + +#include "benchmark/benchmark.h" + +namespace { + +void BM_Strtod_Pi(benchmark::State& state) { + const char* pi = "3.14159"; + for (auto s : state) { + benchmark::DoNotOptimize(pi); + benchmark::DoNotOptimize(strtod(pi, nullptr)); + } +} +BENCHMARK(BM_Strtod_Pi); + +void BM_Absl_Pi(benchmark::State& state) { + const char* pi = "3.14159"; + const char* pi_end = pi + strlen(pi); + for (auto s : state) { + benchmark::DoNotOptimize(pi); + double v; + absl::from_chars(pi, pi_end, v); + benchmark::DoNotOptimize(v); + } +} +BENCHMARK(BM_Absl_Pi); + +void BM_Strtod_Pi_float(benchmark::State& state) { + const char* pi = "3.14159"; + for (auto s : state) { + benchmark::DoNotOptimize(pi); + benchmark::DoNotOptimize(strtof(pi, nullptr)); + } +} +BENCHMARK(BM_Strtod_Pi_float); + +void BM_Absl_Pi_float(benchmark::State& state) { + const char* pi = "3.14159"; + const char* pi_end = pi + strlen(pi); + for (auto s : state) { + benchmark::DoNotOptimize(pi); + float v; + absl::from_chars(pi, pi_end, v); + benchmark::DoNotOptimize(v); + } +} +BENCHMARK(BM_Absl_Pi_float); + +void BM_Strtod_HardLarge(benchmark::State& state) { + const char* num = "272104041512242479.e200"; + for (auto s : state) { + benchmark::DoNotOptimize(num); + benchmark::DoNotOptimize(strtod(num, nullptr)); + } +} +BENCHMARK(BM_Strtod_HardLarge); + +void BM_Absl_HardLarge(benchmark::State& state) { + const char* numstr = "272104041512242479.e200"; + const char* numstr_end = numstr + strlen(numstr); + for (auto s : state) { + benchmark::DoNotOptimize(numstr); + double v; + absl::from_chars(numstr, numstr_end, v); + benchmark::DoNotOptimize(v); + } +} +BENCHMARK(BM_Absl_HardLarge); + +void BM_Strtod_HardSmall(benchmark::State& state) { + const char* num = "94080055902682397.e-242"; + for (auto s : state) { + benchmark::DoNotOptimize(num); + benchmark::DoNotOptimize(strtod(num, nullptr)); + } +} +BENCHMARK(BM_Strtod_HardSmall); + +void BM_Absl_HardSmall(benchmark::State& state) { + const char* numstr = "94080055902682397.e-242"; + const char* numstr_end = numstr + strlen(numstr); + for (auto s : state) { + benchmark::DoNotOptimize(numstr); + double v; + absl::from_chars(numstr, numstr_end, v); + benchmark::DoNotOptimize(v); + } +} +BENCHMARK(BM_Absl_HardSmall); + +void BM_Strtod_HugeMantissa(benchmark::State& state) { + std::string huge(200, '3'); + const char* num = huge.c_str(); + for (auto s : state) { + benchmark::DoNotOptimize(num); + benchmark::DoNotOptimize(strtod(num, nullptr)); + } +} +BENCHMARK(BM_Strtod_HugeMantissa); + +void BM_Absl_HugeMantissa(benchmark::State& state) { + std::string huge(200, '3'); + const char* num = huge.c_str(); + const char* num_end = num + 200; + for (auto s : state) { + benchmark::DoNotOptimize(num); + double v; + absl::from_chars(num, num_end, v); + benchmark::DoNotOptimize(v); + } +} +BENCHMARK(BM_Absl_HugeMantissa); + +std::string MakeHardCase(int length) { + // The number 1.1521...e-297 is exactly halfway between 12345 * 2**-1000 and + // the next larger representable number. The digits of this number are in + // the string below. + const std::string digits = + "1." + "152113937042223790993097181572444900347587985074226836242307364987727724" + "831384300183638649152607195040591791364113930628852279348613864894524591" + "272746490313676832900762939595690019745859128071117417798540258114233761" + "012939937017879509401007964861774960297319002612457273148497158989073482" + "171377406078223015359818300988676687994537274548940612510414856761641652" + "513434981938564294004070500716200446656421722229202383105446378511678258" + "370570631774499359748259931676320916632111681001853983492795053244971606" + "922718923011680846577744433974087653954904214152517799883551075537146316" + "168973685866425605046988661997658648354773076621610279716804960009043764" + "038392994055171112475093876476783502487512538082706095923790634572014823" + "78877699375152587890625" + + std::string(5000, '0'); + // generate the hard cases on either side for the given length. + // Lengths between 3 and 1000 are reasonable. + return digits.substr(0, length) + "1e-297"; +} + +void BM_Strtod_Big_And_Difficult(benchmark::State& state) { + std::string testcase = MakeHardCase(state.range(0)); + const char* begin = testcase.c_str(); + for (auto s : state) { + benchmark::DoNotOptimize(begin); + benchmark::DoNotOptimize(strtod(begin, nullptr)); + } +} +BENCHMARK(BM_Strtod_Big_And_Difficult)->Range(3, 5000); + +void BM_Absl_Big_And_Difficult(benchmark::State& state) { + std::string testcase = MakeHardCase(state.range(0)); + const char* begin = testcase.c_str(); + const char* end = begin + testcase.size(); + for (auto s : state) { + benchmark::DoNotOptimize(begin); + double v; + absl::from_chars(begin, end, v); + benchmark::DoNotOptimize(v); + } +} +BENCHMARK(BM_Absl_Big_And_Difficult)->Range(3, 5000); + +} // namespace + +// ------------------------------------------------------------------------ +// Benchmark Time CPU Iterations +// ------------------------------------------------------------------------ +// BM_Strtod_Pi 96 ns 96 ns 6337454 +// BM_Absl_Pi 35 ns 35 ns 20031996 +// BM_Strtod_Pi_float 91 ns 91 ns 7745851 +// BM_Absl_Pi_float 35 ns 35 ns 20430298 +// BM_Strtod_HardLarge 133 ns 133 ns 5288341 +// BM_Absl_HardLarge 181 ns 181 ns 3855615 +// BM_Strtod_HardSmall 279 ns 279 ns 2517243 +// BM_Absl_HardSmall 287 ns 287 ns 2458744 +// BM_Strtod_HugeMantissa 433 ns 433 ns 1604293 +// BM_Absl_HugeMantissa 160 ns 160 ns 4403671 +// BM_Strtod_Big_And_Difficult/3 236 ns 236 ns 2942496 +// BM_Strtod_Big_And_Difficult/8 232 ns 232 ns 2983796 +// BM_Strtod_Big_And_Difficult/64 437 ns 437 ns 1591951 +// BM_Strtod_Big_And_Difficult/512 1738 ns 1738 ns 402519 +// BM_Strtod_Big_And_Difficult/4096 3943 ns 3943 ns 176128 +// BM_Strtod_Big_And_Difficult/5000 4397 ns 4397 ns 157878 +// BM_Absl_Big_And_Difficult/3 39 ns 39 ns 17799583 +// BM_Absl_Big_And_Difficult/8 43 ns 43 ns 16096859 +// BM_Absl_Big_And_Difficult/64 550 ns 550 ns 1259717 +// BM_Absl_Big_And_Difficult/512 4167 ns 4167 ns 171414 +// BM_Absl_Big_And_Difficult/4096 9160 ns 9159 ns 76297 +// BM_Absl_Big_And_Difficult/5000 9738 ns 9738 ns 70140 diff --git a/third_party/abseil_cpp/absl/strings/charconv_test.cc b/third_party/abseil_cpp/absl/strings/charconv_test.cc new file mode 100644 index 000000000000..9090e9c89c50 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/charconv_test.cc @@ -0,0 +1,780 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/charconv.h" + +#include <cstdlib> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/internal/pow10_helper.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" + +#ifdef _MSC_FULL_VER +#define ABSL_COMPILER_DOES_EXACT_ROUNDING 0 +#define ABSL_STRTOD_HANDLES_NAN_CORRECTLY 0 +#else +#define ABSL_COMPILER_DOES_EXACT_ROUNDING 1 +#define ABSL_STRTOD_HANDLES_NAN_CORRECTLY 1 +#endif + +namespace { + +using absl::strings_internal::Pow10; + +#if ABSL_COMPILER_DOES_EXACT_ROUNDING + +// Tests that the given string is accepted by absl::from_chars, and that it +// converts exactly equal to the given number. +void TestDoubleParse(absl::string_view str, double expected_number) { + SCOPED_TRACE(str); + double actual_number = 0.0; + absl::from_chars_result result = + absl::from_chars(str.data(), str.data() + str.length(), actual_number); + EXPECT_EQ(result.ec, std::errc()); + EXPECT_EQ(result.ptr, str.data() + str.length()); + EXPECT_EQ(actual_number, expected_number); +} + +void TestFloatParse(absl::string_view str, float expected_number) { + SCOPED_TRACE(str); + float actual_number = 0.0; + absl::from_chars_result result = + absl::from_chars(str.data(), str.data() + str.length(), actual_number); + EXPECT_EQ(result.ec, std::errc()); + EXPECT_EQ(result.ptr, str.data() + str.length()); + EXPECT_EQ(actual_number, expected_number); +} + +// Tests that the given double or single precision floating point literal is +// parsed correctly by absl::from_chars. +// +// These convenience macros assume that the C++ compiler being used also does +// fully correct decimal-to-binary conversions. +#define FROM_CHARS_TEST_DOUBLE(number) \ + { \ + TestDoubleParse(#number, number); \ + TestDoubleParse("-" #number, -number); \ + } + +#define FROM_CHARS_TEST_FLOAT(number) \ + { \ + TestFloatParse(#number, number##f); \ + TestFloatParse("-" #number, -number##f); \ + } + +TEST(FromChars, NearRoundingCases) { + // Cases from "A Program for Testing IEEE Decimal-Binary Conversion" + // by Vern Paxson. + + // Forms that should round towards zero. (These are the hardest cases for + // each decimal mantissa size.) + FROM_CHARS_TEST_DOUBLE(5.e125); + FROM_CHARS_TEST_DOUBLE(69.e267); + FROM_CHARS_TEST_DOUBLE(999.e-026); + FROM_CHARS_TEST_DOUBLE(7861.e-034); + FROM_CHARS_TEST_DOUBLE(75569.e-254); + FROM_CHARS_TEST_DOUBLE(928609.e-261); + FROM_CHARS_TEST_DOUBLE(9210917.e080); + FROM_CHARS_TEST_DOUBLE(84863171.e114); + FROM_CHARS_TEST_DOUBLE(653777767.e273); + FROM_CHARS_TEST_DOUBLE(5232604057.e-298); + FROM_CHARS_TEST_DOUBLE(27235667517.e-109); + FROM_CHARS_TEST_DOUBLE(653532977297.e-123); + FROM_CHARS_TEST_DOUBLE(3142213164987.e-294); + FROM_CHARS_TEST_DOUBLE(46202199371337.e-072); + FROM_CHARS_TEST_DOUBLE(231010996856685.e-073); + FROM_CHARS_TEST_DOUBLE(9324754620109615.e212); + FROM_CHARS_TEST_DOUBLE(78459735791271921.e049); + FROM_CHARS_TEST_DOUBLE(272104041512242479.e200); + FROM_CHARS_TEST_DOUBLE(6802601037806061975.e198); + FROM_CHARS_TEST_DOUBLE(20505426358836677347.e-221); + FROM_CHARS_TEST_DOUBLE(836168422905420598437.e-234); + FROM_CHARS_TEST_DOUBLE(4891559871276714924261.e222); + FROM_CHARS_TEST_FLOAT(5.e-20); + FROM_CHARS_TEST_FLOAT(67.e14); + FROM_CHARS_TEST_FLOAT(985.e15); + FROM_CHARS_TEST_FLOAT(7693.e-42); + FROM_CHARS_TEST_FLOAT(55895.e-16); + FROM_CHARS_TEST_FLOAT(996622.e-44); + FROM_CHARS_TEST_FLOAT(7038531.e-32); + FROM_CHARS_TEST_FLOAT(60419369.e-46); + FROM_CHARS_TEST_FLOAT(702990899.e-20); + FROM_CHARS_TEST_FLOAT(6930161142.e-48); + FROM_CHARS_TEST_FLOAT(25933168707.e-13); + FROM_CHARS_TEST_FLOAT(596428896559.e20); + + // Similarly, forms that should round away from zero. + FROM_CHARS_TEST_DOUBLE(9.e-265); + FROM_CHARS_TEST_DOUBLE(85.e-037); + FROM_CHARS_TEST_DOUBLE(623.e100); + FROM_CHARS_TEST_DOUBLE(3571.e263); + FROM_CHARS_TEST_DOUBLE(81661.e153); + FROM_CHARS_TEST_DOUBLE(920657.e-023); + FROM_CHARS_TEST_DOUBLE(4603285.e-024); + FROM_CHARS_TEST_DOUBLE(87575437.e-309); + FROM_CHARS_TEST_DOUBLE(245540327.e122); + FROM_CHARS_TEST_DOUBLE(6138508175.e120); + FROM_CHARS_TEST_DOUBLE(83356057653.e193); + FROM_CHARS_TEST_DOUBLE(619534293513.e124); + FROM_CHARS_TEST_DOUBLE(2335141086879.e218); + FROM_CHARS_TEST_DOUBLE(36167929443327.e-159); + FROM_CHARS_TEST_DOUBLE(609610927149051.e-255); + FROM_CHARS_TEST_DOUBLE(3743626360493413.e-165); + FROM_CHARS_TEST_DOUBLE(94080055902682397.e-242); + FROM_CHARS_TEST_DOUBLE(899810892172646163.e283); + FROM_CHARS_TEST_DOUBLE(7120190517612959703.e120); + FROM_CHARS_TEST_DOUBLE(25188282901709339043.e-252); + FROM_CHARS_TEST_DOUBLE(308984926168550152811.e-052); + FROM_CHARS_TEST_DOUBLE(6372891218502368041059.e064); + FROM_CHARS_TEST_FLOAT(3.e-23); + FROM_CHARS_TEST_FLOAT(57.e18); + FROM_CHARS_TEST_FLOAT(789.e-35); + FROM_CHARS_TEST_FLOAT(2539.e-18); + FROM_CHARS_TEST_FLOAT(76173.e28); + FROM_CHARS_TEST_FLOAT(887745.e-11); + FROM_CHARS_TEST_FLOAT(5382571.e-37); + FROM_CHARS_TEST_FLOAT(82381273.e-35); + FROM_CHARS_TEST_FLOAT(750486563.e-38); + FROM_CHARS_TEST_FLOAT(3752432815.e-39); + FROM_CHARS_TEST_FLOAT(75224575729.e-45); + FROM_CHARS_TEST_FLOAT(459926601011.e15); +} + +#undef FROM_CHARS_TEST_DOUBLE +#undef FROM_CHARS_TEST_FLOAT +#endif + +float ToFloat(absl::string_view s) { + float f; + absl::from_chars(s.data(), s.data() + s.size(), f); + return f; +} + +double ToDouble(absl::string_view s) { + double d; + absl::from_chars(s.data(), s.data() + s.size(), d); + return d; +} + +// A duplication of the test cases in "NearRoundingCases" above, but with +// expected values expressed with integers, using ldexp/ldexpf. These test +// cases will work even on compilers that do not accurately round floating point +// literals. +TEST(FromChars, NearRoundingCasesExplicit) { + EXPECT_EQ(ToDouble("5.e125"), ldexp(6653062250012735, 365)); + EXPECT_EQ(ToDouble("69.e267"), ldexp(4705683757438170, 841)); + EXPECT_EQ(ToDouble("999.e-026"), ldexp(6798841691080350, -129)); + EXPECT_EQ(ToDouble("7861.e-034"), ldexp(8975675289889240, -153)); + EXPECT_EQ(ToDouble("75569.e-254"), ldexp(6091718967192243, -880)); + EXPECT_EQ(ToDouble("928609.e-261"), ldexp(7849264900213743, -900)); + EXPECT_EQ(ToDouble("9210917.e080"), ldexp(8341110837370930, 236)); + EXPECT_EQ(ToDouble("84863171.e114"), ldexp(4625202867375927, 353)); + EXPECT_EQ(ToDouble("653777767.e273"), ldexp(5068902999763073, 884)); + EXPECT_EQ(ToDouble("5232604057.e-298"), ldexp(5741343011915040, -1010)); + EXPECT_EQ(ToDouble("27235667517.e-109"), ldexp(6707124626673586, -380)); + EXPECT_EQ(ToDouble("653532977297.e-123"), ldexp(7078246407265384, -422)); + EXPECT_EQ(ToDouble("3142213164987.e-294"), ldexp(8219991337640559, -988)); + EXPECT_EQ(ToDouble("46202199371337.e-072"), ldexp(5224462102115359, -246)); + EXPECT_EQ(ToDouble("231010996856685.e-073"), ldexp(5224462102115359, -247)); + EXPECT_EQ(ToDouble("9324754620109615.e212"), ldexp(5539753864394442, 705)); + EXPECT_EQ(ToDouble("78459735791271921.e049"), ldexp(8388176519442766, 166)); + EXPECT_EQ(ToDouble("272104041512242479.e200"), ldexp(5554409530847367, 670)); + EXPECT_EQ(ToDouble("6802601037806061975.e198"), ldexp(5554409530847367, 668)); + EXPECT_EQ(ToDouble("20505426358836677347.e-221"), + ldexp(4524032052079546, -722)); + EXPECT_EQ(ToDouble("836168422905420598437.e-234"), + ldexp(5070963299887562, -760)); + EXPECT_EQ(ToDouble("4891559871276714924261.e222"), + ldexp(6452687840519111, 757)); + EXPECT_EQ(ToFloat("5.e-20"), ldexpf(15474250, -88)); + EXPECT_EQ(ToFloat("67.e14"), ldexpf(12479722, 29)); + EXPECT_EQ(ToFloat("985.e15"), ldexpf(14333636, 36)); + EXPECT_EQ(ToFloat("7693.e-42"), ldexpf(10979816, -150)); + EXPECT_EQ(ToFloat("55895.e-16"), ldexpf(12888509, -61)); + EXPECT_EQ(ToFloat("996622.e-44"), ldexpf(14224264, -150)); + EXPECT_EQ(ToFloat("7038531.e-32"), ldexpf(11420669, -107)); + EXPECT_EQ(ToFloat("60419369.e-46"), ldexpf(8623340, -150)); + EXPECT_EQ(ToFloat("702990899.e-20"), ldexpf(16209866, -61)); + EXPECT_EQ(ToFloat("6930161142.e-48"), ldexpf(9891056, -150)); + EXPECT_EQ(ToFloat("25933168707.e-13"), ldexpf(11138211, -32)); + EXPECT_EQ(ToFloat("596428896559.e20"), ldexpf(12333860, 82)); + + + EXPECT_EQ(ToDouble("9.e-265"), ldexp(8168427841980010, -930)); + EXPECT_EQ(ToDouble("85.e-037"), ldexp(6360455125664090, -169)); + EXPECT_EQ(ToDouble("623.e100"), ldexp(6263531988747231, 289)); + EXPECT_EQ(ToDouble("3571.e263"), ldexp(6234526311072170, 833)); + EXPECT_EQ(ToDouble("81661.e153"), ldexp(6696636728760206, 472)); + EXPECT_EQ(ToDouble("920657.e-023"), ldexp(5975405561110124, -109)); + EXPECT_EQ(ToDouble("4603285.e-024"), ldexp(5975405561110124, -110)); + EXPECT_EQ(ToDouble("87575437.e-309"), ldexp(8452160731874668, -1053)); + EXPECT_EQ(ToDouble("245540327.e122"), ldexp(4985336549131723, 381)); + EXPECT_EQ(ToDouble("6138508175.e120"), ldexp(4985336549131723, 379)); + EXPECT_EQ(ToDouble("83356057653.e193"), ldexp(5986732817132056, 625)); + EXPECT_EQ(ToDouble("619534293513.e124"), ldexp(4798406992060657, 399)); + EXPECT_EQ(ToDouble("2335141086879.e218"), ldexp(5419088166961646, 713)); + EXPECT_EQ(ToDouble("36167929443327.e-159"), ldexp(8135819834632444, -536)); + EXPECT_EQ(ToDouble("609610927149051.e-255"), ldexp(4576664294594737, -850)); + EXPECT_EQ(ToDouble("3743626360493413.e-165"), ldexp(6898586531774201, -549)); + EXPECT_EQ(ToDouble("94080055902682397.e-242"), ldexp(6273271706052298, -800)); + EXPECT_EQ(ToDouble("899810892172646163.e283"), ldexp(7563892574477827, 947)); + EXPECT_EQ(ToDouble("7120190517612959703.e120"), ldexp(5385467232557565, 409)); + EXPECT_EQ(ToDouble("25188282901709339043.e-252"), + ldexp(5635662608542340, -825)); + EXPECT_EQ(ToDouble("308984926168550152811.e-052"), + ldexp(5644774693823803, -157)); + EXPECT_EQ(ToDouble("6372891218502368041059.e064"), + ldexp(4616868614322430, 233)); + + EXPECT_EQ(ToFloat("3.e-23"), ldexpf(9507380, -98)); + EXPECT_EQ(ToFloat("57.e18"), ldexpf(12960300, 42)); + EXPECT_EQ(ToFloat("789.e-35"), ldexpf(10739312, -130)); + EXPECT_EQ(ToFloat("2539.e-18"), ldexpf(11990089, -72)); + EXPECT_EQ(ToFloat("76173.e28"), ldexpf(9845130, 86)); + EXPECT_EQ(ToFloat("887745.e-11"), ldexpf(9760860, -40)); + EXPECT_EQ(ToFloat("5382571.e-37"), ldexpf(11447463, -124)); + EXPECT_EQ(ToFloat("82381273.e-35"), ldexpf(8554961, -113)); + EXPECT_EQ(ToFloat("750486563.e-38"), ldexpf(9975678, -120)); + EXPECT_EQ(ToFloat("3752432815.e-39"), ldexpf(9975678, -121)); + EXPECT_EQ(ToFloat("75224575729.e-45"), ldexpf(13105970, -137)); + EXPECT_EQ(ToFloat("459926601011.e15"), ldexpf(12466336, 65)); +} + +// Common test logic for converting a string which lies exactly halfway between +// two target floats. +// +// mantissa and exponent represent the precise value between two floating point +// numbers, `expected_low` and `expected_high`. The floating point +// representation to parse in `StrCat(mantissa, "e", exponent)`. +// +// This function checks that an input just slightly less than the exact value +// is rounded down to `expected_low`, and an input just slightly greater than +// the exact value is rounded up to `expected_high`. +// +// The exact value should round to `expected_half`, which must be either +// `expected_low` or `expected_high`. +template <typename FloatType> +void TestHalfwayValue(const std::string& mantissa, int exponent, + FloatType expected_low, FloatType expected_high, + FloatType expected_half) { + std::string low_rep = mantissa; + low_rep[low_rep.size() - 1] -= 1; + absl::StrAppend(&low_rep, std::string(1000, '9'), "e", exponent); + + FloatType actual_low = 0; + absl::from_chars(low_rep.data(), low_rep.data() + low_rep.size(), actual_low); + EXPECT_EQ(expected_low, actual_low); + + std::string high_rep = + absl::StrCat(mantissa, std::string(1000, '0'), "1e", exponent); + FloatType actual_high = 0; + absl::from_chars(high_rep.data(), high_rep.data() + high_rep.size(), + actual_high); + EXPECT_EQ(expected_high, actual_high); + + std::string halfway_rep = absl::StrCat(mantissa, "e", exponent); + FloatType actual_half = 0; + absl::from_chars(halfway_rep.data(), halfway_rep.data() + halfway_rep.size(), + actual_half); + EXPECT_EQ(expected_half, actual_half); +} + +TEST(FromChars, DoubleRounding) { + const double zero = 0.0; + const double first_subnormal = nextafter(zero, 1.0); + const double second_subnormal = nextafter(first_subnormal, 1.0); + + const double first_normal = DBL_MIN; + const double last_subnormal = nextafter(first_normal, 0.0); + const double second_normal = nextafter(first_normal, 1.0); + + const double last_normal = DBL_MAX; + const double penultimate_normal = nextafter(last_normal, 0.0); + + // Various test cases for numbers between two representable floats. Each + // call to TestHalfwayValue tests a number just below and just above the + // halfway point, as well as the number exactly between them. + + // Test between zero and first_subnormal. Round-to-even tie rounds down. + TestHalfwayValue( + "2." + "470328229206232720882843964341106861825299013071623822127928412503377536" + "351043759326499181808179961898982823477228588654633283551779698981993873" + "980053909390631503565951557022639229085839244910518443593180284993653615" + "250031937045767824921936562366986365848075700158576926990370631192827955" + "855133292783433840935197801553124659726357957462276646527282722005637400" + "648549997709659947045402082816622623785739345073633900796776193057750674" + "017632467360096895134053553745851666113422376667860416215968046191446729" + "184030053005753084904876539171138659164623952491262365388187963623937328" + "042389101867234849766823508986338858792562830275599565752445550725518931" + "369083625477918694866799496832404970582102851318545139621383772282614543" + "7693412532098591327667236328125", + -324, zero, first_subnormal, zero); + + // first_subnormal and second_subnormal. Round-to-even tie rounds up. + TestHalfwayValue( + "7." + "410984687618698162648531893023320585475897039214871466383785237510132609" + "053131277979497545424539885696948470431685765963899850655339096945981621" + "940161728171894510697854671067917687257517734731555330779540854980960845" + "750095811137303474765809687100959097544227100475730780971111893578483867" + "565399878350301522805593404659373979179073872386829939581848166016912201" + "945649993128979841136206248449867871357218035220901702390328579173252022" + "052897402080290685402160661237554998340267130003581248647904138574340187" + "552090159017259254714629617513415977493871857473787096164563890871811984" + "127167305601704549300470526959016576377688490826798697257336652176556794" + "107250876433756084600398490497214911746308553955635418864151316847843631" + "3080237596295773983001708984375", + -324, first_subnormal, second_subnormal, second_subnormal); + + // last_subnormal and first_normal. Round-to-even tie rounds up. + TestHalfwayValue( + "2." + "225073858507201136057409796709131975934819546351645648023426109724822222" + "021076945516529523908135087914149158913039621106870086438694594645527657" + "207407820621743379988141063267329253552286881372149012981122451451889849" + "057222307285255133155755015914397476397983411801999323962548289017107081" + "850690630666655994938275772572015763062690663332647565300009245888316433" + "037779791869612049497390377829704905051080609940730262937128958950003583" + "799967207254304360284078895771796150945516748243471030702609144621572289" + "880258182545180325707018860872113128079512233426288368622321503775666622" + "503982534335974568884423900265498198385487948292206894721689831099698365" + "846814022854243330660339850886445804001034933970427567186443383770486037" + "86162277173854562306587467901408672332763671875", + -308, last_subnormal, first_normal, first_normal); + + // first_normal and second_normal. Round-to-even tie rounds down. + TestHalfwayValue( + "2." + "225073858507201630123055637955676152503612414573018013083228724049586647" + "606759446192036794116886953213985520549032000903434781884412325572184367" + "563347617020518175998922941393629966742598285899994830148971433555578567" + "693279306015978183162142425067962460785295885199272493577688320732492479" + "924816869232247165964934329258783950102250973957579510571600738343645738" + "494324192997092179207389919761694314131497173265255020084997973676783743" + "155205818804439163810572367791175177756227497413804253387084478193655533" + "073867420834526162513029462022730109054820067654020201547112002028139700" + "141575259123440177362244273712468151750189745559978653234255886219611516" + "335924167958029604477064946470184777360934300451421683607013647479513962" + "13837722826145437693412532098591327667236328125", + -308, first_normal, second_normal, first_normal); + + // penultimate_normal and last_normal. Round-to-even rounds down. + TestHalfwayValue( + "1." + "797693134862315608353258760581052985162070023416521662616611746258695532" + "672923265745300992879465492467506314903358770175220871059269879629062776" + "047355692132901909191523941804762171253349609463563872612866401980290377" + "995141836029815117562837277714038305214839639239356331336428021390916694" + "57927874464075218944", + 308, penultimate_normal, last_normal, penultimate_normal); +} + +// Same test cases as DoubleRounding, now with new and improved Much Smaller +// Precision! +TEST(FromChars, FloatRounding) { + const float zero = 0.0; + const float first_subnormal = nextafterf(zero, 1.0); + const float second_subnormal = nextafterf(first_subnormal, 1.0); + + const float first_normal = FLT_MIN; + const float last_subnormal = nextafterf(first_normal, 0.0); + const float second_normal = nextafterf(first_normal, 1.0); + + const float last_normal = FLT_MAX; + const float penultimate_normal = nextafterf(last_normal, 0.0); + + // Test between zero and first_subnormal. Round-to-even tie rounds down. + TestHalfwayValue( + "7." + "006492321624085354618647916449580656401309709382578858785341419448955413" + "42930300743319094181060791015625", + -46, zero, first_subnormal, zero); + + // first_subnormal and second_subnormal. Round-to-even tie rounds up. + TestHalfwayValue( + "2." + "101947696487225606385594374934874196920392912814773657635602425834686624" + "028790902229957282543182373046875", + -45, first_subnormal, second_subnormal, second_subnormal); + + // last_subnormal and first_normal. Round-to-even tie rounds up. + TestHalfwayValue( + "1." + "175494280757364291727882991035766513322858992758990427682963118425003064" + "9651730385585324256680905818939208984375", + -38, last_subnormal, first_normal, first_normal); + + // first_normal and second_normal. Round-to-even tie rounds down. + TestHalfwayValue( + "1." + "175494420887210724209590083408724842314472120785184615334540294131831453" + "9442813071445925743319094181060791015625", + -38, first_normal, second_normal, first_normal); + + // penultimate_normal and last_normal. Round-to-even rounds down. + TestHalfwayValue("3.40282336497324057985868971510891282432", 38, + penultimate_normal, last_normal, penultimate_normal); +} + +TEST(FromChars, Underflow) { + // Check that underflow is handled correctly, according to the specification + // in DR 3081. + double d; + float f; + absl::from_chars_result result; + + std::string negative_underflow = "-1e-1000"; + const char* begin = negative_underflow.data(); + const char* end = begin + negative_underflow.size(); + d = 100.0; + result = absl::from_chars(begin, end, d); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_TRUE(std::signbit(d)); // negative + EXPECT_GE(d, -std::numeric_limits<double>::min()); + f = 100.0; + result = absl::from_chars(begin, end, f); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_TRUE(std::signbit(f)); // negative + EXPECT_GE(f, -std::numeric_limits<float>::min()); + + std::string positive_underflow = "1e-1000"; + begin = positive_underflow.data(); + end = begin + positive_underflow.size(); + d = -100.0; + result = absl::from_chars(begin, end, d); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_FALSE(std::signbit(d)); // positive + EXPECT_LE(d, std::numeric_limits<double>::min()); + f = -100.0; + result = absl::from_chars(begin, end, f); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_FALSE(std::signbit(f)); // positive + EXPECT_LE(f, std::numeric_limits<float>::min()); +} + +TEST(FromChars, Overflow) { + // Check that overflow is handled correctly, according to the specification + // in DR 3081. + double d; + float f; + absl::from_chars_result result; + + std::string negative_overflow = "-1e1000"; + const char* begin = negative_overflow.data(); + const char* end = begin + negative_overflow.size(); + d = 100.0; + result = absl::from_chars(begin, end, d); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_TRUE(std::signbit(d)); // negative + EXPECT_EQ(d, -std::numeric_limits<double>::max()); + f = 100.0; + result = absl::from_chars(begin, end, f); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_TRUE(std::signbit(f)); // negative + EXPECT_EQ(f, -std::numeric_limits<float>::max()); + + std::string positive_overflow = "1e1000"; + begin = positive_overflow.data(); + end = begin + positive_overflow.size(); + d = -100.0; + result = absl::from_chars(begin, end, d); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_FALSE(std::signbit(d)); // positive + EXPECT_EQ(d, std::numeric_limits<double>::max()); + f = -100.0; + result = absl::from_chars(begin, end, f); + EXPECT_EQ(result.ptr, end); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_FALSE(std::signbit(f)); // positive + EXPECT_EQ(f, std::numeric_limits<float>::max()); +} + +TEST(FromChars, RegressionTestsFromFuzzer) { + absl::string_view src = "0x21900000p00000000099"; + float f; + auto result = absl::from_chars(src.data(), src.data() + src.size(), f); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); +} + +TEST(FromChars, ReturnValuePtr) { + // Check that `ptr` points one past the number scanned, even if that number + // is not representable. + double d; + absl::from_chars_result result; + + std::string normal = "3.14@#$%@#$%"; + result = absl::from_chars(normal.data(), normal.data() + normal.size(), d); + EXPECT_EQ(result.ec, std::errc()); + EXPECT_EQ(result.ptr - normal.data(), 4); + + std::string overflow = "1e1000@#$%@#$%"; + result = absl::from_chars(overflow.data(), + overflow.data() + overflow.size(), d); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_EQ(result.ptr - overflow.data(), 6); + + std::string garbage = "#$%@#$%"; + result = absl::from_chars(garbage.data(), + garbage.data() + garbage.size(), d); + EXPECT_EQ(result.ec, std::errc::invalid_argument); + EXPECT_EQ(result.ptr - garbage.data(), 0); +} + +// Check for a wide range of inputs that strtod() and absl::from_chars() exactly +// agree on the conversion amount. +// +// This test assumes the platform's strtod() uses perfect round_to_nearest +// rounding. +TEST(FromChars, TestVersusStrtod) { + for (int mantissa = 1000000; mantissa <= 9999999; mantissa += 501) { + for (int exponent = -300; exponent < 300; ++exponent) { + std::string candidate = absl::StrCat(mantissa, "e", exponent); + double strtod_value = strtod(candidate.c_str(), nullptr); + double absl_value = 0; + absl::from_chars(candidate.data(), candidate.data() + candidate.size(), + absl_value); + ASSERT_EQ(strtod_value, absl_value) << candidate; + } + } +} + +// Check for a wide range of inputs that strtof() and absl::from_chars() exactly +// agree on the conversion amount. +// +// This test assumes the platform's strtof() uses perfect round_to_nearest +// rounding. +TEST(FromChars, TestVersusStrtof) { + for (int mantissa = 1000000; mantissa <= 9999999; mantissa += 501) { + for (int exponent = -43; exponent < 32; ++exponent) { + std::string candidate = absl::StrCat(mantissa, "e", exponent); + float strtod_value = strtof(candidate.c_str(), nullptr); + float absl_value = 0; + absl::from_chars(candidate.data(), candidate.data() + candidate.size(), + absl_value); + ASSERT_EQ(strtod_value, absl_value) << candidate; + } + } +} + +// Tests if two floating point values have identical bit layouts. (EXPECT_EQ +// is not suitable for NaN testing, since NaNs are never equal.) +template <typename Float> +bool Identical(Float a, Float b) { + return 0 == memcmp(&a, &b, sizeof(Float)); +} + +// Check that NaNs are parsed correctly. The spec requires that +// std::from_chars on "NaN(123abc)" return the same value as std::nan("123abc"). +// How such an n-char-sequence affects the generated NaN is unspecified, so we +// just test for symmetry with std::nan and strtod here. +// +// (In Linux, this parses the value as a number and stuffs that number into the +// free bits of a quiet NaN.) +TEST(FromChars, NaNDoubles) { + for (std::string n_char_sequence : + {"", "1", "2", "3", "fff", "FFF", "200000", "400000", "4000000000000", + "8000000000000", "abc123", "legal_but_unexpected", + "99999999999999999999999", "_"}) { + std::string input = absl::StrCat("nan(", n_char_sequence, ")"); + SCOPED_TRACE(input); + double from_chars_double; + absl::from_chars(input.data(), input.data() + input.size(), + from_chars_double); + double std_nan_double = std::nan(n_char_sequence.c_str()); + EXPECT_TRUE(Identical(from_chars_double, std_nan_double)); + + // Also check that we match strtod()'s behavior. This test assumes that the + // platform has a compliant strtod(). +#if ABSL_STRTOD_HANDLES_NAN_CORRECTLY + double strtod_double = strtod(input.c_str(), nullptr); + EXPECT_TRUE(Identical(from_chars_double, strtod_double)); +#endif // ABSL_STRTOD_HANDLES_NAN_CORRECTLY + + // Check that we can parse a negative NaN + std::string negative_input = "-" + input; + double negative_from_chars_double; + absl::from_chars(negative_input.data(), + negative_input.data() + negative_input.size(), + negative_from_chars_double); + EXPECT_TRUE(std::signbit(negative_from_chars_double)); + EXPECT_FALSE(Identical(negative_from_chars_double, from_chars_double)); + from_chars_double = std::copysign(from_chars_double, -1.0); + EXPECT_TRUE(Identical(negative_from_chars_double, from_chars_double)); + } +} + +TEST(FromChars, NaNFloats) { + for (std::string n_char_sequence : + {"", "1", "2", "3", "fff", "FFF", "200000", "400000", "4000000000000", + "8000000000000", "abc123", "legal_but_unexpected", + "99999999999999999999999", "_"}) { + std::string input = absl::StrCat("nan(", n_char_sequence, ")"); + SCOPED_TRACE(input); + float from_chars_float; + absl::from_chars(input.data(), input.data() + input.size(), + from_chars_float); + float std_nan_float = std::nanf(n_char_sequence.c_str()); + EXPECT_TRUE(Identical(from_chars_float, std_nan_float)); + + // Also check that we match strtof()'s behavior. This test assumes that the + // platform has a compliant strtof(). +#if ABSL_STRTOD_HANDLES_NAN_CORRECTLY + float strtof_float = strtof(input.c_str(), nullptr); + EXPECT_TRUE(Identical(from_chars_float, strtof_float)); +#endif // ABSL_STRTOD_HANDLES_NAN_CORRECTLY + + // Check that we can parse a negative NaN + std::string negative_input = "-" + input; + float negative_from_chars_float; + absl::from_chars(negative_input.data(), + negative_input.data() + negative_input.size(), + negative_from_chars_float); + EXPECT_TRUE(std::signbit(negative_from_chars_float)); + EXPECT_FALSE(Identical(negative_from_chars_float, from_chars_float)); + from_chars_float = std::copysign(from_chars_float, -1.0); + EXPECT_TRUE(Identical(negative_from_chars_float, from_chars_float)); + } +} + +// Returns an integer larger than step. The values grow exponentially. +int NextStep(int step) { + return step + (step >> 2) + 1; +} + +// Test a conversion on a family of input strings, checking that the calculation +// is correct for in-bounds values, and that overflow and underflow are done +// correctly for out-of-bounds values. +// +// input_generator maps from an integer index to a string to test. +// expected_generator maps from an integer index to an expected Float value. +// from_chars conversion of input_generator(i) should result in +// expected_generator(i). +// +// lower_bound and upper_bound denote the smallest and largest values for which +// the conversion is expected to succeed. +template <typename Float> +void TestOverflowAndUnderflow( + const std::function<std::string(int)>& input_generator, + const std::function<Float(int)>& expected_generator, int lower_bound, + int upper_bound) { + // test legal values near lower_bound + int index, step; + for (index = lower_bound, step = 1; index < upper_bound; + index += step, step = NextStep(step)) { + std::string input = input_generator(index); + SCOPED_TRACE(input); + Float expected = expected_generator(index); + Float actual; + auto result = + absl::from_chars(input.data(), input.data() + input.size(), actual); + EXPECT_EQ(result.ec, std::errc()); + EXPECT_EQ(expected, actual) + << absl::StrFormat("%a vs %a", expected, actual); + } + // test legal values near upper_bound + for (index = upper_bound, step = 1; index > lower_bound; + index -= step, step = NextStep(step)) { + std::string input = input_generator(index); + SCOPED_TRACE(input); + Float expected = expected_generator(index); + Float actual; + auto result = + absl::from_chars(input.data(), input.data() + input.size(), actual); + EXPECT_EQ(result.ec, std::errc()); + EXPECT_EQ(expected, actual) + << absl::StrFormat("%a vs %a", expected, actual); + } + // Test underflow values below lower_bound + for (index = lower_bound - 1, step = 1; index > -1000000; + index -= step, step = NextStep(step)) { + std::string input = input_generator(index); + SCOPED_TRACE(input); + Float actual; + auto result = + absl::from_chars(input.data(), input.data() + input.size(), actual); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_LT(actual, 1.0); // check for underflow + } + // Test overflow values above upper_bound + for (index = upper_bound + 1, step = 1; index < 1000000; + index += step, step = NextStep(step)) { + std::string input = input_generator(index); + SCOPED_TRACE(input); + Float actual; + auto result = + absl::from_chars(input.data(), input.data() + input.size(), actual); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); + EXPECT_GT(actual, 1.0); // check for overflow + } +} + +// Check that overflow and underflow are caught correctly for hex doubles. +// +// The largest representable double is 0x1.fffffffffffffp+1023, and the +// smallest representable subnormal is 0x0.0000000000001p-1022, which equals +// 0x1p-1074. Therefore 1023 and -1074 are the limits of acceptable exponents +// in this test. +TEST(FromChars, HexdecimalDoubleLimits) { + auto input_gen = [](int index) { return absl::StrCat("0x1.0p", index); }; + auto expected_gen = [](int index) { return std::ldexp(1.0, index); }; + TestOverflowAndUnderflow<double>(input_gen, expected_gen, -1074, 1023); +} + +// Check that overflow and underflow are caught correctly for hex floats. +// +// The largest representable float is 0x1.fffffep+127, and the smallest +// representable subnormal is 0x0.000002p-126, which equals 0x1p-149. +// Therefore 127 and -149 are the limits of acceptable exponents in this test. +TEST(FromChars, HexdecimalFloatLimits) { + auto input_gen = [](int index) { return absl::StrCat("0x1.0p", index); }; + auto expected_gen = [](int index) { return std::ldexp(1.0f, index); }; + TestOverflowAndUnderflow<float>(input_gen, expected_gen, -149, 127); +} + +// Check that overflow and underflow are caught correctly for decimal doubles. +// +// The largest representable double is about 1.8e308, and the smallest +// representable subnormal is about 5e-324. '1e-324' therefore rounds away from +// the smallest representable positive value. -323 and 308 are the limits of +// acceptable exponents in this test. +TEST(FromChars, DecimalDoubleLimits) { + auto input_gen = [](int index) { return absl::StrCat("1.0e", index); }; + auto expected_gen = [](int index) { return Pow10(index); }; + TestOverflowAndUnderflow<double>(input_gen, expected_gen, -323, 308); +} + +// Check that overflow and underflow are caught correctly for decimal floats. +// +// The largest representable float is about 3.4e38, and the smallest +// representable subnormal is about 1.45e-45. '1e-45' therefore rounds towards +// the smallest representable positive value. -45 and 38 are the limits of +// acceptable exponents in this test. +TEST(FromChars, DecimalFloatLimits) { + auto input_gen = [](int index) { return absl::StrCat("1.0e", index); }; + auto expected_gen = [](int index) { return Pow10(index); }; + TestOverflowAndUnderflow<float>(input_gen, expected_gen, -45, 38); +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/cord.cc b/third_party/abseil_cpp/absl/strings/cord.cc new file mode 100644 index 000000000000..9efd13575039 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/cord.cc @@ -0,0 +1,1995 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/cord.h" + +#include <algorithm> +#include <atomic> +#include <cstddef> +#include <cstdio> +#include <cstdlib> +#include <iomanip> +#include <iostream> +#include <limits> +#include <ostream> +#include <sstream> +#include <type_traits> +#include <unordered_set> +#include <vector> + +#include "absl/base/casts.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/container/fixed_array.h" +#include "absl/container/inlined_vector.h" +#include "absl/strings/escaping.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +using ::absl::cord_internal::CordRep; +using ::absl::cord_internal::CordRepConcat; +using ::absl::cord_internal::CordRepExternal; +using ::absl::cord_internal::CordRepSubstring; + +using ::absl::cord_internal::CONCAT; +using ::absl::cord_internal::EXTERNAL; +using ::absl::cord_internal::FLAT; +using ::absl::cord_internal::SUBSTRING; + +namespace cord_internal { + +inline CordRepConcat* CordRep::concat() { + assert(tag == CONCAT); + return static_cast<CordRepConcat*>(this); +} + +inline const CordRepConcat* CordRep::concat() const { + assert(tag == CONCAT); + return static_cast<const CordRepConcat*>(this); +} + +inline CordRepSubstring* CordRep::substring() { + assert(tag == SUBSTRING); + return static_cast<CordRepSubstring*>(this); +} + +inline const CordRepSubstring* CordRep::substring() const { + assert(tag == SUBSTRING); + return static_cast<const CordRepSubstring*>(this); +} + +inline CordRepExternal* CordRep::external() { + assert(tag == EXTERNAL); + return static_cast<CordRepExternal*>(this); +} + +inline const CordRepExternal* CordRep::external() const { + assert(tag == EXTERNAL); + return static_cast<const CordRepExternal*>(this); +} + +} // namespace cord_internal + +static const size_t kFlatOverhead = offsetof(CordRep, data); + +// Largest and smallest flat node lengths we are willing to allocate +// Flat allocation size is stored in tag, which currently can encode sizes up +// to 4K, encoded as multiple of either 8 or 32 bytes. +// If we allow for larger sizes, we need to change this to 8/64, 16/128, etc. +static constexpr size_t kMaxFlatSize = 4096; +static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead; +static constexpr size_t kMinFlatLength = 32 - kFlatOverhead; + +// Prefer copying blocks of at most this size, otherwise reference count. +static const size_t kMaxBytesToCopy = 511; + +// Helper functions for rounded div, and rounding to exact sizes. +static size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; } +static size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; } + +// Returns the size to the nearest equal or larger value that can be +// expressed exactly as a tag value. +static size_t RoundUpForTag(size_t size) { + return RoundUp(size, (size <= 1024) ? 8 : 32); +} + +// Converts the allocated size to a tag, rounding down if the size +// does not exactly match a 'tag expressible' size value. The result is +// undefined if the size exceeds the maximum size that can be encoded in +// a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>). +static uint8_t AllocatedSizeToTag(size_t size) { + const size_t tag = (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32; + assert(tag <= std::numeric_limits<uint8_t>::max()); + return tag; +} + +// Converts the provided tag to the corresponding allocated size +static constexpr size_t TagToAllocatedSize(uint8_t tag) { + return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32); +} + +// Converts the provided tag to the corresponding available data length +static constexpr size_t TagToLength(uint8_t tag) { + return TagToAllocatedSize(tag) - kFlatOverhead; +} + +// Enforce that kMaxFlatSize maps to a well-known exact tag value. +static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic"); + +constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) { + return n == 0 ? a : Fibonacci(n - 1, b, a + b); +} + +static_assert(Fibonacci(63) == 6557470319842, + "Fibonacci values computed incorrectly"); + +// Minimum length required for a given depth tree -- a tree is considered +// balanced if +// length(t) >= min_length[depth(t)] +// The root node depth is allowed to become twice as large to reduce rebalancing +// for larger strings (see IsRootBalanced). +static constexpr uint64_t min_length[] = { + Fibonacci(2), Fibonacci(3), Fibonacci(4), Fibonacci(5), + Fibonacci(6), Fibonacci(7), Fibonacci(8), Fibonacci(9), + Fibonacci(10), Fibonacci(11), Fibonacci(12), Fibonacci(13), + Fibonacci(14), Fibonacci(15), Fibonacci(16), Fibonacci(17), + Fibonacci(18), Fibonacci(19), Fibonacci(20), Fibonacci(21), + Fibonacci(22), Fibonacci(23), Fibonacci(24), Fibonacci(25), + Fibonacci(26), Fibonacci(27), Fibonacci(28), Fibonacci(29), + Fibonacci(30), Fibonacci(31), Fibonacci(32), Fibonacci(33), + Fibonacci(34), Fibonacci(35), Fibonacci(36), Fibonacci(37), + Fibonacci(38), Fibonacci(39), Fibonacci(40), Fibonacci(41), + Fibonacci(42), Fibonacci(43), Fibonacci(44), Fibonacci(45), + Fibonacci(46), Fibonacci(47), + 0xffffffffffffffffull, // Avoid overflow +}; + +static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length); + +// The inlined size to use with absl::InlinedVector. +// +// Note: The InlinedVectors in this file (and in cord.h) do not need to use +// the same value for their inlined size. The fact that they do is historical. +// It may be desirable for each to use a different inlined size optimized for +// that InlinedVector's usage. +// +// TODO(jgm): Benchmark to see if there's a more optimal value than 47 for +// the inlined vector size (47 exists for backward compatibility). +static const int kInlinedVectorSize = 47; + +static inline bool IsRootBalanced(CordRep* node) { + if (node->tag != CONCAT) { + return true; + } else if (node->concat()->depth() <= 15) { + return true; + } else if (node->concat()->depth() > kMinLengthSize) { + return false; + } else { + // Allow depth to become twice as large as implied by fibonacci rule to + // reduce rebalancing for larger strings. + return (node->length >= min_length[node->concat()->depth() / 2]); + } +} + +static CordRep* Rebalance(CordRep* node); +static void DumpNode(CordRep* rep, bool include_data, std::ostream* os); +static bool VerifyNode(CordRep* root, CordRep* start_node, + bool full_validation); + +static inline CordRep* VerifyTree(CordRep* node) { + // Verification is expensive, so only do it in debug mode. + // Even in debug mode we normally do only light validation. + // If you are debugging Cord itself, you should define the + // macro EXTRA_CORD_VALIDATION, e.g. by adding + // --copt=-DEXTRA_CORD_VALIDATION to the blaze line. +#ifdef EXTRA_CORD_VALIDATION + assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true)); +#else // EXTRA_CORD_VALIDATION + assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false)); +#endif // EXTRA_CORD_VALIDATION + static_cast<void>(&VerifyNode); + + return node; +} + +// -------------------------------------------------------------------- +// Memory management + +inline CordRep* Ref(CordRep* rep) { + if (rep != nullptr) { + rep->refcount.Increment(); + } + return rep; +} + +// This internal routine is called from the cold path of Unref below. Keeping it +// in a separate routine allows good inlining of Unref into many profitable call +// sites. However, the call to this function can be highly disruptive to the +// register pressure in those callers. To minimize the cost to callers, we use +// a special LLVM calling convention that preserves most registers. This allows +// the call to this routine in cold paths to not disrupt the caller's register +// pressure. This calling convention is not available on all platforms; we +// intentionally allow LLVM to ignore the attribute rather than attempting to +// hardcode the list of supported platforms. +#if defined(__clang__) && !defined(__i386__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wattributes" +__attribute__((preserve_most)) +#pragma clang diagnostic pop +#endif +static void UnrefInternal(CordRep* rep) { + assert(rep != nullptr); + + absl::InlinedVector<CordRep*, kInlinedVectorSize> pending; + while (true) { + assert(!rep->refcount.IsImmortal()); + if (rep->tag == CONCAT) { + CordRepConcat* rep_concat = rep->concat(); + CordRep* right = rep_concat->right; + if (!right->refcount.Decrement()) { + pending.push_back(right); + } + CordRep* left = rep_concat->left; + delete rep_concat; + rep = nullptr; + if (!left->refcount.Decrement()) { + rep = left; + continue; + } + } else if (rep->tag == EXTERNAL) { + CordRepExternal* rep_external = rep->external(); + assert(rep_external->releaser_invoker != nullptr); + rep_external->releaser_invoker(rep_external); + rep = nullptr; + } else if (rep->tag == SUBSTRING) { + CordRepSubstring* rep_substring = rep->substring(); + CordRep* child = rep_substring->child; + delete rep_substring; + rep = nullptr; + if (!child->refcount.Decrement()) { + rep = child; + continue; + } + } else { + // Flat CordReps are allocated and constructed with raw ::operator new + // and placement new, and must be destructed and deallocated + // accordingly. +#if defined(__cpp_sized_deallocation) + size_t size = TagToAllocatedSize(rep->tag); + rep->~CordRep(); + ::operator delete(rep, size); +#else + rep->~CordRep(); + ::operator delete(rep); +#endif + rep = nullptr; + } + + if (!pending.empty()) { + rep = pending.back(); + pending.pop_back(); + } else { + break; + } + } +} + +inline void Unref(CordRep* rep) { + // Fast-path for two common, hot cases: a null rep and a shared root. + if (ABSL_PREDICT_TRUE(rep == nullptr || + rep->refcount.DecrementExpectHighRefcount())) { + return; + } + + UnrefInternal(rep); +} + +// Return the depth of a node +static int Depth(const CordRep* rep) { + if (rep->tag == CONCAT) { + return rep->concat()->depth(); + } else { + return 0; + } +} + +static void SetConcatChildren(CordRepConcat* concat, CordRep* left, + CordRep* right) { + concat->left = left; + concat->right = right; + + concat->length = left->length + right->length; + concat->set_depth(1 + std::max(Depth(left), Depth(right))); +} + +// Create a concatenation of the specified nodes. +// Does not change the refcounts of "left" and "right". +// The returned node has a refcount of 1. +static CordRep* RawConcat(CordRep* left, CordRep* right) { + // Avoid making degenerate concat nodes (one child is empty) + if (left == nullptr || left->length == 0) { + Unref(left); + return right; + } + if (right == nullptr || right->length == 0) { + Unref(right); + return left; + } + + CordRepConcat* rep = new CordRepConcat(); + rep->tag = CONCAT; + SetConcatChildren(rep, left, right); + + return rep; +} + +static CordRep* Concat(CordRep* left, CordRep* right) { + CordRep* rep = RawConcat(left, right); + if (rep != nullptr && !IsRootBalanced(rep)) { + rep = Rebalance(rep); + } + return VerifyTree(rep); +} + +// Make a balanced tree out of an array of leaf nodes. +static CordRep* MakeBalancedTree(CordRep** reps, size_t n) { + // Make repeated passes over the array, merging adjacent pairs + // until we are left with just a single node. + while (n > 1) { + size_t dst = 0; + for (size_t src = 0; src < n; src += 2) { + if (src + 1 < n) { + reps[dst] = Concat(reps[src], reps[src + 1]); + } else { + reps[dst] = reps[src]; + } + dst++; + } + n = dst; + } + + return reps[0]; +} + +// Create a new flat node. +static CordRep* NewFlat(size_t length_hint) { + if (length_hint <= kMinFlatLength) { + length_hint = kMinFlatLength; + } else if (length_hint > kMaxFlatLength) { + length_hint = kMaxFlatLength; + } + + // Round size up so it matches a size we can exactly express in a tag. + const size_t size = RoundUpForTag(length_hint + kFlatOverhead); + void* const raw_rep = ::operator new(size); + CordRep* rep = new (raw_rep) CordRep(); + rep->tag = AllocatedSizeToTag(size); + return VerifyTree(rep); +} + +// Create a new tree out of the specified array. +// The returned node has a refcount of 1. +static CordRep* NewTree(const char* data, + size_t length, + size_t alloc_hint) { + if (length == 0) return nullptr; + absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1); + size_t n = 0; + do { + const size_t len = std::min(length, kMaxFlatLength); + CordRep* rep = NewFlat(len + alloc_hint); + rep->length = len; + memcpy(rep->data, data, len); + reps[n++] = VerifyTree(rep); + data += len; + length -= len; + } while (length != 0); + return MakeBalancedTree(reps.data(), n); +} + +namespace cord_internal { + +void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep) { + assert(!data.empty()); + rep->length = data.size(); + rep->tag = EXTERNAL; + rep->base = data.data(); + VerifyTree(rep); +} + +} // namespace cord_internal + +static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) { + // Never create empty substring nodes + if (length == 0) { + Unref(child); + return nullptr; + } else { + CordRepSubstring* rep = new CordRepSubstring(); + assert((offset + length) <= child->length); + rep->length = length; + rep->tag = SUBSTRING; + rep->start = offset; + rep->child = child; + return VerifyTree(rep); + } +} + +// -------------------------------------------------------------------- +// Cord::InlineRep functions + +constexpr unsigned char Cord::InlineRep::kMaxInline; + +inline void Cord::InlineRep::set_data(const char* data, size_t n, + bool nullify_tail) { + static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15"); + + cord_internal::SmallMemmove(data_.as_chars, data, n, nullify_tail); + set_tagged_size(static_cast<char>(n)); +} + +inline char* Cord::InlineRep::set_data(size_t n) { + assert(n <= kMaxInline); + ResetToEmpty(); + set_tagged_size(static_cast<char>(n)); + return data_.as_chars; +} + +inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) { + size_t len = tagged_size(); + if (len > kMaxInline) { + return data_.as_tree.rep; + } + + CordRep* result = NewFlat(len + extra_hint); + result->length = len; + static_assert(kMinFlatLength >= sizeof(data_.as_chars), ""); + memcpy(result->data, data_.as_chars, sizeof(data_.as_chars)); + set_tree(result); + return result; +} + +inline void Cord::InlineRep::reduce_size(size_t n) { + size_t tag = tagged_size(); + assert(tag <= kMaxInline); + assert(tag >= n); + tag -= n; + memset(data_.as_chars + tag, 0, n); + set_tagged_size(static_cast<char>(tag)); +} + +inline void Cord::InlineRep::remove_prefix(size_t n) { + cord_internal::SmallMemmove(data_.as_chars, data_.as_chars + n, + tagged_size() - n); + reduce_size(n); +} + +void Cord::InlineRep::AppendTree(CordRep* tree) { + if (tree == nullptr) return; + size_t len = tagged_size(); + if (len == 0) { + set_tree(tree); + } else { + set_tree(Concat(force_tree(0), tree)); + } +} + +void Cord::InlineRep::PrependTree(CordRep* tree) { + assert(tree != nullptr); + size_t len = tagged_size(); + if (len == 0) { + set_tree(tree); + } else { + set_tree(Concat(tree, force_tree(0))); + } +} + +// Searches for a non-full flat node at the rightmost leaf of the tree. If a +// suitable leaf is found, the function will update the length field for all +// nodes to account for the size increase. The append region address will be +// written to region and the actual size increase will be written to size. +static inline bool PrepareAppendRegion(CordRep* root, char** region, + size_t* size, size_t max_length) { + // Search down the right-hand path for a non-full FLAT node. + CordRep* dst = root; + while (dst->tag == CONCAT && dst->refcount.IsOne()) { + dst = dst->concat()->right; + } + + if (dst->tag < FLAT || !dst->refcount.IsOne()) { + *region = nullptr; + *size = 0; + return false; + } + + const size_t in_use = dst->length; + const size_t capacity = TagToLength(dst->tag); + if (in_use == capacity) { + *region = nullptr; + *size = 0; + return false; + } + + size_t size_increase = std::min(capacity - in_use, max_length); + + // We need to update the length fields for all nodes, including the leaf node. + for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) { + rep->length += size_increase; + } + dst->length += size_increase; + + *region = dst->data + in_use; + *size = size_increase; + return true; +} + +void Cord::InlineRep::GetAppendRegion(char** region, size_t* size, + size_t max_length) { + if (max_length == 0) { + *region = nullptr; + *size = 0; + return; + } + + // Try to fit in the inline buffer if possible. + size_t inline_length = tagged_size(); + if (inline_length < kMaxInline && max_length <= kMaxInline - inline_length) { + *region = data_.as_chars + inline_length; + *size = max_length; + set_tagged_size(static_cast<char>(inline_length + max_length)); + return; + } + + CordRep* root = force_tree(max_length); + + if (PrepareAppendRegion(root, region, size, max_length)) { + return; + } + + // Allocate new node. + CordRep* new_node = + NewFlat(std::max(static_cast<size_t>(root->length), max_length)); + new_node->length = + std::min(static_cast<size_t>(TagToLength(new_node->tag)), max_length); + *region = new_node->data; + *size = new_node->length; + replace_tree(Concat(root, new_node)); +} + +void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) { + const size_t max_length = std::numeric_limits<size_t>::max(); + + // Try to fit in the inline buffer if possible. + size_t inline_length = tagged_size(); + if (inline_length < kMaxInline) { + *region = data_.as_chars + inline_length; + *size = kMaxInline - inline_length; + set_tagged_size(kMaxInline); + return; + } + + CordRep* root = force_tree(max_length); + + if (PrepareAppendRegion(root, region, size, max_length)) { + return; + } + + // Allocate new node. + CordRep* new_node = NewFlat(root->length); + new_node->length = TagToLength(new_node->tag); + *region = new_node->data; + *size = new_node->length; + replace_tree(Concat(root, new_node)); +} + +// If the rep is a leaf, this will increment the value at total_mem_usage and +// will return true. +static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) { + if (rep->tag >= FLAT) { + *total_mem_usage += TagToAllocatedSize(rep->tag); + return true; + } + if (rep->tag == EXTERNAL) { + *total_mem_usage += sizeof(CordRepConcat) + rep->length; + return true; + } + return false; +} + +void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) { + ClearSlow(); + + data_ = src.data_; + if (is_tree()) { + Ref(tree()); + } +} + +void Cord::InlineRep::ClearSlow() { + if (is_tree()) { + Unref(tree()); + } + ResetToEmpty(); +} + +// -------------------------------------------------------------------- +// Constructors and destructors + +Cord::Cord(const Cord& src) : contents_(src.contents_) { + Ref(contents_.tree()); // Does nothing if contents_ has embedded data +} + +Cord::Cord(absl::string_view src) { + const size_t n = src.size(); + if (n <= InlineRep::kMaxInline) { + contents_.set_data(src.data(), n, false); + } else { + contents_.set_tree(NewTree(src.data(), n, 0)); + } +} + +template <typename T, Cord::EnableIfString<T>> +Cord::Cord(T&& src) { + if ( + // String is short: copy data to avoid external block overhead. + src.size() <= kMaxBytesToCopy || + // String is wasteful: copy data to avoid pinning too much unused memory. + src.size() < src.capacity() / 2 + ) { + if (src.size() <= InlineRep::kMaxInline) { + contents_.set_data(src.data(), src.size(), false); + } else { + contents_.set_tree(NewTree(src.data(), src.size(), 0)); + } + } else { + struct StringReleaser { + void operator()(absl::string_view /* data */) {} + std::string data; + }; + const absl::string_view original_data = src; + auto* rep = static_cast< + ::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>( + absl::cord_internal::NewExternalRep( + original_data, StringReleaser{std::forward<T>(src)})); + // Moving src may have invalidated its data pointer, so adjust it. + rep->base = rep->template get<0>().data.data(); + contents_.set_tree(rep); + } +} + +template Cord::Cord(std::string&& src); + +// The destruction code is separate so that the compiler can determine +// that it does not need to call the destructor on a moved-from Cord. +void Cord::DestroyCordSlow() { + Unref(VerifyTree(contents_.tree())); +} + +// -------------------------------------------------------------------- +// Mutators + +void Cord::Clear() { + Unref(contents_.clear()); +} + +Cord& Cord::operator=(absl::string_view src) { + + const char* data = src.data(); + size_t length = src.size(); + CordRep* tree = contents_.tree(); + if (length <= InlineRep::kMaxInline) { + // Embed into this->contents_ + contents_.set_data(data, length, true); + Unref(tree); + return *this; + } + if (tree != nullptr && tree->tag >= FLAT && + TagToLength(tree->tag) >= length && tree->refcount.IsOne()) { + // Copy in place if the existing FLAT node is reusable. + memmove(tree->data, data, length); + tree->length = length; + VerifyTree(tree); + return *this; + } + contents_.set_tree(NewTree(data, length, 0)); + Unref(tree); + return *this; +} + +template <typename T, Cord::EnableIfString<T>> +Cord& Cord::operator=(T&& src) { + if (src.size() <= kMaxBytesToCopy) { + *this = absl::string_view(src); + } else { + *this = Cord(std::forward<T>(src)); + } + return *this; +} + +template Cord& Cord::operator=(std::string&& src); + +// TODO(sanjay): Move to Cord::InlineRep section of file. For now, +// we keep it here to make diffs easier. +void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) { + if (src_size == 0) return; // memcpy(_, nullptr, 0) is undefined. + // Try to fit in the inline buffer if possible. + size_t inline_length = tagged_size(); + if (inline_length < kMaxInline && src_size <= kMaxInline - inline_length) { + // Append new data to embedded array + set_tagged_size(static_cast<char>(inline_length + src_size)); + memcpy(data_.as_chars + inline_length, src_data, src_size); + return; + } + + CordRep* root = tree(); + + size_t appended = 0; + if (root) { + char* region; + if (PrepareAppendRegion(root, ®ion, &appended, src_size)) { + memcpy(region, src_data, appended); + } + } else { + // It is possible that src_data == data_, but when we transition from an + // InlineRep to a tree we need to assign data_ = root via set_tree. To + // avoid corrupting the source data before we copy it, delay calling + // set_tree until after we've copied data. + // We are going from an inline size to beyond inline size. Make the new size + // either double the inlined size, or the added size + 10%. + const size_t size1 = inline_length * 2 + src_size; + const size_t size2 = inline_length + src_size / 10; + root = NewFlat(std::max<size_t>(size1, size2)); + appended = std::min(src_size, TagToLength(root->tag) - inline_length); + memcpy(root->data, data_.as_chars, inline_length); + memcpy(root->data + inline_length, src_data, appended); + root->length = inline_length + appended; + set_tree(root); + } + + src_data += appended; + src_size -= appended; + if (src_size == 0) { + return; + } + + // Use new block(s) for any remaining bytes that were not handled above. + // Alloc extra memory only if the right child of the root of the new tree is + // going to be a FLAT node, which will permit further inplace appends. + size_t length = src_size; + if (src_size < kMaxFlatLength) { + // The new length is either + // - old size + 10% + // - old_size + src_size + // This will cause a reasonable conservative step-up in size that is still + // large enough to avoid excessive amounts of small fragments being added. + length = std::max<size_t>(root->length / 10, src_size); + } + set_tree(Concat(root, NewTree(src_data, src_size, length - src_size))); +} + +inline CordRep* Cord::TakeRep() const& { + return Ref(contents_.tree()); +} + +inline CordRep* Cord::TakeRep() && { + CordRep* rep = contents_.tree(); + contents_.clear(); + return rep; +} + +template <typename C> +inline void Cord::AppendImpl(C&& src) { + if (empty()) { + // In case of an empty destination avoid allocating a new node, do not copy + // data. + *this = std::forward<C>(src); + return; + } + + // For short cords, it is faster to copy data if there is room in dst. + const size_t src_size = src.contents_.size(); + if (src_size <= kMaxBytesToCopy) { + CordRep* src_tree = src.contents_.tree(); + if (src_tree == nullptr) { + // src has embedded data. + contents_.AppendArray(src.contents_.data(), src_size); + return; + } + if (src_tree->tag >= FLAT) { + // src tree just has one flat node. + contents_.AppendArray(src_tree->data, src_size); + return; + } + if (&src == this) { + // ChunkIterator below assumes that src is not modified during traversal. + Append(Cord(src)); + return; + } + // TODO(mec): Should we only do this if "dst" has space? + for (absl::string_view chunk : src.Chunks()) { + Append(chunk); + } + return; + } + + contents_.AppendTree(std::forward<C>(src).TakeRep()); +} + +void Cord::Append(const Cord& src) { AppendImpl(src); } + +void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); } + +template <typename T, Cord::EnableIfString<T>> +void Cord::Append(T&& src) { + if (src.size() <= kMaxBytesToCopy) { + Append(absl::string_view(src)); + } else { + Append(Cord(std::forward<T>(src))); + } +} + +template void Cord::Append(std::string&& src); + +void Cord::Prepend(const Cord& src) { + CordRep* src_tree = src.contents_.tree(); + if (src_tree != nullptr) { + Ref(src_tree); + contents_.PrependTree(src_tree); + return; + } + + // `src` cord is inlined. + absl::string_view src_contents(src.contents_.data(), src.contents_.size()); + return Prepend(src_contents); +} + +void Cord::Prepend(absl::string_view src) { + if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. + size_t cur_size = contents_.size(); + if (!contents_.is_tree() && cur_size + src.size() <= InlineRep::kMaxInline) { + // Use embedded storage. + char data[InlineRep::kMaxInline + 1] = {0}; + data[InlineRep::kMaxInline] = cur_size + src.size(); // set size + memcpy(data, src.data(), src.size()); + memcpy(data + src.size(), contents_.data(), cur_size); + memcpy(reinterpret_cast<void*>(&contents_), data, + InlineRep::kMaxInline + 1); + } else { + contents_.PrependTree(NewTree(src.data(), src.size(), 0)); + } +} + +template <typename T, Cord::EnableIfString<T>> +inline void Cord::Prepend(T&& src) { + if (src.size() <= kMaxBytesToCopy) { + Prepend(absl::string_view(src)); + } else { + Prepend(Cord(std::forward<T>(src))); + } +} + +template void Cord::Prepend(std::string&& src); + +static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { + if (n >= node->length) return nullptr; + if (n == 0) return Ref(node); + absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack; + + while (node->tag == CONCAT) { + assert(n <= node->length); + if (n < node->concat()->left->length) { + // Push right to stack, descend left. + rhs_stack.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Drop left, descend right. + n -= node->concat()->left->length; + node = node->concat()->right; + } + } + assert(n <= node->length); + + if (n == 0) { + Ref(node); + } else { + size_t start = n; + size_t len = node->length - n; + if (node->tag == SUBSTRING) { + // Consider in-place update of node, similar to in RemoveSuffixFrom(). + start += node->substring()->start; + node = node->substring()->child; + } + node = NewSubstring(Ref(node), start, len); + } + while (!rhs_stack.empty()) { + node = Concat(node, Ref(rhs_stack.back())); + rhs_stack.pop_back(); + } + return node; +} + +// RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the +// exception that removing a suffix has an optimization where a node may be +// edited in place iff that node and all its ancestors have a refcount of 1. +static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { + if (n >= node->length) return nullptr; + if (n == 0) return Ref(node); + absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack; + bool inplace_ok = node->refcount.IsOne(); + + while (node->tag == CONCAT) { + assert(n <= node->length); + if (n < node->concat()->right->length) { + // Push left to stack, descend right. + lhs_stack.push_back(node->concat()->left); + node = node->concat()->right; + } else { + // Drop right, descend left. + n -= node->concat()->right->length; + node = node->concat()->left; + } + inplace_ok = inplace_ok && node->refcount.IsOne(); + } + assert(n <= node->length); + + if (n == 0) { + Ref(node); + } else if (inplace_ok && node->tag != EXTERNAL) { + // Consider making a new buffer if the current node capacity is much + // larger than the new length. + Ref(node); + node->length -= n; + } else { + size_t start = 0; + size_t len = node->length - n; + if (node->tag == SUBSTRING) { + start = node->substring()->start; + node = node->substring()->child; + } + node = NewSubstring(Ref(node), start, len); + } + while (!lhs_stack.empty()) { + node = Concat(Ref(lhs_stack.back()), node); + lhs_stack.pop_back(); + } + return node; +} + +void Cord::RemovePrefix(size_t n) { + ABSL_INTERNAL_CHECK(n <= size(), + absl::StrCat("Requested prefix size ", n, + " exceeds Cord's size ", size())); + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + contents_.remove_prefix(n); + } else { + CordRep* newrep = RemovePrefixFrom(tree, n); + Unref(tree); + contents_.replace_tree(VerifyTree(newrep)); + } +} + +void Cord::RemoveSuffix(size_t n) { + ABSL_INTERNAL_CHECK(n <= size(), + absl::StrCat("Requested suffix size ", n, + " exceeds Cord's size ", size())); + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + contents_.reduce_size(n); + } else { + CordRep* newrep = RemoveSuffixFrom(tree, n); + Unref(tree); + contents_.replace_tree(VerifyTree(newrep)); + } +} + +// Work item for NewSubRange(). +struct SubRange { + SubRange(CordRep* a_node, size_t a_pos, size_t a_n) + : node(a_node), pos(a_pos), n(a_n) {} + CordRep* node; // nullptr means concat last 2 results. + size_t pos; + size_t n; +}; + +static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) { + absl::InlinedVector<CordRep*, kInlinedVectorSize> results; + absl::InlinedVector<SubRange, kInlinedVectorSize> todo; + todo.push_back(SubRange(node, pos, n)); + do { + const SubRange& sr = todo.back(); + node = sr.node; + pos = sr.pos; + n = sr.n; + todo.pop_back(); + + if (node == nullptr) { + assert(results.size() >= 2); + CordRep* right = results.back(); + results.pop_back(); + CordRep* left = results.back(); + results.pop_back(); + results.push_back(Concat(left, right)); + } else if (pos == 0 && n == node->length) { + results.push_back(Ref(node)); + } else if (node->tag != CONCAT) { + if (node->tag == SUBSTRING) { + pos += node->substring()->start; + node = node->substring()->child; + } + results.push_back(NewSubstring(Ref(node), pos, n)); + } else if (pos + n <= node->concat()->left->length) { + todo.push_back(SubRange(node->concat()->left, pos, n)); + } else if (pos >= node->concat()->left->length) { + pos -= node->concat()->left->length; + todo.push_back(SubRange(node->concat()->right, pos, n)); + } else { + size_t left_n = node->concat()->left->length - pos; + todo.push_back(SubRange(nullptr, 0, 0)); // Concat() + todo.push_back(SubRange(node->concat()->right, 0, n - left_n)); + todo.push_back(SubRange(node->concat()->left, pos, left_n)); + } + } while (!todo.empty()); + assert(results.size() == 1); + return results[0]; +} + +Cord Cord::Subcord(size_t pos, size_t new_size) const { + Cord sub_cord; + size_t length = size(); + if (pos > length) pos = length; + if (new_size > length - pos) new_size = length - pos; + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + // sub_cord is newly constructed, no need to re-zero-out the tail of + // contents_ memory. + sub_cord.contents_.set_data(contents_.data() + pos, new_size, false); + } else if (new_size == 0) { + // We want to return empty subcord, so nothing to do. + } else if (new_size <= InlineRep::kMaxInline) { + Cord::ChunkIterator it = chunk_begin(); + it.AdvanceBytes(pos); + char* dest = sub_cord.contents_.data_.as_chars; + size_t remaining_size = new_size; + while (remaining_size > it->size()) { + cord_internal::SmallMemmove(dest, it->data(), it->size()); + remaining_size -= it->size(); + dest += it->size(); + ++it; + } + cord_internal::SmallMemmove(dest, it->data(), remaining_size); + sub_cord.contents_.set_tagged_size(new_size); + } else { + sub_cord.contents_.set_tree(NewSubRange(tree, pos, new_size)); + } + return sub_cord; +} + +// -------------------------------------------------------------------- +// Balancing + +class CordForest { + public: + explicit CordForest(size_t length) + : root_length_(length), trees_(kMinLengthSize, nullptr) {} + + void Build(CordRep* cord_root) { + std::vector<CordRep*> pending = {cord_root}; + + while (!pending.empty()) { + CordRep* node = pending.back(); + pending.pop_back(); + CheckNode(node); + if (ABSL_PREDICT_FALSE(node->tag != CONCAT)) { + AddNode(node); + continue; + } + + CordRepConcat* concat_node = node->concat(); + if (concat_node->depth() >= kMinLengthSize || + concat_node->length < min_length[concat_node->depth()]) { + pending.push_back(concat_node->right); + pending.push_back(concat_node->left); + + if (concat_node->refcount.IsOne()) { + concat_node->left = concat_freelist_; + concat_freelist_ = concat_node; + } else { + Ref(concat_node->right); + Ref(concat_node->left); + Unref(concat_node); + } + } else { + AddNode(node); + } + } + } + + CordRep* ConcatNodes() { + CordRep* sum = nullptr; + for (auto* node : trees_) { + if (node == nullptr) continue; + + sum = PrependNode(node, sum); + root_length_ -= node->length; + if (root_length_ == 0) break; + } + ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node"); + return VerifyTree(sum); + } + + private: + CordRep* AppendNode(CordRep* node, CordRep* sum) { + return (sum == nullptr) ? node : MakeConcat(sum, node); + } + + CordRep* PrependNode(CordRep* node, CordRep* sum) { + return (sum == nullptr) ? node : MakeConcat(node, sum); + } + + void AddNode(CordRep* node) { + CordRep* sum = nullptr; + + // Collect together everything with which we will merge with node + int i = 0; + for (; node->length > min_length[i + 1]; ++i) { + auto& tree_at_i = trees_[i]; + + if (tree_at_i == nullptr) continue; + sum = PrependNode(tree_at_i, sum); + tree_at_i = nullptr; + } + + sum = AppendNode(node, sum); + + // Insert sum into appropriate place in the forest + for (; sum->length >= min_length[i]; ++i) { + auto& tree_at_i = trees_[i]; + if (tree_at_i == nullptr) continue; + + sum = MakeConcat(tree_at_i, sum); + tree_at_i = nullptr; + } + + // min_length[0] == 1, which means sum->length >= min_length[0] + assert(i > 0); + trees_[i - 1] = sum; + } + + // Make concat node trying to resue existing CordRepConcat nodes we + // already collected in the concat_freelist_. + CordRep* MakeConcat(CordRep* left, CordRep* right) { + if (concat_freelist_ == nullptr) return RawConcat(left, right); + + CordRepConcat* rep = concat_freelist_; + if (concat_freelist_->left == nullptr) { + concat_freelist_ = nullptr; + } else { + concat_freelist_ = concat_freelist_->left->concat(); + } + SetConcatChildren(rep, left, right); + + return rep; + } + + static void CheckNode(CordRep* node) { + ABSL_INTERNAL_CHECK(node->length != 0u, ""); + if (node->tag == CONCAT) { + ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, ""); + ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, ""); + ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length + + node->concat()->right->length), + ""); + } + } + + size_t root_length_; + + // use an inlined vector instead of a flat array to get bounds checking + absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_; + + // List of concat nodes we can re-use for Cord balancing. + CordRepConcat* concat_freelist_ = nullptr; +}; + +static CordRep* Rebalance(CordRep* node) { + VerifyTree(node); + assert(node->tag == CONCAT); + + if (node->length == 0) { + return nullptr; + } + + CordForest forest(node->length); + forest.Build(node); + return forest.ConcatNodes(); +} + +// -------------------------------------------------------------------- +// Comparators + +namespace { + +int ClampResult(int memcmp_res) { + return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0); +} + +int CompareChunks(absl::string_view* lhs, absl::string_view* rhs, + size_t* size_to_compare) { + size_t compared_size = std::min(lhs->size(), rhs->size()); + assert(*size_to_compare >= compared_size); + *size_to_compare -= compared_size; + + int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size); + if (memcmp_res != 0) return memcmp_res; + + lhs->remove_prefix(compared_size); + rhs->remove_prefix(compared_size); + + return 0; +} + +// This overload set computes comparison results from memcmp result. This +// interface is used inside GenericCompare below. Differet implementations +// are specialized for int and bool. For int we clamp result to {-1, 0, 1} +// set. For bool we just interested in "value == 0". +template <typename ResultType> +ResultType ComputeCompareResult(int memcmp_res) { + return ClampResult(memcmp_res); +} +template <> +bool ComputeCompareResult<bool>(int memcmp_res) { + return memcmp_res == 0; +} + +} // namespace + +// Helper routine. Locates the first flat chunk of the Cord without +// initializing the iterator. +inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { + size_t n = tagged_size(); + if (n <= kMaxInline) { + return absl::string_view(data_.as_chars, n); + } + + CordRep* node = tree(); + if (node->tag >= FLAT) { + return absl::string_view(node->data, node->length); + } + + if (node->tag == EXTERNAL) { + return absl::string_view(node->external()->base, node->length); + } + + // Walk down the left branches until we hit a non-CONCAT node. + while (node->tag == CONCAT) { + node = node->concat()->left; + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + assert(length != 0); + + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + if (node->tag >= FLAT) { + return absl::string_view(node->data + offset, length); + } + + assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here"); + + return absl::string_view(node->external()->base + offset, length); +} + +inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size, + size_t size_to_compare) const { + auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) { + if (!chunk->empty()) return true; + ++*it; + if (it->bytes_remaining_ == 0) return false; + *chunk = **it; + return true; + }; + + Cord::ChunkIterator lhs_it = chunk_begin(); + + // compared_size is inside first chunk. + absl::string_view lhs_chunk = + (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view(); + assert(compared_size <= lhs_chunk.size()); + assert(compared_size <= rhs.size()); + lhs_chunk.remove_prefix(compared_size); + rhs.remove_prefix(compared_size); + size_to_compare -= compared_size; // skip already compared size. + + while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) { + int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare); + if (comparison_result != 0) return comparison_result; + if (size_to_compare == 0) return 0; + } + + return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty()); +} + +inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size, + size_t size_to_compare) const { + auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) { + if (!chunk->empty()) return true; + ++*it; + if (it->bytes_remaining_ == 0) return false; + *chunk = **it; + return true; + }; + + Cord::ChunkIterator lhs_it = chunk_begin(); + Cord::ChunkIterator rhs_it = rhs.chunk_begin(); + + // compared_size is inside both first chunks. + absl::string_view lhs_chunk = + (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view(); + absl::string_view rhs_chunk = + (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view(); + assert(compared_size <= lhs_chunk.size()); + assert(compared_size <= rhs_chunk.size()); + lhs_chunk.remove_prefix(compared_size); + rhs_chunk.remove_prefix(compared_size); + size_to_compare -= compared_size; // skip already compared size. + + while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) { + int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare); + if (memcmp_res != 0) return memcmp_res; + if (size_to_compare == 0) return 0; + } + + return static_cast<int>(rhs_chunk.empty()) - + static_cast<int>(lhs_chunk.empty()); +} + +inline absl::string_view Cord::GetFirstChunk(const Cord& c) { + return c.contents_.FindFlatStartPiece(); +} +inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) { + return sv; +} + +// Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed +// that 'size_to_compare' is greater that size of smallest of first chunks. +template <typename ResultType, typename RHS> +ResultType GenericCompare(const Cord& lhs, const RHS& rhs, + size_t size_to_compare) { + absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs); + absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs); + + size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size()); + assert(size_to_compare >= compared_size); + int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size); + if (compared_size == size_to_compare || memcmp_res != 0) { + return ComputeCompareResult<ResultType>(memcmp_res); + } + + return ComputeCompareResult<ResultType>( + lhs.CompareSlowPath(rhs, compared_size, size_to_compare)); +} + +bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const { + return GenericCompare<bool>(*this, rhs, size_to_compare); +} + +bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const { + return GenericCompare<bool>(*this, rhs, size_to_compare); +} + +template <typename RHS> +inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) { + size_t lhs_size = lhs.size(); + size_t rhs_size = rhs.size(); + if (lhs_size == rhs_size) { + return GenericCompare<int>(lhs, rhs, lhs_size); + } + if (lhs_size < rhs_size) { + auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size); + return data_comp_res == 0 ? -1 : data_comp_res; + } + + auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size); + return data_comp_res == 0 ? +1 : data_comp_res; +} + +int Cord::Compare(absl::string_view rhs) const { + return SharedCompareImpl(*this, rhs); +} + +int Cord::CompareImpl(const Cord& rhs) const { + return SharedCompareImpl(*this, rhs); +} + +bool Cord::EndsWith(absl::string_view rhs) const { + size_t my_size = size(); + size_t rhs_size = rhs.size(); + + if (my_size < rhs_size) return false; + + Cord tmp(*this); + tmp.RemovePrefix(my_size - rhs_size); + return tmp.EqualsImpl(rhs, rhs_size); +} + +bool Cord::EndsWith(const Cord& rhs) const { + size_t my_size = size(); + size_t rhs_size = rhs.size(); + + if (my_size < rhs_size) return false; + + Cord tmp(*this); + tmp.RemovePrefix(my_size - rhs_size); + return tmp.EqualsImpl(rhs, rhs_size); +} + +// -------------------------------------------------------------------- +// Misc. + +Cord::operator std::string() const { + std::string s; + absl::CopyCordToString(*this, &s); + return s; +} + +void CopyCordToString(const Cord& src, std::string* dst) { + if (!src.contents_.is_tree()) { + src.contents_.CopyTo(dst); + } else { + absl::strings_internal::STLStringResizeUninitialized(dst, src.size()); + src.CopyToArraySlowPath(&(*dst)[0]); + } +} + +void Cord::CopyToArraySlowPath(char* dst) const { + assert(contents_.is_tree()); + absl::string_view fragment; + if (GetFlatAux(contents_.tree(), &fragment)) { + memcpy(dst, fragment.data(), fragment.size()); + return; + } + for (absl::string_view chunk : Chunks()) { + memcpy(dst, chunk.data(), chunk.size()); + dst += chunk.size(); + } +} + +Cord::ChunkIterator& Cord::ChunkIterator::operator++() { + ABSL_HARDENING_ASSERT(bytes_remaining_ > 0 && + "Attempted to iterate past `end()`"); + assert(bytes_remaining_ >= current_chunk_.size()); + bytes_remaining_ -= current_chunk_.size(); + + if (stack_of_right_children_.empty()) { + assert(!current_chunk_.empty()); // Called on invalid iterator. + // We have reached the end of the Cord. + return *this; + } + + // Process the next node on the stack. + CordRep* node = stack_of_right_children_.back(); + stack_of_right_children_.pop_back(); + + // Walk down the left branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->tag == CONCAT) { + stack_of_right_children_.push_back(node->concat()->right); + node = node->concat()->left; + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(length != 0); + const char* data = + node->tag == EXTERNAL ? node->external()->base : node->data; + current_chunk_ = absl::string_view(data + offset, length); + current_leaf_ = node; + return *this; +} + +Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { + ABSL_HARDENING_ASSERT(bytes_remaining_ >= n && + "Attempted to iterate past `end()`"); + Cord subcord; + + if (n <= InlineRep::kMaxInline) { + // Range to read fits in inline data. Flatten it. + char* data = subcord.contents_.set_data(n); + while (n > current_chunk_.size()) { + memcpy(data, current_chunk_.data(), current_chunk_.size()); + data += current_chunk_.size(); + n -= current_chunk_.size(); + ++*this; + } + memcpy(data, current_chunk_.data(), n); + if (n < current_chunk_.size()) { + RemoveChunkPrefix(n); + } else if (n > 0) { + ++*this; + } + return subcord; + } + if (n < current_chunk_.size()) { + // Range to read is a proper subrange of the current chunk. + assert(current_leaf_ != nullptr); + CordRep* subnode = Ref(current_leaf_); + const char* data = + subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data; + subnode = NewSubstring(subnode, current_chunk_.data() - data, n); + subcord.contents_.set_tree(VerifyTree(subnode)); + RemoveChunkPrefix(n); + return subcord; + } + + // Range to read begins with a proper subrange of the current chunk. + assert(!current_chunk_.empty()); + assert(current_leaf_ != nullptr); + CordRep* subnode = Ref(current_leaf_); + if (current_chunk_.size() < subnode->length) { + const char* data = + subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data; + subnode = NewSubstring(subnode, current_chunk_.data() - data, + current_chunk_.size()); + } + n -= current_chunk_.size(); + bytes_remaining_ -= current_chunk_.size(); + + // Process the next node(s) on the stack, reading whole subtrees depending on + // their length and how many bytes we are advancing. + CordRep* node = nullptr; + while (!stack_of_right_children_.empty()) { + node = stack_of_right_children_.back(); + stack_of_right_children_.pop_back(); + if (node->length > n) break; + // TODO(qrczak): This might unnecessarily recreate existing concat nodes. + // Avoiding that would need pretty complicated logic (instead of + // current_leaf_, keep current_subtree_ which points to the highest node + // such that the current leaf can be found on the path of left children + // starting from current_subtree_; delay creating subnode while node is + // below current_subtree_; find the proper node along the path of left + // children starting from current_subtree_ if this loop exits while staying + // below current_subtree_; etc.; alternatively, push parents instead of + // right children on the stack). + subnode = Concat(subnode, Ref(node)); + n -= node->length; + bytes_remaining_ -= node->length; + node = nullptr; + } + + if (node == nullptr) { + // We have reached the end of the Cord. + assert(bytes_remaining_ == 0); + subcord.contents_.set_tree(VerifyTree(subnode)); + return subcord; + } + + // Walk down the appropriate branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->tag == CONCAT) { + if (node->concat()->left->length > n) { + // Push right, descend left. + stack_of_right_children_.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Read left, descend right. + subnode = Concat(subnode, Ref(node->concat()->left)); + n -= node->concat()->left->length; + bytes_remaining_ -= node->concat()->left->length; + node = node->concat()->right; + } + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + // Range to read ends with a proper (possibly empty) subrange of the current + // chunk. + assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(length > n); + if (n > 0) subnode = Concat(subnode, NewSubstring(Ref(node), offset, n)); + const char* data = + node->tag == EXTERNAL ? node->external()->base : node->data; + current_chunk_ = absl::string_view(data + offset + n, length - n); + current_leaf_ = node; + bytes_remaining_ -= n; + subcord.contents_.set_tree(VerifyTree(subnode)); + return subcord; +} + +void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { + assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); + assert(n >= current_chunk_.size()); // This should only be called when + // iterating to a new node. + + n -= current_chunk_.size(); + bytes_remaining_ -= current_chunk_.size(); + + // Process the next node(s) on the stack, skipping whole subtrees depending on + // their length and how many bytes we are advancing. + CordRep* node = nullptr; + while (!stack_of_right_children_.empty()) { + node = stack_of_right_children_.back(); + stack_of_right_children_.pop_back(); + if (node->length > n) break; + n -= node->length; + bytes_remaining_ -= node->length; + node = nullptr; + } + + if (node == nullptr) { + // We have reached the end of the Cord. + assert(bytes_remaining_ == 0); + return; + } + + // Walk down the appropriate branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->tag == CONCAT) { + if (node->concat()->left->length > n) { + // Push right, descend left. + stack_of_right_children_.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Skip left, descend right. + n -= node->concat()->left->length; + bytes_remaining_ -= node->concat()->left->length; + node = node->concat()->right; + } + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(length > n); + const char* data = + node->tag == EXTERNAL ? node->external()->base : node->data; + current_chunk_ = absl::string_view(data + offset + n, length - n); + current_leaf_ = node; + bytes_remaining_ -= n; +} + +char Cord::operator[](size_t i) const { + ABSL_HARDENING_ASSERT(i < size()); + size_t offset = i; + const CordRep* rep = contents_.tree(); + if (rep == nullptr) { + return contents_.data()[i]; + } + while (true) { + assert(rep != nullptr); + assert(offset < rep->length); + if (rep->tag >= FLAT) { + // Get the "i"th character directly from the flat array. + return rep->data[offset]; + } else if (rep->tag == EXTERNAL) { + // Get the "i"th character from the external array. + return rep->external()->base[offset]; + } else if (rep->tag == CONCAT) { + // Recursively branch to the side of the concatenation that the "i"th + // character is on. + size_t left_length = rep->concat()->left->length; + if (offset < left_length) { + rep = rep->concat()->left; + } else { + offset -= left_length; + rep = rep->concat()->right; + } + } else { + // This must be a substring a node, so bypass it to get to the child. + assert(rep->tag == SUBSTRING); + offset += rep->substring()->start; + rep = rep->substring()->child; + } + } +} + +absl::string_view Cord::FlattenSlowPath() { + size_t total_size = size(); + CordRep* new_rep; + char* new_buffer; + + // Try to put the contents into a new flat rep. If they won't fit in the + // biggest possible flat node, use an external rep instead. + if (total_size <= kMaxFlatLength) { + new_rep = NewFlat(total_size); + new_rep->length = total_size; + new_buffer = new_rep->data; + CopyToArraySlowPath(new_buffer); + } else { + new_buffer = std::allocator<char>().allocate(total_size); + CopyToArraySlowPath(new_buffer); + new_rep = absl::cord_internal::NewExternalRep( + absl::string_view(new_buffer, total_size), [](absl::string_view s) { + std::allocator<char>().deallocate(const_cast<char*>(s.data()), + s.size()); + }); + } + Unref(contents_.tree()); + contents_.set_tree(new_rep); + return absl::string_view(new_buffer, total_size); +} + +/* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) { + assert(rep != nullptr); + if (rep->tag >= FLAT) { + *fragment = absl::string_view(rep->data, rep->length); + return true; + } else if (rep->tag == EXTERNAL) { + *fragment = absl::string_view(rep->external()->base, rep->length); + return true; + } else if (rep->tag == SUBSTRING) { + CordRep* child = rep->substring()->child; + if (child->tag >= FLAT) { + *fragment = + absl::string_view(child->data + rep->substring()->start, rep->length); + return true; + } else if (child->tag == EXTERNAL) { + *fragment = absl::string_view( + child->external()->base + rep->substring()->start, rep->length); + return true; + } + } + return false; +} + +/* static */ void Cord::ForEachChunkAux( + absl::cord_internal::CordRep* rep, + absl::FunctionRef<void(absl::string_view)> callback) { + assert(rep != nullptr); + int stack_pos = 0; + constexpr int stack_max = 128; + // Stack of right branches for tree traversal + absl::cord_internal::CordRep* stack[stack_max]; + absl::cord_internal::CordRep* current_node = rep; + while (true) { + if (current_node->tag == CONCAT) { + if (stack_pos == stack_max) { + // There's no more room on our stack array to add another right branch, + // and the idea is to avoid allocations, so call this function + // recursively to navigate this subtree further. (This is not something + // we expect to happen in practice). + ForEachChunkAux(current_node, callback); + + // Pop the next right branch and iterate. + current_node = stack[--stack_pos]; + continue; + } else { + // Save the right branch for later traversal and continue down the left + // branch. + stack[stack_pos++] = current_node->concat()->right; + current_node = current_node->concat()->left; + continue; + } + } + // This is a leaf node, so invoke our callback. + absl::string_view chunk; + bool success = GetFlatAux(current_node, &chunk); + assert(success); + if (success) { + callback(chunk); + } + if (stack_pos == 0) { + // end of traversal + return; + } + current_node = stack[--stack_pos]; + } +} + +static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) { + const int kIndentStep = 1; + int indent = 0; + absl::InlinedVector<CordRep*, kInlinedVectorSize> stack; + absl::InlinedVector<int, kInlinedVectorSize> indents; + for (;;) { + *os << std::setw(3) << rep->refcount.Get(); + *os << " " << std::setw(7) << rep->length; + *os << " ["; + if (include_data) *os << static_cast<void*>(rep); + *os << "]"; + *os << " " << (IsRootBalanced(rep) ? 'b' : 'u'); + *os << " " << std::setw(indent) << ""; + if (rep->tag == CONCAT) { + *os << "CONCAT depth=" << Depth(rep) << "\n"; + indent += kIndentStep; + indents.push_back(indent); + stack.push_back(rep->concat()->right); + rep = rep->concat()->left; + } else if (rep->tag == SUBSTRING) { + *os << "SUBSTRING @ " << rep->substring()->start << "\n"; + indent += kIndentStep; + rep = rep->substring()->child; + } else { // Leaf + if (rep->tag == EXTERNAL) { + *os << "EXTERNAL ["; + if (include_data) + *os << absl::CEscape(std::string(rep->external()->base, rep->length)); + *os << "]\n"; + } else { + *os << "FLAT cap=" << TagToLength(rep->tag) << " ["; + if (include_data) + *os << absl::CEscape(std::string(rep->data, rep->length)); + *os << "]\n"; + } + if (stack.empty()) break; + rep = stack.back(); + stack.pop_back(); + indent = indents.back(); + indents.pop_back(); + } + } + ABSL_INTERNAL_CHECK(indents.empty(), ""); +} + +static std::string ReportError(CordRep* root, CordRep* node) { + std::ostringstream buf; + buf << "Error at node " << node << " in:"; + DumpNode(root, true, &buf); + return buf.str(); +} + +static bool VerifyNode(CordRep* root, CordRep* start_node, + bool full_validation) { + absl::InlinedVector<CordRep*, 2> worklist; + worklist.push_back(start_node); + do { + CordRep* node = worklist.back(); + worklist.pop_back(); + + ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node)); + if (node != root) { + ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node)); + } + + if (node->tag == CONCAT) { + ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, + ReportError(root, node)); + ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, + ReportError(root, node)); + ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length + + node->concat()->right->length), + ReportError(root, node)); + if (full_validation) { + worklist.push_back(node->concat()->right); + worklist.push_back(node->concat()->left); + } + } else if (node->tag >= FLAT) { + ABSL_INTERNAL_CHECK(node->length <= TagToLength(node->tag), + ReportError(root, node)); + } else if (node->tag == EXTERNAL) { + ABSL_INTERNAL_CHECK(node->external()->base != nullptr, + ReportError(root, node)); + } else if (node->tag == SUBSTRING) { + ABSL_INTERNAL_CHECK( + node->substring()->start < node->substring()->child->length, + ReportError(root, node)); + ABSL_INTERNAL_CHECK(node->substring()->start + node->length <= + node->substring()->child->length, + ReportError(root, node)); + } + } while (!worklist.empty()); + return true; +} + +// Traverses the tree and computes the total memory allocated. +/* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) { + size_t total_mem_usage = 0; + + // Allow a quick exit for the common case that the root is a leaf. + if (RepMemoryUsageLeaf(rep, &total_mem_usage)) { + return total_mem_usage; + } + + // Iterate over the tree. cur_node is never a leaf node and leaf nodes will + // never be appended to tree_stack. This reduces overhead from manipulating + // tree_stack. + absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack; + const CordRep* cur_node = rep; + while (true) { + const CordRep* next_node = nullptr; + + if (cur_node->tag == CONCAT) { + total_mem_usage += sizeof(CordRepConcat); + const CordRep* left = cur_node->concat()->left; + if (!RepMemoryUsageLeaf(left, &total_mem_usage)) { + next_node = left; + } + + const CordRep* right = cur_node->concat()->right; + if (!RepMemoryUsageLeaf(right, &total_mem_usage)) { + if (next_node) { + tree_stack.push_back(next_node); + } + next_node = right; + } + } else { + // Since cur_node is not a leaf or a concat node it must be a substring. + assert(cur_node->tag == SUBSTRING); + total_mem_usage += sizeof(CordRepSubstring); + next_node = cur_node->substring()->child; + if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) { + next_node = nullptr; + } + } + + if (!next_node) { + if (tree_stack.empty()) { + return total_mem_usage; + } + next_node = tree_stack.back(); + tree_stack.pop_back(); + } + cur_node = next_node; + } +} + +std::ostream& operator<<(std::ostream& out, const Cord& cord) { + for (absl::string_view chunk : cord.Chunks()) { + out.write(chunk.data(), chunk.size()); + } + return out; +} + +namespace strings_internal { +size_t CordTestAccess::FlatOverhead() { return kFlatOverhead; } +size_t CordTestAccess::MaxFlatLength() { return kMaxFlatLength; } +size_t CordTestAccess::FlatTagToLength(uint8_t tag) { + return TagToLength(tag); +} +uint8_t CordTestAccess::LengthToTag(size_t s) { + ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s)); + return AllocatedSizeToTag(s + kFlatOverhead); +} +size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); } +size_t CordTestAccess::SizeofCordRepExternal() { + return sizeof(CordRepExternal); +} +size_t CordTestAccess::SizeofCordRepSubstring() { + return sizeof(CordRepSubstring); +} +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/cord.h b/third_party/abseil_cpp/absl/strings/cord.h new file mode 100644 index 000000000000..5d5c897e663c --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/cord.h @@ -0,0 +1,1299 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: cord.h +// ----------------------------------------------------------------------------- +// +// This file defines the `absl::Cord` data structure and operations on that data +// structure. A Cord is a string-like sequence of characters optimized for +// specific use cases. Unlike a `std::string`, which stores an array of +// contiguous characters, Cord data is stored in a structure consisting of +// separate, reference-counted "chunks." (Currently, this implementation is a +// tree structure, though that implementation may change.) +// +// Because a Cord consists of these chunks, data can be added to or removed from +// a Cord during its lifetime. Chunks may also be shared between Cords. Unlike a +// `std::string`, a Cord can therefore accomodate data that changes over its +// lifetime, though it's not quite "mutable"; it can change only in the +// attachment, detachment, or rearrangement of chunks of its constituent data. +// +// A Cord provides some benefit over `std::string` under the following (albeit +// narrow) circumstances: +// +// * Cord data is designed to grow and shrink over a Cord's lifetime. Cord +// provides efficient insertions and deletions at the start and end of the +// character sequences, avoiding copies in those cases. Static data should +// generally be stored as strings. +// * External memory consisting of string-like data can be directly added to +// a Cord without requiring copies or allocations. +// * Cord data may be shared and copied cheaply. Cord provides a copy-on-write +// implementation and cheap sub-Cord operations. Copying a Cord is an O(1) +// operation. +// +// As a consequence to the above, Cord data is generally large. Small data +// should generally use strings, as construction of a Cord requires some +// overhead. Small Cords (<= 15 bytes) are represented inline, but most small +// Cords are expected to grow over their lifetimes. +// +// Note that because a Cord is made up of separate chunked data, random access +// to character data within a Cord is slower than within a `std::string`. +// +// Thread Safety +// +// Cord has the same thread-safety properties as many other types like +// std::string, std::vector<>, int, etc -- it is thread-compatible. In +// particular, if threads do not call non-const methods, then it is safe to call +// const methods without synchronization. Copying a Cord produces a new instance +// that can be used concurrently with the original in arbitrary ways. + +#ifndef ABSL_STRINGS_CORD_H_ +#define ABSL_STRINGS_CORD_H_ + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iosfwd> +#include <iterator> +#include <string> +#include <type_traits> + +#include "absl/base/internal/endian.h" +#include "absl/base/internal/per_thread_tls.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/container/inlined_vector.h" +#include "absl/functional/function_ref.h" +#include "absl/meta/type_traits.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/internal/string_constant.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +class Cord; +class CordTestPeer; +template <typename Releaser> +Cord MakeCordFromExternal(absl::string_view, Releaser&&); +void CopyCordToString(const Cord& src, std::string* dst); + +// Cord +// +// A Cord is a sequence of characters, designed to be more efficient than a +// `std::string` in certain circumstances: namely, large string data that needs +// to change over its lifetime or shared, especially when such data is shared +// across API boundaries. +// +// A Cord stores its character data in a structure that allows efficient prepend +// and append operations. This makes a Cord useful for large string data sent +// over in a wire format that may need to be prepended or appended at some point +// during the data exchange (e.g. HTTP, protocol buffers). For example, a +// Cord is useful for storing an HTTP request, and prepending an HTTP header to +// such a request. +// +// Cords should not be used for storing general string data, however. They +// require overhead to construct and are slower than strings for random access. +// +// The Cord API provides the following common API operations: +// +// * Create or assign Cords out of existing string data, memory, or other Cords +// * Append and prepend data to an existing Cord +// * Create new Sub-Cords from existing Cord data +// * Swap Cord data and compare Cord equality +// * Write out Cord data by constructing a `std::string` +// +// Additionally, the API provides iterator utilities to iterate through Cord +// data via chunks or character bytes. +// +class Cord { + private: + template <typename T> + using EnableIfString = + absl::enable_if_t<std::is_same<T, std::string>::value, int>; + + public: + // Cord::Cord() Constructors. + + // Creates an empty Cord. + constexpr Cord() noexcept; + + // Creates a Cord from an existing Cord. Cord is copyable and efficiently + // movable. The moved-from state is valid but unspecified. + Cord(const Cord& src); + Cord(Cord&& src) noexcept; + Cord& operator=(const Cord& x); + Cord& operator=(Cord&& x) noexcept; + + // Creates a Cord from a `src` string. This constructor is marked explicit to + // prevent implicit Cord constructions from arguments convertible to an + // `absl::string_view`. + explicit Cord(absl::string_view src); + Cord& operator=(absl::string_view src); + + // Creates a Cord from a `std::string&&` rvalue. These constructors are + // templated to avoid ambiguities for types that are convertible to both + // `absl::string_view` and `std::string`, such as `const char*`. + template <typename T, EnableIfString<T> = 0> + explicit Cord(T&& src); + template <typename T, EnableIfString<T> = 0> + Cord& operator=(T&& src); + + // Cord::~Cord() + // + // Destructs the Cord. + ~Cord() { + if (contents_.is_tree()) DestroyCordSlow(); + } + + // MakeCordFromExternal() + // + // Creates a Cord that takes ownership of external string memory. The + // contents of `data` are not copied to the Cord; instead, the external + // memory is added to the Cord and reference-counted. This data may not be + // changed for the life of the Cord, though it may be prepended or appended + // to. + // + // `MakeCordFromExternal()` takes a callable "releaser" that is invoked when + // the reference count for `data` reaches zero. As noted above, this data must + // remain live until the releaser is invoked. The callable releaser also must: + // + // * be move constructible + // * support `void operator()(absl::string_view) const` or `void operator()` + // + // Example: + // + // Cord MakeCord(BlockPool* pool) { + // Block* block = pool->NewBlock(); + // FillBlock(block); + // return absl::MakeCordFromExternal( + // block->ToStringView(), + // [pool, block](absl::string_view v) { + // pool->FreeBlock(block, v); + // }); + // } + // + // WARNING: Because a Cord can be reference-counted, it's likely a bug if your + // releaser doesn't do anything. For example, consider the following: + // + // void Foo(const char* buffer, int len) { + // auto c = absl::MakeCordFromExternal(absl::string_view(buffer, len), + // [](absl::string_view) {}); + // + // // BUG: If Bar() copies its cord for any reason, including keeping a + // // substring of it, the lifetime of buffer might be extended beyond + // // when Foo() returns. + // Bar(c); + // } + template <typename Releaser> + friend Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser); + + // Cord::Clear() + // + // Releases the Cord data. Any nodes that share data with other Cords, if + // applicable, will have their reference counts reduced by 1. + void Clear(); + + // Cord::Append() + // + // Appends data to the Cord, which may come from another Cord or other string + // data. + void Append(const Cord& src); + void Append(Cord&& src); + void Append(absl::string_view src); + template <typename T, EnableIfString<T> = 0> + void Append(T&& src); + + // Cord::Prepend() + // + // Prepends data to the Cord, which may come from another Cord or other string + // data. + void Prepend(const Cord& src); + void Prepend(absl::string_view src); + template <typename T, EnableIfString<T> = 0> + void Prepend(T&& src); + + // Cord::RemovePrefix() + // + // Removes the first `n` bytes of a Cord. + void RemovePrefix(size_t n); + void RemoveSuffix(size_t n); + + // Cord::Subcord() + // + // Returns a new Cord representing the subrange [pos, pos + new_size) of + // *this. If pos >= size(), the result is empty(). If + // (pos + new_size) >= size(), the result is the subrange [pos, size()). + Cord Subcord(size_t pos, size_t new_size) const; + + // Cord::swap() + // + // Swaps the contents of the Cord with `other`. + void swap(Cord& other) noexcept; + + // swap() + // + // Swaps the contents of two Cords. + friend void swap(Cord& x, Cord& y) noexcept { + x.swap(y); + } + + // Cord::size() + // + // Returns the size of the Cord. + size_t size() const; + + // Cord::empty() + // + // Determines whether the given Cord is empty, returning `true` is so. + bool empty() const; + + // Cord::EstimatedMemoryUsage() + // + // Returns the *approximate* number of bytes held in full or in part by this + // Cord (which may not remain the same between invocations). Note that Cords + // that share memory could each be "charged" independently for the same shared + // memory. + size_t EstimatedMemoryUsage() const; + + // Cord::Compare() + // + // Compares 'this' Cord with rhs. This function and its relatives treat Cords + // as sequences of unsigned bytes. The comparison is a straightforward + // lexicographic comparison. `Cord::Compare()` returns values as follows: + // + // -1 'this' Cord is smaller + // 0 two Cords are equal + // 1 'this' Cord is larger + int Compare(absl::string_view rhs) const; + int Compare(const Cord& rhs) const; + + // Cord::StartsWith() + // + // Determines whether the Cord starts with the passed string data `rhs`. + bool StartsWith(const Cord& rhs) const; + bool StartsWith(absl::string_view rhs) const; + + // Cord::EndsWidth() + // + // Determines whether the Cord ends with the passed string data `rhs`. + bool EndsWith(absl::string_view rhs) const; + bool EndsWith(const Cord& rhs) const; + + // Cord::operator std::string() + // + // Converts a Cord into a `std::string()`. This operator is marked explicit to + // prevent unintended Cord usage in functions that take a string. + explicit operator std::string() const; + + // CopyCordToString() + // + // Copies the contents of a `src` Cord into a `*dst` string. + // + // This function optimizes the case of reusing the destination string since it + // can reuse previously allocated capacity. However, this function does not + // guarantee that pointers previously returned by `dst->data()` remain valid + // even if `*dst` had enough capacity to hold `src`. If `*dst` is a new + // object, prefer to simply use the conversion operator to `std::string`. + friend void CopyCordToString(const Cord& src, std::string* dst); + + class CharIterator; + + //---------------------------------------------------------------------------- + // Cord::ChunkIterator + //---------------------------------------------------------------------------- + // + // A `Cord::ChunkIterator` allows iteration over the constituent chunks of its + // Cord. Such iteration allows you to perform non-const operatons on the data + // of a Cord without modifying it. + // + // Generally, you do not instantiate a `Cord::ChunkIterator` directly; + // instead, you create one implicitly through use of the `Cord::Chunks()` + // member function. + // + // The `Cord::ChunkIterator` has the following properties: + // + // * The iterator is invalidated after any non-const operation on the + // Cord object over which it iterates. + // * The `string_view` returned by dereferencing a valid, non-`end()` + // iterator is guaranteed to be non-empty. + // * Two `ChunkIterator` objects can be compared equal if and only if they + // remain valid and iterate over the same Cord. + // * The iterator in this case is a proxy iterator; the `string_view` + // returned by the iterator does not live inside the Cord, and its + // lifetime is limited to the lifetime of the iterator itself. To help + // prevent lifetime issues, `ChunkIterator::reference` is not a true + // reference type and is equivalent to `value_type`. + // * The iterator keeps state that can grow for Cords that contain many + // nodes and are imbalanced due to sharing. Prefer to pass this type by + // const reference instead of by value. + class ChunkIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = absl::string_view; + using difference_type = ptrdiff_t; + using pointer = const value_type*; + using reference = value_type; + + ChunkIterator() = default; + + ChunkIterator& operator++(); + ChunkIterator operator++(int); + bool operator==(const ChunkIterator& other) const; + bool operator!=(const ChunkIterator& other) const; + reference operator*() const; + pointer operator->() const; + + friend class Cord; + friend class CharIterator; + + private: + // Constructs a `begin()` iterator from `cord`. + explicit ChunkIterator(const Cord* cord); + + // Removes `n` bytes from `current_chunk_`. Expects `n` to be smaller than + // `current_chunk_.size()`. + void RemoveChunkPrefix(size_t n); + Cord AdvanceAndReadBytes(size_t n); + void AdvanceBytes(size_t n); + // Iterates `n` bytes, where `n` is expected to be greater than or equal to + // `current_chunk_.size()`. + void AdvanceBytesSlowPath(size_t n); + + // A view into bytes of the current `CordRep`. It may only be a view to a + // suffix of bytes if this is being used by `CharIterator`. + absl::string_view current_chunk_; + // The current leaf, or `nullptr` if the iterator points to short data. + // If the current chunk is a substring node, current_leaf_ points to the + // underlying flat or external node. + absl::cord_internal::CordRep* current_leaf_ = nullptr; + // The number of bytes left in the `Cord` over which we are iterating. + size_t bytes_remaining_ = 0; + absl::InlinedVector<absl::cord_internal::CordRep*, 4> + stack_of_right_children_; + }; + + // Cord::ChunkIterator::chunk_begin() + // + // Returns an iterator to the first chunk of the `Cord`. + // + // Generally, prefer using `Cord::Chunks()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `ChunkIterator` where range-based for-loops are not useful. + // + // Example: + // + // absl::Cord::ChunkIterator FindAsChunk(const absl::Cord& c, + // absl::string_view s) { + // return std::find(c.chunk_begin(), c.chunk_end(), s); + // } + ChunkIterator chunk_begin() const; + + // Cord::ChunkItertator::chunk_end() + // + // Returns an iterator one increment past the last chunk of the `Cord`. + // + // Generally, prefer using `Cord::Chunks()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `ChunkIterator` where range-based for-loops may not be available. + ChunkIterator chunk_end() const; + + //---------------------------------------------------------------------------- + // Cord::ChunkIterator::ChunkRange + //---------------------------------------------------------------------------- + // + // `ChunkRange` is a helper class for iterating over the chunks of the `Cord`, + // producing an iterator which can be used within a range-based for loop. + // Construction of a `ChunkRange` will return an iterator pointing to the + // first chunk of the Cord. Generally, do not construct a `ChunkRange` + // directly; instead, prefer to use the `Cord::Chunks()` method. + // + // Implementation note: `ChunkRange` is simply a convenience wrapper over + // `Cord::chunk_begin()` and `Cord::chunk_end()`. + class ChunkRange { + public: + explicit ChunkRange(const Cord* cord) : cord_(cord) {} + + ChunkIterator begin() const; + ChunkIterator end() const; + + private: + const Cord* cord_; + }; + + // Cord::Chunks() + // + // Returns a `Cord::ChunkIterator::ChunkRange` for iterating over the chunks + // of a `Cord` with a range-based for-loop. For most iteration tasks on a + // Cord, use `Cord::Chunks()` to retrieve this iterator. + // + // Example: + // + // void ProcessChunks(const Cord& cord) { + // for (absl::string_view chunk : cord.Chunks()) { ... } + // } + // + // Note that the ordinary caveats of temporary lifetime extension apply: + // + // void Process() { + // for (absl::string_view chunk : CordFactory().Chunks()) { + // // The temporary Cord returned by CordFactory has been destroyed! + // } + // } + ChunkRange Chunks() const; + + //---------------------------------------------------------------------------- + // Cord::CharIterator + //---------------------------------------------------------------------------- + // + // A `Cord::CharIterator` allows iteration over the constituent characters of + // a `Cord`. + // + // Generally, you do not instantiate a `Cord::CharIterator` directly; instead, + // you create one implicitly through use of the `Cord::Chars()` member + // function. + // + // A `Cord::CharIterator` has the following properties: + // + // * The iterator is invalidated after any non-const operation on the + // Cord object over which it iterates. + // * Two `CharIterator` objects can be compared equal if and only if they + // remain valid and iterate over the same Cord. + // * The iterator keeps state that can grow for Cords that contain many + // nodes and are imbalanced due to sharing. Prefer to pass this type by + // const reference instead of by value. + // * This type cannot act as a forward iterator because a `Cord` can reuse + // sections of memory. This fact violates the requirement for forward + // iterators to compare equal if dereferencing them returns the same + // object. + class CharIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = char; + using difference_type = ptrdiff_t; + using pointer = const char*; + using reference = const char&; + + CharIterator() = default; + + CharIterator& operator++(); + CharIterator operator++(int); + bool operator==(const CharIterator& other) const; + bool operator!=(const CharIterator& other) const; + reference operator*() const; + pointer operator->() const; + + friend Cord; + + private: + explicit CharIterator(const Cord* cord) : chunk_iterator_(cord) {} + + ChunkIterator chunk_iterator_; + }; + + // Cord::CharIterator::AdvanceAndRead() + // + // Advances the `Cord::CharIterator` by `n_bytes` and returns the bytes + // advanced as a separate `Cord`. `n_bytes` must be less than or equal to the + // number of bytes within the Cord; otherwise, behavior is undefined. It is + // valid to pass `char_end()` and `0`. + static Cord AdvanceAndRead(CharIterator* it, size_t n_bytes); + + // Cord::CharIterator::Advance() + // + // Advances the `Cord::CharIterator` by `n_bytes`. `n_bytes` must be less than + // or equal to the number of bytes remaining within the Cord; otherwise, + // behavior is undefined. It is valid to pass `char_end()` and `0`. + static void Advance(CharIterator* it, size_t n_bytes); + + // Cord::CharIterator::ChunkRemaining() + // + // Returns the longest contiguous view starting at the iterator's position. + // + // `it` must be dereferenceable. + static absl::string_view ChunkRemaining(const CharIterator& it); + + // Cord::CharIterator::char_begin() + // + // Returns an iterator to the first character of the `Cord`. + // + // Generally, prefer using `Cord::Chars()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `CharIterator` where range-based for-loops may not be available. + CharIterator char_begin() const; + + // Cord::CharIterator::char_end() + // + // Returns an iterator to one past the last character of the `Cord`. + // + // Generally, prefer using `Cord::Chars()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `CharIterator` where range-based for-loops are not useful. + CharIterator char_end() const; + + // Cord::CharIterator::CharRange + // + // `CharRange` is a helper class for iterating over the characters of a + // producing an iterator which can be used within a range-based for loop. + // Construction of a `CharRange` will return an iterator pointing to the first + // character of the Cord. Generally, do not construct a `CharRange` directly; + // instead, prefer to use the `Cord::Chars()` method show below. + // + // Implementation note: `CharRange` is simply a convenience wrapper over + // `Cord::char_begin()` and `Cord::char_end()`. + class CharRange { + public: + explicit CharRange(const Cord* cord) : cord_(cord) {} + + CharIterator begin() const; + CharIterator end() const; + + private: + const Cord* cord_; + }; + + // Cord::CharIterator::Chars() + // + // Returns a `Cord::CharIterator` for iterating over the characters of a + // `Cord` with a range-based for-loop. For most character-based iteration + // tasks on a Cord, use `Cord::Chars()` to retrieve this iterator. + // + // Example: + // + // void ProcessCord(const Cord& cord) { + // for (char c : cord.Chars()) { ... } + // } + // + // Note that the ordinary caveats of temporary lifetime extension apply: + // + // void Process() { + // for (char c : CordFactory().Chars()) { + // // The temporary Cord returned by CordFactory has been destroyed! + // } + // } + CharRange Chars() const; + + // Cord::operator[] + // + // Gets the "i"th character of the Cord and returns it, provided that + // 0 <= i < Cord.size(). + // + // NOTE: This routine is reasonably efficient. It is roughly + // logarithmic based on the number of chunks that make up the cord. Still, + // if you need to iterate over the contents of a cord, you should + // use a CharIterator/ChunkIterator rather than call operator[] or Get() + // repeatedly in a loop. + char operator[](size_t i) const; + + // Cord::TryFlat() + // + // If this cord's representation is a single flat array, returns a + // string_view referencing that array. Otherwise returns nullopt. + absl::optional<absl::string_view> TryFlat() const; + + // Cord::Flatten() + // + // Flattens the cord into a single array and returns a view of the data. + // + // If the cord was already flat, the contents are not modified. + absl::string_view Flatten(); + + // Supports absl::Cord as a sink object for absl::Format(). + friend void AbslFormatFlush(absl::Cord* cord, absl::string_view part) { + cord->Append(part); + } + + template <typename H> + friend H AbslHashValue(H hash_state, const absl::Cord& c) { + absl::optional<absl::string_view> maybe_flat = c.TryFlat(); + if (maybe_flat.has_value()) { + return H::combine(std::move(hash_state), *maybe_flat); + } + return c.HashFragmented(std::move(hash_state)); + } + + // Create a Cord with the contents of StringConstant<T>::value. + // No allocations will be done and no data will be copied. + // This is an INTERNAL API and subject to change or removal. This API can only + // be used by spelling absl::strings_internal::MakeStringConstant, which is + // also an internal API. + template <typename T> + explicit constexpr Cord(strings_internal::StringConstant<T>); + + private: + friend class CordTestPeer; + friend bool operator==(const Cord& lhs, const Cord& rhs); + friend bool operator==(const Cord& lhs, absl::string_view rhs); + + // Calls the provided function once for each cord chunk, in order. Unlike + // Chunks(), this API will not allocate memory. + void ForEachChunk(absl::FunctionRef<void(absl::string_view)>) const; + + // Allocates new contiguous storage for the contents of the cord. This is + // called by Flatten() when the cord was not already flat. + absl::string_view FlattenSlowPath(); + + // Actual cord contents are hidden inside the following simple + // class so that we can isolate the bulk of cord.cc from changes + // to the representation. + // + // InlineRep holds either a tree pointer, or an array of kMaxInline bytes. + class InlineRep { + public: + static constexpr unsigned char kMaxInline = cord_internal::kMaxInline; + static_assert(kMaxInline >= sizeof(absl::cord_internal::CordRep*), ""); + static constexpr unsigned char kTreeFlag = cord_internal::kTreeFlag; + static constexpr unsigned char kProfiledFlag = cord_internal::kProfiledFlag; + + constexpr InlineRep() : data_() {} + InlineRep(const InlineRep& src); + InlineRep(InlineRep&& src); + InlineRep& operator=(const InlineRep& src); + InlineRep& operator=(InlineRep&& src) noexcept; + + explicit constexpr InlineRep(cord_internal::InlineData data); + + void Swap(InlineRep* rhs); + bool empty() const; + size_t size() const; + const char* data() const; // Returns nullptr if holding pointer + void set_data(const char* data, size_t n, + bool nullify_tail); // Discards pointer, if any + char* set_data(size_t n); // Write data to the result + // Returns nullptr if holding bytes + absl::cord_internal::CordRep* tree() const; + // Discards old pointer, if any + void set_tree(absl::cord_internal::CordRep* rep); + // Replaces a tree with a new root. This is faster than set_tree, but it + // should only be used when it's clear that the old rep was a tree. + void replace_tree(absl::cord_internal::CordRep* rep); + // Returns non-null iff was holding a pointer + absl::cord_internal::CordRep* clear(); + // Converts to pointer if necessary. + absl::cord_internal::CordRep* force_tree(size_t extra_hint); + void reduce_size(size_t n); // REQUIRES: holding data + void remove_prefix(size_t n); // REQUIRES: holding data + void AppendArray(const char* src_data, size_t src_size); + absl::string_view FindFlatStartPiece() const; + void AppendTree(absl::cord_internal::CordRep* tree); + void PrependTree(absl::cord_internal::CordRep* tree); + void GetAppendRegion(char** region, size_t* size, size_t max_length); + void GetAppendRegion(char** region, size_t* size); + bool IsSame(const InlineRep& other) const { + return memcmp(&data_, &other.data_, sizeof(data_)) == 0; + } + int BitwiseCompare(const InlineRep& other) const { + uint64_t x, y; + // Use memcpy to avoid aliasing issues. + memcpy(&x, &data_, sizeof(x)); + memcpy(&y, &other.data_, sizeof(y)); + if (x == y) { + memcpy(&x, reinterpret_cast<const char*>(&data_) + 8, sizeof(x)); + memcpy(&y, reinterpret_cast<const char*>(&other.data_) + 8, sizeof(y)); + if (x == y) return 0; + } + return absl::big_endian::FromHost64(x) < absl::big_endian::FromHost64(y) + ? -1 + : 1; + } + void CopyTo(std::string* dst) const { + // memcpy is much faster when operating on a known size. On most supported + // platforms, the small string optimization is large enough that resizing + // to 15 bytes does not cause a memory allocation. + absl::strings_internal::STLStringResizeUninitialized(dst, + sizeof(data_) - 1); + memcpy(&(*dst)[0], &data_, sizeof(data_) - 1); + // erase is faster than resize because the logic for memory allocation is + // not needed. + dst->erase(tagged_size()); + } + + // Copies the inline contents into `dst`. Assumes the cord is not empty. + void CopyToArray(char* dst) const; + + bool is_tree() const { return tagged_size() > kMaxInline; } + + private: + friend class Cord; + + void AssignSlow(const InlineRep& src); + // Unrefs the tree, stops profiling, and zeroes the contents + void ClearSlow(); + + void ResetToEmpty() { data_ = {}; } + + // This uses reinterpret_cast instead of the union to avoid accessing the + // inactive union element. The tagged size is not a common prefix. + void set_tagged_size(char new_tag) { + reinterpret_cast<char*>(&data_)[kMaxInline] = new_tag; + } + char tagged_size() const { + return reinterpret_cast<const char*>(&data_)[kMaxInline]; + } + + cord_internal::InlineData data_; + }; + InlineRep contents_; + + // Helper for MemoryUsage(). + static size_t MemoryUsageAux(const absl::cord_internal::CordRep* rep); + + // Helper for GetFlat() and TryFlat(). + static bool GetFlatAux(absl::cord_internal::CordRep* rep, + absl::string_view* fragment); + + // Helper for ForEachChunk(). + static void ForEachChunkAux( + absl::cord_internal::CordRep* rep, + absl::FunctionRef<void(absl::string_view)> callback); + + // The destructor for non-empty Cords. + void DestroyCordSlow(); + + // Out-of-line implementation of slower parts of logic. + void CopyToArraySlowPath(char* dst) const; + int CompareSlowPath(absl::string_view rhs, size_t compared_size, + size_t size_to_compare) const; + int CompareSlowPath(const Cord& rhs, size_t compared_size, + size_t size_to_compare) const; + bool EqualsImpl(absl::string_view rhs, size_t size_to_compare) const; + bool EqualsImpl(const Cord& rhs, size_t size_to_compare) const; + int CompareImpl(const Cord& rhs) const; + + template <typename ResultType, typename RHS> + friend ResultType GenericCompare(const Cord& lhs, const RHS& rhs, + size_t size_to_compare); + static absl::string_view GetFirstChunk(const Cord& c); + static absl::string_view GetFirstChunk(absl::string_view sv); + + // Returns a new reference to contents_.tree(), or steals an existing + // reference if called on an rvalue. + absl::cord_internal::CordRep* TakeRep() const&; + absl::cord_internal::CordRep* TakeRep() &&; + + // Helper for Append(). + template <typename C> + void AppendImpl(C&& src); + + // Helper for AbslHashValue(). + template <typename H> + H HashFragmented(H hash_state) const { + typename H::AbslInternalPiecewiseCombiner combiner; + ForEachChunk([&combiner, &hash_state](absl::string_view chunk) { + hash_state = combiner.add_buffer(std::move(hash_state), chunk.data(), + chunk.size()); + }); + return H::combine(combiner.finalize(std::move(hash_state)), size()); + } +}; + +ABSL_NAMESPACE_END +} // namespace absl + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// allow a Cord to be logged +extern std::ostream& operator<<(std::ostream& out, const Cord& cord); + +// ------------------------------------------------------------------ +// Internal details follow. Clients should ignore. + +namespace cord_internal { + +// Fast implementation of memmove for up to 15 bytes. This implementation is +// safe for overlapping regions. If nullify_tail is true, the destination is +// padded with '\0' up to 16 bytes. +inline void SmallMemmove(char* dst, const char* src, size_t n, + bool nullify_tail = false) { + if (n >= 8) { + assert(n <= 16); + uint64_t buf1; + uint64_t buf2; + memcpy(&buf1, src, 8); + memcpy(&buf2, src + n - 8, 8); + if (nullify_tail) { + memset(dst + 8, 0, 8); + } + memcpy(dst, &buf1, 8); + memcpy(dst + n - 8, &buf2, 8); + } else if (n >= 4) { + uint32_t buf1; + uint32_t buf2; + memcpy(&buf1, src, 4); + memcpy(&buf2, src + n - 4, 4); + if (nullify_tail) { + memset(dst + 4, 0, 4); + memset(dst + 8, 0, 8); + } + memcpy(dst, &buf1, 4); + memcpy(dst + n - 4, &buf2, 4); + } else { + if (n != 0) { + dst[0] = src[0]; + dst[n / 2] = src[n / 2]; + dst[n - 1] = src[n - 1]; + } + if (nullify_tail) { + memset(dst + 8, 0, 8); + memset(dst + n, 0, 8); + } + } +} + +// Does non-template-specific `CordRepExternal` initialization. +// Expects `data` to be non-empty. +void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep); + +// Creates a new `CordRep` that owns `data` and `releaser` and returns a pointer +// to it, or `nullptr` if `data` was empty. +template <typename Releaser> +// NOLINTNEXTLINE - suppress clang-tidy raw pointer return. +CordRep* NewExternalRep(absl::string_view data, Releaser&& releaser) { + using ReleaserType = absl::decay_t<Releaser>; + if (data.empty()) { + // Never create empty external nodes. + InvokeReleaser(Rank0{}, ReleaserType(std::forward<Releaser>(releaser)), + data); + return nullptr; + } + + CordRepExternal* rep = new CordRepExternalImpl<ReleaserType>( + std::forward<Releaser>(releaser), 0); + InitializeCordRepExternal(data, rep); + return rep; +} + +// Overload for function reference types that dispatches using a function +// pointer because there are no `alignof()` or `sizeof()` a function reference. +// NOLINTNEXTLINE - suppress clang-tidy raw pointer return. +inline CordRep* NewExternalRep(absl::string_view data, + void (&releaser)(absl::string_view)) { + return NewExternalRep(data, &releaser); +} + +} // namespace cord_internal + +template <typename Releaser> +Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser) { + Cord cord; + cord.contents_.set_tree(::absl::cord_internal::NewExternalRep( + data, std::forward<Releaser>(releaser))); + return cord; +} + +constexpr Cord::InlineRep::InlineRep(cord_internal::InlineData data) + : data_(data) {} + +inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) { + data_ = src.data_; +} + +inline Cord::InlineRep::InlineRep(Cord::InlineRep&& src) { + data_ = src.data_; + src.ResetToEmpty(); +} + +inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) { + if (this == &src) { + return *this; + } + if (!is_tree() && !src.is_tree()) { + data_ = src.data_; + return *this; + } + AssignSlow(src); + return *this; +} + +inline Cord::InlineRep& Cord::InlineRep::operator=( + Cord::InlineRep&& src) noexcept { + if (is_tree()) { + ClearSlow(); + } + data_ = src.data_; + src.ResetToEmpty(); + return *this; +} + +inline void Cord::InlineRep::Swap(Cord::InlineRep* rhs) { + if (rhs == this) { + return; + } + + std::swap(data_, rhs->data_); +} + +inline const char* Cord::InlineRep::data() const { + return is_tree() ? nullptr : data_.as_chars; +} + +inline absl::cord_internal::CordRep* Cord::InlineRep::tree() const { + if (is_tree()) { + return data_.as_tree.rep; + } else { + return nullptr; + } +} + +inline bool Cord::InlineRep::empty() const { return tagged_size() == 0; } + +inline size_t Cord::InlineRep::size() const { + const char tag = tagged_size(); + if (tag <= kMaxInline) return tag; + return static_cast<size_t>(tree()->length); +} + +inline void Cord::InlineRep::set_tree(absl::cord_internal::CordRep* rep) { + if (rep == nullptr) { + ResetToEmpty(); + } else { + bool was_tree = is_tree(); + data_.as_tree = {rep, {}, tagged_size()}; + if (!was_tree) { + // If we were not a tree already, set the tag. + // Otherwise, leave it alone because it might have the profile bit on. + set_tagged_size(kTreeFlag); + } + } +} + +inline void Cord::InlineRep::replace_tree(absl::cord_internal::CordRep* rep) { + ABSL_ASSERT(is_tree()); + if (ABSL_PREDICT_FALSE(rep == nullptr)) { + set_tree(rep); + return; + } + data_.as_tree = {rep, {}, tagged_size()}; +} + +inline absl::cord_internal::CordRep* Cord::InlineRep::clear() { + absl::cord_internal::CordRep* result = tree(); + ResetToEmpty(); + return result; +} + +inline void Cord::InlineRep::CopyToArray(char* dst) const { + assert(!is_tree()); + size_t n = tagged_size(); + assert(n != 0); + cord_internal::SmallMemmove(dst, data_.as_chars, n); +} + +constexpr inline Cord::Cord() noexcept {} + +template <typename T> +constexpr Cord::Cord(strings_internal::StringConstant<T>) + : contents_(strings_internal::StringConstant<T>::value.size() <= + cord_internal::kMaxInline + ? cord_internal::InlineData( + strings_internal::StringConstant<T>::value) + : cord_internal::InlineData(cord_internal::AsTree{ + &cord_internal::ConstInitExternalStorage< + strings_internal::StringConstant<T>>::value, + {}, + cord_internal::kTreeFlag})) {} + +inline Cord& Cord::operator=(const Cord& x) { + contents_ = x.contents_; + return *this; +} + +inline Cord::Cord(Cord&& src) noexcept : contents_(std::move(src.contents_)) {} + +inline void Cord::swap(Cord& other) noexcept { + contents_.Swap(&other.contents_); +} + +inline Cord& Cord::operator=(Cord&& x) noexcept { + contents_ = std::move(x.contents_); + return *this; +} + +extern template Cord::Cord(std::string&& src); +extern template Cord& Cord::operator=(std::string&& src); + +inline size_t Cord::size() const { + // Length is 1st field in str.rep_ + return contents_.size(); +} + +inline bool Cord::empty() const { return contents_.empty(); } + +inline size_t Cord::EstimatedMemoryUsage() const { + size_t result = sizeof(Cord); + if (const absl::cord_internal::CordRep* rep = contents_.tree()) { + result += MemoryUsageAux(rep); + } + return result; +} + +inline absl::optional<absl::string_view> Cord::TryFlat() const { + absl::cord_internal::CordRep* rep = contents_.tree(); + if (rep == nullptr) { + return absl::string_view(contents_.data(), contents_.size()); + } + absl::string_view fragment; + if (GetFlatAux(rep, &fragment)) { + return fragment; + } + return absl::nullopt; +} + +inline absl::string_view Cord::Flatten() { + absl::cord_internal::CordRep* rep = contents_.tree(); + if (rep == nullptr) { + return absl::string_view(contents_.data(), contents_.size()); + } else { + absl::string_view already_flat_contents; + if (GetFlatAux(rep, &already_flat_contents)) { + return already_flat_contents; + } + } + return FlattenSlowPath(); +} + +inline void Cord::Append(absl::string_view src) { + contents_.AppendArray(src.data(), src.size()); +} + +extern template void Cord::Append(std::string&& src); +extern template void Cord::Prepend(std::string&& src); + +inline int Cord::Compare(const Cord& rhs) const { + if (!contents_.is_tree() && !rhs.contents_.is_tree()) { + return contents_.BitwiseCompare(rhs.contents_); + } + + return CompareImpl(rhs); +} + +// Does 'this' cord start/end with rhs +inline bool Cord::StartsWith(const Cord& rhs) const { + if (contents_.IsSame(rhs.contents_)) return true; + size_t rhs_size = rhs.size(); + if (size() < rhs_size) return false; + return EqualsImpl(rhs, rhs_size); +} + +inline bool Cord::StartsWith(absl::string_view rhs) const { + size_t rhs_size = rhs.size(); + if (size() < rhs_size) return false; + return EqualsImpl(rhs, rhs_size); +} + +inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) + : bytes_remaining_(cord->size()) { + if (cord->empty()) return; + if (cord->contents_.is_tree()) { + stack_of_right_children_.push_back(cord->contents_.tree()); + operator++(); + } else { + current_chunk_ = absl::string_view(cord->contents_.data(), cord->size()); + } +} + +inline Cord::ChunkIterator Cord::ChunkIterator::operator++(int) { + ChunkIterator tmp(*this); + operator++(); + return tmp; +} + +inline bool Cord::ChunkIterator::operator==(const ChunkIterator& other) const { + return bytes_remaining_ == other.bytes_remaining_; +} + +inline bool Cord::ChunkIterator::operator!=(const ChunkIterator& other) const { + return !(*this == other); +} + +inline Cord::ChunkIterator::reference Cord::ChunkIterator::operator*() const { + ABSL_HARDENING_ASSERT(bytes_remaining_ != 0); + return current_chunk_; +} + +inline Cord::ChunkIterator::pointer Cord::ChunkIterator::operator->() const { + ABSL_HARDENING_ASSERT(bytes_remaining_ != 0); + return ¤t_chunk_; +} + +inline void Cord::ChunkIterator::RemoveChunkPrefix(size_t n) { + assert(n < current_chunk_.size()); + current_chunk_.remove_prefix(n); + bytes_remaining_ -= n; +} + +inline void Cord::ChunkIterator::AdvanceBytes(size_t n) { + if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) { + RemoveChunkPrefix(n); + } else if (n != 0) { + AdvanceBytesSlowPath(n); + } +} + +inline Cord::ChunkIterator Cord::chunk_begin() const { + return ChunkIterator(this); +} + +inline Cord::ChunkIterator Cord::chunk_end() const { return ChunkIterator(); } + +inline Cord::ChunkIterator Cord::ChunkRange::begin() const { + return cord_->chunk_begin(); +} + +inline Cord::ChunkIterator Cord::ChunkRange::end() const { + return cord_->chunk_end(); +} + +inline Cord::ChunkRange Cord::Chunks() const { return ChunkRange(this); } + +inline Cord::CharIterator& Cord::CharIterator::operator++() { + if (ABSL_PREDICT_TRUE(chunk_iterator_->size() > 1)) { + chunk_iterator_.RemoveChunkPrefix(1); + } else { + ++chunk_iterator_; + } + return *this; +} + +inline Cord::CharIterator Cord::CharIterator::operator++(int) { + CharIterator tmp(*this); + operator++(); + return tmp; +} + +inline bool Cord::CharIterator::operator==(const CharIterator& other) const { + return chunk_iterator_ == other.chunk_iterator_; +} + +inline bool Cord::CharIterator::operator!=(const CharIterator& other) const { + return !(*this == other); +} + +inline Cord::CharIterator::reference Cord::CharIterator::operator*() const { + return *chunk_iterator_->data(); +} + +inline Cord::CharIterator::pointer Cord::CharIterator::operator->() const { + return chunk_iterator_->data(); +} + +inline Cord Cord::AdvanceAndRead(CharIterator* it, size_t n_bytes) { + assert(it != nullptr); + return it->chunk_iterator_.AdvanceAndReadBytes(n_bytes); +} + +inline void Cord::Advance(CharIterator* it, size_t n_bytes) { + assert(it != nullptr); + it->chunk_iterator_.AdvanceBytes(n_bytes); +} + +inline absl::string_view Cord::ChunkRemaining(const CharIterator& it) { + return *it.chunk_iterator_; +} + +inline Cord::CharIterator Cord::char_begin() const { + return CharIterator(this); +} + +inline Cord::CharIterator Cord::char_end() const { return CharIterator(); } + +inline Cord::CharIterator Cord::CharRange::begin() const { + return cord_->char_begin(); +} + +inline Cord::CharIterator Cord::CharRange::end() const { + return cord_->char_end(); +} + +inline Cord::CharRange Cord::Chars() const { return CharRange(this); } + +inline void Cord::ForEachChunk( + absl::FunctionRef<void(absl::string_view)> callback) const { + absl::cord_internal::CordRep* rep = contents_.tree(); + if (rep == nullptr) { + callback(absl::string_view(contents_.data(), contents_.size())); + } else { + return ForEachChunkAux(rep, callback); + } +} + +// Nonmember Cord-to-Cord relational operarators. +inline bool operator==(const Cord& lhs, const Cord& rhs) { + if (lhs.contents_.IsSame(rhs.contents_)) return true; + size_t rhs_size = rhs.size(); + if (lhs.size() != rhs_size) return false; + return lhs.EqualsImpl(rhs, rhs_size); +} + +inline bool operator!=(const Cord& x, const Cord& y) { return !(x == y); } +inline bool operator<(const Cord& x, const Cord& y) { + return x.Compare(y) < 0; +} +inline bool operator>(const Cord& x, const Cord& y) { + return x.Compare(y) > 0; +} +inline bool operator<=(const Cord& x, const Cord& y) { + return x.Compare(y) <= 0; +} +inline bool operator>=(const Cord& x, const Cord& y) { + return x.Compare(y) >= 0; +} + +// Nonmember Cord-to-absl::string_view relational operators. +// +// Due to implicit conversions, these also enable comparisons of Cord with +// with std::string, ::string, and const char*. +inline bool operator==(const Cord& lhs, absl::string_view rhs) { + size_t lhs_size = lhs.size(); + size_t rhs_size = rhs.size(); + if (lhs_size != rhs_size) return false; + return lhs.EqualsImpl(rhs, rhs_size); +} + +inline bool operator==(absl::string_view x, const Cord& y) { return y == x; } +inline bool operator!=(const Cord& x, absl::string_view y) { return !(x == y); } +inline bool operator!=(absl::string_view x, const Cord& y) { return !(x == y); } +inline bool operator<(const Cord& x, absl::string_view y) { + return x.Compare(y) < 0; +} +inline bool operator<(absl::string_view x, const Cord& y) { + return y.Compare(x) > 0; +} +inline bool operator>(const Cord& x, absl::string_view y) { return y < x; } +inline bool operator>(absl::string_view x, const Cord& y) { return y < x; } +inline bool operator<=(const Cord& x, absl::string_view y) { return !(y < x); } +inline bool operator<=(absl::string_view x, const Cord& y) { return !(y < x); } +inline bool operator>=(const Cord& x, absl::string_view y) { return !(x < y); } +inline bool operator>=(absl::string_view x, const Cord& y) { return !(x < y); } + +// Some internals exposed to test code. +namespace strings_internal { +class CordTestAccess { + public: + static size_t FlatOverhead(); + static size_t MaxFlatLength(); + static size_t SizeofCordRepConcat(); + static size_t SizeofCordRepExternal(); + static size_t SizeofCordRepSubstring(); + static size_t FlatTagToLength(uint8_t tag); + static uint8_t LengthToTag(size_t s); +}; +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORD_H_ diff --git a/third_party/abseil_cpp/absl/strings/cord_test.cc b/third_party/abseil_cpp/absl/strings/cord_test.cc new file mode 100644 index 000000000000..7942bfc03c49 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/cord_test.cc @@ -0,0 +1,1711 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/cord.h" + +#include <algorithm> +#include <climits> +#include <cstdio> +#include <iterator> +#include <map> +#include <numeric> +#include <random> +#include <sstream> +#include <type_traits> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/casts.h" +#include "absl/base/config.h" +#include "absl/base/internal/endian.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" +#include "absl/container/fixed_array.h" +#include "absl/strings/cord_test_helpers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" + +typedef std::mt19937_64 RandomEngine; + +static std::string RandomLowercaseString(RandomEngine* rng); +static std::string RandomLowercaseString(RandomEngine* rng, size_t length); + +static int GetUniformRandomUpTo(RandomEngine* rng, int upper_bound) { + if (upper_bound > 0) { + std::uniform_int_distribution<int> uniform(0, upper_bound - 1); + return uniform(*rng); + } else { + return 0; + } +} + +static size_t GetUniformRandomUpTo(RandomEngine* rng, size_t upper_bound) { + if (upper_bound > 0) { + std::uniform_int_distribution<size_t> uniform(0, upper_bound - 1); + return uniform(*rng); + } else { + return 0; + } +} + +static int32_t GenerateSkewedRandom(RandomEngine* rng, int max_log) { + const uint32_t base = (*rng)() % (max_log + 1); + const uint32_t mask = ((base < 32) ? (1u << base) : 0u) - 1u; + return (*rng)() & mask; +} + +static std::string RandomLowercaseString(RandomEngine* rng) { + int length; + std::bernoulli_distribution one_in_1k(0.001); + std::bernoulli_distribution one_in_10k(0.0001); + // With low probability, make a large fragment + if (one_in_10k(*rng)) { + length = GetUniformRandomUpTo(rng, 1048576); + } else if (one_in_1k(*rng)) { + length = GetUniformRandomUpTo(rng, 10000); + } else { + length = GenerateSkewedRandom(rng, 10); + } + return RandomLowercaseString(rng, length); +} + +static std::string RandomLowercaseString(RandomEngine* rng, size_t length) { + std::string result(length, '\0'); + std::uniform_int_distribution<int> chars('a', 'z'); + std::generate(result.begin(), result.end(), + [&]() { return static_cast<char>(chars(*rng)); }); + return result; +} + +static void DoNothing(absl::string_view /* data */, void* /* arg */) {} + +static void DeleteExternalString(absl::string_view data, void* arg) { + std::string* s = reinterpret_cast<std::string*>(arg); + EXPECT_EQ(data, *s); + delete s; +} + +// Add "s" to *dst via `MakeCordFromExternal` +static void AddExternalMemory(absl::string_view s, absl::Cord* dst) { + std::string* str = new std::string(s.data(), s.size()); + dst->Append(absl::MakeCordFromExternal(*str, [str](absl::string_view data) { + DeleteExternalString(data, str); + })); +} + +static void DumpGrowth() { + absl::Cord str; + for (int i = 0; i < 1000; i++) { + char c = 'a' + i % 26; + str.Append(absl::string_view(&c, 1)); + } +} + +// Make a Cord with some number of fragments. Return the size (in bytes) +// of the smallest fragment. +static size_t AppendWithFragments(const std::string& s, RandomEngine* rng, + absl::Cord* cord) { + size_t j = 0; + const size_t max_size = s.size() / 5; // Make approx. 10 fragments + size_t min_size = max_size; // size of smallest fragment + while (j < s.size()) { + size_t N = 1 + GetUniformRandomUpTo(rng, max_size); + if (N > (s.size() - j)) { + N = s.size() - j; + } + if (N < min_size) { + min_size = N; + } + + std::bernoulli_distribution coin_flip(0.5); + if (coin_flip(*rng)) { + // Grow by adding an external-memory. + AddExternalMemory(absl::string_view(s.data() + j, N), cord); + } else { + cord->Append(absl::string_view(s.data() + j, N)); + } + j += N; + } + return min_size; +} + +// Add an external memory that contains the specified std::string to cord +static void AddNewStringBlock(const std::string& str, absl::Cord* dst) { + char* data = new char[str.size()]; + memcpy(data, str.data(), str.size()); + dst->Append(absl::MakeCordFromExternal( + absl::string_view(data, str.size()), + [](absl::string_view s) { delete[] s.data(); })); +} + +// Make a Cord out of many different types of nodes. +static absl::Cord MakeComposite() { + absl::Cord cord; + cord.Append("the"); + AddExternalMemory(" quick brown", &cord); + AddExternalMemory(" fox jumped", &cord); + + absl::Cord full(" over"); + AddExternalMemory(" the lazy", &full); + AddNewStringBlock(" dog slept the whole day away", &full); + absl::Cord substring = full.Subcord(0, 18); + + // Make substring long enough to defeat the copying fast path in Append. + substring.Append(std::string(1000, '.')); + cord.Append(substring); + cord = cord.Subcord(0, cord.size() - 998); // Remove most of extra junk + + return cord; +} + +namespace absl { +ABSL_NAMESPACE_BEGIN + +class CordTestPeer { + public: + static void ForEachChunk( + const Cord& c, absl::FunctionRef<void(absl::string_view)> callback) { + c.ForEachChunk(callback); + } + + static bool IsTree(const Cord& c) { return c.contents_.is_tree(); } +}; + +ABSL_NAMESPACE_END +} // namespace absl + +TEST(Cord, AllFlatSizes) { + using absl::strings_internal::CordTestAccess; + + for (size_t s = 0; s < CordTestAccess::MaxFlatLength(); s++) { + // Make a string of length s. + std::string src; + while (src.size() < s) { + src.push_back('a' + (src.size() % 26)); + } + + absl::Cord dst(src); + EXPECT_EQ(std::string(dst), src) << s; + } +} + +// We create a Cord at least 128GB in size using the fact that Cords can +// internally reference-count; thus the Cord is enormous without actually +// consuming very much memory. +TEST(GigabyteCord, FromExternal) { + const size_t one_gig = 1024U * 1024U * 1024U; + size_t max_size = 2 * one_gig; + if (sizeof(max_size) > 4) max_size = 128 * one_gig; + + size_t length = 128 * 1024; + char* data = new char[length]; + absl::Cord from = absl::MakeCordFromExternal( + absl::string_view(data, length), + [](absl::string_view sv) { delete[] sv.data(); }); + + // This loop may seem odd due to its combination of exponential doubling of + // size and incremental size increases. We do it incrementally to be sure the + // Cord will need rebalancing and will exercise code that, in the past, has + // caused crashes in production. We grow exponentially so that the code will + // execute in a reasonable amount of time. + absl::Cord c; + ABSL_RAW_LOG(INFO, "Made a Cord with %zu bytes!", c.size()); + c.Append(from); + while (c.size() < max_size) { + c.Append(c); + c.Append(from); + c.Append(from); + c.Append(from); + c.Append(from); + } + + for (int i = 0; i < 1024; ++i) { + c.Append(from); + } + ABSL_RAW_LOG(INFO, "Made a Cord with %zu bytes!", c.size()); + // Note: on a 32-bit build, this comes out to 2,818,048,000 bytes. + // Note: on a 64-bit build, this comes out to 171,932,385,280 bytes. +} + +static absl::Cord MakeExternalCord(int size) { + char* buffer = new char[size]; + memset(buffer, 'x', size); + absl::Cord cord; + cord.Append(absl::MakeCordFromExternal( + absl::string_view(buffer, size), + [](absl::string_view s) { delete[] s.data(); })); + return cord; +} + +// Extern to fool clang that this is not constant. Needed to suppress +// a warning of unsafe code we want to test. +extern bool my_unique_true_boolean; +bool my_unique_true_boolean = true; + +TEST(Cord, Assignment) { + absl::Cord x(absl::string_view("hi there")); + absl::Cord y(x); + ASSERT_EQ(std::string(x), "hi there"); + ASSERT_EQ(std::string(y), "hi there"); + ASSERT_TRUE(x == y); + ASSERT_TRUE(x <= y); + ASSERT_TRUE(y <= x); + + x = absl::string_view("foo"); + ASSERT_EQ(std::string(x), "foo"); + ASSERT_EQ(std::string(y), "hi there"); + ASSERT_TRUE(x < y); + ASSERT_TRUE(y > x); + ASSERT_TRUE(x != y); + ASSERT_TRUE(x <= y); + ASSERT_TRUE(y >= x); + + x = "foo"; + ASSERT_EQ(x, "foo"); + + // Test that going from inline rep to tree we don't leak memory. + std::vector<std::pair<absl::string_view, absl::string_view>> + test_string_pairs = {{"hi there", "foo"}, + {"loooooong coooooord", "short cord"}, + {"short cord", "loooooong coooooord"}, + {"loooooong coooooord1", "loooooong coooooord2"}}; + for (std::pair<absl::string_view, absl::string_view> test_strings : + test_string_pairs) { + absl::Cord tmp(test_strings.first); + absl::Cord z(std::move(tmp)); + ASSERT_EQ(std::string(z), test_strings.first); + tmp = test_strings.second; + z = std::move(tmp); + ASSERT_EQ(std::string(z), test_strings.second); + } + { + // Test that self-move assignment doesn't crash/leak. + // Do not write such code! + absl::Cord my_small_cord("foo"); + absl::Cord my_big_cord("loooooong coooooord"); + // Bypass clang's warning on self move-assignment. + absl::Cord* my_small_alias = + my_unique_true_boolean ? &my_small_cord : &my_big_cord; + absl::Cord* my_big_alias = + !my_unique_true_boolean ? &my_small_cord : &my_big_cord; + + *my_small_alias = std::move(my_small_cord); + *my_big_alias = std::move(my_big_cord); + // my_small_cord and my_big_cord are in an unspecified but valid + // state, and will be correctly destroyed here. + } +} + +TEST(Cord, StartsEndsWith) { + absl::Cord x(absl::string_view("abcde")); + absl::Cord empty(""); + + ASSERT_TRUE(x.StartsWith(absl::Cord("abcde"))); + ASSERT_TRUE(x.StartsWith(absl::Cord("abc"))); + ASSERT_TRUE(x.StartsWith(absl::Cord(""))); + ASSERT_TRUE(empty.StartsWith(absl::Cord(""))); + ASSERT_TRUE(x.EndsWith(absl::Cord("abcde"))); + ASSERT_TRUE(x.EndsWith(absl::Cord("cde"))); + ASSERT_TRUE(x.EndsWith(absl::Cord(""))); + ASSERT_TRUE(empty.EndsWith(absl::Cord(""))); + + ASSERT_TRUE(!x.StartsWith(absl::Cord("xyz"))); + ASSERT_TRUE(!empty.StartsWith(absl::Cord("xyz"))); + ASSERT_TRUE(!x.EndsWith(absl::Cord("xyz"))); + ASSERT_TRUE(!empty.EndsWith(absl::Cord("xyz"))); + + ASSERT_TRUE(x.StartsWith("abcde")); + ASSERT_TRUE(x.StartsWith("abc")); + ASSERT_TRUE(x.StartsWith("")); + ASSERT_TRUE(empty.StartsWith("")); + ASSERT_TRUE(x.EndsWith("abcde")); + ASSERT_TRUE(x.EndsWith("cde")); + ASSERT_TRUE(x.EndsWith("")); + ASSERT_TRUE(empty.EndsWith("")); + + ASSERT_TRUE(!x.StartsWith("xyz")); + ASSERT_TRUE(!empty.StartsWith("xyz")); + ASSERT_TRUE(!x.EndsWith("xyz")); + ASSERT_TRUE(!empty.EndsWith("xyz")); +} + +TEST(Cord, Subcord) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + const std::string s = RandomLowercaseString(&rng, 1024); + + absl::Cord a; + AppendWithFragments(s, &rng, &a); + ASSERT_EQ(s.size(), a.size()); + + // Check subcords of a, from a variety of interesting points. + std::set<size_t> positions; + for (int i = 0; i <= 32; ++i) { + positions.insert(i); + positions.insert(i * 32 - 1); + positions.insert(i * 32); + positions.insert(i * 32 + 1); + positions.insert(a.size() - i); + } + positions.insert(237); + positions.insert(732); + for (size_t pos : positions) { + if (pos > a.size()) continue; + for (size_t end_pos : positions) { + if (end_pos < pos || end_pos > a.size()) continue; + absl::Cord sa = a.Subcord(pos, end_pos - pos); + EXPECT_EQ(absl::string_view(s).substr(pos, end_pos - pos), + std::string(sa)) + << a; + } + } + + // Do the same thing for an inline cord. + const std::string sh = "short"; + absl::Cord c(sh); + for (size_t pos = 0; pos <= sh.size(); ++pos) { + for (size_t n = 0; n <= sh.size() - pos; ++n) { + absl::Cord sc = c.Subcord(pos, n); + EXPECT_EQ(sh.substr(pos, n), std::string(sc)) << c; + } + } + + // Check subcords of subcords. + absl::Cord sa = a.Subcord(0, a.size()); + std::string ss = s.substr(0, s.size()); + while (sa.size() > 1) { + sa = sa.Subcord(1, sa.size() - 2); + ss = ss.substr(1, ss.size() - 2); + EXPECT_EQ(ss, std::string(sa)) << a; + if (HasFailure()) break; // halt cascade + } + + // It is OK to ask for too much. + sa = a.Subcord(0, a.size() + 1); + EXPECT_EQ(s, std::string(sa)); + + // It is OK to ask for something beyond the end. + sa = a.Subcord(a.size() + 1, 0); + EXPECT_TRUE(sa.empty()); + sa = a.Subcord(a.size() + 1, 1); + EXPECT_TRUE(sa.empty()); +} + +TEST(Cord, Swap) { + absl::string_view a("Dexter"); + absl::string_view b("Mandark"); + absl::Cord x(a); + absl::Cord y(b); + swap(x, y); + ASSERT_EQ(x, absl::Cord(b)); + ASSERT_EQ(y, absl::Cord(a)); + x.swap(y); + ASSERT_EQ(x, absl::Cord(a)); + ASSERT_EQ(y, absl::Cord(b)); +} + +static void VerifyCopyToString(const absl::Cord& cord) { + std::string initially_empty; + absl::CopyCordToString(cord, &initially_empty); + EXPECT_EQ(initially_empty, cord); + + constexpr size_t kInitialLength = 1024; + std::string has_initial_contents(kInitialLength, 'x'); + const char* address_before_copy = has_initial_contents.data(); + absl::CopyCordToString(cord, &has_initial_contents); + EXPECT_EQ(has_initial_contents, cord); + + if (cord.size() <= kInitialLength) { + EXPECT_EQ(has_initial_contents.data(), address_before_copy) + << "CopyCordToString allocated new string storage; " + "has_initial_contents = \"" + << has_initial_contents << "\""; + } +} + +TEST(Cord, CopyToString) { + VerifyCopyToString(absl::Cord()); + VerifyCopyToString(absl::Cord("small cord")); + VerifyCopyToString( + absl::MakeFragmentedCord({"fragmented ", "cord ", "to ", "test ", + "copying ", "to ", "a ", "string."})); +} + +TEST(TryFlat, Empty) { + absl::Cord c; + EXPECT_EQ(c.TryFlat(), ""); +} + +TEST(TryFlat, Flat) { + absl::Cord c("hello"); + EXPECT_EQ(c.TryFlat(), "hello"); +} + +TEST(TryFlat, SubstrInlined) { + absl::Cord c("hello"); + c.RemovePrefix(1); + EXPECT_EQ(c.TryFlat(), "ello"); +} + +TEST(TryFlat, SubstrFlat) { + absl::Cord c("longer than 15 bytes"); + c.RemovePrefix(1); + EXPECT_EQ(c.TryFlat(), "onger than 15 bytes"); +} + +TEST(TryFlat, Concat) { + absl::Cord c = absl::MakeFragmentedCord({"hel", "lo"}); + EXPECT_EQ(c.TryFlat(), absl::nullopt); +} + +TEST(TryFlat, External) { + absl::Cord c = absl::MakeCordFromExternal("hell", [](absl::string_view) {}); + EXPECT_EQ(c.TryFlat(), "hell"); +} + +TEST(TryFlat, SubstrExternal) { + absl::Cord c = absl::MakeCordFromExternal("hell", [](absl::string_view) {}); + c.RemovePrefix(1); + EXPECT_EQ(c.TryFlat(), "ell"); +} + +TEST(TryFlat, SubstrConcat) { + absl::Cord c = absl::MakeFragmentedCord({"hello", " world"}); + c.RemovePrefix(1); + EXPECT_EQ(c.TryFlat(), absl::nullopt); +} + +static bool IsFlat(const absl::Cord& c) { + return c.chunk_begin() == c.chunk_end() || ++c.chunk_begin() == c.chunk_end(); +} + +static void VerifyFlatten(absl::Cord c) { + std::string old_contents(c); + absl::string_view old_flat; + bool already_flat_and_non_empty = IsFlat(c) && !c.empty(); + if (already_flat_and_non_empty) { + old_flat = *c.chunk_begin(); + } + absl::string_view new_flat = c.Flatten(); + + // Verify that the contents of the flattened Cord are correct. + EXPECT_EQ(new_flat, old_contents); + EXPECT_EQ(std::string(c), old_contents); + + // If the Cord contained data and was already flat, verify that the data + // wasn't copied. + if (already_flat_and_non_empty) { + EXPECT_EQ(old_flat.data(), new_flat.data()) + << "Allocated new memory even though the Cord was already flat."; + } + + // Verify that the flattened Cord is in fact flat. + EXPECT_TRUE(IsFlat(c)); +} + +TEST(Cord, Flatten) { + VerifyFlatten(absl::Cord()); + VerifyFlatten(absl::Cord("small cord")); + VerifyFlatten(absl::Cord("larger than small buffer optimization")); + VerifyFlatten(absl::MakeFragmentedCord({"small ", "fragmented ", "cord"})); + + // Test with a cord that is longer than the largest flat buffer + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + VerifyFlatten(absl::Cord(RandomLowercaseString(&rng, 8192))); +} + +// Test data +namespace { +class TestData { + private: + std::vector<std::string> data_; + + // Return a std::string of the specified length. + static std::string MakeString(int length) { + std::string result; + char buf[30]; + snprintf(buf, sizeof(buf), "(%d)", length); + while (result.size() < length) { + result += buf; + } + result.resize(length); + return result; + } + + public: + TestData() { + // short strings increasing in length by one + for (int i = 0; i < 30; i++) { + data_.push_back(MakeString(i)); + } + + // strings around half kMaxFlatLength + static const int kMaxFlatLength = 4096 - 9; + static const int kHalf = kMaxFlatLength / 2; + + for (int i = -10; i <= +10; i++) { + data_.push_back(MakeString(kHalf + i)); + } + + for (int i = -10; i <= +10; i++) { + data_.push_back(MakeString(kMaxFlatLength + i)); + } + } + + size_t size() const { return data_.size(); } + const std::string& data(size_t i) const { return data_[i]; } +}; +} // namespace + +TEST(Cord, MultipleLengths) { + TestData d; + for (size_t i = 0; i < d.size(); i++) { + std::string a = d.data(i); + + { // Construct from Cord + absl::Cord tmp(a); + absl::Cord x(tmp); + EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; + } + + { // Construct from absl::string_view + absl::Cord x(a); + EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; + } + + { // Append cord to self + absl::Cord self(a); + self.Append(self); + EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; + } + + { // Prepend cord to self + absl::Cord self(a); + self.Prepend(self); + EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; + } + + // Try to append/prepend others + for (size_t j = 0; j < d.size(); j++) { + std::string b = d.data(j); + + { // CopyFrom Cord + absl::Cord x(a); + absl::Cord y(b); + x = y; + EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // CopyFrom absl::string_view + absl::Cord x(a); + x = b; + EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // Cord::Append(Cord) + absl::Cord x(a); + absl::Cord y(b); + x.Append(y); + EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // Cord::Append(absl::string_view) + absl::Cord x(a); + x.Append(b); + EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // Cord::Prepend(Cord) + absl::Cord x(a); + absl::Cord y(b); + x.Prepend(y); + EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; + } + + { // Cord::Prepend(absl::string_view) + absl::Cord x(a); + x.Prepend(b); + EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; + } + } + } +} + +namespace { + +TEST(Cord, RemoveSuffixWithExternalOrSubstring) { + absl::Cord cord = absl::MakeCordFromExternal( + "foo bar baz", [](absl::string_view s) { DoNothing(s, nullptr); }); + + EXPECT_EQ("foo bar baz", std::string(cord)); + + // This RemoveSuffix() will wrap the EXTERNAL node in a SUBSTRING node. + cord.RemoveSuffix(4); + EXPECT_EQ("foo bar", std::string(cord)); + + // This RemoveSuffix() will adjust the SUBSTRING node in-place. + cord.RemoveSuffix(4); + EXPECT_EQ("foo", std::string(cord)); +} + +TEST(Cord, RemoveSuffixMakesZeroLengthNode) { + absl::Cord c; + c.Append(absl::Cord(std::string(100, 'x'))); + absl::Cord other_ref = c; // Prevent inplace appends + c.Append(absl::Cord(std::string(200, 'y'))); + c.RemoveSuffix(200); + EXPECT_EQ(std::string(100, 'x'), std::string(c)); +} + +} // namespace + +// CordSpliceTest contributed by hendrie. +namespace { + +// Create a cord with an external memory block filled with 'z' +absl::Cord CordWithZedBlock(size_t size) { + char* data = new char[size]; + if (size > 0) { + memset(data, 'z', size); + } + absl::Cord cord = absl::MakeCordFromExternal( + absl::string_view(data, size), + [](absl::string_view s) { delete[] s.data(); }); + return cord; +} + +// Establish that ZedBlock does what we think it does. +TEST(CordSpliceTest, ZedBlock) { + absl::Cord blob = CordWithZedBlock(10); + EXPECT_EQ(10, blob.size()); + std::string s; + absl::CopyCordToString(blob, &s); + EXPECT_EQ("zzzzzzzzzz", s); +} + +TEST(CordSpliceTest, ZedBlock0) { + absl::Cord blob = CordWithZedBlock(0); + EXPECT_EQ(0, blob.size()); + std::string s; + absl::CopyCordToString(blob, &s); + EXPECT_EQ("", s); +} + +TEST(CordSpliceTest, ZedBlockSuffix1) { + absl::Cord blob = CordWithZedBlock(10); + EXPECT_EQ(10, blob.size()); + absl::Cord suffix(blob); + suffix.RemovePrefix(9); + EXPECT_EQ(1, suffix.size()); + std::string s; + absl::CopyCordToString(suffix, &s); + EXPECT_EQ("z", s); +} + +// Remove all of a prefix block +TEST(CordSpliceTest, ZedBlockSuffix0) { + absl::Cord blob = CordWithZedBlock(10); + EXPECT_EQ(10, blob.size()); + absl::Cord suffix(blob); + suffix.RemovePrefix(10); + EXPECT_EQ(0, suffix.size()); + std::string s; + absl::CopyCordToString(suffix, &s); + EXPECT_EQ("", s); +} + +absl::Cord BigCord(size_t len, char v) { + std::string s(len, v); + return absl::Cord(s); +} + +// Splice block into cord. +absl::Cord SpliceCord(const absl::Cord& blob, int64_t offset, + const absl::Cord& block) { + ABSL_RAW_CHECK(offset >= 0, ""); + ABSL_RAW_CHECK(offset + block.size() <= blob.size(), ""); + absl::Cord result(blob); + result.RemoveSuffix(blob.size() - offset); + result.Append(block); + absl::Cord suffix(blob); + suffix.RemovePrefix(offset + block.size()); + result.Append(suffix); + ABSL_RAW_CHECK(blob.size() == result.size(), ""); + return result; +} + +// Taking an empty suffix of a block breaks appending. +TEST(CordSpliceTest, RemoveEntireBlock1) { + absl::Cord zero = CordWithZedBlock(10); + absl::Cord suffix(zero); + suffix.RemovePrefix(10); + absl::Cord result; + result.Append(suffix); +} + +TEST(CordSpliceTest, RemoveEntireBlock2) { + absl::Cord zero = CordWithZedBlock(10); + absl::Cord prefix(zero); + prefix.RemoveSuffix(10); + absl::Cord suffix(zero); + suffix.RemovePrefix(10); + absl::Cord result(prefix); + result.Append(suffix); +} + +TEST(CordSpliceTest, RemoveEntireBlock3) { + absl::Cord blob = CordWithZedBlock(10); + absl::Cord block = BigCord(10, 'b'); + blob = SpliceCord(blob, 0, block); +} + +struct CordCompareTestCase { + template <typename LHS, typename RHS> + CordCompareTestCase(const LHS& lhs, const RHS& rhs) + : lhs_cord(lhs), rhs_cord(rhs) {} + + absl::Cord lhs_cord; + absl::Cord rhs_cord; +}; + +const auto sign = [](int x) { return x == 0 ? 0 : (x > 0 ? 1 : -1); }; + +void VerifyComparison(const CordCompareTestCase& test_case) { + std::string lhs_string(test_case.lhs_cord); + std::string rhs_string(test_case.rhs_cord); + int expected = sign(lhs_string.compare(rhs_string)); + EXPECT_EQ(expected, test_case.lhs_cord.Compare(test_case.rhs_cord)) + << "LHS=" << lhs_string << "; RHS=" << rhs_string; + EXPECT_EQ(expected, test_case.lhs_cord.Compare(rhs_string)) + << "LHS=" << lhs_string << "; RHS=" << rhs_string; + EXPECT_EQ(-expected, test_case.rhs_cord.Compare(test_case.lhs_cord)) + << "LHS=" << rhs_string << "; RHS=" << lhs_string; + EXPECT_EQ(-expected, test_case.rhs_cord.Compare(lhs_string)) + << "LHS=" << rhs_string << "; RHS=" << lhs_string; +} + +TEST(Cord, Compare) { + absl::Cord subcord("aaaaaBBBBBcccccDDDDD"); + subcord = subcord.Subcord(3, 10); + + absl::Cord tmp("aaaaaaaaaaaaaaaa"); + tmp.Append("BBBBBBBBBBBBBBBB"); + absl::Cord concat = absl::Cord("cccccccccccccccc"); + concat.Append("DDDDDDDDDDDDDDDD"); + concat.Prepend(tmp); + + absl::Cord concat2("aaaaaaaaaaaaa"); + concat2.Append("aaaBBBBBBBBBBBBBBBBccccc"); + concat2.Append("cccccccccccDDDDDDDDDDDDDD"); + concat2.Append("DD"); + + std::vector<CordCompareTestCase> test_cases = {{ + // Inline cords + {"abcdef", "abcdef"}, + {"abcdef", "abcdee"}, + {"abcdef", "abcdeg"}, + {"bbcdef", "abcdef"}, + {"bbcdef", "abcdeg"}, + {"abcdefa", "abcdef"}, + {"abcdef", "abcdefa"}, + + // Small flat cords + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDD"}, + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBxccccDDDDD"}, + {"aaaaaBBBBBcxcccDDDDD", "aaaaaBBBBBcccccDDDDD"}, + {"aaaaaBBBBBxccccDDDDD", "aaaaaBBBBBcccccDDDDX"}, + {"aaaaaBBBBBcccccDDDDDa", "aaaaaBBBBBcccccDDDDD"}, + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDDa"}, + + // Subcords + {subcord, subcord}, + {subcord, "aaBBBBBccc"}, + {subcord, "aaBBBBBccd"}, + {subcord, "aaBBBBBccb"}, + {subcord, "aaBBBBBxcb"}, + {subcord, "aaBBBBBccca"}, + {subcord, "aaBBBBBcc"}, + + // Concats + {concat, concat}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBcccccccccccccccxDDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBacccccccccccccccDDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDDe"}, + + {concat, concat2}, + }}; + + for (const auto& tc : test_cases) { + VerifyComparison(tc); + } +} + +TEST(Cord, CompareAfterAssign) { + absl::Cord a("aaaaaa1111111"); + absl::Cord b("aaaaaa2222222"); + a = "cccccc"; + b = "cccccc"; + EXPECT_EQ(a, b); + EXPECT_FALSE(a < b); + + a = "aaaa"; + b = "bbbbb"; + a = ""; + b = ""; + EXPECT_EQ(a, b); + EXPECT_FALSE(a < b); +} + +// Test CompareTo() and ComparePrefix() against string and substring +// comparison methods from basic_string. +static void TestCompare(const absl::Cord& c, const absl::Cord& d, + RandomEngine* rng) { + typedef std::basic_string<uint8_t> ustring; + ustring cs(reinterpret_cast<const uint8_t*>(std::string(c).data()), c.size()); + ustring ds(reinterpret_cast<const uint8_t*>(std::string(d).data()), d.size()); + // ustring comparison is ideal because we expect Cord comparisons to be + // based on unsigned byte comparisons regardless of whether char is signed. + int expected = sign(cs.compare(ds)); + EXPECT_EQ(expected, sign(c.Compare(d))) << c << ", " << d; +} + +TEST(Compare, ComparisonIsUnsigned) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + std::uniform_int_distribution<uint32_t> uniform_uint8(0, 255); + char x = static_cast<char>(uniform_uint8(rng)); + TestCompare( + absl::Cord(std::string(GetUniformRandomUpTo(&rng, 100), x)), + absl::Cord(std::string(GetUniformRandomUpTo(&rng, 100), x ^ 0x80)), &rng); +} + +TEST(Compare, RandomComparisons) { + const int kIters = 5000; + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + + int n = GetUniformRandomUpTo(&rng, 5000); + absl::Cord a[] = {MakeExternalCord(n), + absl::Cord("ant"), + absl::Cord("elephant"), + absl::Cord("giraffe"), + absl::Cord(std::string(GetUniformRandomUpTo(&rng, 100), + GetUniformRandomUpTo(&rng, 100))), + absl::Cord(""), + absl::Cord("x"), + absl::Cord("A"), + absl::Cord("B"), + absl::Cord("C")}; + for (int i = 0; i < kIters; i++) { + absl::Cord c, d; + for (int j = 0; j < (i % 7) + 1; j++) { + c.Append(a[GetUniformRandomUpTo(&rng, ABSL_ARRAYSIZE(a))]); + d.Append(a[GetUniformRandomUpTo(&rng, ABSL_ARRAYSIZE(a))]); + } + std::bernoulli_distribution coin_flip(0.5); + TestCompare(coin_flip(rng) ? c : absl::Cord(std::string(c)), + coin_flip(rng) ? d : absl::Cord(std::string(d)), &rng); + } +} + +template <typename T1, typename T2> +void CompareOperators() { + const T1 a("a"); + const T2 b("b"); + + EXPECT_TRUE(a == a); + // For pointer type (i.e. `const char*`), operator== compares the address + // instead of the string, so `a == const char*("a")` isn't necessarily true. + EXPECT_TRUE(std::is_pointer<T1>::value || a == T1("a")); + EXPECT_TRUE(std::is_pointer<T2>::value || a == T2("a")); + EXPECT_FALSE(a == b); + + EXPECT_TRUE(a != b); + EXPECT_FALSE(a != a); + + EXPECT_TRUE(a < b); + EXPECT_FALSE(b < a); + + EXPECT_TRUE(b > a); + EXPECT_FALSE(a > b); + + EXPECT_TRUE(a >= a); + EXPECT_TRUE(b >= a); + EXPECT_FALSE(a >= b); + + EXPECT_TRUE(a <= a); + EXPECT_TRUE(a <= b); + EXPECT_FALSE(b <= a); +} + +TEST(ComparisonOperators, Cord_Cord) { + CompareOperators<absl::Cord, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_StringPiece) { + CompareOperators<absl::Cord, absl::string_view>(); +} + +TEST(ComparisonOperators, StringPiece_Cord) { + CompareOperators<absl::string_view, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_string) { + CompareOperators<absl::Cord, std::string>(); +} + +TEST(ComparisonOperators, string_Cord) { + CompareOperators<std::string, absl::Cord>(); +} + +TEST(ComparisonOperators, stdstring_Cord) { + CompareOperators<std::string, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_stdstring) { + CompareOperators<absl::Cord, std::string>(); +} + +TEST(ComparisonOperators, charstar_Cord) { + CompareOperators<const char*, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_charstar) { + CompareOperators<absl::Cord, const char*>(); +} + +TEST(ConstructFromExternal, ReleaserInvoked) { + // Empty external memory means the releaser should be called immediately. + { + bool invoked = false; + auto releaser = [&invoked](absl::string_view) { invoked = true; }; + { + auto c = absl::MakeCordFromExternal("", releaser); + EXPECT_TRUE(invoked); + } + } + + // If the size of the data is small enough, a future constructor + // implementation may copy the bytes and immediately invoke the releaser + // instead of creating an external node. We make a large dummy std::string to + // make this test independent of such an optimization. + std::string large_dummy(2048, 'c'); + { + bool invoked = false; + auto releaser = [&invoked](absl::string_view) { invoked = true; }; + { + auto c = absl::MakeCordFromExternal(large_dummy, releaser); + EXPECT_FALSE(invoked); + } + EXPECT_TRUE(invoked); + } + + { + bool invoked = false; + auto releaser = [&invoked](absl::string_view) { invoked = true; }; + { + absl::Cord copy; + { + auto c = absl::MakeCordFromExternal(large_dummy, releaser); + copy = c; + EXPECT_FALSE(invoked); + } + EXPECT_FALSE(invoked); + } + EXPECT_TRUE(invoked); + } +} + +TEST(ConstructFromExternal, CompareContents) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + + for (int length = 1; length <= 2048; length *= 2) { + std::string data = RandomLowercaseString(&rng, length); + auto* external = new std::string(data); + auto cord = + absl::MakeCordFromExternal(*external, [external](absl::string_view sv) { + EXPECT_EQ(external->data(), sv.data()); + EXPECT_EQ(external->size(), sv.size()); + delete external; + }); + EXPECT_EQ(data, cord); + } +} + +TEST(ConstructFromExternal, LargeReleaser) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + constexpr size_t kLength = 256; + std::string data = RandomLowercaseString(&rng, kLength); + std::array<char, kLength> data_array; + for (size_t i = 0; i < kLength; ++i) data_array[i] = data[i]; + bool invoked = false; + auto releaser = [data_array, &invoked](absl::string_view data) { + EXPECT_EQ(data, absl::string_view(data_array.data(), data_array.size())); + invoked = true; + }; + (void)absl::MakeCordFromExternal(data, releaser); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, FunctionPointerReleaser) { + static absl::string_view data("hello world"); + static bool invoked; + auto* releaser = + static_cast<void (*)(absl::string_view)>([](absl::string_view sv) { + EXPECT_EQ(data, sv); + invoked = true; + }); + invoked = false; + (void)absl::MakeCordFromExternal(data, releaser); + EXPECT_TRUE(invoked); + + invoked = false; + (void)absl::MakeCordFromExternal(data, *releaser); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, MoveOnlyReleaser) { + struct Releaser { + explicit Releaser(bool* invoked) : invoked(invoked) {} + Releaser(Releaser&& other) noexcept : invoked(other.invoked) {} + void operator()(absl::string_view) const { *invoked = true; } + + bool* invoked; + }; + + bool invoked = false; + (void)absl::MakeCordFromExternal("dummy", Releaser(&invoked)); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, NoArgLambda) { + bool invoked = false; + (void)absl::MakeCordFromExternal("dummy", [&invoked]() { invoked = true; }); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, StringViewArgLambda) { + bool invoked = false; + (void)absl::MakeCordFromExternal( + "dummy", [&invoked](absl::string_view) { invoked = true; }); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, NonTrivialReleaserDestructor) { + struct Releaser { + explicit Releaser(bool* destroyed) : destroyed(destroyed) {} + ~Releaser() { *destroyed = true; } + void operator()(absl::string_view) const {} + + bool* destroyed; + }; + + bool destroyed = false; + Releaser releaser(&destroyed); + (void)absl::MakeCordFromExternal("dummy", releaser); + EXPECT_TRUE(destroyed); +} + +TEST(ConstructFromExternal, ReferenceQualifierOverloads) { + struct Releaser { + void operator()(absl::string_view) & { *lvalue_invoked = true; } + void operator()(absl::string_view) && { *rvalue_invoked = true; } + + bool* lvalue_invoked; + bool* rvalue_invoked; + }; + + bool lvalue_invoked = false; + bool rvalue_invoked = false; + Releaser releaser = {&lvalue_invoked, &rvalue_invoked}; + (void)absl::MakeCordFromExternal("", releaser); + EXPECT_FALSE(lvalue_invoked); + EXPECT_TRUE(rvalue_invoked); + rvalue_invoked = false; + + (void)absl::MakeCordFromExternal("dummy", releaser); + EXPECT_FALSE(lvalue_invoked); + EXPECT_TRUE(rvalue_invoked); + rvalue_invoked = false; + + // NOLINTNEXTLINE: suppress clang-tidy std::move on trivially copyable type. + (void)absl::MakeCordFromExternal("dummy", std::move(releaser)); + EXPECT_FALSE(lvalue_invoked); + EXPECT_TRUE(rvalue_invoked); +} + +TEST(ExternalMemory, BasicUsage) { + static const char* strings[] = {"", "hello", "there"}; + for (const char* str : strings) { + absl::Cord dst("(prefix)"); + AddExternalMemory(str, &dst); + dst.Append("(suffix)"); + EXPECT_EQ((std::string("(prefix)") + str + std::string("(suffix)")), + std::string(dst)); + } +} + +TEST(ExternalMemory, RemovePrefixSuffix) { + // Exhaustively try all sub-strings. + absl::Cord cord = MakeComposite(); + std::string s = std::string(cord); + for (int offset = 0; offset <= s.size(); offset++) { + for (int length = 0; length <= s.size() - offset; length++) { + absl::Cord result(cord); + result.RemovePrefix(offset); + result.RemoveSuffix(result.size() - length); + EXPECT_EQ(s.substr(offset, length), std::string(result)) + << offset << " " << length; + } + } +} + +TEST(ExternalMemory, Get) { + absl::Cord cord("hello"); + AddExternalMemory(" world!", &cord); + AddExternalMemory(" how are ", &cord); + cord.Append(" you?"); + std::string s = std::string(cord); + for (int i = 0; i < s.size(); i++) { + EXPECT_EQ(s[i], cord[i]); + } +} + +// CordMemoryUsage tests verify the correctness of the EstimatedMemoryUsage() +// These tests take into account that the reported memory usage is approximate +// and non-deterministic. For all tests, We verify that the reported memory +// usage is larger than `size()`, and less than `size() * 1.5` as a cord should +// never reserve more 'extra' capacity than half of its size as it grows. +// Additionally we have some whiteboxed expectations based on our knowledge of +// the layout and size of empty and inlined cords, and flat nodes. + +TEST(CordMemoryUsage, Empty) { + EXPECT_EQ(sizeof(absl::Cord), absl::Cord().EstimatedMemoryUsage()); +} + +TEST(CordMemoryUsage, Embedded) { + absl::Cord a("hello"); + EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord)); +} + +TEST(CordMemoryUsage, EmbeddedAppend) { + absl::Cord a("a"); + absl::Cord b("bcd"); + EXPECT_EQ(b.EstimatedMemoryUsage(), sizeof(absl::Cord)); + a.Append(b); + EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord)); +} + +TEST(CordMemoryUsage, ExternalMemory) { + static const int kLength = 1000; + absl::Cord cord; + AddExternalMemory(std::string(kLength, 'x'), &cord); + EXPECT_GT(cord.EstimatedMemoryUsage(), kLength); + EXPECT_LE(cord.EstimatedMemoryUsage(), kLength * 1.5); +} + +TEST(CordMemoryUsage, Flat) { + static const int kLength = 125; + absl::Cord a(std::string(kLength, 'a')); + EXPECT_GT(a.EstimatedMemoryUsage(), kLength); + EXPECT_LE(a.EstimatedMemoryUsage(), kLength * 1.5); +} + +TEST(CordMemoryUsage, AppendFlat) { + using absl::strings_internal::CordTestAccess; + absl::Cord a(std::string(CordTestAccess::MaxFlatLength(), 'a')); + size_t length = a.EstimatedMemoryUsage(); + a.Append(std::string(CordTestAccess::MaxFlatLength(), 'b')); + size_t delta = a.EstimatedMemoryUsage() - length; + EXPECT_GT(delta, CordTestAccess::MaxFlatLength()); + EXPECT_LE(delta, CordTestAccess::MaxFlatLength() * 1.5); +} + +// Regtest for a change that had to be rolled back because it expanded out +// of the InlineRep too soon, which was observable through MemoryUsage(). +TEST(CordMemoryUsage, InlineRep) { + constexpr size_t kMaxInline = 15; // Cord::InlineRep::N + const std::string small_string(kMaxInline, 'x'); + absl::Cord c1(small_string); + + absl::Cord c2; + c2.Append(small_string); + EXPECT_EQ(c1, c2); + EXPECT_EQ(c1.EstimatedMemoryUsage(), c2.EstimatedMemoryUsage()); +} + +} // namespace + +// Regtest for 7510292 (fix a bug introduced by 7465150) +TEST(Cord, Concat_Append) { + // Create a rep of type CONCAT + absl::Cord s1("foobarbarbarbarbar"); + s1.Append("abcdefgabcdefgabcdefgabcdefgabcdefgabcdefgabcdefg"); + size_t size = s1.size(); + + // Create a copy of s1 and append to it. + absl::Cord s2 = s1; + s2.Append("x"); + + // 7465150 modifies s1 when it shouldn't. + EXPECT_EQ(s1.size(), size); + EXPECT_EQ(s2.size(), size + 1); +} + +TEST(MakeFragmentedCord, MakeFragmentedCordFromInitializerList) { + absl::Cord fragmented = + absl::MakeFragmentedCord({"A ", "fragmented ", "Cord"}); + + EXPECT_EQ("A fragmented Cord", fragmented); + + auto chunk_it = fragmented.chunk_begin(); + + ASSERT_TRUE(chunk_it != fragmented.chunk_end()); + EXPECT_EQ("A ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("fragmented ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("Cord", *chunk_it); + + ASSERT_TRUE(++chunk_it == fragmented.chunk_end()); +} + +TEST(MakeFragmentedCord, MakeFragmentedCordFromVector) { + std::vector<absl::string_view> chunks = {"A ", "fragmented ", "Cord"}; + absl::Cord fragmented = absl::MakeFragmentedCord(chunks); + + EXPECT_EQ("A fragmented Cord", fragmented); + + auto chunk_it = fragmented.chunk_begin(); + + ASSERT_TRUE(chunk_it != fragmented.chunk_end()); + EXPECT_EQ("A ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("fragmented ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("Cord", *chunk_it); + + ASSERT_TRUE(++chunk_it == fragmented.chunk_end()); +} + +TEST(CordChunkIterator, Traits) { + static_assert(std::is_copy_constructible<absl::Cord::ChunkIterator>::value, + ""); + static_assert(std::is_copy_assignable<absl::Cord::ChunkIterator>::value, ""); + + // Move semantics to satisfy swappable via std::swap + static_assert(std::is_move_constructible<absl::Cord::ChunkIterator>::value, + ""); + static_assert(std::is_move_assignable<absl::Cord::ChunkIterator>::value, ""); + + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::ChunkIterator>::iterator_category, + std::input_iterator_tag>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::ChunkIterator>::value_type, + absl::string_view>::value, + ""); + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::ChunkIterator>::difference_type, + ptrdiff_t>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::ChunkIterator>::pointer, + const absl::string_view*>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::ChunkIterator>::reference, + absl::string_view>::value, + ""); +} + +static void VerifyChunkIterator(const absl::Cord& cord, + size_t expected_chunks) { + EXPECT_EQ(cord.chunk_begin() == cord.chunk_end(), cord.empty()) << cord; + EXPECT_EQ(cord.chunk_begin() != cord.chunk_end(), !cord.empty()); + + absl::Cord::ChunkRange range = cord.Chunks(); + EXPECT_EQ(range.begin() == range.end(), cord.empty()); + EXPECT_EQ(range.begin() != range.end(), !cord.empty()); + + std::string content(cord); + size_t pos = 0; + auto pre_iter = cord.chunk_begin(), post_iter = cord.chunk_begin(); + size_t n_chunks = 0; + while (pre_iter != cord.chunk_end() && post_iter != cord.chunk_end()) { + EXPECT_FALSE(pre_iter == cord.chunk_end()); // NOLINT: explicitly test == + EXPECT_FALSE(post_iter == cord.chunk_end()); // NOLINT + + EXPECT_EQ(pre_iter, post_iter); + EXPECT_EQ(*pre_iter, *post_iter); + + EXPECT_EQ(pre_iter->data(), (*pre_iter).data()); + EXPECT_EQ(pre_iter->size(), (*pre_iter).size()); + + absl::string_view chunk = *pre_iter; + EXPECT_FALSE(chunk.empty()); + EXPECT_LE(pos + chunk.size(), content.size()); + EXPECT_EQ(absl::string_view(content.c_str() + pos, chunk.size()), chunk); + + int n_equal_iterators = 0; + for (absl::Cord::ChunkIterator it = range.begin(); it != range.end(); + ++it) { + n_equal_iterators += static_cast<int>(it == pre_iter); + } + EXPECT_EQ(n_equal_iterators, 1); + + ++pre_iter; + EXPECT_EQ(*post_iter++, chunk); + + pos += chunk.size(); + ++n_chunks; + } + EXPECT_EQ(expected_chunks, n_chunks); + EXPECT_EQ(pos, content.size()); + EXPECT_TRUE(pre_iter == cord.chunk_end()); // NOLINT: explicitly test == + EXPECT_TRUE(post_iter == cord.chunk_end()); // NOLINT +} + +TEST(CordChunkIterator, Operations) { + absl::Cord empty_cord; + VerifyChunkIterator(empty_cord, 0); + + absl::Cord small_buffer_cord("small cord"); + VerifyChunkIterator(small_buffer_cord, 1); + + absl::Cord flat_node_cord("larger than small buffer optimization"); + VerifyChunkIterator(flat_node_cord, 1); + + VerifyChunkIterator( + absl::MakeFragmentedCord({"a ", "small ", "fragmented ", "cord ", "for ", + "testing ", "chunk ", "iterations."}), + 8); + + absl::Cord reused_nodes_cord(std::string(40, 'c')); + reused_nodes_cord.Prepend(absl::Cord(std::string(40, 'b'))); + reused_nodes_cord.Prepend(absl::Cord(std::string(40, 'a'))); + size_t expected_chunks = 3; + for (int i = 0; i < 8; ++i) { + reused_nodes_cord.Prepend(reused_nodes_cord); + expected_chunks *= 2; + VerifyChunkIterator(reused_nodes_cord, expected_chunks); + } + + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); + absl::Cord subcords; + for (int i = 0; i < 128; ++i) subcords.Prepend(flat_cord.Subcord(i, 128)); + VerifyChunkIterator(subcords, 128); +} + +TEST(CordCharIterator, Traits) { + static_assert(std::is_copy_constructible<absl::Cord::CharIterator>::value, + ""); + static_assert(std::is_copy_assignable<absl::Cord::CharIterator>::value, ""); + + // Move semantics to satisfy swappable via std::swap + static_assert(std::is_move_constructible<absl::Cord::CharIterator>::value, + ""); + static_assert(std::is_move_assignable<absl::Cord::CharIterator>::value, ""); + + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::CharIterator>::iterator_category, + std::input_iterator_tag>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::CharIterator>::value_type, + char>::value, + ""); + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::CharIterator>::difference_type, + ptrdiff_t>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::CharIterator>::pointer, + const char*>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::CharIterator>::reference, + const char&>::value, + ""); +} + +static void VerifyCharIterator(const absl::Cord& cord) { + EXPECT_EQ(cord.char_begin() == cord.char_end(), cord.empty()); + EXPECT_EQ(cord.char_begin() != cord.char_end(), !cord.empty()); + + absl::Cord::CharRange range = cord.Chars(); + EXPECT_EQ(range.begin() == range.end(), cord.empty()); + EXPECT_EQ(range.begin() != range.end(), !cord.empty()); + + size_t i = 0; + absl::Cord::CharIterator pre_iter = cord.char_begin(); + absl::Cord::CharIterator post_iter = cord.char_begin(); + std::string content(cord); + while (pre_iter != cord.char_end() && post_iter != cord.char_end()) { + EXPECT_FALSE(pre_iter == cord.char_end()); // NOLINT: explicitly test == + EXPECT_FALSE(post_iter == cord.char_end()); // NOLINT + + EXPECT_LT(i, cord.size()); + EXPECT_EQ(content[i], *pre_iter); + + EXPECT_EQ(pre_iter, post_iter); + EXPECT_EQ(*pre_iter, *post_iter); + EXPECT_EQ(&*pre_iter, &*post_iter); + + EXPECT_EQ(&*pre_iter, pre_iter.operator->()); + + const char* character_address = &*pre_iter; + absl::Cord::CharIterator copy = pre_iter; + ++copy; + EXPECT_EQ(character_address, &*pre_iter); + + int n_equal_iterators = 0; + for (absl::Cord::CharIterator it = range.begin(); it != range.end(); ++it) { + n_equal_iterators += static_cast<int>(it == pre_iter); + } + EXPECT_EQ(n_equal_iterators, 1); + + absl::Cord::CharIterator advance_iter = range.begin(); + absl::Cord::Advance(&advance_iter, i); + EXPECT_EQ(pre_iter, advance_iter); + + advance_iter = range.begin(); + EXPECT_EQ(absl::Cord::AdvanceAndRead(&advance_iter, i), cord.Subcord(0, i)); + EXPECT_EQ(pre_iter, advance_iter); + + advance_iter = pre_iter; + absl::Cord::Advance(&advance_iter, cord.size() - i); + EXPECT_EQ(range.end(), advance_iter); + + advance_iter = pre_iter; + EXPECT_EQ(absl::Cord::AdvanceAndRead(&advance_iter, cord.size() - i), + cord.Subcord(i, cord.size() - i)); + EXPECT_EQ(range.end(), advance_iter); + + ++i; + ++pre_iter; + post_iter++; + } + EXPECT_EQ(i, cord.size()); + EXPECT_TRUE(pre_iter == cord.char_end()); // NOLINT: explicitly test == + EXPECT_TRUE(post_iter == cord.char_end()); // NOLINT + + absl::Cord::CharIterator zero_advanced_end = cord.char_end(); + absl::Cord::Advance(&zero_advanced_end, 0); + EXPECT_EQ(zero_advanced_end, cord.char_end()); + + absl::Cord::CharIterator it = cord.char_begin(); + for (absl::string_view chunk : cord.Chunks()) { + while (!chunk.empty()) { + EXPECT_EQ(absl::Cord::ChunkRemaining(it), chunk); + chunk.remove_prefix(1); + ++it; + } + } +} + +TEST(CordCharIterator, Operations) { + absl::Cord empty_cord; + VerifyCharIterator(empty_cord); + + absl::Cord small_buffer_cord("small cord"); + VerifyCharIterator(small_buffer_cord); + + absl::Cord flat_node_cord("larger than small buffer optimization"); + VerifyCharIterator(flat_node_cord); + + VerifyCharIterator( + absl::MakeFragmentedCord({"a ", "small ", "fragmented ", "cord ", "for ", + "testing ", "character ", "iteration."})); + + absl::Cord reused_nodes_cord("ghi"); + reused_nodes_cord.Prepend(absl::Cord("def")); + reused_nodes_cord.Prepend(absl::Cord("abc")); + for (int i = 0; i < 4; ++i) { + reused_nodes_cord.Prepend(reused_nodes_cord); + VerifyCharIterator(reused_nodes_cord); + } + + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); + absl::Cord subcords; + for (int i = 0; i < 4; ++i) subcords.Prepend(flat_cord.Subcord(16 * i, 128)); + VerifyCharIterator(subcords); +} + +TEST(Cord, StreamingOutput) { + absl::Cord c = + absl::MakeFragmentedCord({"A ", "small ", "fragmented ", "Cord", "."}); + std::stringstream output; + output << c; + EXPECT_EQ("A small fragmented Cord.", output.str()); +} + +TEST(Cord, ForEachChunk) { + for (int num_elements : {1, 10, 200}) { + SCOPED_TRACE(num_elements); + std::vector<std::string> cord_chunks; + for (int i = 0; i < num_elements; ++i) { + cord_chunks.push_back(absl::StrCat("[", i, "]")); + } + absl::Cord c = absl::MakeFragmentedCord(cord_chunks); + + std::vector<std::string> iterated_chunks; + absl::CordTestPeer::ForEachChunk(c, + [&iterated_chunks](absl::string_view sv) { + iterated_chunks.emplace_back(sv); + }); + EXPECT_EQ(iterated_chunks, cord_chunks); + } +} + +TEST(Cord, SmallBufferAssignFromOwnData) { + constexpr size_t kMaxInline = 15; + std::string contents = "small buff cord"; + EXPECT_EQ(contents.size(), kMaxInline); + for (size_t pos = 0; pos < contents.size(); ++pos) { + for (size_t count = contents.size() - pos; count > 0; --count) { + absl::Cord c(contents); + absl::string_view flat = c.Flatten(); + c = flat.substr(pos, count); + EXPECT_EQ(c, contents.substr(pos, count)) + << "pos = " << pos << "; count = " << count; + } + } +} + +TEST(Cord, Format) { + absl::Cord c; + absl::Format(&c, "There were %04d little %s.", 3, "pigs"); + EXPECT_EQ(c, "There were 0003 little pigs."); + absl::Format(&c, "And %-3llx bad wolf!", 1); + EXPECT_EQ(c, "There were 0003 little pigs.And 1 bad wolf!"); +} + +TEST(CordDeathTest, Hardening) { + absl::Cord cord("hello"); + // These statement should abort the program in all builds modes. + EXPECT_DEATH_IF_SUPPORTED(cord.RemovePrefix(6), ""); + EXPECT_DEATH_IF_SUPPORTED(cord.RemoveSuffix(6), ""); + + bool test_hardening = false; + ABSL_HARDENING_ASSERT([&]() { + // This only runs when ABSL_HARDENING_ASSERT is active. + test_hardening = true; + return true; + }()); + if (!test_hardening) return; + + EXPECT_DEATH_IF_SUPPORTED(cord[5], ""); + EXPECT_DEATH_IF_SUPPORTED(*cord.chunk_end(), ""); + EXPECT_DEATH_IF_SUPPORTED(static_cast<void>(cord.chunk_end()->empty()), ""); + EXPECT_DEATH_IF_SUPPORTED(++cord.chunk_end(), ""); +} + +class AfterExitCordTester { + public: + bool Set(absl::Cord* cord, absl::string_view expected) { + cord_ = cord; + expected_ = expected; + return true; + } + + ~AfterExitCordTester() { + EXPECT_EQ(*cord_, expected_); + } + private: + absl::Cord* cord_; + absl::string_view expected_; +}; + +template <typename Str> +void TestConstinitConstructor(Str) { + const auto expected = Str::value; + // Defined before `cord` to be destroyed after it. + static AfterExitCordTester exit_tester; // NOLINT + ABSL_CONST_INIT static absl::Cord cord(Str{}); // NOLINT + static bool init_exit_tester = exit_tester.Set(&cord, expected); + (void)init_exit_tester; + + EXPECT_EQ(cord, expected); + // Copy the object and test the copy, and the original. + { + absl::Cord copy = cord; + EXPECT_EQ(copy, expected); + } + // The original still works + EXPECT_EQ(cord, expected); + + // Try making adding more structure to the tree. + { + absl::Cord copy = cord; + std::string expected_copy(expected); + for (int i = 0; i < 10; ++i) { + copy.Append(cord); + absl::StrAppend(&expected_copy, expected); + EXPECT_EQ(copy, expected_copy); + } + } + + // Make sure we are using the right branch during constant evaluation. + EXPECT_EQ(absl::CordTestPeer::IsTree(cord), cord.size() >= 16); + + for (int i = 0; i < 10; ++i) { + // Make a few more Cords from the same global rep. + // This tests what happens when the refcount for it gets below 1. + EXPECT_EQ(expected, absl::Cord(Str{})); + } +} + +constexpr int SimpleStrlen(const char* p) { + return *p ? 1 + SimpleStrlen(p + 1) : 0; +} + +struct ShortView { + constexpr absl::string_view operator()() const { + return absl::string_view("SSO string", SimpleStrlen("SSO string")); + } +}; + +struct LongView { + constexpr absl::string_view operator()() const { + return absl::string_view("String that does not fit SSO.", + SimpleStrlen("String that does not fit SSO.")); + } +}; + + +TEST(Cord, ConstinitConstructor) { + TestConstinitConstructor( + absl::strings_internal::MakeStringConstant(ShortView{})); + TestConstinitConstructor( + absl::strings_internal::MakeStringConstant(LongView{})); +} diff --git a/third_party/abseil_cpp/absl/strings/cord_test_helpers.h b/third_party/abseil_cpp/absl/strings/cord_test_helpers.h new file mode 100644 index 000000000000..f1036e3b1388 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/cord_test_helpers.h @@ -0,0 +1,60 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef ABSL_STRINGS_CORD_TEST_HELPERS_H_ +#define ABSL_STRINGS_CORD_TEST_HELPERS_H_ + +#include "absl/strings/cord.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// Creates a multi-segment Cord from an iterable container of strings. The +// resulting Cord is guaranteed to have one segment for every string in the +// container. This allows code to be unit tested with multi-segment Cord +// inputs. +// +// Example: +// +// absl::Cord c = absl::MakeFragmentedCord({"A ", "fragmented ", "Cord"}); +// EXPECT_FALSE(c.GetFlat(&unused)); +// +// The mechanism by which this Cord is created is an implementation detail. Any +// implementation that produces a multi-segment Cord may produce a flat Cord in +// the future as new optimizations are added to the Cord class. +// MakeFragmentedCord will, however, always be updated to return a multi-segment +// Cord. +template <typename Container> +Cord MakeFragmentedCord(const Container& c) { + Cord result; + for (const auto& s : c) { + auto* external = new std::string(s); + Cord tmp = absl::MakeCordFromExternal( + *external, [external](absl::string_view) { delete external; }); + tmp.Prepend(result); + result = tmp; + } + return result; +} + +inline Cord MakeFragmentedCord(std::initializer_list<absl::string_view> list) { + return MakeFragmentedCord<std::initializer_list<absl::string_view>>(list); +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORD_TEST_HELPERS_H_ diff --git a/third_party/abseil_cpp/absl/strings/escaping.cc b/third_party/abseil_cpp/absl/strings/escaping.cc new file mode 100644 index 000000000000..18b20b83fd36 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/escaping.cc @@ -0,0 +1,949 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/escaping.h" + +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <limits> +#include <string> + +#include "absl/base/internal/endian.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/internal/unaligned_access.h" +#include "absl/strings/internal/char_map.h" +#include "absl/strings/internal/escaping.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/internal/utf8.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace { + +// These are used for the leave_nulls_escaped argument to CUnescapeInternal(). +constexpr bool kUnescapeNulls = false; + +inline bool is_octal_digit(char c) { return ('0' <= c) && (c <= '7'); } + +inline int hex_digit_to_int(char c) { + static_assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61, + "Character set must be ASCII."); + assert(absl::ascii_isxdigit(c)); + int x = static_cast<unsigned char>(c); + if (x > '9') { + x += 9; + } + return x & 0xf; +} + +inline bool IsSurrogate(char32_t c, absl::string_view src, std::string* error) { + if (c >= 0xD800 && c <= 0xDFFF) { + if (error) { + *error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\", + src); + } + return true; + } + return false; +} + +// ---------------------------------------------------------------------- +// CUnescapeInternal() +// Implements both CUnescape() and CUnescapeForNullTerminatedString(). +// +// Unescapes C escape sequences and is the reverse of CEscape(). +// +// If 'source' is valid, stores the unescaped string and its size in +// 'dest' and 'dest_len' respectively, and returns true. Otherwise +// returns false and optionally stores the error description in +// 'error'. Set 'error' to nullptr to disable error reporting. +// +// 'dest' should point to a buffer that is at least as big as 'source'. +// 'source' and 'dest' may be the same. +// +// NOTE: any changes to this function must also be reflected in the older +// UnescapeCEscapeSequences(). +// ---------------------------------------------------------------------- +bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, + char* dest, ptrdiff_t* dest_len, std::string* error) { + char* d = dest; + const char* p = source.data(); + const char* end = p + source.size(); + const char* last_byte = end - 1; + + // Small optimization for case where source = dest and there's no escaping + while (p == d && p < end && *p != '\\') p++, d++; + + while (p < end) { + if (*p != '\\') { + *d++ = *p++; + } else { + if (++p > last_byte) { // skip past the '\\' + if (error) *error = "String cannot end with \\"; + return false; + } + switch (*p) { + case 'a': *d++ = '\a'; break; + case 'b': *d++ = '\b'; break; + case 'f': *d++ = '\f'; break; + case 'n': *d++ = '\n'; break; + case 'r': *d++ = '\r'; break; + case 't': *d++ = '\t'; break; + case 'v': *d++ = '\v'; break; + case '\\': *d++ = '\\'; break; + case '?': *d++ = '\?'; break; // \? Who knew? + case '\'': *d++ = '\''; break; + case '"': *d++ = '\"'; break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': { + // octal digit: 1 to 3 digits + const char* octal_start = p; + unsigned int ch = *p - '0'; + if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p - '0'; + if (p < last_byte && is_octal_digit(p[1])) + ch = ch * 8 + *++p - '0'; // now points at last digit + if (ch > 0xff) { + if (error) { + *error = "Value of \\" + + std::string(octal_start, p + 1 - octal_start) + + " exceeds 0xff"; + } + return false; + } + if ((ch == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + const ptrdiff_t octal_size = p + 1 - octal_start; + *d++ = '\\'; + memmove(d, octal_start, octal_size); + d += octal_size; + break; + } + *d++ = ch; + break; + } + case 'x': + case 'X': { + if (p >= last_byte) { + if (error) *error = "String cannot end with \\x"; + return false; + } else if (!absl::ascii_isxdigit(p[1])) { + if (error) *error = "\\x cannot be followed by a non-hex digit"; + return false; + } + unsigned int ch = 0; + const char* hex_start = p; + while (p < last_byte && absl::ascii_isxdigit(p[1])) + // Arbitrarily many hex digits + ch = (ch << 4) + hex_digit_to_int(*++p); + if (ch > 0xFF) { + if (error) { + *error = "Value of \\" + + std::string(hex_start, p + 1 - hex_start) + + " exceeds 0xff"; + } + return false; + } + if ((ch == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + const ptrdiff_t hex_size = p + 1 - hex_start; + *d++ = '\\'; + memmove(d, hex_start, hex_size); + d += hex_size; + break; + } + *d++ = ch; + break; + } + case 'u': { + // \uhhhh => convert 4 hex digits to UTF-8 + char32_t rune = 0; + const char* hex_start = p; + if (p + 4 >= end) { + if (error) { + *error = "\\u must be followed by 4 hex digits: \\" + + std::string(hex_start, p + 1 - hex_start); + } + return false; + } + for (int i = 0; i < 4; ++i) { + // Look one char ahead. + if (absl::ascii_isxdigit(p[1])) { + rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. + } else { + if (error) { + *error = "\\u must be followed by 4 hex digits: \\" + + std::string(hex_start, p + 1 - hex_start); + } + return false; + } + } + if ((rune == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + *d++ = '\\'; + memmove(d, hex_start, 5); // u0000 + d += 5; + break; + } + if (IsSurrogate(rune, absl::string_view(hex_start, 5), error)) { + return false; + } + d += strings_internal::EncodeUTF8Char(d, rune); + break; + } + case 'U': { + // \Uhhhhhhhh => convert 8 hex digits to UTF-8 + char32_t rune = 0; + const char* hex_start = p; + if (p + 8 >= end) { + if (error) { + *error = "\\U must be followed by 8 hex digits: \\" + + std::string(hex_start, p + 1 - hex_start); + } + return false; + } + for (int i = 0; i < 8; ++i) { + // Look one char ahead. + if (absl::ascii_isxdigit(p[1])) { + // Don't change rune until we're sure this + // is within the Unicode limit, but do advance p. + uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p); + if (newrune > 0x10FFFF) { + if (error) { + *error = "Value of \\" + + std::string(hex_start, p + 1 - hex_start) + + " exceeds Unicode limit (0x10FFFF)"; + } + return false; + } else { + rune = newrune; + } + } else { + if (error) { + *error = "\\U must be followed by 8 hex digits: \\" + + std::string(hex_start, p + 1 - hex_start); + } + return false; + } + } + if ((rune == 0) && leave_nulls_escaped) { + // Copy the escape sequence for the null character + *d++ = '\\'; + memmove(d, hex_start, 9); // U00000000 + d += 9; + break; + } + if (IsSurrogate(rune, absl::string_view(hex_start, 9), error)) { + return false; + } + d += strings_internal::EncodeUTF8Char(d, rune); + break; + } + default: { + if (error) *error = std::string("Unknown escape sequence: \\") + *p; + return false; + } + } + p++; // read past letter we escaped + } + } + *dest_len = d - dest; + return true; +} + +// ---------------------------------------------------------------------- +// CUnescapeInternal() +// +// Same as above but uses a std::string for output. 'source' and 'dest' +// may be the same. +// ---------------------------------------------------------------------- +bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, + std::string* dest, std::string* error) { + strings_internal::STLStringResizeUninitialized(dest, source.size()); + + ptrdiff_t dest_size; + if (!CUnescapeInternal(source, + leave_nulls_escaped, + &(*dest)[0], + &dest_size, + error)) { + return false; + } + dest->erase(dest_size); + return true; +} + +// ---------------------------------------------------------------------- +// CEscape() +// CHexEscape() +// Utf8SafeCEscape() +// Utf8SafeCHexEscape() +// Escapes 'src' using C-style escape sequences. This is useful for +// preparing query flags. The 'Hex' version uses hexadecimal rather than +// octal sequences. The 'Utf8Safe' version does not touch UTF-8 bytes. +// +// Escaped chars: \n, \r, \t, ", ', \, and !absl::ascii_isprint(). +// ---------------------------------------------------------------------- +std::string CEscapeInternal(absl::string_view src, bool use_hex, + bool utf8_safe) { + std::string dest; + bool last_hex_escape = false; // true if last output char was \xNN. + + for (unsigned char c : src) { + bool is_hex_escape = false; + switch (c) { + case '\n': dest.append("\\" "n"); break; + case '\r': dest.append("\\" "r"); break; + case '\t': dest.append("\\" "t"); break; + case '\"': dest.append("\\" "\""); break; + case '\'': dest.append("\\" "'"); break; + case '\\': dest.append("\\" "\\"); break; + default: + // Note that if we emit \xNN and the src character after that is a hex + // digit then that digit must be escaped too to prevent it being + // interpreted as part of the character code by C. + if ((!utf8_safe || c < 0x80) && + (!absl::ascii_isprint(c) || + (last_hex_escape && absl::ascii_isxdigit(c)))) { + if (use_hex) { + dest.append("\\" "x"); + dest.push_back(numbers_internal::kHexChar[c / 16]); + dest.push_back(numbers_internal::kHexChar[c % 16]); + is_hex_escape = true; + } else { + dest.append("\\"); + dest.push_back(numbers_internal::kHexChar[c / 64]); + dest.push_back(numbers_internal::kHexChar[(c % 64) / 8]); + dest.push_back(numbers_internal::kHexChar[c % 8]); + } + } else { + dest.push_back(c); + break; + } + } + last_hex_escape = is_hex_escape; + } + + return dest; +} + +/* clang-format off */ +constexpr char c_escaped_len[256] = { + 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4, // \t, \n, \r + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // ", ' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // '0'..'9' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A'..'O' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, // 'P'..'Z', '\' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a'..'o' + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, // 'p'..'z', DEL + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +}; +/* clang-format on */ + +// Calculates the length of the C-style escaped version of 'src'. +// Assumes that non-printable characters are escaped using octal sequences, and +// that UTF-8 bytes are not handled specially. +inline size_t CEscapedLength(absl::string_view src) { + size_t escaped_len = 0; + for (unsigned char c : src) escaped_len += c_escaped_len[c]; + return escaped_len; +} + +void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) { + size_t escaped_len = CEscapedLength(src); + if (escaped_len == src.size()) { + dest->append(src.data(), src.size()); + return; + } + + size_t cur_dest_len = dest->size(); + strings_internal::STLStringResizeUninitialized(dest, + cur_dest_len + escaped_len); + char* append_ptr = &(*dest)[cur_dest_len]; + + for (unsigned char c : src) { + int char_len = c_escaped_len[c]; + if (char_len == 1) { + *append_ptr++ = c; + } else if (char_len == 2) { + switch (c) { + case '\n': + *append_ptr++ = '\\'; + *append_ptr++ = 'n'; + break; + case '\r': + *append_ptr++ = '\\'; + *append_ptr++ = 'r'; + break; + case '\t': + *append_ptr++ = '\\'; + *append_ptr++ = 't'; + break; + case '\"': + *append_ptr++ = '\\'; + *append_ptr++ = '\"'; + break; + case '\'': + *append_ptr++ = '\\'; + *append_ptr++ = '\''; + break; + case '\\': + *append_ptr++ = '\\'; + *append_ptr++ = '\\'; + break; + } + } else { + *append_ptr++ = '\\'; + *append_ptr++ = '0' + c / 64; + *append_ptr++ = '0' + (c % 64) / 8; + *append_ptr++ = '0' + c % 8; + } + } +} + +bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, + size_t szdest, const signed char* unbase64, + size_t* len) { + static const char kPad64Equals = '='; + static const char kPad64Dot = '.'; + + size_t destidx = 0; + int decode = 0; + int state = 0; + unsigned int ch = 0; + unsigned int temp = 0; + + // If "char" is signed by default, using *src as an array index results in + // accessing negative array elements. Treat the input as a pointer to + // unsigned char to avoid this. + const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param); + + // The GET_INPUT macro gets the next input character, skipping + // over any whitespace, and stopping when we reach the end of the + // string or when we read any non-data character. The arguments are + // an arbitrary identifier (used as a label for goto) and the number + // of data bytes that must remain in the input to avoid aborting the + // loop. +#define GET_INPUT(label, remain) \ + label: \ + --szsrc; \ + ch = *src++; \ + decode = unbase64[ch]; \ + if (decode < 0) { \ + if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \ + state = 4 - remain; \ + break; \ + } + + // if dest is null, we're just checking to see if it's legal input + // rather than producing output. (I suspect this could just be done + // with a regexp...). We duplicate the loop so this test can be + // outside it instead of in every iteration. + + if (dest) { + // This loop consumes 4 input bytes and produces 3 output bytes + // per iteration. We can't know at the start that there is enough + // data left in the string for a full iteration, so the loop may + // break out in the middle; if so 'state' will be set to the + // number of input bytes read. + + while (szsrc >= 4) { + // We'll start by optimistically assuming that the next four + // bytes of the string (src[0..3]) are four good data bytes + // (that is, no nulls, whitespace, padding chars, or illegal + // chars). We need to test src[0..2] for nulls individually + // before constructing temp to preserve the property that we + // never read past a null in the string (no matter how long + // szsrc claims the string is). + + if (!src[0] || !src[1] || !src[2] || + ((temp = ((unsigned(unbase64[src[0]]) << 18) | + (unsigned(unbase64[src[1]]) << 12) | + (unsigned(unbase64[src[2]]) << 6) | + (unsigned(unbase64[src[3]])))) & + 0x80000000)) { + // Iff any of those four characters was bad (null, illegal, + // whitespace, padding), then temp's high bit will be set + // (because unbase64[] is -1 for all bad characters). + // + // We'll back up and resort to the slower decoder, which knows + // how to handle those cases. + + GET_INPUT(first, 4); + temp = decode; + GET_INPUT(second, 3); + temp = (temp << 6) | decode; + GET_INPUT(third, 2); + temp = (temp << 6) | decode; + GET_INPUT(fourth, 1); + temp = (temp << 6) | decode; + } else { + // We really did have four good data bytes, so advance four + // characters in the string. + + szsrc -= 4; + src += 4; + } + + // temp has 24 bits of input, so write that out as three bytes. + + if (destidx + 3 > szdest) return false; + dest[destidx + 2] = temp; + temp >>= 8; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + destidx += 3; + } + } else { + while (szsrc >= 4) { + if (!src[0] || !src[1] || !src[2] || + ((temp = ((unsigned(unbase64[src[0]]) << 18) | + (unsigned(unbase64[src[1]]) << 12) | + (unsigned(unbase64[src[2]]) << 6) | + (unsigned(unbase64[src[3]])))) & + 0x80000000)) { + GET_INPUT(first_no_dest, 4); + GET_INPUT(second_no_dest, 3); + GET_INPUT(third_no_dest, 2); + GET_INPUT(fourth_no_dest, 1); + } else { + szsrc -= 4; + src += 4; + } + destidx += 3; + } + } + +#undef GET_INPUT + + // if the loop terminated because we read a bad character, return + // now. + if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot && + !absl::ascii_isspace(ch)) + return false; + + if (ch == kPad64Equals || ch == kPad64Dot) { + // if we stopped by hitting an '=' or '.', un-read that character -- we'll + // look at it again when we count to check for the proper number of + // equals signs at the end. + ++szsrc; + --src; + } else { + // This loop consumes 1 input byte per iteration. It's used to + // clean up the 0-3 input bytes remaining when the first, faster + // loop finishes. 'temp' contains the data from 'state' input + // characters read by the first loop. + while (szsrc > 0) { + --szsrc; + ch = *src++; + decode = unbase64[ch]; + if (decode < 0) { + if (absl::ascii_isspace(ch)) { + continue; + } else if (ch == kPad64Equals || ch == kPad64Dot) { + // back up one character; we'll read it again when we check + // for the correct number of pad characters at the end. + ++szsrc; + --src; + break; + } else { + return false; + } + } + + // Each input character gives us six bits of output. + temp = (temp << 6) | decode; + ++state; + if (state == 4) { + // If we've accumulated 24 bits of output, write that out as + // three bytes. + if (dest) { + if (destidx + 3 > szdest) return false; + dest[destidx + 2] = temp; + temp >>= 8; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + } + destidx += 3; + state = 0; + temp = 0; + } + } + } + + // Process the leftover data contained in 'temp' at the end of the input. + int expected_equals = 0; + switch (state) { + case 0: + // Nothing left over; output is a multiple of 3 bytes. + break; + + case 1: + // Bad input; we have 6 bits left over. + return false; + + case 2: + // Produce one more output byte from the 12 input bits we have left. + if (dest) { + if (destidx + 1 > szdest) return false; + temp >>= 4; + dest[destidx] = temp; + } + ++destidx; + expected_equals = 2; + break; + + case 3: + // Produce two more output bytes from the 18 input bits we have left. + if (dest) { + if (destidx + 2 > szdest) return false; + temp >>= 2; + dest[destidx + 1] = temp; + temp >>= 8; + dest[destidx] = temp; + } + destidx += 2; + expected_equals = 1; + break; + + default: + // state should have no other values at this point. + ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d", + state); + } + + // The remainder of the string should be all whitespace, mixed with + // exactly 0 equals signs, or exactly 'expected_equals' equals + // signs. (Always accepting 0 equals signs is an Abseil extension + // not covered in the RFC, as is accepting dot as the pad character.) + + int equals = 0; + while (szsrc > 0) { + if (*src == kPad64Equals || *src == kPad64Dot) + ++equals; + else if (!absl::ascii_isspace(*src)) + return false; + --szsrc; + ++src; + } + + const bool ok = (equals == 0 || equals == expected_equals); + if (ok) *len = destidx; + return ok; +} + +// The arrays below were generated by the following code +// #include <sys/time.h> +// #include <stdlib.h> +// #include <string.h> +// main() +// { +// static const char Base64[] = +// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +// char* pos; +// int idx, i, j; +// printf(" "); +// for (i = 0; i < 255; i += 8) { +// for (j = i; j < i + 8; j++) { +// pos = strchr(Base64, j); +// if ((pos == nullptr) || (j == 0)) +// idx = -1; +// else +// idx = pos - Base64; +// if (idx == -1) +// printf(" %2d, ", idx); +// else +// printf(" %2d/*%c*/,", idx, j); +// } +// printf("\n "); +// } +// } +// +// where the value of "Base64[]" was replaced by one of the base-64 conversion +// tables from the functions below. +/* clang-format off */ +constexpr signed char kUnBase64[] = { + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */, + 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, + 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, + -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, + 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, + 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, + 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1, + -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, + 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, + 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, + 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1 +}; + +constexpr signed char kUnWebSafeBase64[] = { + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 62/*-*/, -1, -1, + 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, + 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, + -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, + 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, + 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, + 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, 63/*_*/, + -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, + 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, + 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, + 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1 +}; +/* clang-format on */ + +constexpr char kWebSafeBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + +template <typename String> +bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, + const signed char* unbase64) { + // Determine the size of the output string. Base64 encodes every 3 bytes into + // 4 characters. any leftover chars are added directly for good measure. + // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548 + const size_t dest_len = 3 * (slen / 4) + (slen % 4); + + strings_internal::STLStringResizeUninitialized(dest, dest_len); + + // We are getting the destination buffer by getting the beginning of the + // string and converting it into a char *. + size_t len; + const bool ok = + Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len); + if (!ok) { + dest->clear(); + return false; + } + + // could be shorter if there was padding + assert(len <= dest_len); + dest->erase(len); + + return true; +} + +/* clang-format off */ +constexpr char kHexValueLenient[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, // '0'..'9' + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'A'..'F' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'a'..'f' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +/* clang-format on */ + +// This is a templated function so that T can be either a char* +// or a string. This works because we use the [] operator to access +// individual characters at a time. +template <typename T> +void HexStringToBytesInternal(const char* from, T to, ptrdiff_t num) { + for (int i = 0; i < num; i++) { + to[i] = (kHexValueLenient[from[i * 2] & 0xFF] << 4) + + (kHexValueLenient[from[i * 2 + 1] & 0xFF]); + } +} + +// This is a templated function so that T can be either a char* or a +// std::string. +template <typename T> +void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) { + auto dest_ptr = &dest[0]; + for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) { + const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2]; + std::copy(hex_p, hex_p + 2, dest_ptr); + } +} + +} // namespace + +// ---------------------------------------------------------------------- +// CUnescape() +// +// See CUnescapeInternal() for implementation details. +// ---------------------------------------------------------------------- +bool CUnescape(absl::string_view source, std::string* dest, + std::string* error) { + return CUnescapeInternal(source, kUnescapeNulls, dest, error); +} + +std::string CEscape(absl::string_view src) { + std::string dest; + CEscapeAndAppendInternal(src, &dest); + return dest; +} + +std::string CHexEscape(absl::string_view src) { + return CEscapeInternal(src, true, false); +} + +std::string Utf8SafeCEscape(absl::string_view src) { + return CEscapeInternal(src, false, true); +} + +std::string Utf8SafeCHexEscape(absl::string_view src) { + return CEscapeInternal(src, true, true); +} + +// ---------------------------------------------------------------------- +// Base64Unescape() - base64 decoder +// Base64Escape() - base64 encoder +// WebSafeBase64Unescape() - Google's variation of base64 decoder +// WebSafeBase64Escape() - Google's variation of base64 encoder +// +// Check out +// http://tools.ietf.org/html/rfc2045 for formal description, but what we +// care about is that... +// Take the encoded stuff in groups of 4 characters and turn each +// character into a code 0 to 63 thus: +// A-Z map to 0 to 25 +// a-z map to 26 to 51 +// 0-9 map to 52 to 61 +// +(- for WebSafe) maps to 62 +// /(_ for WebSafe) maps to 63 +// There will be four numbers, all less than 64 which can be represented +// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). +// Arrange the 6 digit binary numbers into three bytes as such: +// aaaaaabb bbbbcccc ccdddddd +// Equals signs (one or two) are used at the end of the encoded block to +// indicate that the text was not an integer multiple of three bytes long. +// ---------------------------------------------------------------------- + +bool Base64Unescape(absl::string_view src, std::string* dest) { + return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64); +} + +bool WebSafeBase64Unescape(absl::string_view src, std::string* dest) { + return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64); +} + +void Base64Escape(absl::string_view src, std::string* dest) { + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest, + true, strings_internal::kBase64Chars); +} + +void WebSafeBase64Escape(absl::string_view src, std::string* dest) { + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest, + false, kWebSafeBase64Chars); +} + +std::string Base64Escape(absl::string_view src) { + std::string dest; + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest, + true, strings_internal::kBase64Chars); + return dest; +} + +std::string WebSafeBase64Escape(absl::string_view src) { + std::string dest; + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest, + false, kWebSafeBase64Chars); + return dest; +} + +std::string HexStringToBytes(absl::string_view from) { + std::string result; + const auto num = from.size() / 2; + strings_internal::STLStringResizeUninitialized(&result, num); + absl::HexStringToBytesInternal<std::string&>(from.data(), result, num); + return result; +} + +std::string BytesToHexString(absl::string_view from) { + std::string result; + strings_internal::STLStringResizeUninitialized(&result, 2 * from.size()); + absl::BytesToHexStringInternal<std::string&>( + reinterpret_cast<const unsigned char*>(from.data()), result, from.size()); + return result; +} + +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/escaping.h b/third_party/abseil_cpp/absl/strings/escaping.h new file mode 100644 index 000000000000..f5ca26c5dadb --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/escaping.h @@ -0,0 +1,164 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: escaping.h +// ----------------------------------------------------------------------------- +// +// This header file contains string utilities involved in escaping and +// unescaping strings in various ways. + +#ifndef ABSL_STRINGS_ESCAPING_H_ +#define ABSL_STRINGS_ESCAPING_H_ + +#include <cstddef> +#include <string> +#include <vector> + +#include "absl/base/macros.h" +#include "absl/strings/ascii.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// CUnescape() +// +// Unescapes a `source` string and copies it into `dest`, rewriting C-style +// escape sequences (https://en.cppreference.com/w/cpp/language/escape) into +// their proper code point equivalents, returning `true` if successful. +// +// The following unescape sequences can be handled: +// +// * ASCII escape sequences ('\n','\r','\\', etc.) to their ASCII equivalents +// * Octal escape sequences ('\nnn') to byte nnn. The unescaped value must +// resolve to a single byte or an error will occur. E.g. values greater than +// 0xff will produce an error. +// * Hexadecimal escape sequences ('\xnn') to byte nn. While an arbitrary +// number of following digits are allowed, the unescaped value must resolve +// to a single byte or an error will occur. E.g. '\x0045' is equivalent to +// '\x45', but '\x1234' will produce an error. +// * Unicode escape sequences ('\unnnn' for exactly four hex digits or +// '\Unnnnnnnn' for exactly eight hex digits, which will be encoded in +// UTF-8. (E.g., `\u2019` unescapes to the three bytes 0xE2, 0x80, and +// 0x99). +// +// If any errors are encountered, this function returns `false`, leaving the +// `dest` output parameter in an unspecified state, and stores the first +// encountered error in `error`. To disable error reporting, set `error` to +// `nullptr` or use the overload with no error reporting below. +// +// Example: +// +// std::string s = "foo\\rbar\\nbaz\\t"; +// std::string unescaped_s; +// if (!absl::CUnescape(s, &unescaped_s) { +// ... +// } +// EXPECT_EQ(unescaped_s, "foo\rbar\nbaz\t"); +bool CUnescape(absl::string_view source, std::string* dest, std::string* error); + +// Overload of `CUnescape()` with no error reporting. +inline bool CUnescape(absl::string_view source, std::string* dest) { + return CUnescape(source, dest, nullptr); +} + +// CEscape() +// +// Escapes a 'src' string using C-style escapes sequences +// (https://en.cppreference.com/w/cpp/language/escape), escaping other +// non-printable/non-whitespace bytes as octal sequences (e.g. "\377"). +// +// Example: +// +// std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n"; +// std::string escaped_s = absl::CEscape(s); +// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\010\\t\\n\\013\\014\\r\\n"); +std::string CEscape(absl::string_view src); + +// CHexEscape() +// +// Escapes a 'src' string using C-style escape sequences, escaping +// other non-printable/non-whitespace bytes as hexadecimal sequences (e.g. +// "\xFF"). +// +// Example: +// +// std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n"; +// std::string escaped_s = absl::CHexEscape(s); +// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\x08\\t\\n\\x0b\\x0c\\r\\n"); +std::string CHexEscape(absl::string_view src); + +// Utf8SafeCEscape() +// +// Escapes a 'src' string using C-style escape sequences, escaping bytes as +// octal sequences, and passing through UTF-8 characters without conversion. +// I.e., when encountering any bytes with their high bit set, this function +// will not escape those values, whether or not they are valid UTF-8. +std::string Utf8SafeCEscape(absl::string_view src); + +// Utf8SafeCHexEscape() +// +// Escapes a 'src' string using C-style escape sequences, escaping bytes as +// hexadecimal sequences, and passing through UTF-8 characters without +// conversion. +std::string Utf8SafeCHexEscape(absl::string_view src); + +// Base64Unescape() +// +// Converts a `src` string encoded in Base64 to its binary equivalent, writing +// it to a `dest` buffer, returning `true` on success. If `src` contains invalid +// characters, `dest` is cleared and returns `false`. +bool Base64Unescape(absl::string_view src, std::string* dest); + +// WebSafeBase64Unescape() +// +// Converts a `src` string encoded in Base64 to its binary equivalent, writing +// it to a `dest` buffer, but using '-' instead of '+', and '_' instead of '/'. +// If `src` contains invalid characters, `dest` is cleared and returns `false`. +bool WebSafeBase64Unescape(absl::string_view src, std::string* dest); + +// Base64Escape() +// +// Encodes a `src` string into a base64-encoded string, with padding characters. +// This function conforms with RFC 4648 section 4 (base64). +void Base64Escape(absl::string_view src, std::string* dest); +std::string Base64Escape(absl::string_view src); + +// WebSafeBase64Escape() +// +// Encodes a `src` string into a base64-like string, using '-' instead of '+' +// and '_' instead of '/', and without padding. This function conforms with RFC +// 4648 section 5 (base64url). +void WebSafeBase64Escape(absl::string_view src, std::string* dest); +std::string WebSafeBase64Escape(absl::string_view src); + +// HexStringToBytes() +// +// Converts an ASCII hex string into bytes, returning binary data of length +// `from.size()/2`. +std::string HexStringToBytes(absl::string_view from); + +// BytesToHexString() +// +// Converts binary data into an ASCII text string, returning a string of size +// `2*from.size()`. +std::string BytesToHexString(absl::string_view from); + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_ESCAPING_H_ diff --git a/third_party/abseil_cpp/absl/strings/escaping_benchmark.cc b/third_party/abseil_cpp/absl/strings/escaping_benchmark.cc new file mode 100644 index 000000000000..10d5b033c520 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/escaping_benchmark.cc @@ -0,0 +1,94 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/escaping.h" + +#include <cstdio> +#include <cstring> +#include <random> + +#include "benchmark/benchmark.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/escaping_test_common.h" + +namespace { + +void BM_CUnescapeHexString(benchmark::State& state) { + std::string src; + for (int i = 0; i < 50; i++) { + src += "\\x55"; + } + std::string dest; + for (auto _ : state) { + absl::CUnescape(src, &dest); + } +} +BENCHMARK(BM_CUnescapeHexString); + +void BM_WebSafeBase64Escape_string(benchmark::State& state) { + std::string raw; + for (int i = 0; i < 10; ++i) { + for (const auto& test_set : absl::strings_internal::base64_strings()) { + raw += std::string(test_set.plaintext); + } + } + + // The actual benchmark loop is tiny... + std::string escaped; + for (auto _ : state) { + absl::WebSafeBase64Escape(raw, &escaped); + } + + // We want to be sure the compiler doesn't throw away the loop above, + // and the easiest way to ensure that is to round-trip the results and verify + // them. + std::string round_trip; + absl::WebSafeBase64Unescape(escaped, &round_trip); + ABSL_RAW_CHECK(round_trip == raw, ""); +} +BENCHMARK(BM_WebSafeBase64Escape_string); + +// Used for the CEscape benchmarks +const char kStringValueNoEscape[] = "1234567890"; +const char kStringValueSomeEscaped[] = "123\n56789\xA1"; +const char kStringValueMostEscaped[] = "\xA1\xA2\ny\xA4\xA5\xA6z\b\r"; + +void CEscapeBenchmarkHelper(benchmark::State& state, const char* string_value, + int max_len) { + std::string src; + while (src.size() < max_len) { + absl::StrAppend(&src, string_value); + } + + for (auto _ : state) { + absl::CEscape(src); + } +} + +void BM_CEscape_NoEscape(benchmark::State& state) { + CEscapeBenchmarkHelper(state, kStringValueNoEscape, state.range(0)); +} +BENCHMARK(BM_CEscape_NoEscape)->Range(1, 1 << 14); + +void BM_CEscape_SomeEscaped(benchmark::State& state) { + CEscapeBenchmarkHelper(state, kStringValueSomeEscaped, state.range(0)); +} +BENCHMARK(BM_CEscape_SomeEscaped)->Range(1, 1 << 14); + +void BM_CEscape_MostEscaped(benchmark::State& state) { + CEscapeBenchmarkHelper(state, kStringValueMostEscaped, state.range(0)); +} +BENCHMARK(BM_CEscape_MostEscaped)->Range(1, 1 << 14); + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/escaping_test.cc b/third_party/abseil_cpp/absl/strings/escaping_test.cc new file mode 100644 index 000000000000..45671a0ed598 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/escaping_test.cc @@ -0,0 +1,664 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/escaping.h" + +#include <array> +#include <cstdio> +#include <cstring> +#include <memory> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/container/fixed_array.h" +#include "absl/strings/str_cat.h" + +#include "absl/strings/internal/escaping_test_common.h" + +namespace { + +struct epair { + std::string escaped; + std::string unescaped; +}; + +TEST(CEscape, EscapeAndUnescape) { + const std::string inputs[] = { + std::string("foo\nxx\r\b\0023"), + std::string(""), + std::string("abc"), + std::string("\1chad_rules"), + std::string("\1arnar_drools"), + std::string("xxxx\r\t'\"\\"), + std::string("\0xx\0", 4), + std::string("\x01\x31"), + std::string("abc\xb\x42\141bc"), + std::string("123\1\x31\x32\x33"), + std::string("\xc1\xca\x1b\x62\x19o\xcc\x04"), + std::string( + "\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name"), + }; + // Do this twice, once for octal escapes and once for hex escapes. + for (int kind = 0; kind < 4; kind++) { + for (const std::string& original : inputs) { + std::string escaped; + switch (kind) { + case 0: + escaped = absl::CEscape(original); + break; + case 1: + escaped = absl::CHexEscape(original); + break; + case 2: + escaped = absl::Utf8SafeCEscape(original); + break; + case 3: + escaped = absl::Utf8SafeCHexEscape(original); + break; + } + std::string unescaped_str; + EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str)); + EXPECT_EQ(unescaped_str, original); + + unescaped_str.erase(); + std::string error; + EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str, &error)); + EXPECT_EQ(error, ""); + + // Check in-place unescaping + std::string s = escaped; + EXPECT_TRUE(absl::CUnescape(s, &s)); + ASSERT_EQ(s, original); + } + } + // Check that all possible two character strings can be escaped then + // unescaped successfully. + for (int char0 = 0; char0 < 256; char0++) { + for (int char1 = 0; char1 < 256; char1++) { + char chars[2]; + chars[0] = char0; + chars[1] = char1; + std::string s(chars, 2); + std::string escaped = absl::CHexEscape(s); + std::string unescaped; + EXPECT_TRUE(absl::CUnescape(escaped, &unescaped)); + EXPECT_EQ(s, unescaped); + } + } +} + +TEST(CEscape, BasicEscaping) { + epair oct_values[] = { + {"foo\\rbar\\nbaz\\t", "foo\rbar\nbaz\t"}, + {"\\'full of \\\"sound\\\" and \\\"fury\\\"\\'", + "'full of \"sound\" and \"fury\"'"}, + {"signi\\\\fying\\\\ nothing\\\\", "signi\\fying\\ nothing\\"}, + {"\\010\\t\\n\\013\\014\\r", "\010\011\012\013\014\015"} + }; + epair hex_values[] = { + {"ubik\\rubik\\nubik\\t", "ubik\rubik\nubik\t"}, + {"I\\\'ve just seen a \\\"face\\\"", + "I've just seen a \"face\""}, + {"hel\\\\ter\\\\skel\\\\ter\\\\", "hel\\ter\\skel\\ter\\"}, + {"\\x08\\t\\n\\x0b\\x0c\\r", "\010\011\012\013\014\015"} + }; + epair utf8_oct_values[] = { + {"\xe8\xb0\xb7\xe6\xad\x8c\\r\xe8\xb0\xb7\xe6\xad\x8c\\nbaz\\t", + "\xe8\xb0\xb7\xe6\xad\x8c\r\xe8\xb0\xb7\xe6\xad\x8c\nbaz\t"}, + {"\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name", + "\"\xe8\xb0\xb7\xe6\xad\x8c\" is Google\'s Chinese name"}, + {"\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\\\are\\\\Japanese\\\\chars\\\\", + "\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\are\\Japanese\\chars\\"}, + {"\xed\x81\xac\xeb\xa1\xac\\010\\t\\n\\013\\014\\r", + "\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"} + }; + epair utf8_hex_values[] = { + {"\x20\xe4\xbd\xa0\\t\xe5\xa5\xbd,\\r!\\n", + "\x20\xe4\xbd\xa0\t\xe5\xa5\xbd,\r!\n"}, + {"\xe8\xa9\xa6\xe9\xa8\x93\\\' means \\\"test\\\"", + "\xe8\xa9\xa6\xe9\xa8\x93\' means \"test\""}, + {"\\\\\xe6\x88\x91\\\\:\\\\\xe6\x9d\xa8\xe6\xac\xa2\\\\", + "\\\xe6\x88\x91\\:\\\xe6\x9d\xa8\xe6\xac\xa2\\"}, + {"\xed\x81\xac\xeb\xa1\xac\\x08\\t\\n\\x0b\\x0c\\r", + "\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"} + }; + + for (const epair& val : oct_values) { + std::string escaped = absl::CEscape(val.unescaped); + EXPECT_EQ(escaped, val.escaped); + } + for (const epair& val : hex_values) { + std::string escaped = absl::CHexEscape(val.unescaped); + EXPECT_EQ(escaped, val.escaped); + } + for (const epair& val : utf8_oct_values) { + std::string escaped = absl::Utf8SafeCEscape(val.unescaped); + EXPECT_EQ(escaped, val.escaped); + } + for (const epair& val : utf8_hex_values) { + std::string escaped = absl::Utf8SafeCHexEscape(val.unescaped); + EXPECT_EQ(escaped, val.escaped); + } +} + +TEST(Unescape, BasicFunction) { + epair tests[] = + {{"", ""}, + {"\\u0030", "0"}, + {"\\u00A3", "\xC2\xA3"}, + {"\\u22FD", "\xE2\x8B\xBD"}, + {"\\U00010000", "\xF0\x90\x80\x80"}, + {"\\U0010FFFD", "\xF4\x8F\xBF\xBD"}}; + for (const epair& val : tests) { + std::string out; + EXPECT_TRUE(absl::CUnescape(val.escaped, &out)); + EXPECT_EQ(out, val.unescaped); + } + std::string bad[] = {"\\u1", // too short + "\\U1", // too short + "\\Uffffff", // exceeds 0x10ffff (largest Unicode) + "\\U00110000", // exceeds 0x10ffff (largest Unicode) + "\\uD835", // surrogate character (D800-DFFF) + "\\U0000DD04", // surrogate character (D800-DFFF) + "\\777", // exceeds 0xff + "\\xABCD"}; // exceeds 0xff + for (const std::string& e : bad) { + std::string error; + std::string out; + EXPECT_FALSE(absl::CUnescape(e, &out, &error)); + EXPECT_FALSE(error.empty()); + + out.erase(); + EXPECT_FALSE(absl::CUnescape(e, &out)); + } +} + +class CUnescapeTest : public testing::Test { + protected: + static const char kStringWithMultipleOctalNulls[]; + static const char kStringWithMultipleHexNulls[]; + static const char kStringWithMultipleUnicodeNulls[]; + + std::string result_string_; +}; + +const char CUnescapeTest::kStringWithMultipleOctalNulls[] = + "\\0\\n" // null escape \0 plus newline + "0\\n" // just a number 0 (not a null escape) plus newline + "\\00\\12" // null escape \00 plus octal newline code + "\\000"; // null escape \000 + +// This has the same ingredients as kStringWithMultipleOctalNulls +// but with \x hex escapes instead of octal escapes. +const char CUnescapeTest::kStringWithMultipleHexNulls[] = + "\\x0\\n" + "0\\n" + "\\x00\\xa" + "\\x000"; + +const char CUnescapeTest::kStringWithMultipleUnicodeNulls[] = + "\\u0000\\n" // short-form (4-digit) null escape plus newline + "0\\n" // just a number 0 (not a null escape) plus newline + "\\U00000000"; // long-form (8-digit) null escape + +TEST_F(CUnescapeTest, Unescapes1CharOctalNull) { + std::string original_string = "\\0"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes2CharOctalNull) { + std::string original_string = "\\00"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes3CharOctalNull) { + std::string original_string = "\\000"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes1CharHexNull) { + std::string original_string = "\\x0"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes2CharHexNull) { + std::string original_string = "\\x00"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes3CharHexNull) { + std::string original_string = "\\x000"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes4CharUnicodeNull) { + std::string original_string = "\\u0000"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, Unescapes8CharUnicodeNull) { + std::string original_string = "\\U00000000"; + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0", 1), result_string_); +} + +TEST_F(CUnescapeTest, UnescapesMultipleOctalNulls) { + std::string original_string(kStringWithMultipleOctalNulls); + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + // All escapes, including newlines and null escapes, should have been + // converted to the equivalent characters. + EXPECT_EQ(std::string("\0\n" + "0\n" + "\0\n" + "\0", + 7), + result_string_); +} + + +TEST_F(CUnescapeTest, UnescapesMultipleHexNulls) { + std::string original_string(kStringWithMultipleHexNulls); + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0\n" + "0\n" + "\0\n" + "\0", + 7), + result_string_); +} + +TEST_F(CUnescapeTest, UnescapesMultipleUnicodeNulls) { + std::string original_string(kStringWithMultipleUnicodeNulls); + EXPECT_TRUE(absl::CUnescape(original_string, &result_string_)); + EXPECT_EQ(std::string("\0\n" + "0\n" + "\0", + 5), + result_string_); +} + +static struct { + absl::string_view plaintext; + absl::string_view cyphertext; +} const base64_tests[] = { + // Empty string. + {{"", 0}, {"", 0}}, + {{nullptr, 0}, + {"", 0}}, // if length is zero, plaintext ptr must be ignored! + + // Basic bit patterns; + // values obtained with "echo -n '...' | uuencode -m test" + + {{"\000", 1}, "AA=="}, + {{"\001", 1}, "AQ=="}, + {{"\002", 1}, "Ag=="}, + {{"\004", 1}, "BA=="}, + {{"\010", 1}, "CA=="}, + {{"\020", 1}, "EA=="}, + {{"\040", 1}, "IA=="}, + {{"\100", 1}, "QA=="}, + {{"\200", 1}, "gA=="}, + + {{"\377", 1}, "/w=="}, + {{"\376", 1}, "/g=="}, + {{"\375", 1}, "/Q=="}, + {{"\373", 1}, "+w=="}, + {{"\367", 1}, "9w=="}, + {{"\357", 1}, "7w=="}, + {{"\337", 1}, "3w=="}, + {{"\277", 1}, "vw=="}, + {{"\177", 1}, "fw=="}, + {{"\000\000", 2}, "AAA="}, + {{"\000\001", 2}, "AAE="}, + {{"\000\002", 2}, "AAI="}, + {{"\000\004", 2}, "AAQ="}, + {{"\000\010", 2}, "AAg="}, + {{"\000\020", 2}, "ABA="}, + {{"\000\040", 2}, "ACA="}, + {{"\000\100", 2}, "AEA="}, + {{"\000\200", 2}, "AIA="}, + {{"\001\000", 2}, "AQA="}, + {{"\002\000", 2}, "AgA="}, + {{"\004\000", 2}, "BAA="}, + {{"\010\000", 2}, "CAA="}, + {{"\020\000", 2}, "EAA="}, + {{"\040\000", 2}, "IAA="}, + {{"\100\000", 2}, "QAA="}, + {{"\200\000", 2}, "gAA="}, + + {{"\377\377", 2}, "//8="}, + {{"\377\376", 2}, "//4="}, + {{"\377\375", 2}, "//0="}, + {{"\377\373", 2}, "//s="}, + {{"\377\367", 2}, "//c="}, + {{"\377\357", 2}, "/+8="}, + {{"\377\337", 2}, "/98="}, + {{"\377\277", 2}, "/78="}, + {{"\377\177", 2}, "/38="}, + {{"\376\377", 2}, "/v8="}, + {{"\375\377", 2}, "/f8="}, + {{"\373\377", 2}, "+/8="}, + {{"\367\377", 2}, "9/8="}, + {{"\357\377", 2}, "7/8="}, + {{"\337\377", 2}, "3/8="}, + {{"\277\377", 2}, "v/8="}, + {{"\177\377", 2}, "f/8="}, + + {{"\000\000\000", 3}, "AAAA"}, + {{"\000\000\001", 3}, "AAAB"}, + {{"\000\000\002", 3}, "AAAC"}, + {{"\000\000\004", 3}, "AAAE"}, + {{"\000\000\010", 3}, "AAAI"}, + {{"\000\000\020", 3}, "AAAQ"}, + {{"\000\000\040", 3}, "AAAg"}, + {{"\000\000\100", 3}, "AABA"}, + {{"\000\000\200", 3}, "AACA"}, + {{"\000\001\000", 3}, "AAEA"}, + {{"\000\002\000", 3}, "AAIA"}, + {{"\000\004\000", 3}, "AAQA"}, + {{"\000\010\000", 3}, "AAgA"}, + {{"\000\020\000", 3}, "ABAA"}, + {{"\000\040\000", 3}, "ACAA"}, + {{"\000\100\000", 3}, "AEAA"}, + {{"\000\200\000", 3}, "AIAA"}, + {{"\001\000\000", 3}, "AQAA"}, + {{"\002\000\000", 3}, "AgAA"}, + {{"\004\000\000", 3}, "BAAA"}, + {{"\010\000\000", 3}, "CAAA"}, + {{"\020\000\000", 3}, "EAAA"}, + {{"\040\000\000", 3}, "IAAA"}, + {{"\100\000\000", 3}, "QAAA"}, + {{"\200\000\000", 3}, "gAAA"}, + + {{"\377\377\377", 3}, "////"}, + {{"\377\377\376", 3}, "///+"}, + {{"\377\377\375", 3}, "///9"}, + {{"\377\377\373", 3}, "///7"}, + {{"\377\377\367", 3}, "///3"}, + {{"\377\377\357", 3}, "///v"}, + {{"\377\377\337", 3}, "///f"}, + {{"\377\377\277", 3}, "//+/"}, + {{"\377\377\177", 3}, "//9/"}, + {{"\377\376\377", 3}, "//7/"}, + {{"\377\375\377", 3}, "//3/"}, + {{"\377\373\377", 3}, "//v/"}, + {{"\377\367\377", 3}, "//f/"}, + {{"\377\357\377", 3}, "/+//"}, + {{"\377\337\377", 3}, "/9//"}, + {{"\377\277\377", 3}, "/7//"}, + {{"\377\177\377", 3}, "/3//"}, + {{"\376\377\377", 3}, "/v//"}, + {{"\375\377\377", 3}, "/f//"}, + {{"\373\377\377", 3}, "+///"}, + {{"\367\377\377", 3}, "9///"}, + {{"\357\377\377", 3}, "7///"}, + {{"\337\377\377", 3}, "3///"}, + {{"\277\377\377", 3}, "v///"}, + {{"\177\377\377", 3}, "f///"}, + + // Random numbers: values obtained with + // + // #! /bin/bash + // dd bs=$1 count=1 if=/dev/random of=/tmp/bar.random + // od -N $1 -t o1 /tmp/bar.random + // uuencode -m test < /tmp/bar.random + // + // where $1 is the number of bytes (2, 3) + + {{"\243\361", 2}, "o/E="}, + {{"\024\167", 2}, "FHc="}, + {{"\313\252", 2}, "y6o="}, + {{"\046\041", 2}, "JiE="}, + {{"\145\236", 2}, "ZZ4="}, + {{"\254\325", 2}, "rNU="}, + {{"\061\330", 2}, "Mdg="}, + {{"\245\032", 2}, "pRo="}, + {{"\006\000", 2}, "BgA="}, + {{"\375\131", 2}, "/Vk="}, + {{"\303\210", 2}, "w4g="}, + {{"\040\037", 2}, "IB8="}, + {{"\261\372", 2}, "sfo="}, + {{"\335\014", 2}, "3Qw="}, + {{"\233\217", 2}, "m48="}, + {{"\373\056", 2}, "+y4="}, + {{"\247\232", 2}, "p5o="}, + {{"\107\053", 2}, "Rys="}, + {{"\204\077", 2}, "hD8="}, + {{"\276\211", 2}, "vok="}, + {{"\313\110", 2}, "y0g="}, + {{"\363\376", 2}, "8/4="}, + {{"\251\234", 2}, "qZw="}, + {{"\103\262", 2}, "Q7I="}, + {{"\142\312", 2}, "Yso="}, + {{"\067\211", 2}, "N4k="}, + {{"\220\001", 2}, "kAE="}, + {{"\152\240", 2}, "aqA="}, + {{"\367\061", 2}, "9zE="}, + {{"\133\255", 2}, "W60="}, + {{"\176\035", 2}, "fh0="}, + {{"\032\231", 2}, "Gpk="}, + + {{"\013\007\144", 3}, "Cwdk"}, + {{"\030\112\106", 3}, "GEpG"}, + {{"\047\325\046", 3}, "J9Um"}, + {{"\310\160\022", 3}, "yHAS"}, + {{"\131\100\237", 3}, "WUCf"}, + {{"\064\342\134", 3}, "NOJc"}, + {{"\010\177\004", 3}, "CH8E"}, + {{"\345\147\205", 3}, "5WeF"}, + {{"\300\343\360", 3}, "wOPw"}, + {{"\061\240\201", 3}, "MaCB"}, + {{"\225\333\044", 3}, "ldsk"}, + {{"\215\137\352", 3}, "jV/q"}, + {{"\371\147\160", 3}, "+Wdw"}, + {{"\030\320\051", 3}, "GNAp"}, + {{"\044\174\241", 3}, "JHyh"}, + {{"\260\127\037", 3}, "sFcf"}, + {{"\111\045\033", 3}, "SSUb"}, + {{"\202\114\107", 3}, "gkxH"}, + {{"\057\371\042", 3}, "L/ki"}, + {{"\223\247\244", 3}, "k6ek"}, + {{"\047\216\144", 3}, "J45k"}, + {{"\203\070\327", 3}, "gzjX"}, + {{"\247\140\072", 3}, "p2A6"}, + {{"\124\115\116", 3}, "VE1O"}, + {{"\157\162\050", 3}, "b3Io"}, + {{"\357\223\004", 3}, "75ME"}, + {{"\052\117\156", 3}, "Kk9u"}, + {{"\347\154\000", 3}, "52wA"}, + {{"\303\012\142", 3}, "wwpi"}, + {{"\060\035\362", 3}, "MB3y"}, + {{"\130\226\361", 3}, "WJbx"}, + {{"\173\013\071", 3}, "ews5"}, + {{"\336\004\027", 3}, "3gQX"}, + {{"\357\366\234", 3}, "7/ac"}, + {{"\353\304\111", 3}, "68RJ"}, + {{"\024\264\131", 3}, "FLRZ"}, + {{"\075\114\251", 3}, "PUyp"}, + {{"\315\031\225", 3}, "zRmV"}, + {{"\154\201\276", 3}, "bIG+"}, + {{"\200\066\072", 3}, "gDY6"}, + {{"\142\350\267", 3}, "Yui3"}, + {{"\033\000\166", 3}, "GwB2"}, + {{"\210\055\077", 3}, "iC0/"}, + {{"\341\037\124", 3}, "4R9U"}, + {{"\161\103\152", 3}, "cUNq"}, + {{"\270\142\131", 3}, "uGJZ"}, + {{"\337\076\074", 3}, "3z48"}, + {{"\375\106\362", 3}, "/Uby"}, + {{"\227\301\127", 3}, "l8FX"}, + {{"\340\002\234", 3}, "4AKc"}, + {{"\121\064\033", 3}, "UTQb"}, + {{"\157\134\143", 3}, "b1xj"}, + {{"\247\055\327", 3}, "py3X"}, + {{"\340\142\005", 3}, "4GIF"}, + {{"\060\260\143", 3}, "MLBj"}, + {{"\075\203\170", 3}, "PYN4"}, + {{"\143\160\016", 3}, "Y3AO"}, + {{"\313\013\063", 3}, "ywsz"}, + {{"\174\236\135", 3}, "fJ5d"}, + {{"\103\047\026", 3}, "QycW"}, + {{"\365\005\343", 3}, "9QXj"}, + {{"\271\160\223", 3}, "uXCT"}, + {{"\362\255\172", 3}, "8q16"}, + {{"\113\012\015", 3}, "SwoN"}, + + // various lengths, generated by this python script: + // + // from std::string import lowercase as lc + // for i in range(27): + // print '{ %2d, "%s",%s "%s" },' % (i, lc[:i], ' ' * (26-i), + // lc[:i].encode('base64').strip()) + + {{"", 0}, {"", 0}}, + {"a", "YQ=="}, + {"ab", "YWI="}, + {"abc", "YWJj"}, + {"abcd", "YWJjZA=="}, + {"abcde", "YWJjZGU="}, + {"abcdef", "YWJjZGVm"}, + {"abcdefg", "YWJjZGVmZw=="}, + {"abcdefgh", "YWJjZGVmZ2g="}, + {"abcdefghi", "YWJjZGVmZ2hp"}, + {"abcdefghij", "YWJjZGVmZ2hpag=="}, + {"abcdefghijk", "YWJjZGVmZ2hpams="}, + {"abcdefghijkl", "YWJjZGVmZ2hpamts"}, + {"abcdefghijklm", "YWJjZGVmZ2hpamtsbQ=="}, + {"abcdefghijklmn", "YWJjZGVmZ2hpamtsbW4="}, + {"abcdefghijklmno", "YWJjZGVmZ2hpamtsbW5v"}, + {"abcdefghijklmnop", "YWJjZGVmZ2hpamtsbW5vcA=="}, + {"abcdefghijklmnopq", "YWJjZGVmZ2hpamtsbW5vcHE="}, + {"abcdefghijklmnopqr", "YWJjZGVmZ2hpamtsbW5vcHFy"}, + {"abcdefghijklmnopqrs", "YWJjZGVmZ2hpamtsbW5vcHFycw=="}, + {"abcdefghijklmnopqrst", "YWJjZGVmZ2hpamtsbW5vcHFyc3Q="}, + {"abcdefghijklmnopqrstu", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1"}, + {"abcdefghijklmnopqrstuv", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dg=="}, + {"abcdefghijklmnopqrstuvw", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnc="}, + {"abcdefghijklmnopqrstuvwx", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4"}, + {"abcdefghijklmnopqrstuvwxy", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eQ=="}, + {"abcdefghijklmnopqrstuvwxyz", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="}, +}; + +template <typename StringType> +void TestEscapeAndUnescape() { + // Check the short strings; this tests the math (and boundaries) + for (const auto& tc : base64_tests) { + StringType encoded("this junk should be ignored"); + absl::Base64Escape(tc.plaintext, &encoded); + EXPECT_EQ(encoded, tc.cyphertext); + EXPECT_EQ(absl::Base64Escape(tc.plaintext), tc.cyphertext); + + StringType decoded("this junk should be ignored"); + EXPECT_TRUE(absl::Base64Unescape(encoded, &decoded)); + EXPECT_EQ(decoded, tc.plaintext); + + StringType websafe(tc.cyphertext); + for (int c = 0; c < websafe.size(); ++c) { + if ('+' == websafe[c]) websafe[c] = '-'; + if ('/' == websafe[c]) websafe[c] = '_'; + if ('=' == websafe[c]) { + websafe.resize(c); + break; + } + } + + encoded = "this junk should be ignored"; + absl::WebSafeBase64Escape(tc.plaintext, &encoded); + EXPECT_EQ(encoded, websafe); + EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), websafe); + + // Let's try the string version of the decoder + decoded = "this junk should be ignored"; + EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded)); + EXPECT_EQ(decoded, tc.plaintext); + } + + // Now try the long strings, this tests the streaming + for (const auto& tc : absl::strings_internal::base64_strings()) { + StringType buffer; + absl::WebSafeBase64Escape(tc.plaintext, &buffer); + EXPECT_EQ(tc.cyphertext, buffer); + EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), tc.cyphertext); + } + + // Verify the behavior when decoding bad data + { + absl::string_view data_set[] = {"ab-/", absl::string_view("\0bcd", 4), + absl::string_view("abc.\0", 5)}; + for (absl::string_view bad_data : data_set) { + StringType buf; + EXPECT_FALSE(absl::Base64Unescape(bad_data, &buf)); + EXPECT_FALSE(absl::WebSafeBase64Unescape(bad_data, &buf)); + EXPECT_TRUE(buf.empty()); + } + } +} + +TEST(Base64, EscapeAndUnescape) { + TestEscapeAndUnescape<std::string>(); +} + +TEST(Base64, DISABLED_HugeData) { + const size_t kSize = size_t(3) * 1000 * 1000 * 1000; + static_assert(kSize % 3 == 0, "kSize must be divisible by 3"); + const std::string huge(kSize, 'x'); + + std::string escaped; + absl::Base64Escape(huge, &escaped); + + // Generates the string that should match a base64 encoded "xxx..." string. + // "xxx" in base64 is "eHh4". + std::string expected_encoding; + expected_encoding.reserve(kSize / 3 * 4); + for (size_t i = 0; i < kSize / 3; ++i) { + expected_encoding.append("eHh4"); + } + EXPECT_EQ(expected_encoding, escaped); + + std::string unescaped; + EXPECT_TRUE(absl::Base64Unescape(escaped, &unescaped)); + EXPECT_EQ(huge, unescaped); +} + +TEST(HexAndBack, HexStringToBytes_and_BytesToHexString) { + std::string hex_mixed = "0123456789abcdefABCDEF"; + std::string bytes_expected = "\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF"; + std::string hex_only_lower = "0123456789abcdefabcdef"; + + std::string bytes_result = absl::HexStringToBytes(hex_mixed); + EXPECT_EQ(bytes_expected, bytes_result); + + std::string prefix_valid = hex_mixed + "?"; + std::string prefix_valid_result = absl::HexStringToBytes( + absl::string_view(prefix_valid.data(), prefix_valid.size() - 1)); + EXPECT_EQ(bytes_expected, prefix_valid_result); + + std::string infix_valid = "?" + hex_mixed + "???"; + std::string infix_valid_result = absl::HexStringToBytes( + absl::string_view(infix_valid.data() + 1, hex_mixed.size())); + EXPECT_EQ(bytes_expected, infix_valid_result); + + std::string hex_result = absl::BytesToHexString(bytes_expected); + EXPECT_EQ(hex_only_lower, hex_result); +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/char_map.h b/third_party/abseil_cpp/absl/strings/internal/char_map.h new file mode 100644 index 000000000000..61484de0b795 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/char_map.h @@ -0,0 +1,156 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Character Map Class +// +// A fast, bit-vector map for 8-bit unsigned characters. +// This class is useful for non-character purposes as well. + +#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ +#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ + +#include <cstddef> +#include <cstdint> +#include <cstring> + +#include "absl/base/macros.h" +#include "absl/base/port.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +class Charmap { + public: + constexpr Charmap() : m_() {} + + // Initializes with a given char*. Note that NUL is not treated as + // a terminator, but rather a char to be flicked. + Charmap(const char* str, int len) : m_() { + while (len--) SetChar(*str++); + } + + // Initializes with a given char*. NUL is treated as a terminator + // and will not be in the charmap. + explicit Charmap(const char* str) : m_() { + while (*str) SetChar(*str++); + } + + constexpr bool contains(unsigned char c) const { + return (m_[c / 64] >> (c % 64)) & 0x1; + } + + // Returns true if and only if a character exists in both maps. + bool IntersectsWith(const Charmap& c) const { + for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) { + if ((m_[i] & c.m_[i]) != 0) return true; + } + return false; + } + + bool IsZero() const { + for (uint64_t c : m_) { + if (c != 0) return false; + } + return true; + } + + // Containing only a single specified char. + static constexpr Charmap Char(char x) { + return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1), + CharMaskForWord(x, 2), CharMaskForWord(x, 3)); + } + + // Containing all the chars in the C-string 's'. + // Note that this is expensively recursive because of the C++11 constexpr + // formulation. Use only in constexpr initializers. + static constexpr Charmap FromString(const char* s) { + return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1)); + } + + // Containing all the chars in the closed interval [lo,hi]. + static constexpr Charmap Range(char lo, char hi) { + return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1), + RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3)); + } + + friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) { + return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2], + a.m_[3] & b.m_[3]); + } + + friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) { + return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2], + a.m_[3] | b.m_[3]); + } + + friend constexpr Charmap operator~(const Charmap& a) { + return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]); + } + + private: + constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3) + : m_{b0, b1, b2, b3} {} + + static constexpr uint64_t RangeForWord(unsigned char lo, unsigned char hi, + uint64_t word) { + return OpenRangeFromZeroForWord(hi + 1, word) & + ~OpenRangeFromZeroForWord(lo, word); + } + + // All the chars in the specified word of the range [0, upper). + static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper, + uint64_t word) { + return (upper <= 64 * word) + ? 0 + : (upper >= 64 * (word + 1)) + ? ~static_cast<uint64_t>(0) + : (~static_cast<uint64_t>(0) >> (64 - upper % 64)); + } + + static constexpr uint64_t CharMaskForWord(unsigned char x, uint64_t word) { + return (x / 64 == word) ? (static_cast<uint64_t>(1) << (x % 64)) : 0; + } + + private: + void SetChar(unsigned char c) { + m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64); + } + + uint64_t m_[4]; +}; + +// Mirror the char-classifying predicates in <cctype> +constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); } +constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); } +constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); } +constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); } +constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); } +constexpr Charmap XDigitCharmap() { + return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f'); +} +constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); } +constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); } +constexpr Charmap CntrlCharmap() { + return Charmap::Range(0, 0x7f) & ~PrintCharmap(); +} +constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); } +constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); } +constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); } + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/char_map_benchmark.cc b/third_party/abseil_cpp/absl/strings/internal/char_map_benchmark.cc new file mode 100644 index 000000000000..5cef967b3087 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/char_map_benchmark.cc @@ -0,0 +1,61 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/char_map.h" + +#include <cstdint> + +#include "benchmark/benchmark.h" + +namespace { + +absl::strings_internal::Charmap MakeBenchmarkMap() { + absl::strings_internal::Charmap m; + uint32_t x[] = {0x0, 0x1, 0x2, 0x3, 0xf, 0xe, 0xd, 0xc}; + for (uint32_t& t : x) t *= static_cast<uint32_t>(0x11111111UL); + for (uint32_t i = 0; i < 256; ++i) { + if ((x[i / 32] >> (i % 32)) & 1) + m = m | absl::strings_internal::Charmap::Char(i); + } + return m; +} + +// Micro-benchmark for Charmap::contains. +void BM_Contains(benchmark::State& state) { + // Loop-body replicated 10 times to increase time per iteration. + // Argument continuously changed to avoid generating common subexpressions. + const absl::strings_internal::Charmap benchmark_map = MakeBenchmarkMap(); + unsigned char c = 0; + int ops = 0; + for (auto _ : state) { + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + ops += benchmark_map.contains(c++); + } + benchmark::DoNotOptimize(ops); +} +BENCHMARK(BM_Contains); + +// We don't bother benchmarking Charmap::IsZero or Charmap::IntersectsWith; +// their running time is data-dependent and it is not worth characterizing +// "typical" data. + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/char_map_test.cc b/third_party/abseil_cpp/absl/strings/internal/char_map_test.cc new file mode 100644 index 000000000000..d3306241a404 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/char_map_test.cc @@ -0,0 +1,172 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/char_map.h" + +#include <cctype> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { + +constexpr absl::strings_internal::Charmap everything_map = + ~absl::strings_internal::Charmap(); +constexpr absl::strings_internal::Charmap nothing_map{}; + +TEST(Charmap, AllTests) { + const absl::strings_internal::Charmap also_nothing_map("", 0); + ASSERT_TRUE(everything_map.contains('\0')); + ASSERT_TRUE(!nothing_map.contains('\0')); + ASSERT_TRUE(!also_nothing_map.contains('\0')); + for (unsigned char ch = 1; ch != 0; ++ch) { + ASSERT_TRUE(everything_map.contains(ch)); + ASSERT_TRUE(!nothing_map.contains(ch)); + ASSERT_TRUE(!also_nothing_map.contains(ch)); + } + + const absl::strings_internal::Charmap symbols("&@#@^!@?", 5); + ASSERT_TRUE(symbols.contains('&')); + ASSERT_TRUE(symbols.contains('@')); + ASSERT_TRUE(symbols.contains('#')); + ASSERT_TRUE(symbols.contains('^')); + ASSERT_TRUE(!symbols.contains('!')); + ASSERT_TRUE(!symbols.contains('?')); + int cnt = 0; + for (unsigned char ch = 1; ch != 0; ++ch) + cnt += symbols.contains(ch); + ASSERT_EQ(cnt, 4); + + const absl::strings_internal::Charmap lets("^abcde", 3); + const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10); + const absl::strings_internal::Charmap lets3("fghij\0klmnop"); + ASSERT_TRUE(lets2.contains('k')); + ASSERT_TRUE(!lets3.contains('k')); + + ASSERT_TRUE(symbols.IntersectsWith(lets)); + ASSERT_TRUE(!lets2.IntersectsWith(lets)); + ASSERT_TRUE(lets.IntersectsWith(symbols)); + ASSERT_TRUE(!lets.IntersectsWith(lets2)); + + ASSERT_TRUE(nothing_map.IsZero()); + ASSERT_TRUE(!lets.IsZero()); +} + +namespace { +std::string Members(const absl::strings_internal::Charmap& m) { + std::string r; + for (size_t i = 0; i < 256; ++i) + if (m.contains(i)) r.push_back(i); + return r; +} + +std::string ClosedRangeString(unsigned char lo, unsigned char hi) { + // Don't depend on lo<hi. Just increment until lo==hi. + std::string s; + while (true) { + s.push_back(lo); + if (lo == hi) break; + ++lo; + } + return s; +} + +} // namespace + +TEST(Charmap, Constexpr) { + constexpr absl::strings_internal::Charmap kEmpty = nothing_map; + EXPECT_THAT(Members(kEmpty), ""); + constexpr absl::strings_internal::Charmap kA = + absl::strings_internal::Charmap::Char('A'); + EXPECT_THAT(Members(kA), "A"); + constexpr absl::strings_internal::Charmap kAZ = + absl::strings_internal::Charmap::Range('A', 'Z'); + EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + constexpr absl::strings_internal::Charmap kIdentifier = + absl::strings_internal::Charmap::Range('0', '9') | + absl::strings_internal::Charmap::Range('A', 'Z') | + absl::strings_internal::Charmap::Range('a', 'z') | + absl::strings_internal::Charmap::Char('_'); + EXPECT_THAT(Members(kIdentifier), + "0123456789" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "_" + "abcdefghijklmnopqrstuvwxyz"); + constexpr absl::strings_internal::Charmap kAll = everything_map; + for (size_t i = 0; i < 256; ++i) { + EXPECT_TRUE(kAll.contains(i)) << i; + } + constexpr absl::strings_internal::Charmap kHello = + absl::strings_internal::Charmap::FromString("Hello, world!"); + EXPECT_THAT(Members(kHello), " !,Hdelorw"); + + // test negation and intersection + constexpr absl::strings_internal::Charmap kABC = + absl::strings_internal::Charmap::Range('A', 'Z') & + ~absl::strings_internal::Charmap::Range('D', 'Z'); + EXPECT_THAT(Members(kABC), "ABC"); +} + +TEST(Charmap, Range) { + // Exhaustive testing takes too long, so test some of the boundaries that + // are perhaps going to cause trouble. + std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15, + 16, 17, 30, 31, 32, 33, 63, 64, 65, + 127, 128, 129, 223, 224, 225, 254, 255}; + for (auto lo = poi.begin(); lo != poi.end(); ++lo) { + SCOPED_TRACE(*lo); + for (auto hi = lo; hi != poi.end(); ++hi) { + SCOPED_TRACE(*hi); + EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)), + ClosedRangeString(*lo, *hi)); + } + } +} + +bool AsBool(int x) { return static_cast<bool>(x); } + +TEST(CharmapCtype, Match) { + for (int c = 0; c < 256; ++c) { + SCOPED_TRACE(c); + SCOPED_TRACE(static_cast<char>(c)); + EXPECT_EQ(AsBool(std::isupper(c)), + absl::strings_internal::UpperCharmap().contains(c)); + EXPECT_EQ(AsBool(std::islower(c)), + absl::strings_internal::LowerCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isdigit(c)), + absl::strings_internal::DigitCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isalpha(c)), + absl::strings_internal::AlphaCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isalnum(c)), + absl::strings_internal::AlnumCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isxdigit(c)), + absl::strings_internal::XDigitCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isprint(c)), + absl::strings_internal::PrintCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isspace(c)), + absl::strings_internal::SpaceCharmap().contains(c)); + EXPECT_EQ(AsBool(std::iscntrl(c)), + absl::strings_internal::CntrlCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isblank(c)), + absl::strings_internal::BlankCharmap().contains(c)); + EXPECT_EQ(AsBool(std::isgraph(c)), + absl::strings_internal::GraphCharmap().contains(c)); + EXPECT_EQ(AsBool(std::ispunct(c)), + absl::strings_internal::PunctCharmap().contains(c)); + } +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/charconv_bigint.cc b/third_party/abseil_cpp/absl/strings/internal/charconv_bigint.cc new file mode 100644 index 000000000000..ebf8c0791af9 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/charconv_bigint.cc @@ -0,0 +1,359 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/charconv_bigint.h" + +#include <algorithm> +#include <cassert> +#include <string> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +namespace { + +// Table containing some large powers of 5, for fast computation. + +// Constant step size for entries in the kLargePowersOfFive table. Each entry +// is larger than the previous entry by a factor of 5**kLargePowerOfFiveStep +// (or 5**27). +// +// In other words, the Nth entry in the table is 5**(27*N). +// +// 5**27 is the largest power of 5 that fits in 64 bits. +constexpr int kLargePowerOfFiveStep = 27; + +// The largest legal index into the kLargePowersOfFive table. +// +// In other words, the largest precomputed power of 5 is 5**(27*20). +constexpr int kLargestPowerOfFiveIndex = 20; + +// Table of powers of (5**27), up to (5**27)**20 == 5**540. +// +// Used to generate large powers of 5 while limiting the number of repeated +// multiplications required. +// +// clang-format off +const uint32_t kLargePowersOfFive[] = { +// 5**27 (i=1), start=0, end=2 + 0xfa10079dU, 0x6765c793U, +// 5**54 (i=2), start=2, end=6 + 0x97d9f649U, 0x6664242dU, 0x29939b14U, 0x29c30f10U, +// 5**81 (i=3), start=6, end=12 + 0xc4f809c5U, 0x7bf3f22aU, 0x67bdae34U, 0xad340517U, 0x369d1b5fU, 0x10de1593U, +// 5**108 (i=4), start=12, end=20 + 0x92b260d1U, 0x9efff7c7U, 0x81de0ec6U, 0xaeba5d56U, 0x410664a4U, 0x4f40737aU, + 0x20d3846fU, 0x06d00f73U, +// 5**135 (i=5), start=20, end=30 + 0xff1b172dU, 0x13a1d71cU, 0xefa07617U, 0x7f682d3dU, 0xff8c90c0U, 0x3f0131e7U, + 0x3fdcb9feU, 0x917b0177U, 0x16c407a7U, 0x02c06b9dU, +// 5**162 (i=6), start=30, end=42 + 0x960f7199U, 0x056667ecU, 0xe07aefd8U, 0x80f2b9ccU, 0x8273f5e3U, 0xeb9a214aU, + 0x40b38005U, 0x0e477ad4U, 0x277d08e6U, 0xfa28b11eU, 0xd3f7d784U, 0x011c835bU, +// 5**189 (i=7), start=42, end=56 + 0xf723d9d5U, 0x3282d3f3U, 0xe00857d1U, 0x69659d25U, 0x2cf117cfU, 0x24da6d07U, + 0x954d1417U, 0x3e5d8cedU, 0x7a8bb766U, 0xfd785ae6U, 0x645436d2U, 0x40c78b34U, + 0x94151217U, 0x0072e9f7U, +// 5**216 (i=8), start=56, end=72 + 0x2b416aa1U, 0x7893c5a7U, 0xe37dc6d4U, 0x2bad2beaU, 0xf0fc846cU, 0x7575ae4bU, + 0x62587b14U, 0x83b67a34U, 0x02110cdbU, 0xf7992f55U, 0x00deb022U, 0xa4a23becU, + 0x8af5c5cdU, 0xb85b654fU, 0x818df38bU, 0x002e69d2U, +// 5**243 (i=9), start=72, end=90 + 0x3518cbbdU, 0x20b0c15fU, 0x38756c2fU, 0xfb5dc3ddU, 0x22ad2d94U, 0xbf35a952U, + 0xa699192aU, 0x9a613326U, 0xad2a9cedU, 0xd7f48968U, 0xe87dfb54U, 0xc8f05db6U, + 0x5ef67531U, 0x31c1ab49U, 0xe202ac9fU, 0x9b2957b5U, 0xa143f6d3U, 0x0012bf07U, +// 5**270 (i=10), start=90, end=110 + 0x8b971de9U, 0x21aba2e1U, 0x63944362U, 0x57172336U, 0xd9544225U, 0xfb534166U, + 0x08c563eeU, 0x14640ee2U, 0x24e40d31U, 0x02b06537U, 0x03887f14U, 0x0285e533U, + 0xb744ef26U, 0x8be3a6c4U, 0x266979b4U, 0x6761ece2U, 0xd9cb39e4U, 0xe67de319U, + 0x0d39e796U, 0x00079250U, +// 5**297 (i=11), start=110, end=132 + 0x260eb6e5U, 0xf414a796U, 0xee1a7491U, 0xdb9368ebU, 0xf50c105bU, 0x59157750U, + 0x9ed2fb5cU, 0xf6e56d8bU, 0xeaee8d23U, 0x0f319f75U, 0x2aa134d6U, 0xac2908e9U, + 0xd4413298U, 0x02f02a55U, 0x989d5a7aU, 0x70dde184U, 0xba8040a7U, 0x03200981U, + 0xbe03b11cU, 0x3c1c2a18U, 0xd60427a1U, 0x00030ee0U, +// 5**324 (i=12), start=132, end=156 + 0xce566d71U, 0xf1c4aa25U, 0x4e93ca53U, 0xa72283d0U, 0x551a73eaU, 0x3d0538e2U, + 0x8da4303fU, 0x6a58de60U, 0x0e660221U, 0x49cf61a6U, 0x8d058fc1U, 0xb9d1a14cU, + 0x4bab157dU, 0xc85c6932U, 0x518c8b9eU, 0x9b92b8d0U, 0x0d8a0e21U, 0xbd855df9U, + 0xb3ea59a1U, 0x8da29289U, 0x4584d506U, 0x3752d80fU, 0xb72569c6U, 0x00013c33U, +// 5**351 (i=13), start=156, end=182 + 0x190f354dU, 0x83695cfeU, 0xe5a4d0c7U, 0xb60fb7e8U, 0xee5bbcc4U, 0xb922054cU, + 0xbb4f0d85U, 0x48394028U, 0x1d8957dbU, 0x0d7edb14U, 0x4ecc7587U, 0x505e9e02U, + 0x4c87f36bU, 0x99e66bd6U, 0x44b9ed35U, 0x753037d4U, 0xe5fe5f27U, 0x2742c203U, + 0x13b2ed2bU, 0xdc525d2cU, 0xe6fde59aU, 0x77ffb18fU, 0x13c5752cU, 0x08a84bccU, + 0x859a4940U, 0x00007fb6U, +// 5**378 (i=14), start=182, end=210 + 0x4f98cb39U, 0xa60edbbcU, 0x83b5872eU, 0xa501acffU, 0x9cc76f78U, 0xbadd4c73U, + 0x43e989faU, 0xca7acf80U, 0x2e0c824fU, 0xb19f4ffcU, 0x092fd81cU, 0xe4eb645bU, + 0xa1ff84c2U, 0x8a5a83baU, 0xa8a1fae9U, 0x1db43609U, 0xb0fed50bU, 0x0dd7d2bdU, + 0x7d7accd8U, 0x91fa640fU, 0x37dcc6c5U, 0x1c417fd5U, 0xe4d462adU, 0xe8a43399U, + 0x131bf9a5U, 0x8df54d29U, 0x36547dc1U, 0x00003395U, +// 5**405 (i=15), start=210, end=240 + 0x5bd330f5U, 0x77d21967U, 0x1ac481b7U, 0x6be2f7ceU, 0x7f4792a9U, 0xe84c2c52U, + 0x84592228U, 0x9dcaf829U, 0xdab44ce1U, 0x3d0c311bU, 0x532e297dU, 0x4704e8b4U, + 0x9cdc32beU, 0x41e64d9dU, 0x7717bea1U, 0xa824c00dU, 0x08f50b27U, 0x0f198d77U, + 0x49bbfdf0U, 0x025c6c69U, 0xd4e55cd3U, 0xf083602bU, 0xb9f0fecdU, 0xc0864aeaU, + 0x9cb98681U, 0xaaf620e9U, 0xacb6df30U, 0x4faafe66U, 0x8af13c3bU, 0x000014d5U, +// 5**432 (i=16), start=240, end=272 + 0x682bb941U, 0x89a9f297U, 0xcba75d7bU, 0x404217b1U, 0xb4e519e9U, 0xa1bc162bU, + 0xf7f5910aU, 0x98715af5U, 0x2ff53e57U, 0xe3ef118cU, 0x490c4543U, 0xbc9b1734U, + 0x2affbe4dU, 0x4cedcb4cU, 0xfb14e99eU, 0x35e34212U, 0xece39c24U, 0x07673ab3U, + 0xe73115ddU, 0xd15d38e7U, 0x093eed3bU, 0xf8e7eac5U, 0x78a8cc80U, 0x25227aacU, + 0x3f590551U, 0x413da1cbU, 0xdf643a55U, 0xab65ad44U, 0xd70b23d7U, 0xc672cd76U, + 0x3364ea62U, 0x0000086aU, +// 5**459 (i=17), start=272, end=306 + 0x22f163ddU, 0x23cf07acU, 0xbe2af6c2U, 0xf412f6f6U, 0xc3ff541eU, 0x6eeaf7deU, + 0xa47047e0U, 0x408cda92U, 0x0f0eeb08U, 0x56deba9dU, 0xcfc6b090U, 0x8bbbdf04U, + 0x3933cdb3U, 0x9e7bb67dU, 0x9f297035U, 0x38946244U, 0xee1d37bbU, 0xde898174U, + 0x63f3559dU, 0x705b72fbU, 0x138d27d9U, 0xf8603a78U, 0x735eec44U, 0xe30987d5U, + 0xc6d38070U, 0x9cfe548eU, 0x9ff01422U, 0x7c564aa8U, 0x91cc60baU, 0xcbc3565dU, + 0x7550a50bU, 0x6909aeadU, 0x13234c45U, 0x00000366U, +// 5**486 (i=18), start=306, end=342 + 0x17954989U, 0x3a7d7709U, 0x98042de5U, 0xa9011443U, 0x45e723c2U, 0x269ffd6fU, + 0x58852a46U, 0xaaa1042aU, 0x2eee8153U, 0xb2b6c39eU, 0xaf845b65U, 0xf6c365d7U, + 0xe4cffb2bU, 0xc840e90cU, 0xabea8abbU, 0x5c58f8d2U, 0x5c19fa3aU, 0x4670910aU, + 0x4449f21cU, 0xefa645b3U, 0xcc427decU, 0x083c3d73U, 0x467cb413U, 0x6fe10ae4U, + 0x3caffc72U, 0x9f8da55eU, 0x5e5c8ea7U, 0x490594bbU, 0xf0871b0bU, 0xdd89816cU, + 0x8e931df8U, 0xe85ce1c9U, 0xcca090a5U, 0x575fa16bU, 0x6b9f106cU, 0x0000015fU, +// 5**513 (i=19), start=342, end=380 + 0xee20d805U, 0x57bc3c07U, 0xcdea624eU, 0xd3f0f52dU, 0x9924b4f4U, 0xcf968640U, + 0x61d41962U, 0xe87fb464U, 0xeaaf51c7U, 0x564c8b60U, 0xccda4028U, 0x529428bbU, + 0x313a1fa8U, 0x96bd0f94U, 0x7a82ebaaU, 0xad99e7e9U, 0xf2668cd4U, 0xbe33a45eU, + 0xfd0db669U, 0x87ee369fU, 0xd3ec20edU, 0x9c4d7db7U, 0xdedcf0d8U, 0x7cd2ca64U, + 0xe25a6577U, 0x61003fd4U, 0xe56f54ccU, 0x10b7c748U, 0x40526e5eU, 0x7300ae87U, + 0x5c439261U, 0x2c0ff469U, 0xbf723f12U, 0xb2379b61U, 0xbf59b4f5U, 0xc91b1c3fU, + 0xf0046d27U, 0x0000008dU, +// 5**540 (i=20), start=380, end=420 + 0x525c9e11U, 0xf4e0eb41U, 0xebb2895dU, 0x5da512f9U, 0x7d9b29d4U, 0x452f4edcU, + 0x0b90bc37U, 0x341777cbU, 0x63d269afU, 0x1da77929U, 0x0a5c1826U, 0x77991898U, + 0x5aeddf86U, 0xf853a877U, 0x538c31ccU, 0xe84896daU, 0xb7a0010bU, 0x17ef4de5U, + 0xa52a2adeU, 0x029fd81cU, 0x987ce701U, 0x27fefd77U, 0xdb46c66fU, 0x5d301900U, + 0x496998c0U, 0xbb6598b9U, 0x5eebb607U, 0xe547354aU, 0xdf4a2f7eU, 0xf06c4955U, + 0x96242ffaU, 0x1775fb27U, 0xbecc58ceU, 0xebf2a53bU, 0x3eaad82aU, 0xf41137baU, + 0x573e6fbaU, 0xfb4866b8U, 0x54002148U, 0x00000039U, +}; +// clang-format on + +// Returns a pointer to the big integer data for (5**27)**i. i must be +// between 1 and 20, inclusive. +const uint32_t* LargePowerOfFiveData(int i) { + return kLargePowersOfFive + i * (i - 1); +} + +// Returns the size of the big integer data for (5**27)**i, in words. i must be +// between 1 and 20, inclusive. +int LargePowerOfFiveSize(int i) { return 2 * i; } +} // namespace + +ABSL_DLL const uint32_t kFiveToNth[14] = { + 1, 5, 25, 125, 625, 3125, 15625, + 78125, 390625, 1953125, 9765625, 48828125, 244140625, 1220703125, +}; + +ABSL_DLL const uint32_t kTenToNth[10] = { + 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, +}; + +template <int max_words> +int BigUnsigned<max_words>::ReadFloatMantissa(const ParsedFloat& fp, + int significant_digits) { + SetToZero(); + assert(fp.type == FloatType::kNumber); + + if (fp.subrange_begin == nullptr) { + // We already exactly parsed the mantissa, so no more work is necessary. + words_[0] = fp.mantissa & 0xffffffffu; + words_[1] = fp.mantissa >> 32; + if (words_[1]) { + size_ = 2; + } else if (words_[0]) { + size_ = 1; + } + return fp.exponent; + } + int exponent_adjust = + ReadDigits(fp.subrange_begin, fp.subrange_end, significant_digits); + return fp.literal_exponent + exponent_adjust; +} + +template <int max_words> +int BigUnsigned<max_words>::ReadDigits(const char* begin, const char* end, + int significant_digits) { + assert(significant_digits <= Digits10() + 1); + SetToZero(); + + bool after_decimal_point = false; + // Discard any leading zeroes before the decimal point + while (begin < end && *begin == '0') { + ++begin; + } + int dropped_digits = 0; + // Discard any trailing zeroes. These may or may not be after the decimal + // point. + while (begin < end && *std::prev(end) == '0') { + --end; + ++dropped_digits; + } + if (begin < end && *std::prev(end) == '.') { + // If the string ends in '.', either before or after dropping zeroes, then + // drop the decimal point and look for more digits to drop. + dropped_digits = 0; + --end; + while (begin < end && *std::prev(end) == '0') { + --end; + ++dropped_digits; + } + } else if (dropped_digits) { + // We dropped digits, and aren't sure if they're before or after the decimal + // point. Figure that out now. + const char* dp = std::find(begin, end, '.'); + if (dp != end) { + // The dropped trailing digits were after the decimal point, so don't + // count them. + dropped_digits = 0; + } + } + // Any non-fraction digits we dropped need to be accounted for in our exponent + // adjustment. + int exponent_adjust = dropped_digits; + + uint32_t queued = 0; + int digits_queued = 0; + for (; begin != end && significant_digits > 0; ++begin) { + if (*begin == '.') { + after_decimal_point = true; + continue; + } + if (after_decimal_point) { + // For each fractional digit we emit in our parsed integer, adjust our + // decimal exponent to compensate. + --exponent_adjust; + } + int digit = (*begin - '0'); + --significant_digits; + if (significant_digits == 0 && std::next(begin) != end && + (digit == 0 || digit == 5)) { + // If this is the very last significant digit, but insignificant digits + // remain, we know that the last of those remaining significant digits is + // nonzero. (If it wasn't, we would have stripped it before we got here.) + // So if this final digit is a 0 or 5, adjust it upward by 1. + // + // This adjustment is what allows incredibly large mantissas ending in + // 500000...000000000001 to correctly round up, rather than to nearest. + ++digit; + } + queued = 10 * queued + digit; + ++digits_queued; + if (digits_queued == kMaxSmallPowerOfTen) { + MultiplyBy(kTenToNth[kMaxSmallPowerOfTen]); + AddWithCarry(0, queued); + queued = digits_queued = 0; + } + } + // Encode any remaining digits. + if (digits_queued) { + MultiplyBy(kTenToNth[digits_queued]); + AddWithCarry(0, queued); + } + + // If any insignificant digits remain, we will drop them. But if we have not + // yet read the decimal point, then we have to adjust the exponent to account + // for the dropped digits. + if (begin < end && !after_decimal_point) { + // This call to std::find will result in a pointer either to the decimal + // point, or to the end of our buffer if there was none. + // + // Either way, [begin, decimal_point) will contain the set of dropped digits + // that require an exponent adjustment. + const char* decimal_point = std::find(begin, end, '.'); + exponent_adjust += (decimal_point - begin); + } + return exponent_adjust; +} + +template <int max_words> +/* static */ BigUnsigned<max_words> BigUnsigned<max_words>::FiveToTheNth( + int n) { + BigUnsigned answer(1u); + + // Seed from the table of large powers, if possible. + bool first_pass = true; + while (n >= kLargePowerOfFiveStep) { + int big_power = + std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex); + if (first_pass) { + // just copy, rather than multiplying by 1 + std::copy( + LargePowerOfFiveData(big_power), + LargePowerOfFiveData(big_power) + LargePowerOfFiveSize(big_power), + answer.words_); + answer.size_ = LargePowerOfFiveSize(big_power); + first_pass = false; + } else { + answer.MultiplyBy(LargePowerOfFiveSize(big_power), + LargePowerOfFiveData(big_power)); + } + n -= kLargePowerOfFiveStep * big_power; + } + answer.MultiplyByFiveToTheNth(n); + return answer; +} + +template <int max_words> +void BigUnsigned<max_words>::MultiplyStep(int original_size, + const uint32_t* other_words, + int other_size, int step) { + int this_i = std::min(original_size - 1, step); + int other_i = step - this_i; + + uint64_t this_word = 0; + uint64_t carry = 0; + for (; this_i >= 0 && other_i < other_size; --this_i, ++other_i) { + uint64_t product = words_[this_i]; + product *= other_words[other_i]; + this_word += product; + carry += (this_word >> 32); + this_word &= 0xffffffff; + } + AddWithCarry(step + 1, carry); + words_[step] = this_word & 0xffffffff; + if (this_word > 0 && size_ <= step) { + size_ = step + 1; + } +} + +template <int max_words> +std::string BigUnsigned<max_words>::ToString() const { + BigUnsigned<max_words> copy = *this; + std::string result; + // Build result in reverse order + while (copy.size() > 0) { + int next_digit = copy.DivMod<10>(); + result.push_back('0' + next_digit); + } + if (result.empty()) { + result.push_back('0'); + } + std::reverse(result.begin(), result.end()); + return result; +} + +template class BigUnsigned<4>; +template class BigUnsigned<84>; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/charconv_bigint.h b/third_party/abseil_cpp/absl/strings/internal/charconv_bigint.h new file mode 100644 index 000000000000..8f702976a80d --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/charconv_bigint.h @@ -0,0 +1,423 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ +#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ + +#include <algorithm> +#include <cstdint> +#include <iostream> +#include <string> + +#include "absl/base/config.h" +#include "absl/strings/ascii.h" +#include "absl/strings/internal/charconv_parse.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// The largest power that 5 that can be raised to, and still fit in a uint32_t. +constexpr int kMaxSmallPowerOfFive = 13; +// The largest power that 10 that can be raised to, and still fit in a uint32_t. +constexpr int kMaxSmallPowerOfTen = 9; + +ABSL_DLL extern const uint32_t + kFiveToNth[kMaxSmallPowerOfFive + 1]; +ABSL_DLL extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1]; + +// Large, fixed-width unsigned integer. +// +// Exact rounding for decimal-to-binary floating point conversion requires very +// large integer math, but a design goal of absl::from_chars is to avoid +// allocating memory. The integer precision needed for decimal-to-binary +// conversions is large but bounded, so a huge fixed-width integer class +// suffices. +// +// This is an intentionally limited big integer class. Only needed operations +// are implemented. All storage lives in an array data member, and all +// arithmetic is done in-place, to avoid requiring separate storage for operand +// and result. +// +// This is an internal class. Some methods live in the .cc file, and are +// instantiated only for the values of max_words we need. +template <int max_words> +class BigUnsigned { + public: + static_assert(max_words == 4 || max_words == 84, + "unsupported max_words value"); + + BigUnsigned() : size_(0), words_{} {} + explicit constexpr BigUnsigned(uint64_t v) + : size_((v >> 32) ? 2 : v ? 1 : 0), + words_{static_cast<uint32_t>(v & 0xffffffffu), + static_cast<uint32_t>(v >> 32)} {} + + // Constructs a BigUnsigned from the given string_view containing a decimal + // value. If the input string is not a decimal integer, constructs a 0 + // instead. + explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} { + // Check for valid input, returning a 0 otherwise. This is reasonable + // behavior only because this constructor is for unit tests. + if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() || + sv.empty()) { + return; + } + int exponent_adjust = + ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1); + if (exponent_adjust > 0) { + MultiplyByTenToTheNth(exponent_adjust); + } + } + + // Loads the mantissa value of a previously-parsed float. + // + // Returns the associated decimal exponent. The value of the parsed float is + // exactly *this * 10**exponent. + int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits); + + // Returns the number of decimal digits of precision this type provides. All + // numbers with this many decimal digits or fewer are representable by this + // type. + // + // Analagous to std::numeric_limits<BigUnsigned>::digits10. + static constexpr int Digits10() { + // 9975007/1035508 is very slightly less than log10(2**32). + return static_cast<uint64_t>(max_words) * 9975007 / 1035508; + } + + // Shifts left by the given number of bits. + void ShiftLeft(int count) { + if (count > 0) { + const int word_shift = count / 32; + if (word_shift >= max_words) { + SetToZero(); + return; + } + size_ = (std::min)(size_ + word_shift, max_words); + count %= 32; + if (count == 0) { + std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_); + } else { + for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) { + words_[i] = (words_[i - word_shift] << count) | + (words_[i - word_shift - 1] >> (32 - count)); + } + words_[word_shift] = words_[0] << count; + // Grow size_ if necessary. + if (size_ < max_words && words_[size_]) { + ++size_; + } + } + std::fill(words_, words_ + word_shift, 0u); + } + } + + + // Multiplies by v in-place. + void MultiplyBy(uint32_t v) { + if (size_ == 0 || v == 1) { + return; + } + if (v == 0) { + SetToZero(); + return; + } + const uint64_t factor = v; + uint64_t window = 0; + for (int i = 0; i < size_; ++i) { + window += factor * words_[i]; + words_[i] = window & 0xffffffff; + window >>= 32; + } + // If carry bits remain and there's space for them, grow size_. + if (window && size_ < max_words) { + words_[size_] = window & 0xffffffff; + ++size_; + } + } + + void MultiplyBy(uint64_t v) { + uint32_t words[2]; + words[0] = static_cast<uint32_t>(v); + words[1] = static_cast<uint32_t>(v >> 32); + if (words[1] == 0) { + MultiplyBy(words[0]); + } else { + MultiplyBy(2, words); + } + } + + // Multiplies in place by 5 to the power of n. n must be non-negative. + void MultiplyByFiveToTheNth(int n) { + while (n >= kMaxSmallPowerOfFive) { + MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]); + n -= kMaxSmallPowerOfFive; + } + if (n > 0) { + MultiplyBy(kFiveToNth[n]); + } + } + + // Multiplies in place by 10 to the power of n. n must be non-negative. + void MultiplyByTenToTheNth(int n) { + if (n > kMaxSmallPowerOfTen) { + // For large n, raise to a power of 5, then shift left by the same amount. + // (10**n == 5**n * 2**n.) This requires fewer multiplications overall. + MultiplyByFiveToTheNth(n); + ShiftLeft(n); + } else if (n > 0) { + // We can do this more quickly for very small N by using a single + // multiplication. + MultiplyBy(kTenToNth[n]); + } + } + + // Returns the value of 5**n, for non-negative n. This implementation uses + // a lookup table, and is faster then seeding a BigUnsigned with 1 and calling + // MultiplyByFiveToTheNth(). + static BigUnsigned FiveToTheNth(int n); + + // Multiplies by another BigUnsigned, in-place. + template <int M> + void MultiplyBy(const BigUnsigned<M>& other) { + MultiplyBy(other.size(), other.words()); + } + + void SetToZero() { + std::fill(words_, words_ + size_, 0u); + size_ = 0; + } + + // Returns the value of the nth word of this BigUnsigned. This is + // range-checked, and returns 0 on out-of-bounds accesses. + uint32_t GetWord(int index) const { + if (index < 0 || index >= size_) { + return 0; + } + return words_[index]; + } + + // Returns this integer as a decimal string. This is not used in the decimal- + // to-binary conversion; it is intended to aid in testing. + std::string ToString() const; + + int size() const { return size_; } + const uint32_t* words() const { return words_; } + + private: + // Reads the number between [begin, end), possibly containing a decimal point, + // into this BigUnsigned. + // + // Callers are required to ensure [begin, end) contains a valid number, with + // one or more decimal digits and at most one decimal point. This routine + // will behave unpredictably if these preconditions are not met. + // + // Only the first `significant_digits` digits are read. Digits beyond this + // limit are "sticky": If the final significant digit is 0 or 5, and if any + // dropped digit is nonzero, then that final significant digit is adjusted up + // to 1 or 6. This adjustment allows for precise rounding. + // + // Returns `exponent_adjustment`, a power-of-ten exponent adjustment to + // account for the decimal point and for dropped significant digits. After + // this function returns, + // actual_value_of_parsed_string ~= *this * 10**exponent_adjustment. + int ReadDigits(const char* begin, const char* end, int significant_digits); + + // Performs a step of big integer multiplication. This computes the full + // (64-bit-wide) values that should be added at the given index (step), and + // adds to that location in-place. + // + // Because our math all occurs in place, we must multiply starting from the + // highest word working downward. (This is a bit more expensive due to the + // extra carries involved.) + // + // This must be called in steps, for each word to be calculated, starting from + // the high end and working down to 0. The first value of `step` should be + // `std::min(original_size + other.size_ - 2, max_words - 1)`. + // The reason for this expression is that multiplying the i'th word from one + // multiplicand and the j'th word of another multiplicand creates a + // two-word-wide value to be stored at the (i+j)'th element. The highest + // word indices we will access are `original_size - 1` from this object, and + // `other.size_ - 1` from our operand. Therefore, + // `original_size + other.size_ - 2` is the first step we should calculate, + // but limited on an upper bound by max_words. + + // Working from high-to-low ensures that we do not overwrite the portions of + // the initial value of *this which are still needed for later steps. + // + // Once called with step == 0, *this contains the result of the + // multiplication. + // + // `original_size` is the size_ of *this before the first call to + // MultiplyStep(). `other_words` and `other_size` are the contents of our + // operand. `step` is the step to perform, as described above. + void MultiplyStep(int original_size, const uint32_t* other_words, + int other_size, int step); + + void MultiplyBy(int other_size, const uint32_t* other_words) { + const int original_size = size_; + const int first_step = + (std::min)(original_size + other_size - 2, max_words - 1); + for (int step = first_step; step >= 0; --step) { + MultiplyStep(original_size, other_words, other_size, step); + } + } + + // Adds a 32-bit value to the index'th word, with carry. + void AddWithCarry(int index, uint32_t value) { + if (value) { + while (index < max_words && value > 0) { + words_[index] += value; + // carry if we overflowed in this word: + if (value > words_[index]) { + value = 1; + ++index; + } else { + value = 0; + } + } + size_ = (std::min)(max_words, (std::max)(index + 1, size_)); + } + } + + void AddWithCarry(int index, uint64_t value) { + if (value && index < max_words) { + uint32_t high = value >> 32; + uint32_t low = value & 0xffffffff; + words_[index] += low; + if (words_[index] < low) { + ++high; + if (high == 0) { + // Carry from the low word caused our high word to overflow. + // Short circuit here to do the right thing. + AddWithCarry(index + 2, static_cast<uint32_t>(1)); + return; + } + } + if (high > 0) { + AddWithCarry(index + 1, high); + } else { + // Normally 32-bit AddWithCarry() sets size_, but since we don't call + // it when `high` is 0, do it ourselves here. + size_ = (std::min)(max_words, (std::max)(index + 1, size_)); + } + } + } + + // Divide this in place by a constant divisor. Returns the remainder of the + // division. + template <uint32_t divisor> + uint32_t DivMod() { + uint64_t accumulator = 0; + for (int i = size_ - 1; i >= 0; --i) { + accumulator <<= 32; + accumulator += words_[i]; + // accumulator / divisor will never overflow an int32_t in this loop + words_[i] = static_cast<uint32_t>(accumulator / divisor); + accumulator = accumulator % divisor; + } + while (size_ > 0 && words_[size_ - 1] == 0) { + --size_; + } + return static_cast<uint32_t>(accumulator); + } + + // The number of elements in words_ that may carry significant values. + // All elements beyond this point are 0. + // + // When size_ is 0, this BigUnsigned stores the value 0. + // When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is + // nonzero. This can occur due to overflow truncation. + // In particular, x.size_ != y.size_ does *not* imply x != y. + int size_; + uint32_t words_[max_words]; +}; + +// Compares two big integer instances. +// +// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs. +template <int N, int M> +int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + int limit = (std::max)(lhs.size(), rhs.size()); + for (int i = limit - 1; i >= 0; --i) { + const uint32_t lhs_word = lhs.GetWord(i); + const uint32_t rhs_word = rhs.GetWord(i); + if (lhs_word < rhs_word) { + return -1; + } else if (lhs_word > rhs_word) { + return 1; + } + } + return 0; +} + +template <int N, int M> +bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + int limit = (std::max)(lhs.size(), rhs.size()); + for (int i = 0; i < limit; ++i) { + if (lhs.GetWord(i) != rhs.GetWord(i)) { + return false; + } + } + return true; +} + +template <int N, int M> +bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return !(lhs == rhs); +} + +template <int N, int M> +bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return Compare(lhs, rhs) == -1; +} + +template <int N, int M> +bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return rhs < lhs; +} +template <int N, int M> +bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return !(rhs < lhs); +} +template <int N, int M> +bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return !(lhs < rhs); +} + +// Output operator for BigUnsigned, for testing purposes only. +template <int N> +std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) { + return os << num.ToString(); +} + +// Explicit instantiation declarations for the sizes of BigUnsigned that we +// are using. +// +// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is +// still bigger than an int128, and 84 is a large value we will want to use +// in the from_chars implementation. +// +// Comments justifying the use of 84 belong in the from_chars implementation, +// and will be added in a follow-up CL. +extern template class BigUnsigned<4>; +extern template class BigUnsigned<84>; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/charconv_bigint_test.cc b/third_party/abseil_cpp/absl/strings/internal/charconv_bigint_test.cc new file mode 100644 index 000000000000..a8b9945829e8 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/charconv_bigint_test.cc @@ -0,0 +1,260 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/charconv_bigint.h" + +#include <string> + +#include "gtest/gtest.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +TEST(BigUnsigned, ShiftLeft) { + { + // Check that 3 * 2**100 is calculated correctly + BigUnsigned<4> num(3u); + num.ShiftLeft(100); + EXPECT_EQ(num, BigUnsigned<4>("3802951800684688204490109616128")); + } + { + // Test that overflow is truncated properly. + // 15 is 4 bits long, and BigUnsigned<4> is a 128-bit bigint. + // Shifting left by 125 bits should truncate off the high bit, so that + // 15 << 125 == 7 << 125 + // after truncation. + BigUnsigned<4> a(15u); + BigUnsigned<4> b(7u); + BigUnsigned<4> c(3u); + a.ShiftLeft(125); + b.ShiftLeft(125); + c.ShiftLeft(125); + EXPECT_EQ(a, b); + EXPECT_NE(a, c); + } + { + // Same test, larger bigint: + BigUnsigned<84> a(15u); + BigUnsigned<84> b(7u); + BigUnsigned<84> c(3u); + a.ShiftLeft(84 * 32 - 3); + b.ShiftLeft(84 * 32 - 3); + c.ShiftLeft(84 * 32 - 3); + EXPECT_EQ(a, b); + EXPECT_NE(a, c); + } + { + // Check that incrementally shifting has the same result as doing it all at + // once (attempting to capture corner cases.) + const std::string seed = "1234567890123456789012345678901234567890"; + BigUnsigned<84> a(seed); + for (int i = 1; i <= 84 * 32; ++i) { + a.ShiftLeft(1); + BigUnsigned<84> b(seed); + b.ShiftLeft(i); + EXPECT_EQ(a, b); + } + // And we should have fully rotated all bits off by now: + EXPECT_EQ(a, BigUnsigned<84>(0u)); + } + { + // Bit shifting large and small numbers by large and small offsets. + // Intended to exercise bounds-checking corner on ShiftLeft() (directly + // and under asan). + + // 2**(32*84)-1 + const BigUnsigned<84> all_bits_one( + "1474444211396924248063325089479706787923460402125687709454567433186613" + "6228083464060749874845919674257665016359189106695900028098437021384227" + "3285029708032466536084583113729486015826557532750465299832071590813090" + "2011853039837649252477307070509704043541368002938784757296893793903797" + "8180292336310543540677175225040919704702800559606097685920595947397024" + "8303316808753252115729411497720357971050627997031988036134171378490368" + "6008000778741115399296162550786288457245180872759047016734959330367829" + "5235612397427686310674725251378116268607113017720538636924549612987647" + "5767411074510311386444547332882472126067840027882117834454260409440463" + "9345147252664893456053258463203120637089916304618696601333953616715125" + "2115882482473279040772264257431663818610405673876655957323083702713344" + "4201105427930770976052393421467136557055"); + const BigUnsigned<84> zero(0u); + const BigUnsigned<84> one(1u); + // in bounds shifts + for (int i = 1; i < 84*32; ++i) { + // shifting all_bits_one to the left should result in a smaller number, + // since the high bits rotate off and the low bits are replaced with + // zeroes. + BigUnsigned<84> big_shifted = all_bits_one; + big_shifted.ShiftLeft(i); + EXPECT_GT(all_bits_one, big_shifted); + // Shifting 1 to the left should instead result in a larger number. + BigUnsigned<84> small_shifted = one; + small_shifted.ShiftLeft(i); + EXPECT_LT(one, small_shifted); + } + // Shifting by zero or a negative number has no effect + for (int no_op_shift : {0, -1, -84 * 32, std::numeric_limits<int>::min()}) { + BigUnsigned<84> big_shifted = all_bits_one; + big_shifted.ShiftLeft(no_op_shift); + EXPECT_EQ(all_bits_one, big_shifted); + BigUnsigned<84> small_shifted = one; + big_shifted.ShiftLeft(no_op_shift); + EXPECT_EQ(one, small_shifted); + } + // Shifting by an amount greater than the number of bits should result in + // zero. + for (int out_of_bounds_shift : + {84 * 32, 84 * 32 + 1, std::numeric_limits<int>::max()}) { + BigUnsigned<84> big_shifted = all_bits_one; + big_shifted.ShiftLeft(out_of_bounds_shift); + EXPECT_EQ(zero, big_shifted); + BigUnsigned<84> small_shifted = one; + small_shifted.ShiftLeft(out_of_bounds_shift); + EXPECT_EQ(zero, small_shifted); + } + } +} + +TEST(BigUnsigned, MultiplyByUint32) { + const BigUnsigned<84> factorial_100( + "933262154439441526816992388562667004907159682643816214685929638952175999" + "932299156089414639761565182862536979208272237582511852109168640000000000" + "00000000000000"); + BigUnsigned<84> a(1u); + for (uint32_t i = 1; i <= 100; ++i) { + a.MultiplyBy(i); + } + EXPECT_EQ(a, BigUnsigned<84>(factorial_100)); +} + +TEST(BigUnsigned, MultiplyByBigUnsigned) { + { + // Put the terms of factorial_200 into two bigints, and multiply them + // together. + const BigUnsigned<84> factorial_200( + "7886578673647905035523632139321850622951359776871732632947425332443594" + "4996340334292030428401198462390417721213891963883025764279024263710506" + "1926624952829931113462857270763317237396988943922445621451664240254033" + "2918641312274282948532775242424075739032403212574055795686602260319041" + "7032406235170085879617892222278962370389737472000000000000000000000000" + "0000000000000000000000000"); + BigUnsigned<84> evens(1u); + BigUnsigned<84> odds(1u); + for (uint32_t i = 1; i < 200; i += 2) { + odds.MultiplyBy(i); + evens.MultiplyBy(i + 1); + } + evens.MultiplyBy(odds); + EXPECT_EQ(evens, factorial_200); + } + { + // Multiply various powers of 10 together. + for (int a = 0 ; a < 700; a += 25) { + SCOPED_TRACE(a); + BigUnsigned<84> a_value("3" + std::string(a, '0')); + for (int b = 0; b < (700 - a); b += 25) { + SCOPED_TRACE(b); + BigUnsigned<84> b_value("2" + std::string(b, '0')); + BigUnsigned<84> expected_product("6" + std::string(a + b, '0')); + b_value.MultiplyBy(a_value); + EXPECT_EQ(b_value, expected_product); + } + } + } +} + +TEST(BigUnsigned, MultiplyByOverflow) { + { + // Check that multiplcation overflow predictably truncates. + + // A big int with all bits on. + BigUnsigned<4> all_bits_on("340282366920938463463374607431768211455"); + // Modulo 2**128, this is equal to -1. Therefore the square of this, + // modulo 2**128, should be 1. + all_bits_on.MultiplyBy(all_bits_on); + EXPECT_EQ(all_bits_on, BigUnsigned<4>(1u)); + } + { + // Try multiplying a large bigint by 2**50, and compare the result to + // shifting. + BigUnsigned<4> value_1("12345678901234567890123456789012345678"); + BigUnsigned<4> value_2("12345678901234567890123456789012345678"); + BigUnsigned<4> two_to_fiftieth(1u); + two_to_fiftieth.ShiftLeft(50); + + value_1.ShiftLeft(50); + value_2.MultiplyBy(two_to_fiftieth); + EXPECT_EQ(value_1, value_2); + } +} + +TEST(BigUnsigned, FiveToTheNth) { + { + // Sanity check that MultiplyByFiveToTheNth gives consistent answers, up to + // and including overflow. + for (int i = 0; i < 1160; ++i) { + SCOPED_TRACE(i); + BigUnsigned<84> value_1(123u); + BigUnsigned<84> value_2(123u); + value_1.MultiplyByFiveToTheNth(i); + for (int j = 0; j < i; j++) { + value_2.MultiplyBy(5u); + } + EXPECT_EQ(value_1, value_2); + } + } + { + // Check that the faster, table-lookup-based static method returns the same + // result that multiplying in-place would return, up to and including + // overflow. + for (int i = 0; i < 1160; ++i) { + SCOPED_TRACE(i); + BigUnsigned<84> value_1(1u); + value_1.MultiplyByFiveToTheNth(i); + BigUnsigned<84> value_2 = BigUnsigned<84>::FiveToTheNth(i); + EXPECT_EQ(value_1, value_2); + } + } +} + +TEST(BigUnsigned, TenToTheNth) { + { + // Sanity check MultiplyByTenToTheNth. + for (int i = 0; i < 800; ++i) { + SCOPED_TRACE(i); + BigUnsigned<84> value_1(123u); + BigUnsigned<84> value_2(123u); + value_1.MultiplyByTenToTheNth(i); + for (int j = 0; j < i; j++) { + value_2.MultiplyBy(10u); + } + EXPECT_EQ(value_1, value_2); + } + } + { + // Alternate testing approach, taking advantage of the decimal parser. + for (int i = 0; i < 200; ++i) { + SCOPED_TRACE(i); + BigUnsigned<84> value_1(135u); + value_1.MultiplyByTenToTheNth(i); + BigUnsigned<84> value_2("135" + std::string(i, '0')); + EXPECT_EQ(value_1, value_2); + } + } +} + + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/charconv_parse.cc b/third_party/abseil_cpp/absl/strings/internal/charconv_parse.cc new file mode 100644 index 000000000000..8b11868c887a --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/charconv_parse.cc @@ -0,0 +1,504 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/charconv_parse.h" +#include "absl/strings/charconv.h" + +#include <cassert> +#include <cstdint> +#include <limits> + +#include "absl/strings/internal/memutil.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace { + +// ParseFloat<10> will read the first 19 significant digits of the mantissa. +// This number was chosen for multiple reasons. +// +// (a) First, for whatever integer type we choose to represent the mantissa, we +// want to choose the largest possible number of decimal digits for that integer +// type. We are using uint64_t, which can express any 19-digit unsigned +// integer. +// +// (b) Second, we need to parse enough digits that the binary value of any +// mantissa we capture has more bits of resolution than the mantissa +// representation in the target float. Our algorithm requires at least 3 bits +// of headway, but 19 decimal digits give a little more than that. +// +// The following static assertions verify the above comments: +constexpr int kDecimalMantissaDigitsMax = 19; + +static_assert(std::numeric_limits<uint64_t>::digits10 == + kDecimalMantissaDigitsMax, + "(a) above"); + +// IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa. +static_assert(std::numeric_limits<double>::is_iec559, "IEEE double assumed"); +static_assert(std::numeric_limits<double>::radix == 2, "IEEE double fact"); +static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact"); + +// The lowest valued 19-digit decimal mantissa we can read still contains +// sufficient information to reconstruct a binary mantissa. +static_assert(1000000000000000000u > (uint64_t(1) << (53 + 3)), "(b) above"); + +// ParseFloat<16> will read the first 15 significant digits of the mantissa. +// +// Because a base-16-to-base-2 conversion can be done exactly, we do not need +// to maximize the number of scanned hex digits to improve our conversion. What +// is required is to scan two more bits than the mantissa can represent, so that +// we always round correctly. +// +// (One extra bit does not suffice to perform correct rounding, since a number +// exactly halfway between two representable floats has unique rounding rules, +// so we need to differentiate between a "halfway between" number and a "closer +// to the larger value" number.) +constexpr int kHexadecimalMantissaDigitsMax = 15; + +// The minimum number of significant bits that will be read from +// kHexadecimalMantissaDigitsMax hex digits. We must subtract by three, since +// the most significant digit can be a "1", which only contributes a single +// significant bit. +constexpr int kGuaranteedHexadecimalMantissaBitPrecision = + 4 * kHexadecimalMantissaDigitsMax - 3; + +static_assert(kGuaranteedHexadecimalMantissaBitPrecision > + std::numeric_limits<double>::digits + 2, + "kHexadecimalMantissaDigitsMax too small"); + +// We also impose a limit on the number of significant digits we will read from +// an exponent, to avoid having to deal with integer overflow. We use 9 for +// this purpose. +// +// If we read a 9 digit exponent, the end result of the conversion will +// necessarily be infinity or zero, depending on the sign of the exponent. +// Therefore we can just drop extra digits on the floor without any extra +// logic. +constexpr int kDecimalExponentDigitsMax = 9; +static_assert(std::numeric_limits<int>::digits10 >= kDecimalExponentDigitsMax, + "int type too small"); + +// To avoid incredibly large inputs causing integer overflow for our exponent, +// we impose an arbitrary but very large limit on the number of significant +// digits we will accept. The implementation refuses to match a string with +// more consecutive significant mantissa digits than this. +constexpr int kDecimalDigitLimit = 50000000; + +// Corresponding limit for hexadecimal digit inputs. This is one fourth the +// amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires +// a binary exponent adjustment of 4. +constexpr int kHexadecimalDigitLimit = kDecimalDigitLimit / 4; + +// The largest exponent we can read is 999999999 (per +// kDecimalExponentDigitsMax), and the largest exponent adjustment we can get +// from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these +// comfortably fits in an integer. +// +// We count kDecimalDigitLimit twice because there are independent limits for +// numbers before and after the decimal point. (In the case where there are no +// significant digits before the decimal point, there are independent limits for +// post-decimal-point leading zeroes and for significant digits.) +static_assert(999999999 + 2 * kDecimalDigitLimit < + std::numeric_limits<int>::max(), + "int type too small"); +static_assert(999999999 + 2 * (4 * kHexadecimalDigitLimit) < + std::numeric_limits<int>::max(), + "int type too small"); + +// Returns true if the provided bitfield allows parsing an exponent value +// (e.g., "1.5e100"). +bool AllowExponent(chars_format flags) { + bool fixed = (flags & chars_format::fixed) == chars_format::fixed; + bool scientific = + (flags & chars_format::scientific) == chars_format::scientific; + return scientific || !fixed; +} + +// Returns true if the provided bitfield requires an exponent value be present. +bool RequireExponent(chars_format flags) { + bool fixed = (flags & chars_format::fixed) == chars_format::fixed; + bool scientific = + (flags & chars_format::scientific) == chars_format::scientific; + return scientific && !fixed; +} + +const int8_t kAsciiToInt[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1}; + +// Returns true if `ch` is a digit in the given base +template <int base> +bool IsDigit(char ch); + +// Converts a valid `ch` to its digit value in the given base. +template <int base> +unsigned ToDigit(char ch); + +// Returns true if `ch` is the exponent delimiter for the given base. +template <int base> +bool IsExponentCharacter(char ch); + +// Returns the maximum number of significant digits we will read for a float +// in the given base. +template <int base> +constexpr int MantissaDigitsMax(); + +// Returns the largest consecutive run of digits we will accept when parsing a +// number in the given base. +template <int base> +constexpr int DigitLimit(); + +// Returns the amount the exponent must be adjusted by for each dropped digit. +// (For decimal this is 1, since the digits are in base 10 and the exponent base +// is also 10, but for hexadecimal this is 4, since the digits are base 16 but +// the exponent base is 2.) +template <int base> +constexpr int DigitMagnitude(); + +template <> +bool IsDigit<10>(char ch) { + return ch >= '0' && ch <= '9'; +} +template <> +bool IsDigit<16>(char ch) { + return kAsciiToInt[static_cast<unsigned char>(ch)] >= 0; +} + +template <> +unsigned ToDigit<10>(char ch) { + return ch - '0'; +} +template <> +unsigned ToDigit<16>(char ch) { + return kAsciiToInt[static_cast<unsigned char>(ch)]; +} + +template <> +bool IsExponentCharacter<10>(char ch) { + return ch == 'e' || ch == 'E'; +} + +template <> +bool IsExponentCharacter<16>(char ch) { + return ch == 'p' || ch == 'P'; +} + +template <> +constexpr int MantissaDigitsMax<10>() { + return kDecimalMantissaDigitsMax; +} +template <> +constexpr int MantissaDigitsMax<16>() { + return kHexadecimalMantissaDigitsMax; +} + +template <> +constexpr int DigitLimit<10>() { + return kDecimalDigitLimit; +} +template <> +constexpr int DigitLimit<16>() { + return kHexadecimalDigitLimit; +} + +template <> +constexpr int DigitMagnitude<10>() { + return 1; +} +template <> +constexpr int DigitMagnitude<16>() { + return 4; +} + +// Reads decimal digits from [begin, end) into *out. Returns the number of +// digits consumed. +// +// After max_digits has been read, keeps consuming characters, but no longer +// adjusts *out. If a nonzero digit is dropped this way, *dropped_nonzero_digit +// is set; otherwise, it is left unmodified. +// +// If no digits are matched, returns 0 and leaves *out unchanged. +// +// ConsumeDigits does not protect against overflow on *out; max_digits must +// be chosen with respect to type T to avoid the possibility of overflow. +template <int base, typename T> +int ConsumeDigits(const char* begin, const char* end, int max_digits, T* out, + bool* dropped_nonzero_digit) { + if (base == 10) { + assert(max_digits <= std::numeric_limits<T>::digits10); + } else if (base == 16) { + assert(max_digits * 4 <= std::numeric_limits<T>::digits); + } + const char* const original_begin = begin; + + // Skip leading zeros, but only if *out is zero. + // They don't cause an overflow so we don't have to count them for + // `max_digits`. + while (!*out && end != begin && *begin == '0') ++begin; + + T accumulator = *out; + const char* significant_digits_end = + (end - begin > max_digits) ? begin + max_digits : end; + while (begin < significant_digits_end && IsDigit<base>(*begin)) { + // Do not guard against *out overflow; max_digits was chosen to avoid this. + // Do assert against it, to detect problems in debug builds. + auto digit = static_cast<T>(ToDigit<base>(*begin)); + assert(accumulator * base >= accumulator); + accumulator *= base; + assert(accumulator + digit >= accumulator); + accumulator += digit; + ++begin; + } + bool dropped_nonzero = false; + while (begin < end && IsDigit<base>(*begin)) { + dropped_nonzero = dropped_nonzero || (*begin != '0'); + ++begin; + } + if (dropped_nonzero && dropped_nonzero_digit != nullptr) { + *dropped_nonzero_digit = true; + } + *out = accumulator; + return static_cast<int>(begin - original_begin); +} + +// Returns true if `v` is one of the chars allowed inside parentheses following +// a NaN. +bool IsNanChar(char v) { + return (v == '_') || (v >= '0' && v <= '9') || (v >= 'a' && v <= 'z') || + (v >= 'A' && v <= 'Z'); +} + +// Checks the range [begin, end) for a strtod()-formatted infinity or NaN. If +// one is found, sets `out` appropriately and returns true. +bool ParseInfinityOrNan(const char* begin, const char* end, + strings_internal::ParsedFloat* out) { + if (end - begin < 3) { + return false; + } + switch (*begin) { + case 'i': + case 'I': { + // An infinity string consists of the characters "inf" or "infinity", + // case insensitive. + if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) { + return false; + } + out->type = strings_internal::FloatType::kInfinity; + if (end - begin >= 8 && + strings_internal::memcasecmp(begin + 3, "inity", 5) == 0) { + out->end = begin + 8; + } else { + out->end = begin + 3; + } + return true; + } + case 'n': + case 'N': { + // A NaN consists of the characters "nan", case insensitive, optionally + // followed by a parenthesized sequence of zero or more alphanumeric + // characters and/or underscores. + if (strings_internal::memcasecmp(begin + 1, "an", 2) != 0) { + return false; + } + out->type = strings_internal::FloatType::kNan; + out->end = begin + 3; + // NaN is allowed to be followed by a parenthesized string, consisting of + // only the characters [a-zA-Z0-9_]. Match that if it's present. + begin += 3; + if (begin < end && *begin == '(') { + const char* nan_begin = begin + 1; + while (nan_begin < end && IsNanChar(*nan_begin)) { + ++nan_begin; + } + if (nan_begin < end && *nan_begin == ')') { + // We found an extra NaN specifier range + out->subrange_begin = begin + 1; + out->subrange_end = nan_begin; + out->end = nan_begin + 1; + } + } + return true; + } + default: + return false; + } +} +} // namespace + +namespace strings_internal { + +template <int base> +strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end, + chars_format format_flags) { + strings_internal::ParsedFloat result; + + // Exit early if we're given an empty range. + if (begin == end) return result; + + // Handle the infinity and NaN cases. + if (ParseInfinityOrNan(begin, end, &result)) { + return result; + } + + const char* const mantissa_begin = begin; + while (begin < end && *begin == '0') { + ++begin; // skip leading zeros + } + uint64_t mantissa = 0; + + int exponent_adjustment = 0; + bool mantissa_is_inexact = false; + int pre_decimal_digits = ConsumeDigits<base>( + begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact); + begin += pre_decimal_digits; + int digits_left; + if (pre_decimal_digits >= DigitLimit<base>()) { + // refuse to parse pathological inputs + return result; + } else if (pre_decimal_digits > MantissaDigitsMax<base>()) { + // We dropped some non-fraction digits on the floor. Adjust our exponent + // to compensate. + exponent_adjustment = + static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>()); + digits_left = 0; + } else { + digits_left = + static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits); + } + if (begin < end && *begin == '.') { + ++begin; + if (mantissa == 0) { + // If we haven't seen any nonzero digits yet, keep skipping zeros. We + // have to adjust the exponent to reflect the changed place value. + const char* begin_zeros = begin; + while (begin < end && *begin == '0') { + ++begin; + } + int zeros_skipped = static_cast<int>(begin - begin_zeros); + if (zeros_skipped >= DigitLimit<base>()) { + // refuse to parse pathological inputs + return result; + } + exponent_adjustment -= static_cast<int>(zeros_skipped); + } + int post_decimal_digits = ConsumeDigits<base>( + begin, end, digits_left, &mantissa, &mantissa_is_inexact); + begin += post_decimal_digits; + + // Since `mantissa` is an integer, each significant digit we read after + // the decimal point requires an adjustment to the exponent. "1.23e0" will + // be stored as `mantissa` == 123 and `exponent` == -2 (that is, + // "123e-2"). + if (post_decimal_digits >= DigitLimit<base>()) { + // refuse to parse pathological inputs + return result; + } else if (post_decimal_digits > digits_left) { + exponent_adjustment -= digits_left; + } else { + exponent_adjustment -= post_decimal_digits; + } + } + // If we've found no mantissa whatsoever, this isn't a number. + if (mantissa_begin == begin) { + return result; + } + // A bare "." doesn't count as a mantissa either. + if (begin - mantissa_begin == 1 && *mantissa_begin == '.') { + return result; + } + + if (mantissa_is_inexact) { + // We dropped significant digits on the floor. Handle this appropriately. + if (base == 10) { + // If we truncated significant decimal digits, store the full range of the + // mantissa for future big integer math for exact rounding. + result.subrange_begin = mantissa_begin; + result.subrange_end = begin; + } else if (base == 16) { + // If we truncated hex digits, reflect this fact by setting the low + // ("sticky") bit. This allows for correct rounding in all cases. + mantissa |= 1; + } + } + result.mantissa = mantissa; + + const char* const exponent_begin = begin; + result.literal_exponent = 0; + bool found_exponent = false; + if (AllowExponent(format_flags) && begin < end && + IsExponentCharacter<base>(*begin)) { + bool negative_exponent = false; + ++begin; + if (begin < end && *begin == '-') { + negative_exponent = true; + ++begin; + } else if (begin < end && *begin == '+') { + ++begin; + } + const char* const exponent_digits_begin = begin; + // Exponent is always expressed in decimal, even for hexadecimal floats. + begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax, + &result.literal_exponent, nullptr); + if (begin == exponent_digits_begin) { + // there were no digits where we expected an exponent. We failed to read + // an exponent and should not consume the 'e' after all. Rewind 'begin'. + found_exponent = false; + begin = exponent_begin; + } else { + found_exponent = true; + if (negative_exponent) { + result.literal_exponent = -result.literal_exponent; + } + } + } + + if (!found_exponent && RequireExponent(format_flags)) { + // Provided flags required an exponent, but none was found. This results + // in a failure to scan. + return result; + } + + // Success! + result.type = strings_internal::FloatType::kNumber; + if (result.mantissa > 0) { + result.exponent = result.literal_exponent + + (DigitMagnitude<base>() * exponent_adjustment); + } else { + result.exponent = 0; + } + result.end = begin; + return result; +} + +template ParsedFloat ParseFloat<10>(const char* begin, const char* end, + chars_format format_flags); +template ParsedFloat ParseFloat<16>(const char* begin, const char* end, + chars_format format_flags); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/charconv_parse.h b/third_party/abseil_cpp/absl/strings/internal/charconv_parse.h new file mode 100644 index 000000000000..505998b5394a --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/charconv_parse.h @@ -0,0 +1,99 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_ +#define ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_ + +#include <cstdint> + +#include "absl/base/config.h" +#include "absl/strings/charconv.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// Enum indicating whether a parsed float is a number or special value. +enum class FloatType { kNumber, kInfinity, kNan }; + +// The decomposed parts of a parsed `float` or `double`. +struct ParsedFloat { + // Representation of the parsed mantissa, with the decimal point adjusted to + // make it an integer. + // + // During decimal scanning, this contains 19 significant digits worth of + // mantissa value. If digits beyond this point are found, they + // are truncated, and if any of these dropped digits are nonzero, then + // `mantissa` is inexact, and the full mantissa is stored in [subrange_begin, + // subrange_end). + // + // During hexadecimal scanning, this contains 15 significant hex digits worth + // of mantissa value. Digits beyond this point are sticky -- they are + // truncated, but if any dropped digits are nonzero, the low bit of mantissa + // will be set. (This allows for precise rounding, and avoids the need + // to store the full mantissa in [subrange_begin, subrange_end).) + uint64_t mantissa = 0; + + // Floating point expontent. This reflects any decimal point adjustments and + // any truncated digits from the mantissa. The absolute value of the parsed + // number is represented by mantissa * (base ** exponent), where base==10 for + // decimal floats, and base==2 for hexadecimal floats. + int exponent = 0; + + // The literal exponent value scanned from the input, or 0 if none was + // present. This does not reflect any adjustments applied to mantissa. + int literal_exponent = 0; + + // The type of number scanned. + FloatType type = FloatType::kNumber; + + // When non-null, [subrange_begin, subrange_end) marks a range of characters + // that require further processing. The meaning is dependent on float type. + // If type == kNumber and this is set, this is a "wide input": the input + // mantissa contained more than 19 digits. The range contains the full + // mantissa. It plus `literal_exponent` need to be examined to find the best + // floating point match. + // If type == kNan and this is set, the range marks the contents of a + // matched parenthesized character region after the NaN. + const char* subrange_begin = nullptr; + const char* subrange_end = nullptr; + + // One-past-the-end of the successfully parsed region, or nullptr if no + // matching pattern was found. + const char* end = nullptr; +}; + +// Read the floating point number in the provided range, and populate +// ParsedFloat accordingly. +// +// format_flags is a bitmask value specifying what patterns this API will match. +// `scientific` and `fixed` are honored per std::from_chars rules +// ([utility.from.chars], C++17): if exactly one of these bits is set, then an +// exponent is required, or dislallowed, respectively. +// +// Template parameter `base` must be either 10 or 16. For base 16, a "0x" is +// *not* consumed. The `hex` bit from format_flags is ignored by ParseFloat. +template <int base> +ParsedFloat ParseFloat(const char* begin, const char* end, + absl::chars_format format_flags); + +extern template ParsedFloat ParseFloat<10>(const char* begin, const char* end, + absl::chars_format format_flags); +extern template ParsedFloat ParseFloat<16>(const char* begin, const char* end, + absl::chars_format format_flags); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl +#endif // ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/charconv_parse_test.cc b/third_party/abseil_cpp/absl/strings/internal/charconv_parse_test.cc new file mode 100644 index 000000000000..bc2d11187651 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/charconv_parse_test.cc @@ -0,0 +1,357 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/charconv_parse.h" + +#include <string> +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/str_cat.h" + +using absl::chars_format; +using absl::strings_internal::FloatType; +using absl::strings_internal::ParsedFloat; +using absl::strings_internal::ParseFloat; + +namespace { + +// Check that a given string input is parsed to the expected mantissa and +// exponent. +// +// Input string `s` must contain a '$' character. It marks the end of the +// characters that should be consumed by the match. It is stripped from the +// input to ParseFloat. +// +// If input string `s` contains '[' and ']' characters, these mark the region +// of characters that should be marked as the "subrange". For NaNs, this is +// the location of the extended NaN string. For numbers, this is the location +// of the full, over-large mantissa. +template <int base> +void ExpectParsedFloat(std::string s, absl::chars_format format_flags, + FloatType expected_type, uint64_t expected_mantissa, + int expected_exponent, + int expected_literal_exponent = -999) { + SCOPED_TRACE(s); + + int begin_subrange = -1; + int end_subrange = -1; + // If s contains '[' and ']', then strip these characters and set the subrange + // indices appropriately. + std::string::size_type open_bracket_pos = s.find('['); + if (open_bracket_pos != std::string::npos) { + begin_subrange = static_cast<int>(open_bracket_pos); + s.replace(open_bracket_pos, 1, ""); + std::string::size_type close_bracket_pos = s.find(']'); + ABSL_RAW_CHECK(close_bracket_pos != absl::string_view::npos, + "Test input contains [ without matching ]"); + end_subrange = static_cast<int>(close_bracket_pos); + s.replace(close_bracket_pos, 1, ""); + } + const std::string::size_type expected_characters_matched = s.find('$'); + ABSL_RAW_CHECK(expected_characters_matched != std::string::npos, + "Input string must contain $"); + s.replace(expected_characters_matched, 1, ""); + + ParsedFloat parsed = + ParseFloat<base>(s.data(), s.data() + s.size(), format_flags); + + EXPECT_NE(parsed.end, nullptr); + if (parsed.end == nullptr) { + return; // The following tests are not useful if we fully failed to parse + } + EXPECT_EQ(parsed.type, expected_type); + if (begin_subrange == -1) { + EXPECT_EQ(parsed.subrange_begin, nullptr); + EXPECT_EQ(parsed.subrange_end, nullptr); + } else { + EXPECT_EQ(parsed.subrange_begin, s.data() + begin_subrange); + EXPECT_EQ(parsed.subrange_end, s.data() + end_subrange); + } + if (parsed.type == FloatType::kNumber) { + EXPECT_EQ(parsed.mantissa, expected_mantissa); + EXPECT_EQ(parsed.exponent, expected_exponent); + if (expected_literal_exponent != -999) { + EXPECT_EQ(parsed.literal_exponent, expected_literal_exponent); + } + } + auto characters_matched = static_cast<int>(parsed.end - s.data()); + EXPECT_EQ(characters_matched, expected_characters_matched); +} + +// Check that a given string input is parsed to the expected mantissa and +// exponent. +// +// Input string `s` must contain a '$' character. It marks the end of the +// characters that were consumed by the match. +template <int base> +void ExpectNumber(std::string s, absl::chars_format format_flags, + uint64_t expected_mantissa, int expected_exponent, + int expected_literal_exponent = -999) { + ExpectParsedFloat<base>(std::move(s), format_flags, FloatType::kNumber, + expected_mantissa, expected_exponent, + expected_literal_exponent); +} + +// Check that a given string input is parsed to the given special value. +// +// This tests against both number bases, since infinities and NaNs have +// identical representations in both modes. +void ExpectSpecial(const std::string& s, absl::chars_format format_flags, + FloatType type) { + ExpectParsedFloat<10>(s, format_flags, type, 0, 0); + ExpectParsedFloat<16>(s, format_flags, type, 0, 0); +} + +// Check that a given input string is not matched by Float. +template <int base> +void ExpectFailedParse(absl::string_view s, absl::chars_format format_flags) { + ParsedFloat parsed = + ParseFloat<base>(s.data(), s.data() + s.size(), format_flags); + EXPECT_EQ(parsed.end, nullptr); +} + +TEST(ParseFloat, SimpleValue) { + // Test that various forms of floating point numbers all parse correctly. + ExpectNumber<10>("1.23456789e5$", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e+5$", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789E5$", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e05$", chars_format::general, 123456789, -3); + ExpectNumber<10>("123.456789e3$", chars_format::general, 123456789, -3); + ExpectNumber<10>("0.000123456789e9$", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456789e-3$", chars_format::general, 123456789, -3); + + ExpectNumber<16>("1.234abcdefp28$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234abcdefp+28$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234ABCDEFp28$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234AbCdEfP0028$", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("123.4abcdefp20$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("0.0001234abcdefp44$", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1234abcd.ef$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1234abcdefp-8$", chars_format::general, 0x1234abcdef, -8); + + // ExpectNumber does not attempt to drop trailing zeroes. + ExpectNumber<10>("0001.2345678900e005$", chars_format::general, 12345678900, + -5); + ExpectNumber<16>("0001.234abcdef000p28$", chars_format::general, + 0x1234abcdef000, -20); + + // Ensure non-matching characters after a number are ignored, even when they + // look like potentially matching characters. + ExpectNumber<10>("1.23456789e5$ ", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$e5e5", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$.25", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$-", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$PUPPERS!!!", chars_format::general, 123456789, + -3); + ExpectNumber<10>("123456.789$efghij", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$e", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$p5", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$.10", chars_format::general, 123456789, -3); + + ExpectNumber<16>("1.234abcdefp28$ ", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1.234abcdefp28$p28", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1.234abcdefp28$.125", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1.234abcdefp28$-", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234abcdefp28$KITTEHS!!!", chars_format::general, + 0x1234abcdef, -8); + ExpectNumber<16>("1234abcd.ef$ghijk", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1234abcd.ef$p", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1234abcd.ef$.10", chars_format::general, 0x1234abcdef, -8); + + // Ensure we can read a full resolution mantissa without overflow. + ExpectNumber<10>("9999999999999999999$", chars_format::general, + 9999999999999999999u, 0); + ExpectNumber<16>("fffffffffffffff$", chars_format::general, + 0xfffffffffffffffu, 0); + + // Check that zero is consistently read. + ExpectNumber<10>("0$", chars_format::general, 0, 0); + ExpectNumber<16>("0$", chars_format::general, 0, 0); + ExpectNumber<10>("000000000000000000000000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<16>("000000000000000000000000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<10>("0000000000000000000000.000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<16>("0000000000000000000000.000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<10>("0.00000000000000000000000000000000e123456$", + chars_format::general, 0, 0); + ExpectNumber<16>("0.00000000000000000000000000000000p123456$", + chars_format::general, 0, 0); +} + +TEST(ParseFloat, LargeDecimalMantissa) { + // After 19 significant decimal digits in the mantissa, ParsedFloat will + // truncate additional digits. We need to test that: + // 1) the truncation to 19 digits happens + // 2) the returned exponent reflects the dropped significant digits + // 3) a correct literal_exponent is set + // + // If and only if a significant digit is found after 19 digits, then the + // entirety of the mantissa in case the exact value is needed to make a + // rounding decision. The [ and ] characters below denote where such a + // subregion was marked by by ParseFloat. They are not part of the input. + + // Mark a capture group only if a dropped digit is significant (nonzero). + ExpectNumber<10>("100000000000000000000000000$", chars_format::general, + 1000000000000000000, + /* adjusted exponent */ 8); + + ExpectNumber<10>("123456789123456789100000000$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8); + + ExpectNumber<10>("[123456789123456789123456789]$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + ExpectNumber<10>("[123456789123456789100000009]$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + ExpectNumber<10>("[123456789123456789120000000]$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + // Leading zeroes should not count towards the 19 significant digit limit + ExpectNumber<10>("[00000000123456789123456789123456789]$", + chars_format::general, 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + ExpectNumber<10>("00000000123456789123456789100000000$", + chars_format::general, 1234567891234567891, + /* adjusted exponent */ 8); + + // Truncated digits after the decimal point should not cause a further + // exponent adjustment. + ExpectNumber<10>("1.234567891234567891e123$", chars_format::general, + 1234567891234567891, 105); + ExpectNumber<10>("[1.23456789123456789123456789]e123$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 105, + /* literal exponent */ 123); + + // Ensure we truncate, and not round. (The from_chars algorithm we use + // depends on our guess missing low, if it misses, so we need the rounding + // error to be downward.) + ExpectNumber<10>("[1999999999999999999999]$", chars_format::general, + 1999999999999999999, + /* adjusted exponent */ 3, + /* literal exponent */ 0); +} + +TEST(ParseFloat, LargeHexadecimalMantissa) { + // After 15 significant hex digits in the mantissa, ParsedFloat will treat + // additional digits as sticky, We need to test that: + // 1) The truncation to 15 digits happens + // 2) The returned exponent reflects the dropped significant digits + // 3) If a nonzero digit is dropped, the low bit of mantissa is set. + + ExpectNumber<16>("123456789abcdef123456789abcdef$", chars_format::general, + 0x123456789abcdef, 60); + + // Leading zeroes should not count towards the 15 significant digit limit + ExpectNumber<16>("000000123456789abcdef123456789abcdef$", + chars_format::general, 0x123456789abcdef, 60); + + // Truncated digits after the radix point should not cause a further + // exponent adjustment. + ExpectNumber<16>("1.23456789abcdefp100$", chars_format::general, + 0x123456789abcdef, 44); + ExpectNumber<16>("1.23456789abcdef123456789abcdefp100$", + chars_format::general, 0x123456789abcdef, 44); + + // test sticky digit behavior. The low bit should be set iff any dropped + // digit is nonzero. + ExpectNumber<16>("123456789abcdee123456789abcdee$", chars_format::general, + 0x123456789abcdef, 60); + ExpectNumber<16>("123456789abcdee000000000000001$", chars_format::general, + 0x123456789abcdef, 60); + ExpectNumber<16>("123456789abcdee000000000000000$", chars_format::general, + 0x123456789abcdee, 60); +} + +TEST(ParseFloat, ScientificVsFixed) { + // In fixed mode, an exponent is never matched (but the remainder of the + // number will be matched.) + ExpectNumber<10>("1.23456789$e5", chars_format::fixed, 123456789, -8); + ExpectNumber<10>("123456.789$", chars_format::fixed, 123456789, -3); + ExpectNumber<16>("1.234abcdef$p28", chars_format::fixed, 0x1234abcdef, -36); + ExpectNumber<16>("1234abcd.ef$", chars_format::fixed, 0x1234abcdef, -8); + + // In scientific mode, numbers don't match *unless* they have an exponent. + ExpectNumber<10>("1.23456789e5$", chars_format::scientific, 123456789, -3); + ExpectFailedParse<10>("-123456.789$", chars_format::scientific); + ExpectNumber<16>("1.234abcdefp28$", chars_format::scientific, 0x1234abcdef, + -8); + ExpectFailedParse<16>("1234abcd.ef$", chars_format::scientific); +} + +TEST(ParseFloat, Infinity) { + ExpectFailedParse<10>("in", chars_format::general); + ExpectFailedParse<16>("in", chars_format::general); + ExpectFailedParse<10>("inx", chars_format::general); + ExpectFailedParse<16>("inx", chars_format::general); + ExpectSpecial("inf$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("Inf$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("INF$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("inf$inite", chars_format::general, FloatType::kInfinity); + ExpectSpecial("iNfInItY$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("infinity$!!!", chars_format::general, FloatType::kInfinity); +} + +TEST(ParseFloat, NaN) { + ExpectFailedParse<10>("na", chars_format::general); + ExpectFailedParse<16>("na", chars_format::general); + ExpectFailedParse<10>("nah", chars_format::general); + ExpectFailedParse<16>("nah", chars_format::general); + ExpectSpecial("nan$", chars_format::general, FloatType::kNan); + ExpectSpecial("NaN$", chars_format::general, FloatType::kNan); + ExpectSpecial("nAn$", chars_format::general, FloatType::kNan); + ExpectSpecial("NAN$", chars_format::general, FloatType::kNan); + ExpectSpecial("NaN$aNaNaNaNaBatman!", chars_format::general, FloatType::kNan); + + // A parenthesized sequence of the characters [a-zA-Z0-9_] is allowed to + // appear after an NaN. Check that this is allowed, and that the correct + // characters are grouped. + // + // (The characters [ and ] in the pattern below delimit the expected matched + // subgroup; they are not part of the input passed to ParseFloat.) + ExpectSpecial("nan([0xabcdef])$", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([0xabcdef])$...", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([0xabcdef])$)...", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([])$", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([aAzZ09_])$", chars_format::general, FloatType::kNan); + // If the subgroup contains illegal characters, don't match it at all. + ExpectSpecial("nan$(bad-char)", chars_format::general, FloatType::kNan); + // Also cope with a missing close paren. + ExpectSpecial("nan$(0xabcdef", chars_format::general, FloatType::kNan); +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/cord_internal.h b/third_party/abseil_cpp/absl/strings/internal/cord_internal.h new file mode 100644 index 000000000000..aa91a691b949 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/cord_internal.h @@ -0,0 +1,270 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ + +#include <atomic> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <type_traits> + +#include "absl/base/internal/invoke.h" +#include "absl/container/internal/compressed_tuple.h" +#include "absl/meta/type_traits.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Wraps std::atomic for reference counting. +class Refcount { + public: + constexpr Refcount() : count_{kRefIncrement} {} + struct Immortal {}; + explicit constexpr Refcount(Immortal) : count_(kImmortalTag) {} + + // Increments the reference count. Imposes no memory ordering. + inline void Increment() { + count_.fetch_add(kRefIncrement, std::memory_order_relaxed); + } + + // Asserts that the current refcount is greater than 0. If the refcount is + // greater than 1, decrements the reference count. + // + // Returns false if there are no references outstanding; true otherwise. + // Inserts barriers to ensure that state written before this method returns + // false will be visible to a thread that just observed this method returning + // false. + inline bool Decrement() { + int32_t refcount = count_.load(std::memory_order_acquire); + assert(refcount > 0 || refcount & kImmortalTag); + return refcount != kRefIncrement && + count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) != + kRefIncrement; + } + + // Same as Decrement but expect that refcount is greater than 1. + inline bool DecrementExpectHighRefcount() { + int32_t refcount = + count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel); + assert(refcount > 0 || refcount & kImmortalTag); + return refcount != kRefIncrement; + } + + // Returns the current reference count using acquire semantics. + inline int32_t Get() const { + return count_.load(std::memory_order_acquire) >> kImmortalShift; + } + + // Returns whether the atomic integer is 1. + // If the reference count is used in the conventional way, a + // reference count of 1 implies that the current thread owns the + // reference and no other thread shares it. + // This call performs the test for a reference count of one, and + // performs the memory barrier needed for the owning thread + // to act on the object, knowing that it has exclusive access to the + // object. + inline bool IsOne() { + return count_.load(std::memory_order_acquire) == kRefIncrement; + } + + bool IsImmortal() const { + return (count_.load(std::memory_order_relaxed) & kImmortalTag) != 0; + } + + private: + // We reserve the bottom bit to tag a reference count as immortal. + // By making it `1` we ensure that we never reach `0` when adding/subtracting + // `2`, thus it never looks as if it should be destroyed. + // These are used for the StringConstant constructor where we do not increase + // the refcount at construction time (due to constinit requirements) but we + // will still decrease it at destruction time to avoid branching on Unref. + enum { + kImmortalShift = 1, + kRefIncrement = 1 << kImmortalShift, + kImmortalTag = kRefIncrement - 1 + }; + + std::atomic<int32_t> count_; +}; + +// The overhead of a vtable is too much for Cord, so we roll our own subclasses +// using only a single byte to differentiate classes from each other - the "tag" +// byte. Define the subclasses first so we can provide downcasting helper +// functions in the base class. + +struct CordRepConcat; +struct CordRepSubstring; +struct CordRepExternal; + +// Various representations that we allow +enum CordRepKind { + CONCAT = 0, + EXTERNAL = 1, + SUBSTRING = 2, + + // We have different tags for different sized flat arrays, + // starting with FLAT + FLAT = 3, +}; + +struct CordRep { + CordRep() = default; + constexpr CordRep(Refcount::Immortal immortal, size_t l) + : length(l), refcount(immortal), tag(EXTERNAL), data{} {} + + // The following three fields have to be less than 32 bytes since + // that is the smallest supported flat node size. + size_t length; + Refcount refcount; + // If tag < FLAT, it represents CordRepKind and indicates the type of node. + // Otherwise, the node type is CordRepFlat and the tag is the encoded size. + uint8_t tag; + char data[1]; // Starting point for flat array: MUST BE LAST FIELD of CordRep + + inline CordRepConcat* concat(); + inline const CordRepConcat* concat() const; + inline CordRepSubstring* substring(); + inline const CordRepSubstring* substring() const; + inline CordRepExternal* external(); + inline const CordRepExternal* external() const; +}; + +struct CordRepConcat : public CordRep { + CordRep* left; + CordRep* right; + + uint8_t depth() const { return static_cast<uint8_t>(data[0]); } + void set_depth(uint8_t depth) { data[0] = static_cast<char>(depth); } +}; + +struct CordRepSubstring : public CordRep { + size_t start; // Starting offset of substring in child + CordRep* child; +}; + +// Type for function pointer that will invoke the releaser function and also +// delete the `CordRepExternalImpl` corresponding to the passed in +// `CordRepExternal`. +using ExternalReleaserInvoker = void (*)(CordRepExternal*); + +// External CordReps are allocated together with a type erased releaser. The +// releaser is stored in the memory directly following the CordRepExternal. +struct CordRepExternal : public CordRep { + CordRepExternal() = default; + explicit constexpr CordRepExternal(absl::string_view str) + : CordRep(Refcount::Immortal{}, str.size()), + base(str.data()), + releaser_invoker(nullptr) {} + + const char* base; + // Pointer to function that knows how to call and destroy the releaser. + ExternalReleaserInvoker releaser_invoker; +}; + +struct Rank1 {}; +struct Rank0 : Rank1 {}; + +template <typename Releaser, typename = ::absl::base_internal::invoke_result_t< + Releaser, absl::string_view>> +void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view data) { + ::absl::base_internal::invoke(std::forward<Releaser>(releaser), data); +} + +template <typename Releaser, + typename = ::absl::base_internal::invoke_result_t<Releaser>> +void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view) { + ::absl::base_internal::invoke(std::forward<Releaser>(releaser)); +} + +// We use CompressedTuple so that we can benefit from EBCO. +template <typename Releaser> +struct CordRepExternalImpl + : public CordRepExternal, + public ::absl::container_internal::CompressedTuple<Releaser> { + // The extra int arg is so that we can avoid interfering with copy/move + // constructors while still benefitting from perfect forwarding. + template <typename T> + CordRepExternalImpl(T&& releaser, int) + : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) { + this->releaser_invoker = &Release; + } + + ~CordRepExternalImpl() { + InvokeReleaser(Rank0{}, std::move(this->template get<0>()), + absl::string_view(base, length)); + } + + static void Release(CordRepExternal* rep) { + delete static_cast<CordRepExternalImpl*>(rep); + } +}; + +template <typename Str> +struct ConstInitExternalStorage { + ABSL_CONST_INIT static CordRepExternal value; +}; + +template <typename Str> +CordRepExternal ConstInitExternalStorage<Str>::value(Str::value); + +enum { + kMaxInline = 15, + // Tag byte & kMaxInline means we are storing a pointer. + kTreeFlag = 1 << 4, + // Tag byte & kProfiledFlag means we are profiling the Cord. + kProfiledFlag = 1 << 5 +}; + +// If the data has length <= kMaxInline, we store it in `as_chars`, and +// store the size in `tagged_size`. +// Else we store it in a tree and store a pointer to that tree in +// `as_tree.rep` and store a tag in `tagged_size`. +struct AsTree { + absl::cord_internal::CordRep* rep; + char padding[kMaxInline + 1 - sizeof(absl::cord_internal::CordRep*) - 1]; + char tagged_size; +}; + +constexpr char GetOrNull(absl::string_view data, size_t pos) { + return pos < data.size() ? data[pos] : '\0'; +} + +union InlineData { + constexpr InlineData() : as_chars{} {} + explicit constexpr InlineData(AsTree tree) : as_tree(tree) {} + explicit constexpr InlineData(absl::string_view chars) + : as_chars{GetOrNull(chars, 0), GetOrNull(chars, 1), + GetOrNull(chars, 2), GetOrNull(chars, 3), + GetOrNull(chars, 4), GetOrNull(chars, 5), + GetOrNull(chars, 6), GetOrNull(chars, 7), + GetOrNull(chars, 8), GetOrNull(chars, 9), + GetOrNull(chars, 10), GetOrNull(chars, 11), + GetOrNull(chars, 12), GetOrNull(chars, 13), + GetOrNull(chars, 14), static_cast<char>(chars.size())} {} + + AsTree as_tree; + char as_chars[kMaxInline + 1]; +}; +static_assert(sizeof(InlineData) == kMaxInline + 1, ""); +static_assert(sizeof(AsTree) == sizeof(InlineData), ""); +static_assert(offsetof(AsTree, tagged_size) == kMaxInline, ""); + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl +#endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/escaping.cc b/third_party/abseil_cpp/absl/strings/internal/escaping.cc new file mode 100644 index 000000000000..c5271286ad00 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/escaping.cc @@ -0,0 +1,180 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/escaping.h" + +#include "absl/base/internal/endian.h" +#include "absl/base/internal/raw_logging.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +const char kBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { + // Base64 encodes three bytes of input at a time. If the input is not + // divisible by three, we pad as appropriate. + // + // (from https://tools.ietf.org/html/rfc3548) + // Special processing is performed if fewer than 24 bits are available + // at the end of the data being encoded. A full encoding quantum is + // always completed at the end of a quantity. When fewer than 24 input + // bits are available in an input group, zero bits are added (on the + // right) to form an integral number of 6-bit groups. Padding at the + // end of the data is performed using the '=' character. Since all base + // 64 input is an integral number of octets, only the following cases + // can arise: + + // Base64 encodes each three bytes of input into four bytes of output. + size_t len = (input_len / 3) * 4; + + if (input_len % 3 == 0) { + // (from https://tools.ietf.org/html/rfc3548) + // (1) the final quantum of encoding input is an integral multiple of 24 + // bits; here, the final unit of encoded output will be an integral + // multiple of 4 characters with no "=" padding, + } else if (input_len % 3 == 1) { + // (from https://tools.ietf.org/html/rfc3548) + // (2) the final quantum of encoding input is exactly 8 bits; here, the + // final unit of encoded output will be two characters followed by two + // "=" padding characters, or + len += 2; + if (do_padding) { + len += 2; + } + } else { // (input_len % 3 == 2) + // (from https://tools.ietf.org/html/rfc3548) + // (3) the final quantum of encoding input is exactly 16 bits; here, the + // final unit of encoded output will be three characters followed by one + // "=" padding character. + len += 3; + if (do_padding) { + len += 1; + } + } + + assert(len >= input_len); // make sure we didn't overflow + return len; +} + +size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, + size_t szdest, const char* base64, + bool do_padding) { + static const char kPad64 = '='; + + if (szsrc * 4 > szdest * 3) return 0; + + char* cur_dest = dest; + const unsigned char* cur_src = src; + + char* const limit_dest = dest + szdest; + const unsigned char* const limit_src = src + szsrc; + + // Three bytes of data encodes to four characters of cyphertext. + // So we can pump through three-byte chunks atomically. + if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3. + while (cur_src < limit_src - 3) { // While we have >= 32 bits. + uint32_t in = absl::big_endian::Load32(cur_src) >> 8; + + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + + cur_dest += 4; + cur_src += 3; + } + } + // To save time, we didn't update szdest or szsrc in the loop. So do it now. + szdest = limit_dest - cur_dest; + szsrc = limit_src - cur_src; + + /* now deal with the tail (<=3 bytes) */ + switch (szsrc) { + case 0: + // Nothing left; nothing more to do. + break; + case 1: { + // One byte left: this encodes to two characters, and (optionally) + // two pad characters to round out the four-character cypherblock. + if (szdest < 2) return 0; + uint32_t in = cur_src[0]; + cur_dest[0] = base64[in >> 2]; + in &= 0x3; + cur_dest[1] = base64[in << 4]; + cur_dest += 2; + szdest -= 2; + if (do_padding) { + if (szdest < 2) return 0; + cur_dest[0] = kPad64; + cur_dest[1] = kPad64; + cur_dest += 2; + szdest -= 2; + } + break; + } + case 2: { + // Two bytes left: this encodes to three characters, and (optionally) + // one pad character to round out the four-character cypherblock. + if (szdest < 3) return 0; + uint32_t in = absl::big_endian::Load16(cur_src); + cur_dest[0] = base64[in >> 10]; + in &= 0x3FF; + cur_dest[1] = base64[in >> 4]; + in &= 0x00F; + cur_dest[2] = base64[in << 2]; + cur_dest += 3; + szdest -= 3; + if (do_padding) { + if (szdest < 1) return 0; + cur_dest[0] = kPad64; + cur_dest += 1; + szdest -= 1; + } + break; + } + case 3: { + // Three bytes left: same as in the big loop above. We can't do this in + // the loop because the loop above always reads 4 bytes, and the fourth + // byte is past the end of the input. + if (szdest < 4) return 0; + uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1); + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + cur_dest += 4; + szdest -= 4; + break; + } + default: + // Should not be reached: blocks of 4 bytes are handled + // in the while loop before this switch statement. + ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc); + break; + } + return (cur_dest - dest); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/escaping.h b/third_party/abseil_cpp/absl/strings/internal/escaping.h new file mode 100644 index 000000000000..6a9ce602d9ed --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/escaping.h @@ -0,0 +1,58 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_H_ +#define ABSL_STRINGS_INTERNAL_ESCAPING_H_ + +#include <cassert> + +#include "absl/strings/internal/resize_uninitialized.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +ABSL_CONST_INIT extern const char kBase64Chars[]; + +// Calculates how long a string will be when it is base64 encoded given its +// length and whether or not the result should be padded. +size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding); + +// Base64-encodes `src` using the alphabet provided in `base64` and writes the +// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars +// until its length is a multiple of 3. Returns the length of `dest`. +size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, + size_t szdest, const char* base64, bool do_padding); + +// Base64-encodes `src` using the alphabet provided in `base64` and writes the +// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars +// until its length is a multiple of 3. +template <typename String> +void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest, + bool do_padding, const char* base64_chars) { + const size_t calc_escaped_size = + CalculateBase64EscapedLenInternal(szsrc, do_padding); + STLStringResizeUninitialized(dest, calc_escaped_size); + + const size_t escaped_len = Base64EscapeInternal( + src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding); + assert(calc_escaped_size == escaped_len); + dest->erase(escaped_len); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_ESCAPING_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/escaping_test_common.h b/third_party/abseil_cpp/absl/strings/internal/escaping_test_common.h new file mode 100644 index 000000000000..7b18017a0890 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/escaping_test_common.h @@ -0,0 +1,133 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This test contains common things needed by both escaping_test.cc and +// escaping_benchmark.cc. + +#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_ +#define ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_ + +#include <array> +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +struct base64_testcase { + absl::string_view plaintext; + absl::string_view cyphertext; +}; + +inline const std::array<base64_testcase, 5>& base64_strings() { + static const std::array<base64_testcase, 5> testcase{{ + // Some google quotes + // Cyphertext created with "uuencode (GNU sharutils) 4.6.3" + // (Note that we're testing the websafe encoding, though, so if + // you add messages, be sure to run "tr -- '+/' '-_'" on the output) + { "I was always good at math and science, and I never realized " + "that was unusual or somehow undesirable. So one of the things " + "I care a lot about is helping to remove that stigma, " + "to show girls that you can be feminine, you can like the things " + "that girls like, but you can also be really good at technology. " + "You can be really good at building things." + " - Marissa Meyer, Newsweek, 2010-12-22" "\n", + + "SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg" + "bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu" + "ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg" + "YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo" + "b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp" + "a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs" + "c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy" + "ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll" + "ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" }, + + { "Typical first year for a new cluster: " + "~0.5 overheating " + "~1 PDU failure " + "~1 rack-move " + "~1 network rewiring " + "~20 rack failures " + "~5 racks go wonky " + "~8 network maintenances " + "~12 router reloads " + "~3 router failures " + "~dozens of minor 30-second blips for dns " + "~1000 individual machine failures " + "~thousands of hard drive failures " + "slow disks, bad memory, misconfigured machines, flaky machines, etc." + " - Jeff Dean, The Joys of Real Hardware" "\n", + + "VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92" + "ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3" + "b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv" + "bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk" + "cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv" + "bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp" + "bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg" + "ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs" + "YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS" + "ZWFsIEhhcmR3YXJlCg" }, + + { "I'm the head of the webspam team at Google. " + "That means that if you type your name into Google and get porn back, " + "it's my fault. Unless you're a porn star, in which case porn is a " + "completely reasonable response." + " - Matt Cutts, Google Plus" "\n", + + "SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg" + "VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv" + "b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz" + "IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg" + "Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs" + "IEdvb2dsZSBQbHVzCg" }, + + { "It will still be a long time before machines approach human " + "intelligence. " + "But luckily, machines don't actually have to be intelligent; " + "they just have to fake it. Access to a wealth of information, " + "combined with a rudimentary decision-making capacity, " + "can often be almost as useful. Of course, the results are better yet " + "when coupled with intelligence. A reference librarian with access to " + "a good search engine is a formidable tool." + " - Craig Silverstein, Siemens Pictures of the Future, Spring 2004" + "\n", + + "SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg" + "YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj" + "aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg" + "dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo" + "IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg" + "ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0" + "IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy" + "IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl" + "bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu" + "Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp" + "biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw" + "NAo" }, + + // Degenerate edge case + { "", + "" }, + }}; + + return testcase; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/memutil.cc b/third_party/abseil_cpp/absl/strings/internal/memutil.cc new file mode 100644 index 000000000000..2519c6881e35 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/memutil.cc @@ -0,0 +1,112 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/memutil.h" + +#include <cstdlib> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +int memcasecmp(const char* s1, const char* s2, size_t len) { + const unsigned char* us1 = reinterpret_cast<const unsigned char*>(s1); + const unsigned char* us2 = reinterpret_cast<const unsigned char*>(s2); + + for (size_t i = 0; i < len; i++) { + const int diff = + int{static_cast<unsigned char>(absl::ascii_tolower(us1[i]))} - + int{static_cast<unsigned char>(absl::ascii_tolower(us2[i]))}; + if (diff != 0) return diff; + } + return 0; +} + +char* memdup(const char* s, size_t slen) { + void* copy; + if ((copy = malloc(slen)) == nullptr) return nullptr; + memcpy(copy, s, slen); + return reinterpret_cast<char*>(copy); +} + +char* memrchr(const char* s, int c, size_t slen) { + for (const char* e = s + slen - 1; e >= s; e--) { + if (*e == c) return const_cast<char*>(e); + } + return nullptr; +} + +size_t memspn(const char* s, size_t slen, const char* accept) { + const char* p = s; + const char* spanp; + char c, sc; + +cont: + c = *p++; + if (slen-- == 0) return p - 1 - s; + for (spanp = accept; (sc = *spanp++) != '\0';) + if (sc == c) goto cont; + return p - 1 - s; +} + +size_t memcspn(const char* s, size_t slen, const char* reject) { + const char* p = s; + const char* spanp; + char c, sc; + + while (slen-- != 0) { + c = *p++; + for (spanp = reject; (sc = *spanp++) != '\0';) + if (sc == c) return p - 1 - s; + } + return p - s; +} + +char* mempbrk(const char* s, size_t slen, const char* accept) { + const char* scanp; + int sc; + + for (; slen; ++s, --slen) { + for (scanp = accept; (sc = *scanp++) != '\0';) + if (sc == *s) return const_cast<char*>(s); + } + return nullptr; +} + +// This is significantly faster for case-sensitive matches with very +// few possible matches. See unit test for benchmarks. +const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, + size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + // A static cast is used here to work around the fact that memchr returns + // a void* on Posix-compliant systems and const void* on Windows. + while ((match = static_cast<const char*>( + memchr(phaystack, pneedle[0], hayend - phaystack)))) { + if (memcmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } + return nullptr; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/memutil.h b/third_party/abseil_cpp/absl/strings/internal/memutil.h new file mode 100644 index 000000000000..9ad05358086c --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/memutil.h @@ -0,0 +1,148 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// These routines provide mem versions of standard C string routines, +// such as strpbrk. They function exactly the same as the str versions, +// so if you wonder what they are, replace the word "mem" by +// "str" and check out the man page. I could return void*, as the +// strutil.h mem*() routines tend to do, but I return char* instead +// since this is by far the most common way these functions are called. +// +// The difference between the mem and str versions is the mem version +// takes a pointer and a length, rather than a '\0'-terminated string. +// The memcase* routines defined here assume the locale is "C" +// (they use absl::ascii_tolower instead of tolower). +// +// These routines are based on the BSD library. +// +// Here's a list of routines from string.h, and their mem analogues. +// Functions in lowercase are defined in string.h; those in UPPERCASE +// are defined here: +// +// strlen -- +// strcat strncat MEMCAT +// strcpy strncpy memcpy +// -- memccpy (very cool function, btw) +// -- memmove +// -- memset +// strcmp strncmp memcmp +// strcasecmp strncasecmp MEMCASECMP +// strchr memchr +// strcoll -- +// strxfrm -- +// strdup strndup MEMDUP +// strrchr MEMRCHR +// strspn MEMSPN +// strcspn MEMCSPN +// strpbrk MEMPBRK +// strstr MEMSTR MEMMEM +// (g)strcasestr MEMCASESTR MEMCASEMEM +// strtok -- +// strprefix MEMPREFIX (strprefix is from strutil.h) +// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h) +// strsuffix MEMSUFFIX (strsuffix is from strutil.h) +// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h) +// -- MEMIS +// -- MEMCASEIS +// strcount MEMCOUNT (strcount is from strutil.h) + +#ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_ +#define ABSL_STRINGS_INTERNAL_MEMUTIL_H_ + +#include <cstddef> +#include <cstring> + +#include "absl/base/port.h" // disable some warnings on Windows +#include "absl/strings/ascii.h" // for absl::ascii_tolower + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +inline char* memcat(char* dest, size_t destlen, const char* src, + size_t srclen) { + return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen)); +} + +int memcasecmp(const char* s1, const char* s2, size_t len); +char* memdup(const char* s, size_t slen); +char* memrchr(const char* s, int c, size_t slen); +size_t memspn(const char* s, size_t slen, const char* accept); +size_t memcspn(const char* s, size_t slen, const char* reject); +char* mempbrk(const char* s, size_t slen, const char* accept); + +// This is for internal use only. Don't call this directly +template <bool case_sensitive> +const char* int_memmatch(const char* haystack, size_t haylen, + const char* needle, size_t neelen) { + if (0 == neelen) { + return haystack; // even if haylen is 0 + } + const char* hayend = haystack + haylen; + const char* needlestart = needle; + const char* needleend = needlestart + neelen; + + for (; haystack < hayend; ++haystack) { + char hay = case_sensitive + ? *haystack + : absl::ascii_tolower(static_cast<unsigned char>(*haystack)); + char nee = case_sensitive + ? *needle + : absl::ascii_tolower(static_cast<unsigned char>(*needle)); + if (hay == nee) { + if (++needle == needleend) { + return haystack + 1 - neelen; + } + } else if (needle != needlestart) { + // must back up haystack in case a prefix matched (find "aab" in "aaab") + haystack -= needle - needlestart; // for loop will advance one more + needle = needlestart; + } + } + return nullptr; +} + +// These are the guys you can call directly +inline const char* memstr(const char* phaystack, size_t haylen, + const char* pneedle) { + return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle)); +} + +inline const char* memcasestr(const char* phaystack, size_t haylen, + const char* pneedle) { + return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle)); +} + +inline const char* memmem(const char* phaystack, size_t haylen, + const char* pneedle, size_t needlelen) { + return int_memmatch<true>(phaystack, haylen, pneedle, needlelen); +} + +inline const char* memcasemem(const char* phaystack, size_t haylen, + const char* pneedle, size_t needlelen) { + return int_memmatch<false>(phaystack, haylen, pneedle, needlelen); +} + +// This is significantly faster for case-sensitive matches with very +// few possible matches. See unit test for benchmarks. +const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, + size_t neelen); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/memutil_benchmark.cc b/third_party/abseil_cpp/absl/strings/internal/memutil_benchmark.cc new file mode 100644 index 000000000000..dc95c3e5e55a --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/memutil_benchmark.cc @@ -0,0 +1,323 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/memutil.h" + +#include <algorithm> +#include <cstdlib> + +#include "benchmark/benchmark.h" +#include "absl/strings/ascii.h" + +// We fill the haystack with aaaaaaaaaaaaaaaaaa...aaaab. +// That gives us: +// - an easy search: 'b' +// - a medium search: 'ab'. That means every letter is a possible match. +// - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack) +// We benchmark case-sensitive and case-insensitive versions of +// three memmem implementations: +// - memmem() from memutil.h +// - search() from STL +// - memmatch(), a custom implementation using memchr and memcmp. +// Here are sample results: +// +// Run on (12 X 3800 MHz CPU s) +// CPU Caches: +// L1 Data 32K (x6) +// L1 Instruction 32K (x6) +// L2 Unified 256K (x6) +// L3 Unified 15360K (x1) +// ---------------------------------------------------------------- +// Benchmark Time CPU Iterations +// ---------------------------------------------------------------- +// BM_Memmem 3583 ns 3582 ns 196469 2.59966GB/s +// BM_MemmemMedium 13743 ns 13742 ns 50901 693.986MB/s +// BM_MemmemPathological 13695030 ns 13693977 ns 51 713.133kB/s +// BM_Memcasemem 3299 ns 3299 ns 212942 2.82309GB/s +// BM_MemcasememMedium 16407 ns 16406 ns 42170 581.309MB/s +// BM_MemcasememPathological 17267745 ns 17266030 ns 41 565.598kB/s +// BM_Search 1610 ns 1609 ns 431321 5.78672GB/s +// BM_SearchMedium 11111 ns 11110 ns 63001 858.414MB/s +// BM_SearchPathological 12117390 ns 12116397 ns 58 805.984kB/s +// BM_Searchcase 3081 ns 3081 ns 229949 3.02313GB/s +// BM_SearchcaseMedium 16003 ns 16001 ns 44170 595.998MB/s +// BM_SearchcasePathological 15823413 ns 15821909 ns 44 617.222kB/s +// BM_Memmatch 197 ns 197 ns 3584225 47.2951GB/s +// BM_MemmatchMedium 52333 ns 52329 ns 13280 182.244MB/s +// BM_MemmatchPathological 659799 ns 659727 ns 1058 14.4556MB/s +// BM_Memcasematch 5460 ns 5460 ns 127606 1.70586GB/s +// BM_MemcasematchMedium 32861 ns 32857 ns 21258 290.248MB/s +// BM_MemcasematchPathological 15154243 ns 15153089 ns 46 644.464kB/s +// BM_MemmemStartup 5 ns 5 ns 150821500 +// BM_SearchStartup 5 ns 5 ns 150644203 +// BM_MemmatchStartup 7 ns 7 ns 97068802 +// +// Conclusions: +// +// The following recommendations are based on the sample results above. However, +// we have found that the performance of STL search can vary significantly +// depending on compiler and standard library implementation. We recommend you +// run the benchmarks for yourself on relevant platforms. +// +// If you need case-insensitive, STL search is slightly better than memmem for +// all cases. +// +// Case-sensitive is more subtle: +// Custom memmatch is _very_ fast at scanning, so if you have very few possible +// matches in your haystack, that's the way to go. Performance drops +// significantly with more matches. +// +// STL search is slightly faster than memmem in the medium and pathological +// benchmarks. However, the performance of memmem is currently more dependable +// across platforms and build configurations. + +namespace { + +constexpr int kHaystackSize = 10000; +constexpr int64_t kHaystackSize64 = kHaystackSize; +const char* MakeHaystack() { + char* haystack = new char[kHaystackSize]; + for (int i = 0; i < kHaystackSize - 1; ++i) haystack[i] = 'a'; + haystack[kHaystackSize - 1] = 'b'; + return haystack; +} +const char* const kHaystack = MakeHaystack(); + +void BM_Memmem(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Memmem); + +void BM_MemmemMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemmemMedium); + +void BM_MemmemPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memmem( + kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, + kHaystackSize - kHaystackSize / 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemmemPathological); + +void BM_Memcasemem(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Memcasemem); + +void BM_MemcasememMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemcasememMedium); + +void BM_MemcasememPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memcasemem( + kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, + kHaystackSize - kHaystackSize / 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemcasememPathological); + +bool case_eq(const char a, const char b) { + return absl::ascii_tolower(a) == absl::ascii_tolower(b); +} + +void BM_Search(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 1, + kHaystack + kHaystackSize)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Search); + +void BM_SearchMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 2, + kHaystack + kHaystackSize)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_SearchMedium); + +void BM_SearchPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize / 2, + kHaystack + kHaystackSize)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_SearchPathological); + +void BM_Searchcase(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 1, + kHaystack + kHaystackSize, case_eq)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Searchcase); + +void BM_SearchcaseMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 2, + kHaystack + kHaystackSize, case_eq)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_SearchcaseMedium); + +void BM_SearchcasePathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, + kHaystack + kHaystackSize / 2, + kHaystack + kHaystackSize, case_eq)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_SearchcasePathological); + +char* memcasechr(const char* s, int c, size_t slen) { + c = absl::ascii_tolower(c); + for (; slen; ++s, --slen) { + if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s); + } + return nullptr; +} + +const char* memcasematch(const char* phaystack, size_t haylen, + const char* pneedle, size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + while ((match = static_cast<char*>( + memcasechr(phaystack, pneedle[0], hayend - phaystack)))) { + if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } + return nullptr; +} + +void BM_Memmatch(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Memmatch); + +void BM_MemmatchMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemmatchMedium); + +void BM_MemmatchPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memmatch( + kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, + kHaystackSize - kHaystackSize / 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemmatchPathological); + +void BM_Memcasematch(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_Memcasematch); + +void BM_MemcasematchMedium(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "ab", 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemcasematchMedium); + +void BM_MemcasematchPathological(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, + kHaystack + kHaystackSize / 2, + kHaystackSize - kHaystackSize / 2)); + } + state.SetBytesProcessed(kHaystackSize64 * state.iterations()); +} +BENCHMARK(BM_MemcasematchPathological); + +void BM_MemmemStartup(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memmem( + kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1)); + } +} +BENCHMARK(BM_MemmemStartup); + +void BM_SearchStartup(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize( + std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize, + kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize)); + } +} +BENCHMARK(BM_SearchStartup); + +void BM_MemmatchStartup(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize(absl::strings_internal::memmatch( + kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1)); + } +} +BENCHMARK(BM_MemmatchStartup); + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/memutil_test.cc b/third_party/abseil_cpp/absl/strings/internal/memutil_test.cc new file mode 100644 index 000000000000..d8681ddf4e3b --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/memutil_test.cc @@ -0,0 +1,179 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Unit test for memutil.cc + +#include "absl/strings/internal/memutil.h" + +#include <cstdlib> + +#include "gtest/gtest.h" +#include "absl/strings/ascii.h" + +namespace { + +static char* memcasechr(const char* s, int c, size_t slen) { + c = absl::ascii_tolower(c); + for (; slen; ++s, --slen) { + if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s); + } + return nullptr; +} + +static const char* memcasematch(const char* phaystack, size_t haylen, + const char* pneedle, size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + while ((match = static_cast<char*>( + memcasechr(phaystack, pneedle[0], hayend - phaystack)))) { + if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } + return nullptr; +} + +TEST(MemUtilTest, AllTests) { + // check memutil functions + char a[1000]; + absl::strings_internal::memcat(a, 0, "hello", sizeof("hello") - 1); + absl::strings_internal::memcat(a, 5, " there", sizeof(" there") - 1); + + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there", + sizeof("hello there") - 1), + 0); + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf", + sizeof("hello there") - 1), + -1); + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf", + sizeof("hello there") - 2), + 0); + EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0); + + char* p = absl::strings_internal::memdup("hello", 5); + free(p); + + p = absl::strings_internal::memrchr("hello there", 'e', + sizeof("hello there") - 1); + EXPECT_TRUE(p && p[-1] == 'r'); + p = absl::strings_internal::memrchr("hello there", 'e', + sizeof("hello there") - 2); + EXPECT_TRUE(p && p[-1] == 'h'); + p = absl::strings_internal::memrchr("hello there", 'u', + sizeof("hello there") - 1); + EXPECT_TRUE(p == nullptr); + + int len = absl::strings_internal::memspn("hello there", + sizeof("hello there") - 1, "hole"); + EXPECT_EQ(len, sizeof("hello") - 1); + len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, + "u"); + EXPECT_EQ(len, 0); + len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, + ""); + EXPECT_EQ(len, 0); + len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, + "trole h"); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memspn("hello there!", + sizeof("hello there!") - 1, "trole h"); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memspn("hello there!", + sizeof("hello there!") - 2, "trole h!"); + EXPECT_EQ(len, sizeof("hello there!") - 2); + + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, "leho"); + EXPECT_EQ(len, 0); + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, "u"); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, ""); + EXPECT_EQ(len, sizeof("hello there") - 1); + len = absl::strings_internal::memcspn("hello there", + sizeof("hello there") - 1, " "); + EXPECT_EQ(len, 5); + + p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, + "leho"); + EXPECT_TRUE(p && p[1] == 'e' && p[2] == 'l'); + p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, + "nu"); + EXPECT_TRUE(p == nullptr); + p = absl::strings_internal::mempbrk("hello there!", + sizeof("hello there!") - 2, "!"); + EXPECT_TRUE(p == nullptr); + p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, + " t "); + EXPECT_TRUE(p && p[-1] == 'o' && p[1] == 't'); + + { + const char kHaystack[] = "0123456789"; + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 0, "", 0), kHaystack); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "012", 3), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "0xx", 1), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "789", 3), + kHaystack + 7); + EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "9xx", 1), + kHaystack + 9); + EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "9xx", 3) == + nullptr); + EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "xxx", 1) == + nullptr); + } + { + const char kHaystack[] = "aBcDeFgHiJ"; + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 0, "", 0), + kHaystack); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Abc", 3), + kHaystack); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Axx", 1), + kHaystack); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "hIj", 3), + kHaystack + 7); + EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 1), + kHaystack + 9); + EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 3) == + nullptr); + EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "xxx", 1) == + nullptr); + } + { + const char kHaystack[] = "0123456789"; + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 0, "", 0), kHaystack); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "012", 3), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "0xx", 1), + kHaystack); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "789", 3), + kHaystack + 7); + EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 1), + kHaystack + 9); + EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 3) == + nullptr); + EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "xxx", 1) == + nullptr); + } +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/numbers_test_common.h b/third_party/abseil_cpp/absl/strings/internal/numbers_test_common.h new file mode 100644 index 000000000000..eaa88a88975b --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/numbers_test_common.h @@ -0,0 +1,184 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file contains common things needed by numbers_test.cc, +// numbers_legacy_test.cc and numbers_benchmark.cc. + +#ifndef ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_ +#define ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_ + +#include <array> +#include <cstdint> +#include <limits> +#include <string> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +template <typename IntType> +inline bool Itoa(IntType value, int base, std::string* destination) { + destination->clear(); + if (base <= 1 || base > 36) { + return false; + } + + if (value == 0) { + destination->push_back('0'); + return true; + } + + bool negative = value < 0; + while (value != 0) { + const IntType next_value = value / base; + // Can't use std::abs here because of problems when IntType is unsigned. + int remainder = + static_cast<int>(value > next_value * base ? value - next_value * base + : next_value * base - value); + char c = remainder < 10 ? '0' + remainder : 'A' + remainder - 10; + destination->insert(0, 1, c); + value = next_value; + } + + if (negative) { + destination->insert(0, 1, '-'); + } + return true; +} + +struct uint32_test_case { + const char* str; + bool expect_ok; + int base; // base to pass to the conversion function + uint32_t expected; +}; + +inline const std::array<uint32_test_case, 27>& strtouint32_test_cases() { + static const std::array<uint32_test_case, 27> test_cases{{ + {"0xffffffff", true, 16, (std::numeric_limits<uint32_t>::max)()}, + {"0x34234324", true, 16, 0x34234324}, + {"34234324", true, 16, 0x34234324}, + {"0", true, 16, 0}, + {" \t\n 0xffffffff", true, 16, (std::numeric_limits<uint32_t>::max)()}, + {" \f\v 46", true, 10, 46}, // must accept weird whitespace + {" \t\n 72717222", true, 8, 072717222}, + {" \t\n 072717222", true, 8, 072717222}, + {" \t\n 072717228", false, 8, 07271722}, + {"0", true, 0, 0}, + + // Base-10 version. + {"34234324", true, 0, 34234324}, + {"4294967295", true, 0, (std::numeric_limits<uint32_t>::max)()}, + {"34234324 \n\t", true, 10, 34234324}, + + // Unusual base + {"0", true, 3, 0}, + {"2", true, 3, 2}, + {"11", true, 3, 4}, + + // Invalid uints. + {"", false, 0, 0}, + {" ", false, 0, 0}, + {"abc", false, 0, 0}, // would be valid hex, but prefix is missing + {"34234324a", false, 0, 34234324}, + {"34234.3", false, 0, 34234}, + {"-1", false, 0, 0}, + {" -123", false, 0, 0}, + {" \t\n -123", false, 0, 0}, + + // Out of bounds. + {"4294967296", false, 0, (std::numeric_limits<uint32_t>::max)()}, + {"0x100000000", false, 0, (std::numeric_limits<uint32_t>::max)()}, + {nullptr, false, 0, 0}, + }}; + return test_cases; +} + +struct uint64_test_case { + const char* str; + bool expect_ok; + int base; + uint64_t expected; +}; + +inline const std::array<uint64_test_case, 34>& strtouint64_test_cases() { + static const std::array<uint64_test_case, 34> test_cases{{ + {"0x3423432448783446", true, 16, int64_t{0x3423432448783446}}, + {"3423432448783446", true, 16, int64_t{0x3423432448783446}}, + + {"0", true, 16, 0}, + {"000", true, 0, 0}, + {"0", true, 0, 0}, + {" \t\n 0xffffffffffffffff", true, 16, + (std::numeric_limits<uint64_t>::max)()}, + + {"012345670123456701234", true, 8, int64_t{012345670123456701234}}, + {"12345670123456701234", true, 8, int64_t{012345670123456701234}}, + + {"12845670123456701234", false, 8, 0}, + + // Base-10 version. + {"34234324487834466", true, 0, int64_t{34234324487834466}}, + + {" \t\n 18446744073709551615", true, 0, + (std::numeric_limits<uint64_t>::max)()}, + + {"34234324487834466 \n\t ", true, 0, int64_t{34234324487834466}}, + + {" \f\v 46", true, 10, 46}, // must accept weird whitespace + + // Unusual base + {"0", true, 3, 0}, + {"2", true, 3, 2}, + {"11", true, 3, 4}, + + {"0", true, 0, 0}, + + // Invalid uints. + {"", false, 0, 0}, + {" ", false, 0, 0}, + {"abc", false, 0, 0}, + {"34234324487834466a", false, 0, 0}, + {"34234487834466.3", false, 0, 0}, + {"-1", false, 0, 0}, + {" -123", false, 0, 0}, + {" \t\n -123", false, 0, 0}, + + // Out of bounds. + {"18446744073709551616", false, 10, 0}, + {"18446744073709551616", false, 0, 0}, + {"0x10000000000000000", false, 16, + (std::numeric_limits<uint64_t>::max)()}, + {"0X10000000000000000", false, 16, + (std::numeric_limits<uint64_t>::max)()}, // 0X versus 0x. + {"0x10000000000000000", false, 0, (std::numeric_limits<uint64_t>::max)()}, + {"0X10000000000000000", false, 0, + (std::numeric_limits<uint64_t>::max)()}, // 0X versus 0x. + + {"0x1234", true, 16, 0x1234}, + + // Base-10 string version. + {"1234", true, 0, 1234}, + {nullptr, false, 0, 0}, + }}; + return test_cases; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/ostringstream.cc b/third_party/abseil_cpp/absl/strings/internal/ostringstream.cc new file mode 100644 index 000000000000..05324c780c60 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/ostringstream.cc @@ -0,0 +1,36 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/ostringstream.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +OStringStream::Buf::int_type OStringStream::overflow(int c) { + assert(s_); + if (!Buf::traits_type::eq_int_type(c, Buf::traits_type::eof())) + s_->push_back(static_cast<char>(c)); + return 1; +} + +std::streamsize OStringStream::xsputn(const char* s, std::streamsize n) { + assert(s_); + s_->append(s, n); + return n; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/ostringstream.h b/third_party/abseil_cpp/absl/strings/internal/ostringstream.h new file mode 100644 index 000000000000..d25d60473f6d --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/ostringstream.h @@ -0,0 +1,89 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ +#define ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ + +#include <cassert> +#include <ostream> +#include <streambuf> +#include <string> + +#include "absl/base/port.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// The same as std::ostringstream but appends to a user-specified std::string, +// and is faster. It is ~70% faster to create, ~50% faster to write to, and +// completely free to extract the result std::string. +// +// std::string s; +// OStringStream strm(&s); +// strm << 42 << ' ' << 3.14; // appends to `s` +// +// The stream object doesn't have to be named. Starting from C++11 operator<< +// works with rvalues of std::ostream. +// +// std::string s; +// OStringStream(&s) << 42 << ' ' << 3.14; // appends to `s` +// +// OStringStream is faster to create than std::ostringstream but it's still +// relatively slow. Avoid creating multiple streams where a single stream will +// do. +// +// Creates unnecessary instances of OStringStream: slow. +// +// std::string s; +// OStringStream(&s) << 42; +// OStringStream(&s) << ' '; +// OStringStream(&s) << 3.14; +// +// Creates a single instance of OStringStream and reuses it: fast. +// +// std::string s; +// OStringStream strm(&s); +// strm << 42; +// strm << ' '; +// strm << 3.14; +// +// Note: flush() has no effect. No reason to call it. +class OStringStream : private std::basic_streambuf<char>, public std::ostream { + public: + // The argument can be null, in which case you'll need to call str(p) with a + // non-null argument before you can write to the stream. + // + // The destructor of OStringStream doesn't use the std::string. It's OK to + // destroy the std::string before the stream. + explicit OStringStream(std::string* s) : std::ostream(this), s_(s) {} + + std::string* str() { return s_; } + const std::string* str() const { return s_; } + void str(std::string* s) { s_ = s; } + + private: + using Buf = std::basic_streambuf<char>; + + Buf::int_type overflow(int c) override; + std::streamsize xsputn(const char* s, std::streamsize n) override; + + std::string* s_; +}; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/ostringstream_benchmark.cc b/third_party/abseil_cpp/absl/strings/internal/ostringstream_benchmark.cc new file mode 100644 index 000000000000..5979f18236e3 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/ostringstream_benchmark.cc @@ -0,0 +1,106 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/ostringstream.h" + +#include <sstream> +#include <string> + +#include "benchmark/benchmark.h" + +namespace { + +enum StringType { + kNone, + kStdString, +}; + +// Benchmarks for std::ostringstream. +template <StringType kOutput> +void BM_StdStream(benchmark::State& state) { + const int num_writes = state.range(0); + const int bytes_per_write = state.range(1); + const std::string payload(bytes_per_write, 'x'); + for (auto _ : state) { + std::ostringstream strm; + benchmark::DoNotOptimize(strm); + for (int i = 0; i != num_writes; ++i) { + strm << payload; + } + switch (kOutput) { + case kNone: { + break; + } + case kStdString: { + std::string s = strm.str(); + benchmark::DoNotOptimize(s); + break; + } + } + } +} + +// Create the stream, optionally write to it, then destroy it. +BENCHMARK_TEMPLATE(BM_StdStream, kNone) + ->ArgPair(0, 0) + ->ArgPair(1, 16) // 16 bytes is small enough for SSO + ->ArgPair(1, 256) // 256 bytes requires heap allocation + ->ArgPair(1024, 256); +// Create the stream, write to it, get std::string out, then destroy. +BENCHMARK_TEMPLATE(BM_StdStream, kStdString) + ->ArgPair(1, 16) // 16 bytes is small enough for SSO + ->ArgPair(1, 256) // 256 bytes requires heap allocation + ->ArgPair(1024, 256); + +// Benchmarks for OStringStream. +template <StringType kOutput> +void BM_CustomStream(benchmark::State& state) { + const int num_writes = state.range(0); + const int bytes_per_write = state.range(1); + const std::string payload(bytes_per_write, 'x'); + for (auto _ : state) { + std::string out; + absl::strings_internal::OStringStream strm(&out); + benchmark::DoNotOptimize(strm); + for (int i = 0; i != num_writes; ++i) { + strm << payload; + } + switch (kOutput) { + case kNone: { + break; + } + case kStdString: { + std::string s = out; + benchmark::DoNotOptimize(s); + break; + } + } + } +} + +// Create the stream, optionally write to it, then destroy it. +BENCHMARK_TEMPLATE(BM_CustomStream, kNone) + ->ArgPair(0, 0) + ->ArgPair(1, 16) // 16 bytes is small enough for SSO + ->ArgPair(1, 256) // 256 bytes requires heap allocation + ->ArgPair(1024, 256); +// Create the stream, write to it, get std::string out, then destroy. +// It's not useful in practice to extract std::string from OStringStream; we +// measure it for completeness. +BENCHMARK_TEMPLATE(BM_CustomStream, kStdString) + ->ArgPair(1, 16) // 16 bytes is small enough for SSO + ->ArgPair(1, 256) // 256 bytes requires heap allocation + ->ArgPair(1024, 256); + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/ostringstream_test.cc b/third_party/abseil_cpp/absl/strings/internal/ostringstream_test.cc new file mode 100644 index 000000000000..2879e50eb38d --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/ostringstream_test.cc @@ -0,0 +1,102 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/ostringstream.h" + +#include <memory> +#include <ostream> +#include <string> +#include <type_traits> + +#include "gtest/gtest.h" + +namespace { + +TEST(OStringStream, IsOStream) { + static_assert( + std::is_base_of<std::ostream, absl::strings_internal::OStringStream>(), + ""); +} + +TEST(OStringStream, ConstructDestroy) { + { + absl::strings_internal::OStringStream strm(nullptr); + EXPECT_EQ(nullptr, strm.str()); + } + { + std::string s = "abc"; + { + absl::strings_internal::OStringStream strm(&s); + EXPECT_EQ(&s, strm.str()); + } + EXPECT_EQ("abc", s); + } + { + std::unique_ptr<std::string> s(new std::string); + absl::strings_internal::OStringStream strm(s.get()); + s.reset(); + } +} + +TEST(OStringStream, Str) { + std::string s1; + absl::strings_internal::OStringStream strm(&s1); + const absl::strings_internal::OStringStream& c_strm(strm); + + static_assert(std::is_same<decltype(strm.str()), std::string*>(), ""); + static_assert(std::is_same<decltype(c_strm.str()), const std::string*>(), ""); + + EXPECT_EQ(&s1, strm.str()); + EXPECT_EQ(&s1, c_strm.str()); + + strm.str(&s1); + EXPECT_EQ(&s1, strm.str()); + EXPECT_EQ(&s1, c_strm.str()); + + std::string s2; + strm.str(&s2); + EXPECT_EQ(&s2, strm.str()); + EXPECT_EQ(&s2, c_strm.str()); + + strm.str(nullptr); + EXPECT_EQ(nullptr, strm.str()); + EXPECT_EQ(nullptr, c_strm.str()); +} + +TEST(OStreamStream, WriteToLValue) { + std::string s = "abc"; + { + absl::strings_internal::OStringStream strm(&s); + EXPECT_EQ("abc", s); + strm << ""; + EXPECT_EQ("abc", s); + strm << 42; + EXPECT_EQ("abc42", s); + strm << 'x' << 'y'; + EXPECT_EQ("abc42xy", s); + } + EXPECT_EQ("abc42xy", s); +} + +TEST(OStreamStream, WriteToRValue) { + std::string s = "abc"; + absl::strings_internal::OStringStream(&s) << ""; + EXPECT_EQ("abc", s); + absl::strings_internal::OStringStream(&s) << 42; + EXPECT_EQ("abc42", s); + absl::strings_internal::OStringStream(&s) << 'x' << 'y'; + EXPECT_EQ("abc42xy", s); +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/pow10_helper.cc b/third_party/abseil_cpp/absl/strings/internal/pow10_helper.cc new file mode 100644 index 000000000000..42e96c3425d2 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/pow10_helper.cc @@ -0,0 +1,122 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/pow10_helper.h" + +#include <cmath> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +namespace { + +// The exact value of 1e23 falls precisely halfway between two representable +// doubles. Furthermore, the rounding rules we prefer (break ties by rounding +// to the nearest even) dictate in this case that the number should be rounded +// down, but this is not completely specified for floating-point literals in +// C++. (It just says to use the default rounding mode of the standard +// library.) We ensure the result we want by using a number that has an +// unambiguous correctly rounded answer. +constexpr double k1e23 = 9999999999999999e7; + +constexpr double kPowersOfTen[] = { + 0.0, 1e-323, 1e-322, 1e-321, 1e-320, 1e-319, 1e-318, 1e-317, 1e-316, + 1e-315, 1e-314, 1e-313, 1e-312, 1e-311, 1e-310, 1e-309, 1e-308, 1e-307, + 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, 1e-299, 1e-298, + 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, 1e-290, 1e-289, + 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, 1e-281, 1e-280, + 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, 1e-272, 1e-271, + 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, 1e-263, 1e-262, + 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, 1e-254, 1e-253, + 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, 1e-245, 1e-244, + 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, 1e-236, 1e-235, + 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, 1e-227, 1e-226, + 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, 1e-218, 1e-217, + 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, 1e-209, 1e-208, + 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, 1e-200, 1e-199, + 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, 1e-191, 1e-190, + 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, 1e-182, 1e-181, + 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, 1e-173, 1e-172, + 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, 1e-164, 1e-163, + 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, 1e-155, 1e-154, + 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, 1e-146, 1e-145, + 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, 1e-137, 1e-136, + 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, 1e-128, 1e-127, + 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, 1e-119, 1e-118, + 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, 1e-110, 1e-109, + 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, 1e-101, 1e-100, + 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92, 1e-91, + 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83, 1e-82, + 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74, 1e-73, + 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 1e-64, + 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, + 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, + 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37, + 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, + 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, + 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, + 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, + 1e+0, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, + 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, + 1e+18, 1e+19, 1e+20, 1e+21, 1e+22, k1e23, 1e+24, 1e+25, 1e+26, + 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, + 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, 1e+41, 1e+42, 1e+43, 1e+44, + 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, + 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, 1e+61, 1e+62, + 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, + 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80, + 1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, + 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, + 1e+99, 1e+100, 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, + 1e+108, 1e+109, 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, + 1e+117, 1e+118, 1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, + 1e+126, 1e+127, 1e+128, 1e+129, 1e+130, 1e+131, 1e+132, 1e+133, 1e+134, + 1e+135, 1e+136, 1e+137, 1e+138, 1e+139, 1e+140, 1e+141, 1e+142, 1e+143, + 1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149, 1e+150, 1e+151, 1e+152, + 1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159, 1e+160, 1e+161, + 1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168, 1e+169, 1e+170, + 1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177, 1e+178, 1e+179, + 1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186, 1e+187, 1e+188, + 1e+189, 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195, 1e+196, 1e+197, + 1e+198, 1e+199, 1e+200, 1e+201, 1e+202, 1e+203, 1e+204, 1e+205, 1e+206, + 1e+207, 1e+208, 1e+209, 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215, + 1e+216, 1e+217, 1e+218, 1e+219, 1e+220, 1e+221, 1e+222, 1e+223, 1e+224, + 1e+225, 1e+226, 1e+227, 1e+228, 1e+229, 1e+230, 1e+231, 1e+232, 1e+233, + 1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239, 1e+240, 1e+241, 1e+242, + 1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249, 1e+250, 1e+251, + 1e+252, 1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258, 1e+259, 1e+260, + 1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269, + 1e+270, 1e+271, 1e+272, 1e+273, 1e+274, 1e+275, 1e+276, 1e+277, 1e+278, + 1e+279, 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285, 1e+286, 1e+287, + 1e+288, 1e+289, 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296, + 1e+297, 1e+298, 1e+299, 1e+300, 1e+301, 1e+302, 1e+303, 1e+304, 1e+305, + 1e+306, 1e+307, 1e+308, +}; + +} // namespace + +double Pow10(int exp) { + if (exp < -324) { + return 0.0; + } else if (exp > 308) { + return INFINITY; + } else { + return kPowersOfTen[exp + 324]; + } +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/pow10_helper.h b/third_party/abseil_cpp/absl/strings/internal/pow10_helper.h new file mode 100644 index 000000000000..c37c2c3ffee5 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/pow10_helper.h @@ -0,0 +1,40 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This test helper library contains a table of powers of 10, to guarantee +// precise values are computed across the full range of doubles. We can't rely +// on the pow() function, because not all standard libraries ship a version +// that is precise. +#ifndef ABSL_STRINGS_INTERNAL_POW10_HELPER_H_ +#define ABSL_STRINGS_INTERNAL_POW10_HELPER_H_ + +#include <vector> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// Computes the precise value of 10^exp. (I.e. the nearest representable +// double to the exact value, rounding to nearest-even in the (single) case of +// being exactly halfway between.) +double Pow10(int exp); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_POW10_HELPER_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/pow10_helper_test.cc b/third_party/abseil_cpp/absl/strings/internal/pow10_helper_test.cc new file mode 100644 index 000000000000..a4ff76d31e10 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/pow10_helper_test.cc @@ -0,0 +1,122 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/pow10_helper.h" + +#include <cmath> + +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +namespace { + +struct TestCase { + int power; // Testing Pow10(power) + uint64_t significand; // Raw bits of the expected value + int radix; // significand is adjusted by 2^radix +}; + +TEST(Pow10HelperTest, Works) { + // The logic in pow10_helper.cc is so simple that theoretically we don't even + // need a test. However, we're paranoid and believe that there may be + // compilers that don't round floating-point literals correctly, even though + // it is specified by the standard. We check various edge cases, just to be + // sure. + constexpr TestCase kTestCases[] = { + // Subnormals + {-323, 0x2, -1074}, + {-322, 0x14, -1074}, + {-321, 0xca, -1074}, + {-320, 0x7e8, -1074}, + {-319, 0x4f10, -1074}, + {-318, 0x316a2, -1074}, + {-317, 0x1ee257, -1074}, + {-316, 0x134d761, -1074}, + {-315, 0xc1069cd, -1074}, + {-314, 0x78a42205, -1074}, + {-313, 0x4b6695433, -1074}, + {-312, 0x2f201d49fb, -1074}, + {-311, 0x1d74124e3d1, -1074}, + {-310, 0x12688b70e62b, -1074}, + {-309, 0xb8157268fdaf, -1074}, + {-308, 0x730d67819e8d2, -1074}, + // Values that are very close to rounding the other way. + // Comment shows difference of significand from the true value. + {-307, 0x11fa182c40c60d, -1072}, // -.4588 + {-290, 0x18f2b061aea072, -1016}, // .4854 + {-276, 0x11BA03F5B21000, -969}, // .4709 + {-259, 0x1899C2F6732210, -913}, // .4830 + {-252, 0x1D53844EE47DD1, -890}, // -.4743 + {-227, 0x1E5297287C2F45, -807}, // -.4708 + {-198, 0x1322E220A5B17E, -710}, // -.4714 + {-195, 0x12B010D3E1CF56, -700}, // .4928 + {-192, 0x123FF06EEA847A, -690}, // .4968 + {-163, 0x1708D0F84D3DE7, -594}, // -.4977 + {-145, 0x13FAAC3E3FA1F3, -534}, // -.4785 + {-111, 0x133D4032C2C7F5, -421}, // .4774 + {-106, 0x1D5B561574765B, -405}, // -.4869 + {-104, 0x16EF5B40C2FC77, -398}, // -.4741 + {-88, 0x197683DF2F268D, -345}, // -.4738 + {-86, 0x13E497065CD61F, -338}, // .4736 + {-76, 0x17288E1271F513, -305}, // -.4761 + {-63, 0x1A53FC9631D10D, -262}, // .4929 + {-30, 0x14484BFEEBC2A0, -152}, // .4758 + {-21, 0x12E3B40A0E9B4F, -122}, // -.4916 + {-5, 0x14F8B588E368F1, -69}, // .4829 + {23, 0x152D02C7E14AF6, 24}, // -.5000 (exactly, round-to-even) + {29, 0x1431E0FAE6D721, 44}, // -.4870 + {34, 0x1ED09BEAD87C03, 60}, // -.4721 + {70, 0x172EBAD6DDC73D, 180}, // .4733 + {105, 0x1BE7ABD3781ECA, 296}, // -.4850 + {126, 0x17A2ECC414A03F, 366}, // -.4999 + {130, 0x1CDA62055B2D9E, 379}, // .4855 + {165, 0x115D847AD00087, 496}, // -.4913 + {172, 0x14B378469B6732, 519}, // .4818 + {187, 0x1262DFEEBBB0F9, 569}, // -.4805 + {210, 0x18557F31326BBB, 645}, // -.4992 + {212, 0x1302CB5E6F642A, 652}, // -.4838 + {215, 0x1290BA9A38C7D1, 662}, // -.4881 + {236, 0x1F736F9B3494E9, 731}, // .4707 + {244, 0x176EC98994F489, 758}, // .4924 + {250, 0x1658E3AB795204, 778}, // -.4963 + {252, 0x117571DDF6C814, 785}, // .4873 + {254, 0x1B4781EAD1989E, 791}, // -.4887 + {260, 0x1A03FDE214CAF1, 811}, // .4784 + {284, 0x1585041B2C477F, 891}, // .4798 + {304, 0x1D2A1BE4048F90, 957}, // -.4987 + // Out-of-range values + {-324, 0x0, 0}, + {-325, 0x0, 0}, + {-326, 0x0, 0}, + {309, 1, 2000}, + {310, 1, 2000}, + {311, 1, 2000}, + }; + for (const TestCase& test_case : kTestCases) { + EXPECT_EQ(Pow10(test_case.power), + std::ldexp(test_case.significand, test_case.radix)) + << absl::StrFormat("Failure for Pow10(%d): %a vs %a", test_case.power, + Pow10(test_case.power), + std::ldexp(test_case.significand, test_case.radix)); + } +} + +} // namespace +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/resize_uninitialized.h b/third_party/abseil_cpp/absl/strings/internal/resize_uninitialized.h new file mode 100644 index 000000000000..e42628e3949a --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/resize_uninitialized.h @@ -0,0 +1,73 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ +#define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ + +#include <string> +#include <type_traits> +#include <utility> + +#include "absl/base/port.h" +#include "absl/meta/type_traits.h" // for void_t + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// Is a subclass of true_type or false_type, depending on whether or not +// T has a __resize_default_init member. +template <typename string_type, typename = void> +struct ResizeUninitializedTraits { + using HasMember = std::false_type; + static void Resize(string_type* s, size_t new_size) { s->resize(new_size); } +}; + +// __resize_default_init is provided by libc++ >= 8.0 +template <typename string_type> +struct ResizeUninitializedTraits< + string_type, absl::void_t<decltype(std::declval<string_type&>() + .__resize_default_init(237))> > { + using HasMember = std::true_type; + static void Resize(string_type* s, size_t new_size) { + s->__resize_default_init(new_size); + } +}; + +// Returns true if the std::string implementation supports a resize where +// the new characters added to the std::string are left untouched. +// +// (A better name might be "STLStringSupportsUninitializedResize", alluding to +// the previous function.) +template <typename string_type> +inline constexpr bool STLStringSupportsNontrashingResize(string_type*) { + return ResizeUninitializedTraits<string_type>::HasMember::value; +} + +// Like str->resize(new_size), except any new characters added to "*str" as a +// result of resizing may be left uninitialized, rather than being filled with +// '0' bytes. Typically used when code is then going to overwrite the backing +// store of the std::string with known data. +template <typename string_type, typename = void> +inline void STLStringResizeUninitialized(string_type* s, size_t new_size) { + ResizeUninitializedTraits<string_type>::Resize(s, new_size); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/resize_uninitialized_test.cc b/third_party/abseil_cpp/absl/strings/internal/resize_uninitialized_test.cc new file mode 100644 index 000000000000..0f8b3c2a95b8 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/resize_uninitialized_test.cc @@ -0,0 +1,82 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/resize_uninitialized.h" + +#include "gtest/gtest.h" + +namespace { + +int resize_call_count = 0; + +// A mock string class whose only purpose is to track how many times its +// resize() method has been called. +struct resizable_string { + size_t size() const { return 0; } + char& operator[](size_t) { + static char c = '\0'; + return c; + } + void resize(size_t) { resize_call_count += 1; } +}; + +int resize_default_init_call_count = 0; + +// A mock string class whose only purpose is to track how many times its +// resize() and __resize_default_init() methods have been called. +struct resize_default_init_string { + size_t size() const { return 0; } + char& operator[](size_t) { + static char c = '\0'; + return c; + } + void resize(size_t) { resize_call_count += 1; } + void __resize_default_init(size_t) { resize_default_init_call_count += 1; } +}; + +TEST(ResizeUninit, WithAndWithout) { + resize_call_count = 0; + resize_default_init_call_count = 0; + { + resizable_string rs; + + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_FALSE( + absl::strings_internal::STLStringSupportsNontrashingResize(&rs)); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_default_init_call_count, 0); + absl::strings_internal::STLStringResizeUninitialized(&rs, 237); + EXPECT_EQ(resize_call_count, 1); + EXPECT_EQ(resize_default_init_call_count, 0); + } + + resize_call_count = 0; + resize_default_init_call_count = 0; + { + resize_default_init_string rus; + + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_TRUE( + absl::strings_internal::STLStringSupportsNontrashingResize(&rus)); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_default_init_call_count, 0); + absl::strings_internal::STLStringResizeUninitialized(&rus, 237); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(resize_default_init_call_count, 1); + } +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/stl_type_traits.h b/third_party/abseil_cpp/absl/strings/internal/stl_type_traits.h new file mode 100644 index 000000000000..6035ca45cbd2 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/stl_type_traits.h @@ -0,0 +1,248 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Thie file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type +// trait metafunction to assist in working with the _GLIBCXX_DEBUG debug +// wrappers of STL containers. +// +// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including +// absl/strings/str_split.h. +// +// IWYU pragma: private, include "absl/strings/str_split.h" + +#ifndef ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_ +#define ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_ + +#include <array> +#include <bitset> +#include <deque> +#include <forward_list> +#include <list> +#include <map> +#include <set> +#include <type_traits> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include "absl/meta/type_traits.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +template <typename C, template <typename...> class T> +struct IsSpecializationImpl : std::false_type {}; +template <template <typename...> class T, typename... Args> +struct IsSpecializationImpl<T<Args...>, T> : std::true_type {}; +template <typename C, template <typename...> class T> +using IsSpecialization = IsSpecializationImpl<absl::decay_t<C>, T>; + +template <typename C> +struct IsArrayImpl : std::false_type {}; +template <template <typename, size_t> class A, typename T, size_t N> +struct IsArrayImpl<A<T, N>> : std::is_same<A<T, N>, std::array<T, N>> {}; +template <typename C> +using IsArray = IsArrayImpl<absl::decay_t<C>>; + +template <typename C> +struct IsBitsetImpl : std::false_type {}; +template <template <size_t> class B, size_t N> +struct IsBitsetImpl<B<N>> : std::is_same<B<N>, std::bitset<N>> {}; +template <typename C> +using IsBitset = IsBitsetImpl<absl::decay_t<C>>; + +template <typename C> +struct IsSTLContainer + : absl::disjunction< + IsArray<C>, IsBitset<C>, IsSpecialization<C, std::deque>, + IsSpecialization<C, std::forward_list>, + IsSpecialization<C, std::list>, IsSpecialization<C, std::map>, + IsSpecialization<C, std::multimap>, IsSpecialization<C, std::set>, + IsSpecialization<C, std::multiset>, + IsSpecialization<C, std::unordered_map>, + IsSpecialization<C, std::unordered_multimap>, + IsSpecialization<C, std::unordered_set>, + IsSpecialization<C, std::unordered_multiset>, + IsSpecialization<C, std::vector>> {}; + +template <typename C, template <typename...> class T, typename = void> +struct IsBaseOfSpecializationImpl : std::false_type {}; +// IsBaseOfSpecializationImpl needs multiple partial specializations to SFINAE +// on the existence of container dependent types and plug them into the STL +// template. +template <typename C, template <typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, absl::void_t<typename C::value_type, typename C::allocator_type>> + : std::is_base_of<C, + T<typename C::value_type, typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::key_compare, + typename C::allocator_type>> + : std::is_base_of<C, T<typename C::key_type, typename C::key_compare, + typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::mapped_type, + typename C::key_compare, typename C::allocator_type>> + : std::is_base_of<C, + T<typename C::key_type, typename C::mapped_type, + typename C::key_compare, typename C::allocator_type>> { +}; +template <typename C, template <typename, typename, typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::hasher, + typename C::key_equal, typename C::allocator_type>> + : std::is_base_of<C, T<typename C::key_type, typename C::hasher, + typename C::key_equal, typename C::allocator_type>> { +}; +template <typename C, + template <typename, typename, typename, typename, typename> class T> +struct IsBaseOfSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::mapped_type, + typename C::hasher, typename C::key_equal, + typename C::allocator_type>> + : std::is_base_of<C, T<typename C::key_type, typename C::mapped_type, + typename C::hasher, typename C::key_equal, + typename C::allocator_type>> {}; +template <typename C, template <typename...> class T> +using IsBaseOfSpecialization = IsBaseOfSpecializationImpl<absl::decay_t<C>, T>; + +template <typename C> +struct IsBaseOfArrayImpl : std::false_type {}; +template <template <typename, size_t> class A, typename T, size_t N> +struct IsBaseOfArrayImpl<A<T, N>> : std::is_base_of<A<T, N>, std::array<T, N>> { +}; +template <typename C> +using IsBaseOfArray = IsBaseOfArrayImpl<absl::decay_t<C>>; + +template <typename C> +struct IsBaseOfBitsetImpl : std::false_type {}; +template <template <size_t> class B, size_t N> +struct IsBaseOfBitsetImpl<B<N>> : std::is_base_of<B<N>, std::bitset<N>> {}; +template <typename C> +using IsBaseOfBitset = IsBaseOfBitsetImpl<absl::decay_t<C>>; + +template <typename C> +struct IsBaseOfSTLContainer + : absl::disjunction<IsBaseOfArray<C>, IsBaseOfBitset<C>, + IsBaseOfSpecialization<C, std::deque>, + IsBaseOfSpecialization<C, std::forward_list>, + IsBaseOfSpecialization<C, std::list>, + IsBaseOfSpecialization<C, std::map>, + IsBaseOfSpecialization<C, std::multimap>, + IsBaseOfSpecialization<C, std::set>, + IsBaseOfSpecialization<C, std::multiset>, + IsBaseOfSpecialization<C, std::unordered_map>, + IsBaseOfSpecialization<C, std::unordered_multimap>, + IsBaseOfSpecialization<C, std::unordered_set>, + IsBaseOfSpecialization<C, std::unordered_multiset>, + IsBaseOfSpecialization<C, std::vector>> {}; + +template <typename C, template <typename...> class T, typename = void> +struct IsConvertibleToSpecializationImpl : std::false_type {}; +// IsConvertibleToSpecializationImpl needs multiple partial specializations to +// SFINAE on the existence of container dependent types and plug them into the +// STL template. +template <typename C, template <typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, absl::void_t<typename C::value_type, typename C::allocator_type>> + : std::is_convertible< + C, T<typename C::value_type, typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::key_compare, + typename C::allocator_type>> + : std::is_convertible<C, T<typename C::key_type, typename C::key_compare, + typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::mapped_type, + typename C::key_compare, typename C::allocator_type>> + : std::is_convertible< + C, T<typename C::key_type, typename C::mapped_type, + typename C::key_compare, typename C::allocator_type>> {}; +template <typename C, template <typename, typename, typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::hasher, + typename C::key_equal, typename C::allocator_type>> + : std::is_convertible< + C, T<typename C::key_type, typename C::hasher, typename C::key_equal, + typename C::allocator_type>> {}; +template <typename C, + template <typename, typename, typename, typename, typename> class T> +struct IsConvertibleToSpecializationImpl< + C, T, + absl::void_t<typename C::key_type, typename C::mapped_type, + typename C::hasher, typename C::key_equal, + typename C::allocator_type>> + : std::is_convertible<C, T<typename C::key_type, typename C::mapped_type, + typename C::hasher, typename C::key_equal, + typename C::allocator_type>> {}; +template <typename C, template <typename...> class T> +using IsConvertibleToSpecialization = + IsConvertibleToSpecializationImpl<absl::decay_t<C>, T>; + +template <typename C> +struct IsConvertibleToArrayImpl : std::false_type {}; +template <template <typename, size_t> class A, typename T, size_t N> +struct IsConvertibleToArrayImpl<A<T, N>> + : std::is_convertible<A<T, N>, std::array<T, N>> {}; +template <typename C> +using IsConvertibleToArray = IsConvertibleToArrayImpl<absl::decay_t<C>>; + +template <typename C> +struct IsConvertibleToBitsetImpl : std::false_type {}; +template <template <size_t> class B, size_t N> +struct IsConvertibleToBitsetImpl<B<N>> + : std::is_convertible<B<N>, std::bitset<N>> {}; +template <typename C> +using IsConvertibleToBitset = IsConvertibleToBitsetImpl<absl::decay_t<C>>; + +template <typename C> +struct IsConvertibleToSTLContainer + : absl::disjunction< + IsConvertibleToArray<C>, IsConvertibleToBitset<C>, + IsConvertibleToSpecialization<C, std::deque>, + IsConvertibleToSpecialization<C, std::forward_list>, + IsConvertibleToSpecialization<C, std::list>, + IsConvertibleToSpecialization<C, std::map>, + IsConvertibleToSpecialization<C, std::multimap>, + IsConvertibleToSpecialization<C, std::set>, + IsConvertibleToSpecialization<C, std::multiset>, + IsConvertibleToSpecialization<C, std::unordered_map>, + IsConvertibleToSpecialization<C, std::unordered_multimap>, + IsConvertibleToSpecialization<C, std::unordered_set>, + IsConvertibleToSpecialization<C, std::unordered_multiset>, + IsConvertibleToSpecialization<C, std::vector>> {}; + +template <typename C> +struct IsStrictlyBaseOfAndConvertibleToSTLContainer + : absl::conjunction<absl::negation<IsSTLContainer<C>>, + IsBaseOfSTLContainer<C>, + IsConvertibleToSTLContainer<C>> {}; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl +#endif // ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/arg.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/arg.cc new file mode 100644 index 000000000000..e28a29b17169 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/arg.cc @@ -0,0 +1,488 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +// POSIX spec: +// http://pubs.opengroup.org/onlinepubs/009695399/functions/fprintf.html +// +#include "absl/strings/internal/str_format/arg.h" + +#include <cassert> +#include <cerrno> +#include <cstdlib> +#include <string> +#include <type_traits> + +#include "absl/base/port.h" +#include "absl/strings/internal/str_format/float_conversion.h" +#include "absl/strings/numbers.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { +namespace { + +// Reduce *capacity by s.size(), clipped to a 0 minimum. +void ReducePadding(string_view s, size_t *capacity) { + *capacity = Excess(s.size(), *capacity); +} + +// Reduce *capacity by n, clipped to a 0 minimum. +void ReducePadding(size_t n, size_t *capacity) { + *capacity = Excess(n, *capacity); +} + +template <typename T> +struct MakeUnsigned : std::make_unsigned<T> {}; +template <> +struct MakeUnsigned<absl::int128> { + using type = absl::uint128; +}; +template <> +struct MakeUnsigned<absl::uint128> { + using type = absl::uint128; +}; + +template <typename T> +struct IsSigned : std::is_signed<T> {}; +template <> +struct IsSigned<absl::int128> : std::true_type {}; +template <> +struct IsSigned<absl::uint128> : std::false_type {}; + +// Integral digit printer. +// Call one of the PrintAs* routines after construction once. +// Use with_neg_and_zero/without_neg_or_zero/is_negative to access the results. +class IntDigits { + public: + // Print the unsigned integer as octal. + // Supports unsigned integral types and uint128. + template <typename T> + void PrintAsOct(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + do { + *--p = static_cast<char>('0' + (static_cast<size_t>(v) & 7)); + v >>= 3; + } while (v); + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + + // Print the signed or unsigned integer as decimal. + // Supports all integral types. + template <typename T> + void PrintAsDec(T v) { + static_assert(std::is_integral<T>::value, ""); + start_ = storage_; + size_ = numbers_internal::FastIntToBuffer(v, storage_) - storage_; + } + + void PrintAsDec(int128 v) { + auto u = static_cast<uint128>(v); + bool add_neg = false; + if (v < 0) { + add_neg = true; + u = uint128{} - u; + } + PrintAsDec(u, add_neg); + } + + void PrintAsDec(uint128 v, bool add_neg = false) { + // This function can be sped up if needed. We can call FastIntToBuffer + // twice, or fix FastIntToBuffer to support uint128. + char *p = storage_ + sizeof(storage_); + do { + p -= 2; + numbers_internal::PutTwoDigits(static_cast<size_t>(v % 100), p); + v /= 100; + } while (v); + if (p[0] == '0') { + // We printed one too many hexits. + ++p; + } + if (add_neg) { + *--p = '-'; + } + size_ = storage_ + sizeof(storage_) - p; + start_ = p; + } + + // Print the unsigned integer as hex using lowercase. + // Supports unsigned integral types and uint128. + template <typename T> + void PrintAsHexLower(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + + do { + p -= 2; + constexpr const char* table = numbers_internal::kHexTable; + std::memcpy(p, table + 2 * (static_cast<size_t>(v) & 0xFF), 2); + if (sizeof(T) == 1) break; + v >>= 8; + } while (v); + if (p[0] == '0') { + // We printed one too many digits. + ++p; + } + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + + // Print the unsigned integer as hex using uppercase. + // Supports unsigned integral types and uint128. + template <typename T> + void PrintAsHexUpper(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + + // kHexTable is only lowercase, so do it manually for uppercase. + do { + *--p = "0123456789ABCDEF"[static_cast<size_t>(v) & 15]; + v >>= 4; + } while (v); + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + + // The printed value including the '-' sign if available. + // For inputs of value `0`, this will return "0" + string_view with_neg_and_zero() const { return {start_, size_}; } + + // The printed value not including the '-' sign. + // For inputs of value `0`, this will return "". + string_view without_neg_or_zero() const { + static_assert('-' < '0', "The check below verifies both."); + size_t advance = start_[0] <= '0' ? 1 : 0; + return {start_ + advance, size_ - advance}; + } + + bool is_negative() const { return start_[0] == '-'; } + + private: + const char *start_; + size_t size_; + // Max size: 128 bit value as octal -> 43 digits, plus sign char + char storage_[128 / 3 + 1 + 1]; +}; + +// Note: 'o' conversions do not have a base indicator, it's just that +// the '#' flag is specified to modify the precision for 'o' conversions. +string_view BaseIndicator(const IntDigits &as_digits, + const FormatConversionSpecImpl conv) { + // always show 0x for %p. + bool alt = conv.has_alt_flag() || + conv.conversion_char() == FormatConversionCharInternal::p; + bool hex = (conv.conversion_char() == FormatConversionCharInternal::x || + conv.conversion_char() == FormatConversionCharInternal::X || + conv.conversion_char() == FormatConversionCharInternal::p); + // From the POSIX description of '#' flag: + // "For x or X conversion specifiers, a non-zero result shall have + // 0x (or 0X) prefixed to it." + if (alt && hex && !as_digits.without_neg_or_zero().empty()) { + return conv.conversion_char() == FormatConversionCharInternal::X ? "0X" + : "0x"; + } + return {}; +} + +string_view SignColumn(bool neg, const FormatConversionSpecImpl conv) { + if (conv.conversion_char() == FormatConversionCharInternal::d || + conv.conversion_char() == FormatConversionCharInternal::i) { + if (neg) return "-"; + if (conv.has_show_pos_flag()) return "+"; + if (conv.has_sign_col_flag()) return " "; + } + return {}; +} + +bool ConvertCharImpl(unsigned char v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + size_t fill = 0; + if (conv.width() >= 0) fill = conv.width(); + ReducePadding(1, &fill); + if (!conv.has_left_flag()) sink->Append(fill, ' '); + sink->Append(1, v); + if (conv.has_left_flag()) sink->Append(fill, ' '); + return true; +} + +bool ConvertIntImplInnerSlow(const IntDigits &as_digits, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + // Print as a sequence of Substrings: + // [left_spaces][sign][base_indicator][zeroes][formatted][right_spaces] + size_t fill = 0; + if (conv.width() >= 0) fill = conv.width(); + + string_view formatted = as_digits.without_neg_or_zero(); + ReducePadding(formatted, &fill); + + string_view sign = SignColumn(as_digits.is_negative(), conv); + ReducePadding(sign, &fill); + + string_view base_indicator = BaseIndicator(as_digits, conv); + ReducePadding(base_indicator, &fill); + + int precision = conv.precision(); + bool precision_specified = precision >= 0; + if (!precision_specified) + precision = 1; + + if (conv.has_alt_flag() && + conv.conversion_char() == FormatConversionCharInternal::o) { + // From POSIX description of the '#' (alt) flag: + // "For o conversion, it increases the precision (if necessary) to + // force the first digit of the result to be zero." + if (formatted.empty() || *formatted.begin() != '0') { + int needed = static_cast<int>(formatted.size()) + 1; + precision = std::max(precision, needed); + } + } + + size_t num_zeroes = Excess(formatted.size(), precision); + ReducePadding(num_zeroes, &fill); + + size_t num_left_spaces = !conv.has_left_flag() ? fill : 0; + size_t num_right_spaces = conv.has_left_flag() ? fill : 0; + + // From POSIX description of the '0' (zero) flag: + // "For d, i, o, u, x, and X conversion specifiers, if a precision + // is specified, the '0' flag is ignored." + if (!precision_specified && conv.has_zero_flag()) { + num_zeroes += num_left_spaces; + num_left_spaces = 0; + } + + sink->Append(num_left_spaces, ' '); + sink->Append(sign); + sink->Append(base_indicator); + sink->Append(num_zeroes, '0'); + sink->Append(formatted); + sink->Append(num_right_spaces, ' '); + return true; +} + +template <typename T> +bool ConvertIntArg(T v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + using U = typename MakeUnsigned<T>::type; + IntDigits as_digits; + + // This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes + // it to complain about a switch/case type mismatch, even though both are + // FormatConverionChar. Likely this is because at this point + // FormatConversionChar is declared, but not defined. + switch (static_cast<uint8_t>(conv.conversion_char())) { + case static_cast<uint8_t>(FormatConversionCharInternal::c): + return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink); + + case static_cast<uint8_t>(FormatConversionCharInternal::o): + as_digits.PrintAsOct(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::x): + as_digits.PrintAsHexLower(static_cast<U>(v)); + break; + case static_cast<uint8_t>(FormatConversionCharInternal::X): + as_digits.PrintAsHexUpper(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::u): + as_digits.PrintAsDec(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::d): + case static_cast<uint8_t>(FormatConversionCharInternal::i): + as_digits.PrintAsDec(v); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::a): + case static_cast<uint8_t>(FormatConversionCharInternal::e): + case static_cast<uint8_t>(FormatConversionCharInternal::f): + case static_cast<uint8_t>(FormatConversionCharInternal::g): + case static_cast<uint8_t>(FormatConversionCharInternal::A): + case static_cast<uint8_t>(FormatConversionCharInternal::E): + case static_cast<uint8_t>(FormatConversionCharInternal::F): + case static_cast<uint8_t>(FormatConversionCharInternal::G): + return ConvertFloatImpl(static_cast<double>(v), conv, sink); + + default: + ABSL_INTERNAL_ASSUME(false); + } + + if (conv.is_basic()) { + sink->Append(as_digits.with_neg_and_zero()); + return true; + } + return ConvertIntImplInnerSlow(as_digits, conv, sink); +} + +template <typename T> +bool ConvertFloatArg(T v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return FormatConversionCharIsFloat(conv.conversion_char()) && + ConvertFloatImpl(v, conv, sink); +} + +inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + if (conv.is_basic()) { + sink->Append(v); + return true; + } + return sink->PutPaddedString(v, conv.width(), conv.precision(), + conv.has_left_flag()); +} + +} // namespace + +// ==================== Strings ==================== +StringConvertResult FormatConvertImpl(const std::string &v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertStringArg(v, conv, sink)}; +} + +StringConvertResult FormatConvertImpl(string_view v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertStringArg(v, conv, sink)}; +} + +ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> +FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + if (conv.conversion_char() == FormatConversionCharInternal::p) + return {FormatConvertImpl(VoidPtr(v), conv, sink).value}; + size_t len; + if (v == nullptr) { + len = 0; + } else if (conv.precision() < 0) { + len = std::strlen(v); + } else { + // If precision is set, we look for the NUL-terminator on the valid range. + len = std::find(v, v + conv.precision(), '\0') - v; + } + return {ConvertStringArg(string_view(v, len), conv, sink)}; +} + +// ==================== Raw pointers ==================== +ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl( + VoidPtr v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { + if (!v.value) { + sink->Append("(nil)"); + return {true}; + } + IntDigits as_digits; + as_digits.PrintAsHexLower(v.value); + return {ConvertIntImplInnerSlow(as_digits, conv, sink)}; +} + +// ==================== Floats ==================== +FloatingConvertResult FormatConvertImpl(float v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertFloatArg(v, conv, sink)}; +} +FloatingConvertResult FormatConvertImpl(double v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertFloatArg(v, conv, sink)}; +} +FloatingConvertResult FormatConvertImpl(long double v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertFloatArg(v, conv, sink)}; +} + +// ==================== Chars ==================== +IntegralConvertResult FormatConvertImpl(char v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(signed char v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned char v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} + +// ==================== Ints ==================== +IntegralConvertResult FormatConvertImpl(short v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(int v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(long v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(long long v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(absl::int128 v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} +IntegralConvertResult FormatConvertImpl(absl::uint128 v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} + +ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(); + + + +} // namespace str_format_internal + +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/arg.h b/third_party/abseil_cpp/absl/strings/internal/str_format/arg.h new file mode 100644 index 000000000000..7040c866778e --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/arg.h @@ -0,0 +1,518 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ + +#include <string.h> +#include <wchar.h> + +#include <cstdio> +#include <iomanip> +#include <limits> +#include <memory> +#include <sstream> +#include <string> +#include <type_traits> + +#include "absl/base/port.h" +#include "absl/meta/type_traits.h" +#include "absl/numeric/int128.h" +#include "absl/strings/internal/str_format/extension.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +class Cord; +class FormatCountCapture; +class FormatSink; + +template <absl::FormatConversionCharSet C> +struct FormatConvertResult; +class FormatConversionSpec; + +namespace str_format_internal { + +template <typename T, typename = void> +struct HasUserDefinedConvert : std::false_type {}; + +template <typename T> +struct HasUserDefinedConvert<T, void_t<decltype(AbslFormatConvert( + std::declval<const T&>(), + std::declval<const FormatConversionSpec&>(), + std::declval<FormatSink*>()))>> + : std::true_type {}; + +void AbslFormatConvert(); // Stops the lexical name lookup +template <typename T> +auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) + -> decltype(AbslFormatConvert(v, + std::declval<const FormatConversionSpec&>(), + std::declval<FormatSink*>())) { + using FormatConversionSpecT = + absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatConversionSpec>; + using FormatSinkT = + absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>; + auto fcs = conv.Wrap<FormatConversionSpecT>(); + auto fs = sink->Wrap<FormatSinkT>(); + return AbslFormatConvert(v, fcs, &fs); +} + +template <typename T> +class StreamedWrapper; + +// If 'v' can be converted (in the printf sense) according to 'conv', +// then convert it, appending to `sink` and return `true`. +// Otherwise fail and return `false`. + +// AbslFormatConvert(v, conv, sink) is intended to be found by ADL on 'v' +// as an extension mechanism. These FormatConvertImpl functions are the default +// implementations. +// The ADL search is augmented via the 'Sink*' parameter, which also +// serves as a disambiguator to reject possible unintended 'AbslFormatConvert' +// functions in the namespaces associated with 'v'. + +// Raw pointers. +struct VoidPtr { + VoidPtr() = default; + template <typename T, + decltype(reinterpret_cast<uintptr_t>(std::declval<T*>())) = 0> + VoidPtr(T* ptr) // NOLINT + : value(ptr ? reinterpret_cast<uintptr_t>(ptr) : 0) {} + uintptr_t value; +}; + +template <FormatConversionCharSet C> +struct ArgConvertResult { + bool value; +}; + +template <FormatConversionCharSet C> +constexpr FormatConversionCharSet ExtractCharSet(FormatConvertResult<C>) { + return C; +} + +template <FormatConversionCharSet C> +constexpr FormatConversionCharSet ExtractCharSet(ArgConvertResult<C>) { + return C; +} + +using StringConvertResult = + ArgConvertResult<FormatConversionCharSetInternal::s>; +ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl( + VoidPtr v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); + +// Strings. +StringConvertResult FormatConvertImpl(const std::string& v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +StringConvertResult FormatConvertImpl(string_view v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> +FormatConvertImpl(const char* v, const FormatConversionSpecImpl conv, + FormatSinkImpl* sink); + +template <class AbslCord, typename std::enable_if<std::is_same< + AbslCord, absl::Cord>::value>::type* = nullptr> +StringConvertResult FormatConvertImpl(const AbslCord& value, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + bool is_left = conv.has_left_flag(); + size_t space_remaining = 0; + + int width = conv.width(); + if (width >= 0) space_remaining = width; + + size_t to_write = value.size(); + + int precision = conv.precision(); + if (precision >= 0) + to_write = (std::min)(to_write, static_cast<size_t>(precision)); + + space_remaining = Excess(to_write, space_remaining); + + if (space_remaining > 0 && !is_left) sink->Append(space_remaining, ' '); + + for (string_view piece : value.Chunks()) { + if (piece.size() > to_write) { + piece.remove_suffix(piece.size() - to_write); + to_write = 0; + } else { + to_write -= piece.size(); + } + sink->Append(piece); + if (to_write == 0) { + break; + } + } + + if (space_remaining > 0 && is_left) sink->Append(space_remaining, ' '); + return {true}; +} + +using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::c, + FormatConversionCharSetInternal::kNumeric, + FormatConversionCharSetInternal::kStar)>; +using FloatingConvertResult = + ArgConvertResult<FormatConversionCharSetInternal::kFloating>; + +// Floats. +FloatingConvertResult FormatConvertImpl(float v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +FloatingConvertResult FormatConvertImpl(double v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +FloatingConvertResult FormatConvertImpl(long double v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); + +// Chars. +IntegralConvertResult FormatConvertImpl(char v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(signed char v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned char v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); + +// Ints. +IntegralConvertResult FormatConvertImpl(short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(int v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(int128 v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(uint128 v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +template <typename T, enable_if_t<std::is_same<T, bool>::value, int> = 0> +IntegralConvertResult FormatConvertImpl(T v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + return FormatConvertImpl(static_cast<int>(v), conv, sink); +} + +// We provide this function to help the checker, but it is never defined. +// FormatArgImpl will use the underlying Convert functions instead. +template <typename T> +typename std::enable_if<std::is_enum<T>::value && + !HasUserDefinedConvert<T>::value, + IntegralConvertResult>::type +FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); + +template <typename T> +StringConvertResult FormatConvertImpl(const StreamedWrapper<T>& v, + FormatConversionSpecImpl conv, + FormatSinkImpl* out) { + std::ostringstream oss; + oss << v.v_; + if (!oss) return {false}; + return str_format_internal::FormatConvertImpl(oss.str(), conv, out); +} + +// Use templates and dependent types to delay evaluation of the function +// until after FormatCountCapture is fully defined. +struct FormatCountCaptureHelper { + template <class T = int> + static ArgConvertResult<FormatConversionCharSetInternal::n> ConvertHelper( + const FormatCountCapture& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + const absl::enable_if_t<sizeof(T) != 0, FormatCountCapture>& v2 = v; + + if (conv.conversion_char() != + str_format_internal::FormatConversionCharInternal::n) { + return {false}; + } + *v2.p_ = static_cast<int>(sink->size()); + return {true}; + } +}; + +template <class T = int> +ArgConvertResult<FormatConversionCharSetInternal::n> FormatConvertImpl( + const FormatCountCapture& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + return FormatCountCaptureHelper::ConvertHelper(v, conv, sink); +} + +// Helper friend struct to hide implementation details from the public API of +// FormatArgImpl. +struct FormatArgImplFriend { + template <typename Arg> + static bool ToInt(Arg arg, int* out) { + // A value initialized FormatConversionSpecImpl has a `none` conv, which + // tells the dispatcher to run the `int` conversion. + return arg.dispatcher_(arg.data_, {}, out); + } + + template <typename Arg> + static bool Convert(Arg arg, FormatConversionSpecImpl conv, + FormatSinkImpl* out) { + return arg.dispatcher_(arg.data_, conv, out); + } + + template <typename Arg> + static typename Arg::Dispatcher GetVTablePtrForTest(Arg arg) { + return arg.dispatcher_; + } +}; + +template <typename Arg> +constexpr FormatConversionCharSet ArgumentToConv() { + return absl::str_format_internal::ExtractCharSet( + decltype(str_format_internal::FormatConvertImpl( + std::declval<const Arg&>(), + std::declval<const FormatConversionSpecImpl&>(), + std::declval<FormatSinkImpl*>())){}); +} + +// A type-erased handle to a format argument. +class FormatArgImpl { + private: + enum { kInlinedSpace = 8 }; + + using VoidPtr = str_format_internal::VoidPtr; + + union Data { + const void* ptr; + const volatile void* volatile_ptr; + char buf[kInlinedSpace]; + }; + + using Dispatcher = bool (*)(Data, FormatConversionSpecImpl, void* out); + + template <typename T> + struct store_by_value + : std::integral_constant<bool, (sizeof(T) <= kInlinedSpace) && + (std::is_integral<T>::value || + std::is_floating_point<T>::value || + std::is_pointer<T>::value || + std::is_same<VoidPtr, T>::value)> {}; + + enum StoragePolicy { ByPointer, ByVolatilePointer, ByValue }; + template <typename T> + struct storage_policy + : std::integral_constant<StoragePolicy, + (std::is_volatile<T>::value + ? ByVolatilePointer + : (store_by_value<T>::value ? ByValue + : ByPointer))> { + }; + + // To reduce the number of vtables we will decay values before hand. + // Anything with a user-defined Convert will get its own vtable. + // For everything else: + // - Decay char* and char arrays into `const char*` + // - Decay any other pointer to `const void*` + // - Decay all enums to their underlying type. + // - Decay function pointers to void*. + template <typename T, typename = void> + struct DecayType { + static constexpr bool kHasUserDefined = + str_format_internal::HasUserDefinedConvert<T>::value; + using type = typename std::conditional< + !kHasUserDefined && std::is_convertible<T, const char*>::value, + const char*, + typename std::conditional<!kHasUserDefined && + std::is_convertible<T, VoidPtr>::value, + VoidPtr, const T&>::type>::type; + }; + template <typename T> + struct DecayType<T, + typename std::enable_if< + !str_format_internal::HasUserDefinedConvert<T>::value && + std::is_enum<T>::value>::type> { + using type = typename std::underlying_type<T>::type; + }; + + public: + template <typename T> + explicit FormatArgImpl(const T& value) { + using D = typename DecayType<T>::type; + static_assert( + std::is_same<D, const T&>::value || storage_policy<D>::value == ByValue, + "Decayed types must be stored by value"); + Init(static_cast<D>(value)); + } + + private: + friend struct str_format_internal::FormatArgImplFriend; + template <typename T, StoragePolicy = storage_policy<T>::value> + struct Manager; + + template <typename T> + struct Manager<T, ByPointer> { + static Data SetValue(const T& value) { + Data data; + data.ptr = std::addressof(value); + return data; + } + + static const T& Value(Data arg) { return *static_cast<const T*>(arg.ptr); } + }; + + template <typename T> + struct Manager<T, ByVolatilePointer> { + static Data SetValue(const T& value) { + Data data; + data.volatile_ptr = &value; + return data; + } + + static const T& Value(Data arg) { + return *static_cast<const T*>(arg.volatile_ptr); + } + }; + + template <typename T> + struct Manager<T, ByValue> { + static Data SetValue(const T& value) { + Data data; + memcpy(data.buf, &value, sizeof(value)); + return data; + } + + static T Value(Data arg) { + T value; + memcpy(&value, arg.buf, sizeof(T)); + return value; + } + }; + + template <typename T> + void Init(const T& value) { + data_ = Manager<T>::SetValue(value); + dispatcher_ = &Dispatch<T>; + } + + template <typename T> + static int ToIntVal(const T& val) { + using CommonType = typename std::conditional<std::is_signed<T>::value, + int64_t, uint64_t>::type; + if (static_cast<CommonType>(val) > + static_cast<CommonType>((std::numeric_limits<int>::max)())) { + return (std::numeric_limits<int>::max)(); + } else if (std::is_signed<T>::value && + static_cast<CommonType>(val) < + static_cast<CommonType>((std::numeric_limits<int>::min)())) { + return (std::numeric_limits<int>::min)(); + } + return static_cast<int>(val); + } + + template <typename T> + static bool ToInt(Data arg, int* out, std::true_type /* is_integral */, + std::false_type) { + *out = ToIntVal(Manager<T>::Value(arg)); + return true; + } + + template <typename T> + static bool ToInt(Data arg, int* out, std::false_type, + std::true_type /* is_enum */) { + *out = ToIntVal(static_cast<typename std::underlying_type<T>::type>( + Manager<T>::Value(arg))); + return true; + } + + template <typename T> + static bool ToInt(Data, int*, std::false_type, std::false_type) { + return false; + } + + template <typename T> + static bool Dispatch(Data arg, FormatConversionSpecImpl spec, void* out) { + // A `none` conv indicates that we want the `int` conversion. + if (ABSL_PREDICT_FALSE(spec.conversion_char() == + FormatConversionCharInternal::kNone)) { + return ToInt<T>(arg, static_cast<int*>(out), std::is_integral<T>(), + std::is_enum<T>()); + } + if (ABSL_PREDICT_FALSE(!Contains(ArgumentToConv<T>(), + spec.conversion_char()))) { + return false; + } + return str_format_internal::FormatConvertImpl( + Manager<T>::Value(arg), spec, + static_cast<FormatSinkImpl*>(out)) + .value; + } + + Data data_; + Dispatcher dispatcher_; +}; + +#define ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(T, E) \ + E template bool FormatArgImpl::Dispatch<T>(Data, FormatConversionSpecImpl, \ + void*) + +#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(str_format_internal::VoidPtr, \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(bool, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(char, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(signed char, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned char, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(short, __VA_ARGS__); /* NOLINT */ \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned short, /* NOLINT */ \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned int, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long, __VA_ARGS__); /* NOLINT */ \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long, /* NOLINT */ \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long long, /* NOLINT */ \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long long, /* NOLINT */ \ + __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int128, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(uint128, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(float, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(double, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long double, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(const char*, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(std::string, __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(string_view, __VA_ARGS__) + +ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(extern); + + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/arg_test.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/arg_test.cc new file mode 100644 index 000000000000..1261937c3097 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/arg_test.cc @@ -0,0 +1,130 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/arg.h" + +#include <ostream> +#include <string> +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { +namespace { + +class FormatArgImplTest : public ::testing::Test { + public: + enum Color { kRed, kGreen, kBlue }; + + static const char *hi() { return "hi"; } + + struct X {}; + + X x_; +}; + +inline FormatConvertResult<FormatConversionCharSet{}> AbslFormatConvert( + const FormatArgImplTest::X &, const FormatConversionSpec &, FormatSink *) { + return {false}; +} + +TEST_F(FormatArgImplTest, ToInt) { + int out = 0; + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(1), &out)); + EXPECT_EQ(1, out); + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(-1), &out)); + EXPECT_EQ(-1, out); + EXPECT_TRUE( + FormatArgImplFriend::ToInt(FormatArgImpl(static_cast<char>(64)), &out)); + EXPECT_EQ(64, out); + EXPECT_TRUE(FormatArgImplFriend::ToInt( + FormatArgImpl(static_cast<unsigned long long>(123456)), &out)); // NOLINT + EXPECT_EQ(123456, out); + EXPECT_TRUE(FormatArgImplFriend::ToInt( + FormatArgImpl(static_cast<unsigned long long>( // NOLINT + std::numeric_limits<int>::max()) + + 1), + &out)); + EXPECT_EQ(std::numeric_limits<int>::max(), out); + EXPECT_TRUE(FormatArgImplFriend::ToInt( + FormatArgImpl(static_cast<long long>( // NOLINT + std::numeric_limits<int>::min()) - + 10), + &out)); + EXPECT_EQ(std::numeric_limits<int>::min(), out); + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(false), &out)); + EXPECT_EQ(0, out); + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(true), &out)); + EXPECT_EQ(1, out); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(2.2), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(3.2f), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt( + FormatArgImpl(static_cast<int *>(nullptr)), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(hi()), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl("hi"), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(x_), &out)); + EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(kBlue), &out)); + EXPECT_EQ(2, out); +} + +extern const char kMyArray[]; + +TEST_F(FormatArgImplTest, CharArraysDecayToCharPtr) { + const char* a = ""; + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(""))); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("A"))); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("ABC"))); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)), + FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(kMyArray))); +} + +TEST_F(FormatArgImplTest, OtherPtrDecayToVoidPtr) { + auto expected = FormatArgImplFriend::GetVTablePtrForTest( + FormatArgImpl(static_cast<void *>(nullptr))); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest( + FormatArgImpl(static_cast<int *>(nullptr))), + expected); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest( + FormatArgImpl(static_cast<volatile int *>(nullptr))), + expected); + + auto p = static_cast<void (*)()>([] {}); + EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(p)), + expected); +} + +TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) { + std::string s; + FormatSinkImpl sink(&s); + FormatConversionSpecImpl conv; + FormatConversionSpecImplFriend::SetConversionChar( + FormatConversionCharInternal::s, &conv); + FormatConversionSpecImplFriend::SetFlags(Flags(), &conv); + FormatConversionSpecImplFriend::SetWidth(-1, &conv); + FormatConversionSpecImplFriend::SetPrecision(-1, &conv); + EXPECT_TRUE( + FormatArgImplFriend::Convert(FormatArgImpl(kMyArray), conv, &sink)); + sink.Flush(); + EXPECT_EQ("ABCDE", s); +} +const char kMyArray[] = "ABCDE"; + +} // namespace +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/bind.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/bind.cc new file mode 100644 index 000000000000..4e68b90b5ce8 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/bind.cc @@ -0,0 +1,259 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/bind.h" + +#include <cerrno> +#include <limits> +#include <sstream> +#include <string> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +namespace { + +inline bool BindFromPosition(int position, int* value, + absl::Span<const FormatArgImpl> pack) { + assert(position > 0); + if (static_cast<size_t>(position) > pack.size()) { + return false; + } + // -1 because positions are 1-based + return FormatArgImplFriend::ToInt(pack[position - 1], value); +} + +class ArgContext { + public: + explicit ArgContext(absl::Span<const FormatArgImpl> pack) : pack_(pack) {} + + // Fill 'bound' with the results of applying the context's argument pack + // to the specified 'unbound'. We synthesize a BoundConversion by + // lining up a UnboundConversion with a user argument. We also + // resolve any '*' specifiers for width and precision, so after + // this call, 'bound' has all the information it needs to be formatted. + // Returns false on failure. + bool Bind(const UnboundConversion* unbound, BoundConversion* bound); + + private: + absl::Span<const FormatArgImpl> pack_; +}; + +inline bool ArgContext::Bind(const UnboundConversion* unbound, + BoundConversion* bound) { + const FormatArgImpl* arg = nullptr; + int arg_position = unbound->arg_position; + if (static_cast<size_t>(arg_position - 1) >= pack_.size()) return false; + arg = &pack_[arg_position - 1]; // 1-based + + if (!unbound->flags.basic) { + int width = unbound->width.value(); + bool force_left = false; + if (unbound->width.is_from_arg()) { + if (!BindFromPosition(unbound->width.get_from_arg(), &width, pack_)) + return false; + if (width < 0) { + // "A negative field width is taken as a '-' flag followed by a + // positive field width." + force_left = true; + // Make sure we don't overflow the width when negating it. + width = -std::max(width, -std::numeric_limits<int>::max()); + } + } + + int precision = unbound->precision.value(); + if (unbound->precision.is_from_arg()) { + if (!BindFromPosition(unbound->precision.get_from_arg(), &precision, + pack_)) + return false; + } + + FormatConversionSpecImplFriend::SetWidth(width, bound); + FormatConversionSpecImplFriend::SetPrecision(precision, bound); + + if (force_left) { + Flags flags = unbound->flags; + flags.left = true; + FormatConversionSpecImplFriend::SetFlags(flags, bound); + } else { + FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound); + } + } else { + FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound); + FormatConversionSpecImplFriend::SetWidth(-1, bound); + FormatConversionSpecImplFriend::SetPrecision(-1, bound); + } + FormatConversionSpecImplFriend::SetConversionChar(unbound->conv, bound); + bound->set_arg(arg); + return true; +} + +template <typename Converter> +class ConverterConsumer { + public: + ConverterConsumer(Converter converter, absl::Span<const FormatArgImpl> pack) + : converter_(converter), arg_context_(pack) {} + + bool Append(string_view s) { + converter_.Append(s); + return true; + } + bool ConvertOne(const UnboundConversion& conv, string_view conv_string) { + BoundConversion bound; + if (!arg_context_.Bind(&conv, &bound)) return false; + return converter_.ConvertOne(bound, conv_string); + } + + private: + Converter converter_; + ArgContext arg_context_; +}; + +template <typename Converter> +bool ConvertAll(const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args, Converter converter) { + if (format.has_parsed_conversion()) { + return format.parsed_conversion()->ProcessFormat( + ConverterConsumer<Converter>(converter, args)); + } else { + return ParseFormatString(format.str(), + ConverterConsumer<Converter>(converter, args)); + } +} + +class DefaultConverter { + public: + explicit DefaultConverter(FormatSinkImpl* sink) : sink_(sink) {} + + void Append(string_view s) const { sink_->Append(s); } + + bool ConvertOne(const BoundConversion& bound, string_view /*conv*/) const { + return FormatArgImplFriend::Convert(*bound.arg(), bound, sink_); + } + + private: + FormatSinkImpl* sink_; +}; + +class SummarizingConverter { + public: + explicit SummarizingConverter(FormatSinkImpl* sink) : sink_(sink) {} + + void Append(string_view s) const { sink_->Append(s); } + + bool ConvertOne(const BoundConversion& bound, string_view /*conv*/) const { + UntypedFormatSpecImpl spec("%d"); + + std::ostringstream ss; + ss << "{" << Streamable(spec, {*bound.arg()}) << ":" + << FormatConversionSpecImplFriend::FlagsToString(bound); + if (bound.width() >= 0) ss << bound.width(); + if (bound.precision() >= 0) ss << "." << bound.precision(); + ss << bound.conversion_char() << "}"; + Append(ss.str()); + return true; + } + + private: + FormatSinkImpl* sink_; +}; + +} // namespace + +bool BindWithPack(const UnboundConversion* props, + absl::Span<const FormatArgImpl> pack, + BoundConversion* bound) { + return ArgContext(pack).Bind(props, bound); +} + +std::string Summarize(const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args) { + typedef SummarizingConverter Converter; + std::string out; + { + // inner block to destroy sink before returning out. It ensures a last + // flush. + FormatSinkImpl sink(&out); + if (!ConvertAll(format, args, Converter(&sink))) { + return ""; + } + } + return out; +} + +bool FormatUntyped(FormatRawSinkImpl raw_sink, + const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args) { + FormatSinkImpl sink(raw_sink); + using Converter = DefaultConverter; + return ConvertAll(format, args, Converter(&sink)); +} + +std::ostream& Streamable::Print(std::ostream& os) const { + if (!FormatUntyped(&os, format_, args_)) os.setstate(std::ios::failbit); + return os; +} + +std::string& AppendPack(std::string* out, const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args) { + size_t orig = out->size(); + if (ABSL_PREDICT_FALSE(!FormatUntyped(out, format, args))) { + out->erase(orig); + } + return *out; +} + +std::string FormatPack(const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args) { + std::string out; + if (ABSL_PREDICT_FALSE(!FormatUntyped(&out, format, args))) { + out.clear(); + } + return out; +} + +int FprintF(std::FILE* output, const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args) { + FILERawSink sink(output); + if (!FormatUntyped(&sink, format, args)) { + errno = EINVAL; + return -1; + } + if (sink.error()) { + errno = sink.error(); + return -1; + } + if (sink.count() > static_cast<size_t>(std::numeric_limits<int>::max())) { + errno = EFBIG; + return -1; + } + return static_cast<int>(sink.count()); +} + +int SnprintF(char* output, size_t size, const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args) { + BufferRawSink sink(output, size ? size - 1 : 0); + if (!FormatUntyped(&sink, format, args)) { + errno = EINVAL; + return -1; + } + size_t total = sink.total_written(); + if (size) output[std::min(total, size - 1)] = 0; + return static_cast<int>(total); +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/bind.h b/third_party/abseil_cpp/absl/strings/internal/str_format/bind.h new file mode 100644 index 000000000000..267cc0ef6928 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/bind.h @@ -0,0 +1,217 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ + +#include <array> +#include <cstdio> +#include <sstream> +#include <string> + +#include "absl/base/port.h" +#include "absl/strings/internal/str_format/arg.h" +#include "absl/strings/internal/str_format/checker.h" +#include "absl/strings/internal/str_format/parser.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +class UntypedFormatSpec; + +namespace str_format_internal { + +class BoundConversion : public FormatConversionSpecImpl { + public: + const FormatArgImpl* arg() const { return arg_; } + void set_arg(const FormatArgImpl* a) { arg_ = a; } + + private: + const FormatArgImpl* arg_; +}; + +// This is the type-erased class that the implementation uses. +class UntypedFormatSpecImpl { + public: + UntypedFormatSpecImpl() = delete; + + explicit UntypedFormatSpecImpl(string_view s) + : data_(s.data()), size_(s.size()) {} + explicit UntypedFormatSpecImpl( + const str_format_internal::ParsedFormatBase* pc) + : data_(pc), size_(~size_t{}) {} + + bool has_parsed_conversion() const { return size_ == ~size_t{}; } + + string_view str() const { + assert(!has_parsed_conversion()); + return string_view(static_cast<const char*>(data_), size_); + } + const str_format_internal::ParsedFormatBase* parsed_conversion() const { + assert(has_parsed_conversion()); + return static_cast<const str_format_internal::ParsedFormatBase*>(data_); + } + + template <typename T> + static const UntypedFormatSpecImpl& Extract(const T& s) { + return s.spec_; + } + + private: + const void* data_; + size_t size_; +}; + +template <typename T, FormatConversionCharSet...> +struct MakeDependent { + using type = T; +}; + +// Implicitly convertible from `const char*`, `string_view`, and the +// `ExtendedParsedFormat` type. This abstraction allows all format functions to +// operate on any without providing too many overloads. +template <FormatConversionCharSet... Args> +class FormatSpecTemplate + : public MakeDependent<UntypedFormatSpec, Args...>::type { + using Base = typename MakeDependent<UntypedFormatSpec, Args...>::type; + + public: +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + + // Honeypot overload for when the string is not constexpr. + // We use the 'unavailable' attribute to give a better compiler error than + // just 'method is deleted'. + FormatSpecTemplate(...) // NOLINT + __attribute__((unavailable("Format string is not constexpr."))); + + // Honeypot overload for when the format is constexpr and invalid. + // We use the 'unavailable' attribute to give a better compiler error than + // just 'method is deleted'. + // To avoid checking the format twice, we just check that the format is + // constexpr. If is it valid, then the overload below will kick in. + // We add the template here to make this overload have lower priority. + template <typename = void> + FormatSpecTemplate(const char* s) // NOLINT + __attribute__(( + enable_if(str_format_internal::EnsureConstexpr(s), "constexpr trap"), + unavailable( + "Format specified does not match the arguments passed."))); + + template <typename T = void> + FormatSpecTemplate(string_view s) // NOLINT + __attribute__((enable_if(str_format_internal::EnsureConstexpr(s), + "constexpr trap"))) { + static_assert(sizeof(T*) == 0, + "Format specified does not match the arguments passed."); + } + + // Good format overload. + FormatSpecTemplate(const char* s) // NOLINT + __attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap"))) + : Base(s) {} + + FormatSpecTemplate(string_view s) // NOLINT + __attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap"))) + : Base(s) {} + +#else // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + + FormatSpecTemplate(const char* s) : Base(s) {} // NOLINT + FormatSpecTemplate(string_view s) : Base(s) {} // NOLINT + +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + + template < + FormatConversionCharSet... C, + typename = typename std::enable_if<sizeof...(C) == sizeof...(Args)>::type, + typename = typename std::enable_if<AllOf(Contains(Args, + C)...)>::type> + FormatSpecTemplate(const ExtendedParsedFormat<C...>& pc) // NOLINT + : Base(&pc) {} +}; + +class Streamable { + public: + Streamable(const UntypedFormatSpecImpl& format, + absl::Span<const FormatArgImpl> args) + : format_(format) { + if (args.size() <= ABSL_ARRAYSIZE(few_args_)) { + for (size_t i = 0; i < args.size(); ++i) { + few_args_[i] = args[i]; + } + args_ = absl::MakeSpan(few_args_, args.size()); + } else { + many_args_.assign(args.begin(), args.end()); + args_ = many_args_; + } + } + + std::ostream& Print(std::ostream& os) const; + + friend std::ostream& operator<<(std::ostream& os, const Streamable& l) { + return l.Print(os); + } + + private: + const UntypedFormatSpecImpl& format_; + absl::Span<const FormatArgImpl> args_; + // if args_.size() is 4 or less: + FormatArgImpl few_args_[4] = {FormatArgImpl(0), FormatArgImpl(0), + FormatArgImpl(0), FormatArgImpl(0)}; + // if args_.size() is more than 4: + std::vector<FormatArgImpl> many_args_; +}; + +// for testing +std::string Summarize(UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args); +bool BindWithPack(const UnboundConversion* props, + absl::Span<const FormatArgImpl> pack, BoundConversion* bound); + +bool FormatUntyped(FormatRawSinkImpl raw_sink, + UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args); + +std::string& AppendPack(std::string* out, UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args); + +std::string FormatPack(const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args); + +int FprintF(std::FILE* output, UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args); +int SnprintF(char* output, size_t size, UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args); + +// Returned by Streamed(v). Converts via '%s' to the std::string created +// by std::ostream << v. +template <typename T> +class StreamedWrapper { + public: + explicit StreamedWrapper(const T& v) : v_(v) { } + + private: + template <typename S> + friend ArgConvertResult<FormatConversionCharSetInternal::s> FormatConvertImpl( + const StreamedWrapper<S>& v, FormatConversionSpecImpl conv, + FormatSinkImpl* out); + const T& v_; +}; + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/bind_test.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/bind_test.cc new file mode 100644 index 000000000000..1eef9c4326e2 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/bind_test.cc @@ -0,0 +1,157 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/bind.h" + +#include <string.h> +#include <limits> + +#include "gtest/gtest.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { +namespace { + +class FormatBindTest : public ::testing::Test { + public: + bool Extract(const char *s, UnboundConversion *props, int *next) const { + return ConsumeUnboundConversion(s, s + strlen(s), props, next) == + s + strlen(s); + } +}; + +TEST_F(FormatBindTest, BindSingle) { + struct Expectation { + int line; + const char *fmt; + int ok_phases; + const FormatArgImpl *arg; + int width; + int precision; + int next_arg; + }; + const int no = -1; + const int ia[] = { 10, 20, 30, 40}; + const FormatArgImpl args[] = {FormatArgImpl(ia[0]), FormatArgImpl(ia[1]), + FormatArgImpl(ia[2]), FormatArgImpl(ia[3])}; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" + const Expectation kExpect[] = { + {__LINE__, "d", 2, &args[0], no, no, 2}, + {__LINE__, "4d", 2, &args[0], 4, no, 2}, + {__LINE__, ".5d", 2, &args[0], no, 5, 2}, + {__LINE__, "4.5d", 2, &args[0], 4, 5, 2}, + {__LINE__, "*d", 2, &args[1], 10, no, 3}, + {__LINE__, ".*d", 2, &args[1], no, 10, 3}, + {__LINE__, "*.*d", 2, &args[2], 10, 20, 4}, + {__LINE__, "1$d", 2, &args[0], no, no, 0}, + {__LINE__, "2$d", 2, &args[1], no, no, 0}, + {__LINE__, "3$d", 2, &args[2], no, no, 0}, + {__LINE__, "4$d", 2, &args[3], no, no, 0}, + {__LINE__, "2$*1$d", 2, &args[1], 10, no, 0}, + {__LINE__, "2$*2$d", 2, &args[1], 20, no, 0}, + {__LINE__, "2$*3$d", 2, &args[1], 30, no, 0}, + {__LINE__, "2$.*1$d", 2, &args[1], no, 10, 0}, + {__LINE__, "2$.*2$d", 2, &args[1], no, 20, 0}, + {__LINE__, "2$.*3$d", 2, &args[1], no, 30, 0}, + {__LINE__, "2$*3$.*1$d", 2, &args[1], 30, 10, 0}, + {__LINE__, "2$*2$.*2$d", 2, &args[1], 20, 20, 0}, + {__LINE__, "2$*1$.*3$d", 2, &args[1], 10, 30, 0}, + {__LINE__, "2$*3$.*1$d", 2, &args[1], 30, 10, 0}, + {__LINE__, "1$*d", 0}, // indexed, then positional + {__LINE__, "*2$d", 0}, // positional, then indexed + {__LINE__, "6$d", 1}, // arg position out of bounds + {__LINE__, "1$6$d", 0}, // width position incorrectly specified + {__LINE__, "1$.6$d", 0}, // precision position incorrectly specified + {__LINE__, "1$*6$d", 1}, // width position out of bounds + {__LINE__, "1$.*6$d", 1}, // precision position out of bounds + }; +#pragma GCC diagnostic pop + for (const Expectation &e : kExpect) { + SCOPED_TRACE(e.line); + SCOPED_TRACE(e.fmt); + UnboundConversion props; + BoundConversion bound; + int ok_phases = 0; + int next = 0; + if (Extract(e.fmt, &props, &next)) { + ++ok_phases; + if (BindWithPack(&props, args, &bound)) { + ++ok_phases; + } + } + EXPECT_EQ(e.ok_phases, ok_phases); + if (e.ok_phases < 2) continue; + if (e.arg != nullptr) { + EXPECT_EQ(e.arg, bound.arg()); + } + EXPECT_EQ(e.width, bound.width()); + EXPECT_EQ(e.precision, bound.precision()); + } +} + +TEST_F(FormatBindTest, WidthUnderflowRegression) { + UnboundConversion props; + BoundConversion bound; + int next = 0; + const int args_i[] = {std::numeric_limits<int>::min(), 17}; + const FormatArgImpl args[] = {FormatArgImpl(args_i[0]), + FormatArgImpl(args_i[1])}; + ASSERT_TRUE(Extract("*d", &props, &next)); + ASSERT_TRUE(BindWithPack(&props, args, &bound)); + + EXPECT_EQ(bound.width(), std::numeric_limits<int>::max()); + EXPECT_EQ(bound.arg(), args + 1); +} + +TEST_F(FormatBindTest, FormatPack) { + struct Expectation { + int line; + const char *fmt; + const char *summary; + }; + const int ia[] = { 10, 20, 30, 40, -10 }; + const FormatArgImpl args[] = {FormatArgImpl(ia[0]), FormatArgImpl(ia[1]), + FormatArgImpl(ia[2]), FormatArgImpl(ia[3]), + FormatArgImpl(ia[4])}; + const Expectation kExpect[] = { + {__LINE__, "a%4db%dc", "a{10:4d}b{20:d}c"}, + {__LINE__, "a%.4db%dc", "a{10:.4d}b{20:d}c"}, + {__LINE__, "a%4.5db%dc", "a{10:4.5d}b{20:d}c"}, + {__LINE__, "a%db%4.5dc", "a{10:d}b{20:4.5d}c"}, + {__LINE__, "a%db%*.*dc", "a{10:d}b{40:20.30d}c"}, + {__LINE__, "a%.*fb", "a{20:.10f}b"}, + {__LINE__, "a%1$db%2$*3$.*4$dc", "a{10:d}b{20:30.40d}c"}, + {__LINE__, "a%4$db%3$*2$.*1$dc", "a{40:d}b{30:20.10d}c"}, + {__LINE__, "a%04ldb", "a{10:04d}b"}, + {__LINE__, "a%-#04lldb", "a{10:-#04d}b"}, + {__LINE__, "a%1$*5$db", "a{10:-10d}b"}, + {__LINE__, "a%1$.*5$db", "a{10:d}b"}, + }; + for (const Expectation &e : kExpect) { + absl::string_view fmt = e.fmt; + SCOPED_TRACE(e.line); + SCOPED_TRACE(e.fmt); + UntypedFormatSpecImpl format(fmt); + EXPECT_EQ(e.summary, + str_format_internal::Summarize(format, absl::MakeSpan(args))) + << "line:" << e.line; + } +} + +} // namespace +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/checker.h b/third_party/abseil_cpp/absl/strings/internal/str_format/checker.h new file mode 100644 index 000000000000..2a2601eccfd8 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/checker.h @@ -0,0 +1,333 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ + +#include "absl/base/attributes.h" +#include "absl/strings/internal/str_format/arg.h" +#include "absl/strings/internal/str_format/extension.h" + +// Compile time check support for entry points. + +#ifndef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER +#if ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__) +#define ABSL_INTERNAL_ENABLE_FORMAT_CHECKER 1 +#endif // ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__) +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +constexpr bool AllOf() { return true; } + +template <typename... T> +constexpr bool AllOf(bool b, T... t) { + return b && AllOf(t...); +} + +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +constexpr bool ContainsChar(const char* chars, char c) { + return *chars == c || (*chars && ContainsChar(chars + 1, c)); +} + +// A constexpr compatible list of Convs. +struct ConvList { + const FormatConversionCharSet* array; + int count; + + // We do the bound check here to avoid having to do it on the callers. + // Returning an empty FormatConversionCharSet has the same effect as + // short circuiting because it will never match any conversion. + constexpr FormatConversionCharSet operator[](int i) const { + return i < count ? array[i] : FormatConversionCharSet{}; + } + + constexpr ConvList without_front() const { + return count != 0 ? ConvList{array + 1, count - 1} : *this; + } +}; + +template <size_t count> +struct ConvListT { + // Make sure the array has size > 0. + FormatConversionCharSet list[count ? count : 1]; +}; + +constexpr char GetChar(string_view str, size_t index) { + return index < str.size() ? str[index] : char{}; +} + +constexpr string_view ConsumeFront(string_view str, size_t len = 1) { + return len <= str.size() ? string_view(str.data() + len, str.size() - len) + : string_view(); +} + +constexpr string_view ConsumeAnyOf(string_view format, const char* chars) { + return ContainsChar(chars, GetChar(format, 0)) + ? ConsumeAnyOf(ConsumeFront(format), chars) + : format; +} + +constexpr bool IsDigit(char c) { return c >= '0' && c <= '9'; } + +// Helper class for the ParseDigits function. +// It encapsulates the two return values we need there. +struct Integer { + string_view format; + int value; + + // If the next character is a '$', consume it. + // Otherwise, make `this` an invalid positional argument. + constexpr Integer ConsumePositionalDollar() const { + return GetChar(format, 0) == '$' ? Integer{ConsumeFront(format), value} + : Integer{format, 0}; + } +}; + +constexpr Integer ParseDigits(string_view format, int value = 0) { + return IsDigit(GetChar(format, 0)) + ? ParseDigits(ConsumeFront(format), + 10 * value + GetChar(format, 0) - '0') + : Integer{format, value}; +} + +// Parse digits for a positional argument. +// The parsing also consumes the '$'. +constexpr Integer ParsePositional(string_view format) { + return ParseDigits(format).ConsumePositionalDollar(); +} + +// Parses a single conversion specifier. +// See ConvParser::Run() for post conditions. +class ConvParser { + constexpr ConvParser SetFormat(string_view format) const { + return ConvParser(format, args_, error_, arg_position_, is_positional_); + } + + constexpr ConvParser SetArgs(ConvList args) const { + return ConvParser(format_, args, error_, arg_position_, is_positional_); + } + + constexpr ConvParser SetError(bool error) const { + return ConvParser(format_, args_, error_ || error, arg_position_, + is_positional_); + } + + constexpr ConvParser SetArgPosition(int arg_position) const { + return ConvParser(format_, args_, error_, arg_position, is_positional_); + } + + // Consumes the next arg and verifies that it matches `conv`. + // `error_` is set if there is no next arg or if it doesn't match `conv`. + constexpr ConvParser ConsumeNextArg(char conv) const { + return SetArgs(args_.without_front()).SetError(!Contains(args_[0], conv)); + } + + // Verify that positional argument `i.value` matches `conv`. + // `error_` is set if `i.value` is not a valid argument or if it doesn't + // match. + constexpr ConvParser VerifyPositional(Integer i, char conv) const { + return SetFormat(i.format).SetError(!Contains(args_[i.value - 1], conv)); + } + + // Parse the position of the arg and store it in `arg_position_`. + constexpr ConvParser ParseArgPosition(Integer arg) const { + return SetFormat(arg.format).SetArgPosition(arg.value); + } + + // Consume the flags. + constexpr ConvParser ParseFlags() const { + return SetFormat(ConsumeAnyOf(format_, "-+ #0")); + } + + // Consume the width. + // If it is '*', we verify that it matches `args_`. `error_` is set if it + // doesn't match. + constexpr ConvParser ParseWidth() const { + return IsDigit(GetChar(format_, 0)) + ? SetFormat(ParseDigits(format_).format) + : GetChar(format_, 0) == '*' + ? is_positional_ + ? VerifyPositional( + ParsePositional(ConsumeFront(format_)), '*') + : SetFormat(ConsumeFront(format_)) + .ConsumeNextArg('*') + : *this; + } + + // Consume the precision. + // If it is '*', we verify that it matches `args_`. `error_` is set if it + // doesn't match. + constexpr ConvParser ParsePrecision() const { + return GetChar(format_, 0) != '.' + ? *this + : GetChar(format_, 1) == '*' + ? is_positional_ + ? VerifyPositional( + ParsePositional(ConsumeFront(format_, 2)), '*') + : SetFormat(ConsumeFront(format_, 2)) + .ConsumeNextArg('*') + : SetFormat(ParseDigits(ConsumeFront(format_)).format); + } + + // Consume the length characters. + constexpr ConvParser ParseLength() const { + return SetFormat(ConsumeAnyOf(format_, "lLhjztq")); + } + + // Consume the conversion character and verify that it matches `args_`. + // `error_` is set if it doesn't match. + constexpr ConvParser ParseConversion() const { + return is_positional_ + ? VerifyPositional({ConsumeFront(format_), arg_position_}, + GetChar(format_, 0)) + : ConsumeNextArg(GetChar(format_, 0)) + .SetFormat(ConsumeFront(format_)); + } + + constexpr ConvParser(string_view format, ConvList args, bool error, + int arg_position, bool is_positional) + : format_(format), + args_(args), + error_(error), + arg_position_(arg_position), + is_positional_(is_positional) {} + + public: + constexpr ConvParser(string_view format, ConvList args, bool is_positional) + : format_(format), + args_(args), + error_(false), + arg_position_(0), + is_positional_(is_positional) {} + + // Consume the whole conversion specifier. + // `format()` will be set to the character after the conversion character. + // `error()` will be set if any of the arguments do not match. + constexpr ConvParser Run() const { + return (is_positional_ ? ParseArgPosition(ParsePositional(format_)) : *this) + .ParseFlags() + .ParseWidth() + .ParsePrecision() + .ParseLength() + .ParseConversion(); + } + + constexpr string_view format() const { return format_; } + constexpr ConvList args() const { return args_; } + constexpr bool error() const { return error_; } + constexpr bool is_positional() const { return is_positional_; } + + private: + string_view format_; + // Current list of arguments. If we are not in positional mode we will consume + // from the front. + ConvList args_; + bool error_; + // Holds the argument position of the conversion character, if we are in + // positional mode. Otherwise, it is unspecified. + int arg_position_; + // Whether we are in positional mode. + // It changes the behavior of '*' and where to find the converted argument. + bool is_positional_; +}; + +// Parses a whole format expression. +// See FormatParser::Run(). +class FormatParser { + static constexpr bool FoundPercent(string_view format) { + return format.empty() || + (GetChar(format, 0) == '%' && GetChar(format, 1) != '%'); + } + + // We use an inner function to increase the recursion limit. + // The inner function consumes up to `limit` characters on every run. + // This increases the limit from 512 to ~512*limit. + static constexpr string_view ConsumeNonPercentInner(string_view format, + int limit = 20) { + return FoundPercent(format) || !limit + ? format + : ConsumeNonPercentInner( + ConsumeFront(format, GetChar(format, 0) == '%' && + GetChar(format, 1) == '%' + ? 2 + : 1), + limit - 1); + } + + // Consume characters until the next conversion spec %. + // It skips %%. + static constexpr string_view ConsumeNonPercent(string_view format) { + return FoundPercent(format) + ? format + : ConsumeNonPercent(ConsumeNonPercentInner(format)); + } + + static constexpr bool IsPositional(string_view format) { + return IsDigit(GetChar(format, 0)) ? IsPositional(ConsumeFront(format)) + : GetChar(format, 0) == '$'; + } + + constexpr bool RunImpl(bool is_positional) const { + // In non-positional mode we require all arguments to be consumed. + // In positional mode just reaching the end of the format without errors is + // enough. + return (format_.empty() && (is_positional || args_.count == 0)) || + (!format_.empty() && + ValidateArg( + ConvParser(ConsumeFront(format_), args_, is_positional).Run())); + } + + constexpr bool ValidateArg(ConvParser conv) const { + return !conv.error() && FormatParser(conv.format(), conv.args()) + .RunImpl(conv.is_positional()); + } + + public: + constexpr FormatParser(string_view format, ConvList args) + : format_(ConsumeNonPercent(format)), args_(args) {} + + // Runs the parser for `format` and `args`. + // It verifies that the format is valid and that all conversion specifiers + // match the arguments passed. + // In non-positional mode it also verfies that all arguments are consumed. + constexpr bool Run() const { + return RunImpl(!format_.empty() && IsPositional(ConsumeFront(format_))); + } + + private: + string_view format_; + // Current list of arguments. + // If we are not in positional mode we will consume from the front and will + // have to be empty in the end. + ConvList args_; +}; + +template <FormatConversionCharSet... C> +constexpr bool ValidFormatImpl(string_view format) { + return FormatParser(format, + {ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)}) + .Run(); +} + +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/checker_test.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/checker_test.cc new file mode 100644 index 000000000000..7c70f47d682a --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/checker_test.cc @@ -0,0 +1,170 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { +namespace { + +std::string ConvToString(FormatConversionCharSet conv) { + std::string out; +#define CONV_SET_CASE(c) \ + if (Contains(conv, FormatConversionCharSetInternal::c)) { \ + out += #c; \ + } + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) +#undef CONV_SET_CASE + if (Contains(conv, FormatConversionCharSetInternal::kStar)) { + out += "*"; + } + return out; +} + +TEST(StrFormatChecker, ArgumentToConv) { + FormatConversionCharSet conv = ArgumentToConv<std::string>(); + EXPECT_EQ(ConvToString(conv), "s"); + + conv = ArgumentToConv<const char*>(); + EXPECT_EQ(ConvToString(conv), "sp"); + + conv = ArgumentToConv<double>(); + EXPECT_EQ(ConvToString(conv), "fFeEgGaA"); + + conv = ArgumentToConv<int>(); + EXPECT_EQ(ConvToString(conv), "cdiouxXfFeEgGaA*"); + + conv = ArgumentToConv<std::string*>(); + EXPECT_EQ(ConvToString(conv), "p"); +} + +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +struct Case { + bool result; + const char* format; +}; + +template <typename... Args> +constexpr Case ValidFormat(const char* format) { + return {ValidFormatImpl<ArgumentToConv<Args>()...>(format), format}; +} + +TEST(StrFormatChecker, ValidFormat) { + // We want to make sure these expressions are constexpr and they have the + // expected value. + // If they are not constexpr the attribute will just ignore them and not give + // a compile time error. + enum e {}; + enum class e2 {}; + constexpr Case trues[] = { + ValidFormat<>("abc"), // + + ValidFormat<e>("%d"), // + ValidFormat<e2>("%d"), // + ValidFormat<int>("%% %d"), // + ValidFormat<int>("%ld"), // + ValidFormat<int>("%lld"), // + ValidFormat<std::string>("%s"), // + ValidFormat<std::string>("%10s"), // + ValidFormat<int>("%.10x"), // + ValidFormat<int, int>("%*.3x"), // + ValidFormat<int>("%1.d"), // + ValidFormat<int>("%.d"), // + ValidFormat<int, double>("%d %g"), // + ValidFormat<int, std::string>("%*s"), // + ValidFormat<int, double>("%.*f"), // + ValidFormat<void (*)(), volatile int*>("%p %p"), // + ValidFormat<string_view, const char*, double, void*>( + "string_view=%s const char*=%s double=%f void*=%p)"), + + ValidFormat<int>("%% %1$d"), // + ValidFormat<int>("%1$ld"), // + ValidFormat<int>("%1$lld"), // + ValidFormat<std::string>("%1$s"), // + ValidFormat<std::string>("%1$10s"), // + ValidFormat<int>("%1$.10x"), // + ValidFormat<int>("%1$*1$.*1$d"), // + ValidFormat<int, int>("%1$*2$.3x"), // + ValidFormat<int>("%1$1.d"), // + ValidFormat<int>("%1$.d"), // + ValidFormat<double, int>("%2$d %1$g"), // + ValidFormat<int, std::string>("%2$*1$s"), // + ValidFormat<int, double>("%2$.*1$f"), // + ValidFormat<void*, string_view, const char*, double>( + "string_view=%2$s const char*=%3$s double=%4$f void*=%1$p " + "repeat=%3$s)")}; + + for (Case c : trues) { + EXPECT_TRUE(c.result) << c.format; + } + + constexpr Case falses[] = { + ValidFormat<int>(""), // + + ValidFormat<e>("%s"), // + ValidFormat<e2>("%s"), // + ValidFormat<>("%s"), // + ValidFormat<>("%r"), // + ValidFormat<int>("%s"), // + ValidFormat<int>("%.1.d"), // + ValidFormat<int>("%*1d"), // + ValidFormat<int>("%1-d"), // + ValidFormat<std::string, int>("%*s"), // + ValidFormat<int>("%*d"), // + ValidFormat<std::string>("%p"), // + ValidFormat<int (*)(int)>("%d"), // + + ValidFormat<>("%3$d"), // + ValidFormat<>("%1$r"), // + ValidFormat<int>("%1$s"), // + ValidFormat<int>("%1$.1.d"), // + ValidFormat<int>("%1$*2$1d"), // + ValidFormat<int>("%1$1-d"), // + ValidFormat<std::string, int>("%2$*1$s"), // + ValidFormat<std::string>("%1$p"), + + ValidFormat<int, int>("%d %2$d"), // + }; + + for (Case c : falses) { + EXPECT_FALSE(c.result) << c.format; + } +} + +TEST(StrFormatChecker, LongFormat) { +#define CHARS_X_40 "1234567890123456789012345678901234567890" +#define CHARS_X_400 \ + CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 \ + CHARS_X_40 CHARS_X_40 CHARS_X_40 +#define CHARS_X_4000 \ + CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 \ + CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 + constexpr char long_format[] = + CHARS_X_4000 "%d" CHARS_X_4000 "%s" CHARS_X_4000; + constexpr bool is_valid = ValidFormat<int, std::string>(long_format).result; + EXPECT_TRUE(is_valid); +} + +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + +} // namespace +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/convert_test.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/convert_test.cc new file mode 100644 index 000000000000..375db0a0592c --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/convert_test.cc @@ -0,0 +1,1242 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> + +#include <cctype> +#include <cmath> +#include <limits> +#include <string> +#include <thread> // NOLINT + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/str_format/bind.h" +#include "absl/strings/match.h" +#include "absl/types/optional.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { +namespace { + +struct NativePrintfTraits { + bool hex_float_has_glibc_rounding; + bool hex_float_prefers_denormal_repr; + bool hex_float_uses_minimal_precision_when_not_specified; + bool hex_float_optimizes_leading_digit_bit_count; +}; + +template <typename T, size_t N> +size_t ArraySize(T (&)[N]) { + return N; +} + +std::string LengthModFor(float) { return ""; } +std::string LengthModFor(double) { return ""; } +std::string LengthModFor(long double) { return "L"; } +std::string LengthModFor(char) { return "hh"; } +std::string LengthModFor(signed char) { return "hh"; } +std::string LengthModFor(unsigned char) { return "hh"; } +std::string LengthModFor(short) { return "h"; } // NOLINT +std::string LengthModFor(unsigned short) { return "h"; } // NOLINT +std::string LengthModFor(int) { return ""; } +std::string LengthModFor(unsigned) { return ""; } +std::string LengthModFor(long) { return "l"; } // NOLINT +std::string LengthModFor(unsigned long) { return "l"; } // NOLINT +std::string LengthModFor(long long) { return "ll"; } // NOLINT +std::string LengthModFor(unsigned long long) { return "ll"; } // NOLINT + +std::string EscCharImpl(int v) { + if (std::isprint(static_cast<unsigned char>(v))) { + return std::string(1, static_cast<char>(v)); + } + char buf[64]; + int n = snprintf(buf, sizeof(buf), "\\%#.2x", + static_cast<unsigned>(v & 0xff)); + assert(n > 0 && n < sizeof(buf)); + return std::string(buf, n); +} + +std::string Esc(char v) { return EscCharImpl(v); } +std::string Esc(signed char v) { return EscCharImpl(v); } +std::string Esc(unsigned char v) { return EscCharImpl(v); } + +template <typename T> +std::string Esc(const T &v) { + std::ostringstream oss; + oss << v; + return oss.str(); +} + +void StrAppendV(std::string *dst, const char *format, va_list ap) { + // First try with a small fixed size buffer + static const int kSpaceLength = 1024; + char space[kSpaceLength]; + + // It's possible for methods that use a va_list to invalidate + // the data in it upon use. The fix is to make a copy + // of the structure before using it and use that copy instead. + va_list backup_ap; + va_copy(backup_ap, ap); + int result = vsnprintf(space, kSpaceLength, format, backup_ap); + va_end(backup_ap); + if (result < kSpaceLength) { + if (result >= 0) { + // Normal case -- everything fit. + dst->append(space, result); + return; + } + if (result < 0) { + // Just an error. + return; + } + } + + // Increase the buffer size to the size requested by vsnprintf, + // plus one for the closing \0. + int length = result + 1; + char *buf = new char[length]; + + // Restore the va_list before we use it again + va_copy(backup_ap, ap); + result = vsnprintf(buf, length, format, backup_ap); + va_end(backup_ap); + + if (result >= 0 && result < length) { + // It fit + dst->append(buf, result); + } + delete[] buf; +} + +void StrAppend(std::string *out, const char *format, ...) { + va_list ap; + va_start(ap, format); + StrAppendV(out, format, ap); + va_end(ap); +} + +std::string StrPrint(const char *format, ...) { + va_list ap; + va_start(ap, format); + std::string result; + StrAppendV(&result, format, ap); + va_end(ap); + return result; +} + +NativePrintfTraits VerifyNativeImplementationImpl() { + NativePrintfTraits result; + + // >>> hex_float_has_glibc_rounding. To have glibc's rounding behavior we need + // to meet three requirements: + // + // - The threshold for rounding up is 8 (for e.g. MSVC uses 9). + // - If the digits lower than than the 8 are non-zero then we round up. + // - If the digits lower than the 8 are all zero then we round toward even. + // + // The numbers below represent all the cases covering {below,at,above} the + // threshold (8) with both {zero,non-zero} lower bits and both {even,odd} + // preceding digits. + const double d0079 = 65657.0; // 0x1.0079p+16 + const double d0179 = 65913.0; // 0x1.0179p+16 + const double d0080 = 65664.0; // 0x1.0080p+16 + const double d0180 = 65920.0; // 0x1.0180p+16 + const double d0081 = 65665.0; // 0x1.0081p+16 + const double d0181 = 65921.0; // 0x1.0181p+16 + result.hex_float_has_glibc_rounding = + StartsWith(StrPrint("%.2a", d0079), "0x1.00") && + StartsWith(StrPrint("%.2a", d0179), "0x1.01") && + StartsWith(StrPrint("%.2a", d0080), "0x1.00") && + StartsWith(StrPrint("%.2a", d0180), "0x1.02") && + StartsWith(StrPrint("%.2a", d0081), "0x1.01") && + StartsWith(StrPrint("%.2a", d0181), "0x1.02"); + + // >>> hex_float_prefers_denormal_repr. Formatting `denormal` on glibc yields + // "0x0.0000000000001p-1022", whereas on std libs that don't use denormal + // representation it would either be 0x1p-1074 or 0x1.0000000000000-1074. + const double denormal = std::numeric_limits<double>::denorm_min(); + result.hex_float_prefers_denormal_repr = + StartsWith(StrPrint("%a", denormal), "0x0.0000000000001"); + + // >>> hex_float_uses_minimal_precision_when_not_specified. Some (non-glibc) + // libs will format the following as "0x1.0079000000000p+16". + result.hex_float_uses_minimal_precision_when_not_specified = + (StrPrint("%a", d0079) == "0x1.0079p+16"); + + // >>> hex_float_optimizes_leading_digit_bit_count. The number 1.5, when + // formatted by glibc should yield "0x1.8p+0" for `double` and "0xcp-3" for + // `long double`, i.e., number of bits in the leading digit is adapted to the + // number of bits in the mantissa. + const double d_15 = 1.5; + const long double ld_15 = 1.5; + result.hex_float_optimizes_leading_digit_bit_count = + StartsWith(StrPrint("%a", d_15), "0x1.8") && + StartsWith(StrPrint("%La", ld_15), "0xc"); + + return result; +} + +const NativePrintfTraits &VerifyNativeImplementation() { + static NativePrintfTraits native_traits = VerifyNativeImplementationImpl(); + return native_traits; +} + +class FormatConvertTest : public ::testing::Test { }; + +template <typename T> +void TestStringConvert(const T& str) { + const FormatArgImpl args[] = {FormatArgImpl(str)}; + struct Expectation { + const char *out; + const char *fmt; + }; + const Expectation kExpect[] = { + {"hello", "%1$s" }, + {"", "%1$.s" }, + {"", "%1$.0s" }, + {"h", "%1$.1s" }, + {"he", "%1$.2s" }, + {"hello", "%1$.10s" }, + {" hello", "%1$6s" }, + {" he", "%1$5.2s" }, + {"he ", "%1$-5.2s" }, + {"hello ", "%1$-6.10s" }, + }; + for (const Expectation &e : kExpect) { + UntypedFormatSpecImpl format(e.fmt); + EXPECT_EQ(e.out, FormatPack(format, absl::MakeSpan(args))); + } +} + +TEST_F(FormatConvertTest, BasicString) { + TestStringConvert("hello"); // As char array. + TestStringConvert(static_cast<const char*>("hello")); + TestStringConvert(std::string("hello")); + TestStringConvert(string_view("hello")); +} + +TEST_F(FormatConvertTest, NullString) { + const char* p = nullptr; + UntypedFormatSpecImpl format("%s"); + EXPECT_EQ("", FormatPack(format, {FormatArgImpl(p)})); +} + +TEST_F(FormatConvertTest, StringPrecision) { + // We cap at the precision. + char c = 'a'; + const char* p = &c; + UntypedFormatSpecImpl format("%.1s"); + EXPECT_EQ("a", FormatPack(format, {FormatArgImpl(p)})); + + // We cap at the NUL-terminator. + p = "ABC"; + UntypedFormatSpecImpl format2("%.10s"); + EXPECT_EQ("ABC", FormatPack(format2, {FormatArgImpl(p)})); +} + +// Pointer formatting is implementation defined. This checks that the argument +// can be matched to `ptr`. +MATCHER_P(MatchesPointerString, ptr, "") { + if (ptr == nullptr && arg == "(nil)") { + return true; + } + void* parsed = nullptr; + if (sscanf(arg.c_str(), "%p", &parsed) != 1) { + ABSL_RAW_LOG(FATAL, "Could not parse %s", arg.c_str()); + } + return ptr == parsed; +} + +TEST_F(FormatConvertTest, Pointer) { + static int x = 0; + const int *xp = &x; + char c = 'h'; + char *mcp = &c; + const char *cp = "hi"; + const char *cnil = nullptr; + const int *inil = nullptr; + using VoidF = void (*)(); + VoidF fp = [] {}, fnil = nullptr; + volatile char vc; + volatile char *vcp = &vc; + volatile char *vcnil = nullptr; + const FormatArgImpl args_array[] = { + FormatArgImpl(xp), FormatArgImpl(cp), FormatArgImpl(inil), + FormatArgImpl(cnil), FormatArgImpl(mcp), FormatArgImpl(fp), + FormatArgImpl(fnil), FormatArgImpl(vcp), FormatArgImpl(vcnil), + }; + auto args = absl::MakeConstSpan(args_array); + + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%20p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.1p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.20p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%30.20p"), args), + MatchesPointerString(&x)); + + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-20p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-.1p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.20p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-30.20p"), args), + MatchesPointerString(&x)); + + // const char* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%2$p"), args), + MatchesPointerString(cp)); + // null const int* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%3$p"), args), + MatchesPointerString(nullptr)); + // null const char* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%4$p"), args), + MatchesPointerString(nullptr)); + // nonconst char* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%5$p"), args), + MatchesPointerString(mcp)); + + // function pointers + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%6$p"), args), + MatchesPointerString(reinterpret_cast<const void*>(fp))); + EXPECT_THAT( + FormatPack(UntypedFormatSpecImpl("%8$p"), args), + MatchesPointerString(reinterpret_cast<volatile const void *>(vcp))); + + // null function pointers + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%7$p"), args), + MatchesPointerString(nullptr)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%9$p"), args), + MatchesPointerString(nullptr)); +} + +struct Cardinal { + enum Pos { k1 = 1, k2 = 2, k3 = 3 }; + enum Neg { kM1 = -1, kM2 = -2, kM3 = -3 }; +}; + +TEST_F(FormatConvertTest, Enum) { + const Cardinal::Pos k3 = Cardinal::k3; + const Cardinal::Neg km3 = Cardinal::kM3; + const FormatArgImpl args[] = {FormatArgImpl(k3), FormatArgImpl(km3)}; + UntypedFormatSpecImpl format("%1$d"); + UntypedFormatSpecImpl format2("%2$d"); + EXPECT_EQ("3", FormatPack(format, absl::MakeSpan(args))); + EXPECT_EQ("-3", FormatPack(format2, absl::MakeSpan(args))); +} + +template <typename T> +class TypedFormatConvertTest : public FormatConvertTest { }; + +TYPED_TEST_SUITE_P(TypedFormatConvertTest); + +std::vector<std::string> AllFlagCombinations() { + const char kFlags[] = {'-', '#', '0', '+', ' '}; + std::vector<std::string> result; + for (size_t fsi = 0; fsi < (1ull << ArraySize(kFlags)); ++fsi) { + std::string flag_set; + for (size_t fi = 0; fi < ArraySize(kFlags); ++fi) + if (fsi & (1ull << fi)) + flag_set += kFlags[fi]; + result.push_back(flag_set); + } + return result; +} + +TYPED_TEST_P(TypedFormatConvertTest, AllIntsWithFlags) { + typedef TypeParam T; + typedef typename std::make_unsigned<T>::type UnsignedT; + using remove_volatile_t = typename std::remove_volatile<T>::type; + const T kMin = std::numeric_limits<remove_volatile_t>::min(); + const T kMax = std::numeric_limits<remove_volatile_t>::max(); + const T kVals[] = { + remove_volatile_t(1), + remove_volatile_t(2), + remove_volatile_t(3), + remove_volatile_t(123), + remove_volatile_t(-1), + remove_volatile_t(-2), + remove_volatile_t(-3), + remove_volatile_t(-123), + remove_volatile_t(0), + kMax - remove_volatile_t(1), + kMax, + kMin + remove_volatile_t(1), + kMin, + }; + const char kConvChars[] = {'d', 'i', 'u', 'o', 'x', 'X'}; + const std::string kWid[] = {"", "4", "10"}; + const std::string kPrec[] = {"", ".", ".0", ".4", ".10"}; + + const std::vector<std::string> flag_sets = AllFlagCombinations(); + + for (size_t vi = 0; vi < ArraySize(kVals); ++vi) { + const T val = kVals[vi]; + SCOPED_TRACE(Esc(val)); + const FormatArgImpl args[] = {FormatArgImpl(val)}; + for (size_t ci = 0; ci < ArraySize(kConvChars); ++ci) { + const char conv_char = kConvChars[ci]; + for (size_t fsi = 0; fsi < flag_sets.size(); ++fsi) { + const std::string &flag_set = flag_sets[fsi]; + for (size_t wi = 0; wi < ArraySize(kWid); ++wi) { + const std::string &wid = kWid[wi]; + for (size_t pi = 0; pi < ArraySize(kPrec); ++pi) { + const std::string &prec = kPrec[pi]; + + const bool is_signed_conv = (conv_char == 'd' || conv_char == 'i'); + const bool is_unsigned_to_signed = + !std::is_signed<T>::value && is_signed_conv; + // Don't consider sign-related flags '+' and ' ' when doing + // unsigned to signed conversions. + if (is_unsigned_to_signed && + flag_set.find_first_of("+ ") != std::string::npos) { + continue; + } + + std::string new_fmt("%"); + new_fmt += flag_set; + new_fmt += wid; + new_fmt += prec; + // old and new always agree up to here. + std::string old_fmt = new_fmt; + new_fmt += conv_char; + std::string old_result; + if (is_unsigned_to_signed) { + // don't expect agreement on unsigned formatted as signed, + // as printf can't do that conversion properly. For those + // cases, we do expect agreement with printf with a "%u" + // and the unsigned equivalent of 'val'. + UnsignedT uval = val; + old_fmt += LengthModFor(uval); + old_fmt += "u"; + old_result = StrPrint(old_fmt.c_str(), uval); + } else { + old_fmt += LengthModFor(val); + old_fmt += conv_char; + old_result = StrPrint(old_fmt.c_str(), val); + } + + SCOPED_TRACE(std::string() + " old_fmt: \"" + old_fmt + + "\"'" + " new_fmt: \"" + + new_fmt + "\""); + UntypedFormatSpecImpl format(new_fmt); + EXPECT_EQ(old_result, FormatPack(format, absl::MakeSpan(args))); + } + } + } + } + } +} + +TYPED_TEST_P(TypedFormatConvertTest, Char) { + typedef TypeParam T; + using remove_volatile_t = typename std::remove_volatile<T>::type; + static const T kMin = std::numeric_limits<remove_volatile_t>::min(); + static const T kMax = std::numeric_limits<remove_volatile_t>::max(); + T kVals[] = { + remove_volatile_t(1), remove_volatile_t(2), remove_volatile_t(10), + remove_volatile_t(-1), remove_volatile_t(-2), remove_volatile_t(-10), + remove_volatile_t(0), + kMin + remove_volatile_t(1), kMin, + kMax - remove_volatile_t(1), kMax + }; + for (const T &c : kVals) { + const FormatArgImpl args[] = {FormatArgImpl(c)}; + UntypedFormatSpecImpl format("%c"); + EXPECT_EQ(StrPrint("%c", c), FormatPack(format, absl::MakeSpan(args))); + } +} + +REGISTER_TYPED_TEST_CASE_P(TypedFormatConvertTest, AllIntsWithFlags, Char); + +typedef ::testing::Types< + int, unsigned, volatile int, + short, unsigned short, + long, unsigned long, + long long, unsigned long long, + signed char, unsigned char, char> + AllIntTypes; +INSTANTIATE_TYPED_TEST_CASE_P(TypedFormatConvertTestWithAllIntTypes, + TypedFormatConvertTest, AllIntTypes); +TEST_F(FormatConvertTest, VectorBool) { + // Make sure vector<bool>'s values behave as bools. + std::vector<bool> v = {true, false}; + const std::vector<bool> cv = {true, false}; + EXPECT_EQ("1,0,1,0", + FormatPack(UntypedFormatSpecImpl("%d,%d,%d,%d"), + absl::Span<const FormatArgImpl>( + {FormatArgImpl(v[0]), FormatArgImpl(v[1]), + FormatArgImpl(cv[0]), FormatArgImpl(cv[1])}))); +} + + +TEST_F(FormatConvertTest, Int128) { + absl::int128 positive = static_cast<absl::int128>(0x1234567890abcdef) * 1979; + absl::int128 negative = -positive; + absl::int128 max = absl::Int128Max(), min = absl::Int128Min(); + const FormatArgImpl args[] = {FormatArgImpl(positive), + FormatArgImpl(negative), FormatArgImpl(max), + FormatArgImpl(min)}; + + struct Case { + const char* format; + const char* expected; + } cases[] = { + {"%1$d", "2595989796776606496405"}, + {"%1$30d", " 2595989796776606496405"}, + {"%1$-30d", "2595989796776606496405 "}, + {"%1$u", "2595989796776606496405"}, + {"%1$x", "8cba9876066020f695"}, + {"%2$d", "-2595989796776606496405"}, + {"%2$30d", " -2595989796776606496405"}, + {"%2$-30d", "-2595989796776606496405 "}, + {"%2$u", "340282366920938460867384810655161715051"}, + {"%2$x", "ffffffffffffff73456789f99fdf096b"}, + {"%3$d", "170141183460469231731687303715884105727"}, + {"%3$u", "170141183460469231731687303715884105727"}, + {"%3$x", "7fffffffffffffffffffffffffffffff"}, + {"%4$d", "-170141183460469231731687303715884105728"}, + {"%4$x", "80000000000000000000000000000000"}, + }; + + for (auto c : cases) { + UntypedFormatSpecImpl format(c.format); + EXPECT_EQ(c.expected, FormatPack(format, absl::MakeSpan(args))); + } +} + +TEST_F(FormatConvertTest, Uint128) { + absl::uint128 v = static_cast<absl::uint128>(0x1234567890abcdef) * 1979; + absl::uint128 max = absl::Uint128Max(); + const FormatArgImpl args[] = {FormatArgImpl(v), FormatArgImpl(max)}; + + struct Case { + const char* format; + const char* expected; + } cases[] = { + {"%1$d", "2595989796776606496405"}, + {"%1$30d", " 2595989796776606496405"}, + {"%1$-30d", "2595989796776606496405 "}, + {"%1$u", "2595989796776606496405"}, + {"%1$x", "8cba9876066020f695"}, + {"%2$d", "340282366920938463463374607431768211455"}, + {"%2$u", "340282366920938463463374607431768211455"}, + {"%2$x", "ffffffffffffffffffffffffffffffff"}, + }; + + for (auto c : cases) { + UntypedFormatSpecImpl format(c.format); + EXPECT_EQ(c.expected, FormatPack(format, absl::MakeSpan(args))); + } +} + +template <typename Floating> +void TestWithMultipleFormatsHelper(const std::vector<Floating> &floats) { + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + // Reserve the space to ensure we don't allocate memory in the output itself. + std::string str_format_result; + str_format_result.reserve(1 << 20); + std::string string_printf_result; + string_printf_result.reserve(1 << 20); + + const char *const kFormats[] = { + "%", "%.3", "%8.5", "%500", "%.5000", "%.60", "%.30", "%03", + "%+", "% ", "%-10", "%#15.3", "%#.0", "%.0", "%1$*2$", "%1$.*2$"}; + + for (const char *fmt : kFormats) { + for (char f : {'f', 'F', // + 'g', 'G', // + 'a', 'A', // + 'e', 'E'}) { + std::string fmt_str = std::string(fmt) + f; + + if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F' && + f != 'a' && f != 'A') { + // This particular test takes way too long with snprintf. + // Disable for the case we are not implementing natively. + continue; + } + + if ((f == 'a' || f == 'A') && + !native_traits.hex_float_has_glibc_rounding) { + continue; + } + + for (Floating d : floats) { + if (!native_traits.hex_float_prefers_denormal_repr && + (f == 'a' || f == 'A') && std::fpclassify(d) == FP_SUBNORMAL) { + continue; + } + int i = -10; + FormatArgImpl args[2] = {FormatArgImpl(d), FormatArgImpl(i)}; + UntypedFormatSpecImpl format(fmt_str); + + string_printf_result.clear(); + StrAppend(&string_printf_result, fmt_str.c_str(), d, i); + str_format_result.clear(); + + { + AppendPack(&str_format_result, format, absl::MakeSpan(args)); + } + + if (string_printf_result != str_format_result) { + // We use ASSERT_EQ here because failures are usually correlated and a + // bug would print way too many failed expectations causing the test + // to time out. + ASSERT_EQ(string_printf_result, str_format_result) + << fmt_str << " " << StrPrint("%.18g", d) << " " + << StrPrint("%a", d) << " " << StrPrint("%.50f", d); + } + } + } + } +} + +TEST_F(FormatConvertTest, Float) { +#ifdef _MSC_VER + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + return; +#endif // _MSC_VER + + std::vector<float> floats = {0.0f, + -0.0f, + .9999999f, + 9999999.f, + std::numeric_limits<float>::max(), + -std::numeric_limits<float>::max(), + std::numeric_limits<float>::min(), + -std::numeric_limits<float>::min(), + std::numeric_limits<float>::lowest(), + -std::numeric_limits<float>::lowest(), + std::numeric_limits<float>::epsilon(), + std::numeric_limits<float>::epsilon() + 1.0f, + std::numeric_limits<float>::infinity(), + -std::numeric_limits<float>::infinity()}; + + // Some regression tests. + floats.push_back(0.999999989f); + + if (std::numeric_limits<float>::has_denorm != std::denorm_absent) { + floats.push_back(std::numeric_limits<float>::denorm_min()); + floats.push_back(-std::numeric_limits<float>::denorm_min()); + } + + for (float base : + {1.f, 12.f, 123.f, 1234.f, 12345.f, 123456.f, 1234567.f, 12345678.f, + 123456789.f, 1234567890.f, 12345678901.f, 12345678.f, 12345678.f}) { + for (int exp = -123; exp <= 123; ++exp) { + for (int sign : {1, -1}) { + floats.push_back(sign * std::ldexp(base, exp)); + } + } + } + + for (int exp = -300; exp <= 300; ++exp) { + const float all_ones_mantissa = 0xffffff; + floats.push_back(std::ldexp(all_ones_mantissa, exp)); + } + + // Remove duplicates to speed up the logic below. + std::sort(floats.begin(), floats.end()); + floats.erase(std::unique(floats.begin(), floats.end()), floats.end()); + +#ifndef __APPLE__ + // Apple formats NaN differently (+nan) vs. (nan) + floats.push_back(std::nan("")); +#endif + + TestWithMultipleFormatsHelper(floats); +} + +TEST_F(FormatConvertTest, Double) { +#ifdef _MSC_VER + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + return; +#endif // _MSC_VER + + std::vector<double> doubles = {0.0, + -0.0, + .99999999999999, + 99999999999999., + std::numeric_limits<double>::max(), + -std::numeric_limits<double>::max(), + std::numeric_limits<double>::min(), + -std::numeric_limits<double>::min(), + std::numeric_limits<double>::lowest(), + -std::numeric_limits<double>::lowest(), + std::numeric_limits<double>::epsilon(), + std::numeric_limits<double>::epsilon() + 1, + std::numeric_limits<double>::infinity(), + -std::numeric_limits<double>::infinity()}; + + // Some regression tests. + doubles.push_back(0.99999999999999989); + + if (std::numeric_limits<double>::has_denorm != std::denorm_absent) { + doubles.push_back(std::numeric_limits<double>::denorm_min()); + doubles.push_back(-std::numeric_limits<double>::denorm_min()); + } + + for (double base : + {1., 12., 123., 1234., 12345., 123456., 1234567., 12345678., 123456789., + 1234567890., 12345678901., 123456789012., 1234567890123.}) { + for (int exp = -123; exp <= 123; ++exp) { + for (int sign : {1, -1}) { + doubles.push_back(sign * std::ldexp(base, exp)); + } + } + } + + // Workaround libc bug. + // https://sourceware.org/bugzilla/show_bug.cgi?id=22142 + const bool gcc_bug_22142 = + StrPrint("%f", std::numeric_limits<double>::max()) != + "1797693134862315708145274237317043567980705675258449965989174768031" + "5726078002853876058955863276687817154045895351438246423432132688946" + "4182768467546703537516986049910576551282076245490090389328944075868" + "5084551339423045832369032229481658085593321233482747978262041447231" + "68738177180919299881250404026184124858368.000000"; + + if (!gcc_bug_22142) { + for (int exp = -300; exp <= 300; ++exp) { + const double all_ones_mantissa = 0x1fffffffffffff; + doubles.push_back(std::ldexp(all_ones_mantissa, exp)); + } + } + + if (gcc_bug_22142) { + for (auto &d : doubles) { + using L = std::numeric_limits<double>; + double d2 = std::abs(d); + if (d2 == L::max() || d2 == L::min() || d2 == L::denorm_min()) { + d = 0; + } + } + } + + // Remove duplicates to speed up the logic below. + std::sort(doubles.begin(), doubles.end()); + doubles.erase(std::unique(doubles.begin(), doubles.end()), doubles.end()); + +#ifndef __APPLE__ + // Apple formats NaN differently (+nan) vs. (nan) + doubles.push_back(std::nan("")); +#endif + + TestWithMultipleFormatsHelper(doubles); +} + +TEST_F(FormatConvertTest, DoubleRound) { + std::string s; + const auto format = [&](const char *fmt, double d) -> std::string & { + s.clear(); + FormatArgImpl args[1] = {FormatArgImpl(d)}; + AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); +#if !defined(_MSC_VER) + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + EXPECT_EQ(StrPrint(fmt, d), s); +#endif // _MSC_VER + + return s; + }; + // All of these values have to be exactly represented. + // Otherwise we might not be testing what we think we are testing. + + // These values can fit in a 64bit "fast" representation. + const double exact_value = 0.00000000000005684341886080801486968994140625; + assert(exact_value == std::pow(2, -44)); + // Round up at a 5xx. + EXPECT_EQ(format("%.13f", exact_value), "0.0000000000001"); + // Round up at a >5 + EXPECT_EQ(format("%.14f", exact_value), "0.00000000000006"); + // Round down at a <5 + EXPECT_EQ(format("%.16f", exact_value), "0.0000000000000568"); + // Nine handling + EXPECT_EQ(format("%.35f", exact_value), + "0.00000000000005684341886080801486969"); + EXPECT_EQ(format("%.36f", exact_value), + "0.000000000000056843418860808014869690"); + // Round down the last nine. + EXPECT_EQ(format("%.37f", exact_value), + "0.0000000000000568434188608080148696899"); + EXPECT_EQ(format("%.10f", 0.000003814697265625), "0.0000038147"); + // Round up the last nine + EXPECT_EQ(format("%.11f", 0.000003814697265625), "0.00000381470"); + EXPECT_EQ(format("%.12f", 0.000003814697265625), "0.000003814697"); + + // Round to even (down) + EXPECT_EQ(format("%.43f", exact_value), + "0.0000000000000568434188608080148696899414062"); + // Exact + EXPECT_EQ(format("%.44f", exact_value), + "0.00000000000005684341886080801486968994140625"); + // Round to even (up), let make the last digits 75 instead of 25 + EXPECT_EQ(format("%.43f", exact_value + std::pow(2, -43)), + "0.0000000000001705302565824240446090698242188"); + // Exact, just to check. + EXPECT_EQ(format("%.44f", exact_value + std::pow(2, -43)), + "0.00000000000017053025658242404460906982421875"); + + // This value has to be small enough that it won't fit in the uint128 + // representation for printing. + const double small_exact_value = + 0.000000000000000000000000000000000000752316384526264005099991383822237233803945956334136013765601092018187046051025390625; // NOLINT + assert(small_exact_value == std::pow(2, -120)); + // Round up at a 5xx. + EXPECT_EQ(format("%.37f", small_exact_value), + "0.0000000000000000000000000000000000008"); + // Round down at a <5 + EXPECT_EQ(format("%.38f", small_exact_value), + "0.00000000000000000000000000000000000075"); + // Round up at a >5 + EXPECT_EQ(format("%.41f", small_exact_value), + "0.00000000000000000000000000000000000075232"); + // Nine handling + EXPECT_EQ(format("%.55f", small_exact_value), + "0.0000000000000000000000000000000000007523163845262640051"); + EXPECT_EQ(format("%.56f", small_exact_value), + "0.00000000000000000000000000000000000075231638452626400510"); + EXPECT_EQ(format("%.57f", small_exact_value), + "0.000000000000000000000000000000000000752316384526264005100"); + EXPECT_EQ(format("%.58f", small_exact_value), + "0.0000000000000000000000000000000000007523163845262640051000"); + // Round down the last nine + EXPECT_EQ(format("%.59f", small_exact_value), + "0.00000000000000000000000000000000000075231638452626400509999"); + // Round up the last nine + EXPECT_EQ(format("%.79f", small_exact_value), + "0.000000000000000000000000000000000000" + "7523163845262640050999913838222372338039460"); + + // Round to even (down) + EXPECT_EQ(format("%.119f", small_exact_value), + "0.000000000000000000000000000000000000" + "75231638452626400509999138382223723380" + "394595633413601376560109201818704605102539062"); + // Exact + EXPECT_EQ(format("%.120f", small_exact_value), + "0.000000000000000000000000000000000000" + "75231638452626400509999138382223723380" + "3945956334136013765601092018187046051025390625"); + // Round to even (up), let make the last digits 75 instead of 25 + EXPECT_EQ(format("%.119f", small_exact_value + std::pow(2, -119)), + "0.000000000000000000000000000000000002" + "25694915357879201529997415146671170141" + "183786900240804129680327605456113815307617188"); + // Exact, just to check. + EXPECT_EQ(format("%.120f", small_exact_value + std::pow(2, -119)), + "0.000000000000000000000000000000000002" + "25694915357879201529997415146671170141" + "1837869002408041296803276054561138153076171875"); +} + +TEST_F(FormatConvertTest, DoubleRoundA) { + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + std::string s; + const auto format = [&](const char *fmt, double d) -> std::string & { + s.clear(); + FormatArgImpl args[1] = {FormatArgImpl(d)}; + AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); + if (native_traits.hex_float_has_glibc_rounding) { + EXPECT_EQ(StrPrint(fmt, d), s); + } + return s; + }; + + // 0x1.00018000p+100 + const double on_boundary_odd = 1267679614447900152596896153600.0; + EXPECT_EQ(format("%.0a", on_boundary_odd), "0x1p+100"); + EXPECT_EQ(format("%.1a", on_boundary_odd), "0x1.0p+100"); + EXPECT_EQ(format("%.2a", on_boundary_odd), "0x1.00p+100"); + EXPECT_EQ(format("%.3a", on_boundary_odd), "0x1.000p+100"); + EXPECT_EQ(format("%.4a", on_boundary_odd), "0x1.0002p+100"); // round + EXPECT_EQ(format("%.5a", on_boundary_odd), "0x1.00018p+100"); + EXPECT_EQ(format("%.6a", on_boundary_odd), "0x1.000180p+100"); + + // 0x1.00028000p-2 + const double on_boundary_even = 0.250009536743164062500; + EXPECT_EQ(format("%.0a", on_boundary_even), "0x1p-2"); + EXPECT_EQ(format("%.1a", on_boundary_even), "0x1.0p-2"); + EXPECT_EQ(format("%.2a", on_boundary_even), "0x1.00p-2"); + EXPECT_EQ(format("%.3a", on_boundary_even), "0x1.000p-2"); + EXPECT_EQ(format("%.4a", on_boundary_even), "0x1.0002p-2"); // no round + EXPECT_EQ(format("%.5a", on_boundary_even), "0x1.00028p-2"); + EXPECT_EQ(format("%.6a", on_boundary_even), "0x1.000280p-2"); + + // 0x1.00018001p+1 + const double slightly_over = 2.00004577683284878730773925781250; + EXPECT_EQ(format("%.0a", slightly_over), "0x1p+1"); + EXPECT_EQ(format("%.1a", slightly_over), "0x1.0p+1"); + EXPECT_EQ(format("%.2a", slightly_over), "0x1.00p+1"); + EXPECT_EQ(format("%.3a", slightly_over), "0x1.000p+1"); + EXPECT_EQ(format("%.4a", slightly_over), "0x1.0002p+1"); + EXPECT_EQ(format("%.5a", slightly_over), "0x1.00018p+1"); + EXPECT_EQ(format("%.6a", slightly_over), "0x1.000180p+1"); + + // 0x1.00017fffp+0 + const double slightly_under = 1.000022887950763106346130371093750; + EXPECT_EQ(format("%.0a", slightly_under), "0x1p+0"); + EXPECT_EQ(format("%.1a", slightly_under), "0x1.0p+0"); + EXPECT_EQ(format("%.2a", slightly_under), "0x1.00p+0"); + EXPECT_EQ(format("%.3a", slightly_under), "0x1.000p+0"); + EXPECT_EQ(format("%.4a", slightly_under), "0x1.0001p+0"); + EXPECT_EQ(format("%.5a", slightly_under), "0x1.00018p+0"); + EXPECT_EQ(format("%.6a", slightly_under), "0x1.000180p+0"); + EXPECT_EQ(format("%.7a", slightly_under), "0x1.0001800p+0"); + + // 0x1.1b3829ac28058p+3 + const double hex_value = 8.85060580848964661981881363317370414733886718750; + EXPECT_EQ(format("%.0a", hex_value), "0x1p+3"); + EXPECT_EQ(format("%.1a", hex_value), "0x1.2p+3"); + EXPECT_EQ(format("%.2a", hex_value), "0x1.1bp+3"); + EXPECT_EQ(format("%.3a", hex_value), "0x1.1b4p+3"); + EXPECT_EQ(format("%.4a", hex_value), "0x1.1b38p+3"); + EXPECT_EQ(format("%.5a", hex_value), "0x1.1b383p+3"); + EXPECT_EQ(format("%.6a", hex_value), "0x1.1b382ap+3"); + EXPECT_EQ(format("%.7a", hex_value), "0x1.1b3829bp+3"); + EXPECT_EQ(format("%.8a", hex_value), "0x1.1b3829acp+3"); + EXPECT_EQ(format("%.9a", hex_value), "0x1.1b3829ac3p+3"); + EXPECT_EQ(format("%.10a", hex_value), "0x1.1b3829ac28p+3"); + EXPECT_EQ(format("%.11a", hex_value), "0x1.1b3829ac280p+3"); + EXPECT_EQ(format("%.12a", hex_value), "0x1.1b3829ac2806p+3"); + EXPECT_EQ(format("%.13a", hex_value), "0x1.1b3829ac28058p+3"); + EXPECT_EQ(format("%.14a", hex_value), "0x1.1b3829ac280580p+3"); + EXPECT_EQ(format("%.15a", hex_value), "0x1.1b3829ac2805800p+3"); + EXPECT_EQ(format("%.16a", hex_value), "0x1.1b3829ac28058000p+3"); + EXPECT_EQ(format("%.17a", hex_value), "0x1.1b3829ac280580000p+3"); + EXPECT_EQ(format("%.18a", hex_value), "0x1.1b3829ac2805800000p+3"); + EXPECT_EQ(format("%.19a", hex_value), "0x1.1b3829ac28058000000p+3"); + EXPECT_EQ(format("%.20a", hex_value), "0x1.1b3829ac280580000000p+3"); + EXPECT_EQ(format("%.21a", hex_value), "0x1.1b3829ac2805800000000p+3"); + + // 0x1.0818283848586p+3 + const double hex_value2 = 8.2529488658208371987257123691961169242858886718750; + EXPECT_EQ(format("%.0a", hex_value2), "0x1p+3"); + EXPECT_EQ(format("%.1a", hex_value2), "0x1.1p+3"); + EXPECT_EQ(format("%.2a", hex_value2), "0x1.08p+3"); + EXPECT_EQ(format("%.3a", hex_value2), "0x1.082p+3"); + EXPECT_EQ(format("%.4a", hex_value2), "0x1.0818p+3"); + EXPECT_EQ(format("%.5a", hex_value2), "0x1.08183p+3"); + EXPECT_EQ(format("%.6a", hex_value2), "0x1.081828p+3"); + EXPECT_EQ(format("%.7a", hex_value2), "0x1.0818284p+3"); + EXPECT_EQ(format("%.8a", hex_value2), "0x1.08182838p+3"); + EXPECT_EQ(format("%.9a", hex_value2), "0x1.081828385p+3"); + EXPECT_EQ(format("%.10a", hex_value2), "0x1.0818283848p+3"); + EXPECT_EQ(format("%.11a", hex_value2), "0x1.08182838486p+3"); + EXPECT_EQ(format("%.12a", hex_value2), "0x1.081828384858p+3"); + EXPECT_EQ(format("%.13a", hex_value2), "0x1.0818283848586p+3"); + EXPECT_EQ(format("%.14a", hex_value2), "0x1.08182838485860p+3"); + EXPECT_EQ(format("%.15a", hex_value2), "0x1.081828384858600p+3"); + EXPECT_EQ(format("%.16a", hex_value2), "0x1.0818283848586000p+3"); + EXPECT_EQ(format("%.17a", hex_value2), "0x1.08182838485860000p+3"); + EXPECT_EQ(format("%.18a", hex_value2), "0x1.081828384858600000p+3"); + EXPECT_EQ(format("%.19a", hex_value2), "0x1.0818283848586000000p+3"); + EXPECT_EQ(format("%.20a", hex_value2), "0x1.08182838485860000000p+3"); + EXPECT_EQ(format("%.21a", hex_value2), "0x1.081828384858600000000p+3"); +} + +TEST_F(FormatConvertTest, LongDoubleRoundA) { + if (std::numeric_limits<long double>::digits % 4 != 0) { + // This test doesn't really make sense to run on platforms where a long + // double has a different mantissa size (mod 4) than Prod, since then the + // leading digit will be formatted differently. + return; + } + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + std::string s; + const auto format = [&](const char *fmt, long double d) -> std::string & { + s.clear(); + FormatArgImpl args[1] = {FormatArgImpl(d)}; + AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); + if (native_traits.hex_float_has_glibc_rounding && + native_traits.hex_float_optimizes_leading_digit_bit_count) { + EXPECT_EQ(StrPrint(fmt, d), s); + } + return s; + }; + + // 0x8.8p+4 + const long double on_boundary_even = 136.0; + EXPECT_EQ(format("%.0La", on_boundary_even), "0x8p+4"); + EXPECT_EQ(format("%.1La", on_boundary_even), "0x8.8p+4"); + EXPECT_EQ(format("%.2La", on_boundary_even), "0x8.80p+4"); + EXPECT_EQ(format("%.3La", on_boundary_even), "0x8.800p+4"); + EXPECT_EQ(format("%.4La", on_boundary_even), "0x8.8000p+4"); + EXPECT_EQ(format("%.5La", on_boundary_even), "0x8.80000p+4"); + EXPECT_EQ(format("%.6La", on_boundary_even), "0x8.800000p+4"); + + // 0x9.8p+4 + const long double on_boundary_odd = 152.0; + EXPECT_EQ(format("%.0La", on_boundary_odd), "0xap+4"); + EXPECT_EQ(format("%.1La", on_boundary_odd), "0x9.8p+4"); + EXPECT_EQ(format("%.2La", on_boundary_odd), "0x9.80p+4"); + EXPECT_EQ(format("%.3La", on_boundary_odd), "0x9.800p+4"); + EXPECT_EQ(format("%.4La", on_boundary_odd), "0x9.8000p+4"); + EXPECT_EQ(format("%.5La", on_boundary_odd), "0x9.80000p+4"); + EXPECT_EQ(format("%.6La", on_boundary_odd), "0x9.800000p+4"); + + // 0x8.80001p+24 + const long double slightly_over = 142606352.0; + EXPECT_EQ(format("%.0La", slightly_over), "0x9p+24"); + EXPECT_EQ(format("%.1La", slightly_over), "0x8.8p+24"); + EXPECT_EQ(format("%.2La", slightly_over), "0x8.80p+24"); + EXPECT_EQ(format("%.3La", slightly_over), "0x8.800p+24"); + EXPECT_EQ(format("%.4La", slightly_over), "0x8.8000p+24"); + EXPECT_EQ(format("%.5La", slightly_over), "0x8.80001p+24"); + EXPECT_EQ(format("%.6La", slightly_over), "0x8.800010p+24"); + + // 0x8.7ffffp+24 + const long double slightly_under = 142606320.0; + EXPECT_EQ(format("%.0La", slightly_under), "0x8p+24"); + EXPECT_EQ(format("%.1La", slightly_under), "0x8.8p+24"); + EXPECT_EQ(format("%.2La", slightly_under), "0x8.80p+24"); + EXPECT_EQ(format("%.3La", slightly_under), "0x8.800p+24"); + EXPECT_EQ(format("%.4La", slightly_under), "0x8.8000p+24"); + EXPECT_EQ(format("%.5La", slightly_under), "0x8.7ffffp+24"); + EXPECT_EQ(format("%.6La", slightly_under), "0x8.7ffff0p+24"); + EXPECT_EQ(format("%.7La", slightly_under), "0x8.7ffff00p+24"); + + // 0xc.0828384858688000p+128 + const long double eights = 4094231060438608800781871108094404067328.0; + EXPECT_EQ(format("%.0La", eights), "0xcp+128"); + EXPECT_EQ(format("%.1La", eights), "0xc.1p+128"); + EXPECT_EQ(format("%.2La", eights), "0xc.08p+128"); + EXPECT_EQ(format("%.3La", eights), "0xc.083p+128"); + EXPECT_EQ(format("%.4La", eights), "0xc.0828p+128"); + EXPECT_EQ(format("%.5La", eights), "0xc.08284p+128"); + EXPECT_EQ(format("%.6La", eights), "0xc.082838p+128"); + EXPECT_EQ(format("%.7La", eights), "0xc.0828385p+128"); + EXPECT_EQ(format("%.8La", eights), "0xc.08283848p+128"); + EXPECT_EQ(format("%.9La", eights), "0xc.082838486p+128"); + EXPECT_EQ(format("%.10La", eights), "0xc.0828384858p+128"); + EXPECT_EQ(format("%.11La", eights), "0xc.08283848587p+128"); + EXPECT_EQ(format("%.12La", eights), "0xc.082838485868p+128"); + EXPECT_EQ(format("%.13La", eights), "0xc.0828384858688p+128"); + EXPECT_EQ(format("%.14La", eights), "0xc.08283848586880p+128"); + EXPECT_EQ(format("%.15La", eights), "0xc.082838485868800p+128"); + EXPECT_EQ(format("%.16La", eights), "0xc.0828384858688000p+128"); +} + +// We don't actually store the results. This is just to exercise the rest of the +// machinery. +struct NullSink { + friend void AbslFormatFlush(NullSink *sink, string_view str) {} +}; + +template <typename... T> +bool FormatWithNullSink(absl::string_view fmt, const T &... a) { + NullSink sink; + FormatArgImpl args[] = {FormatArgImpl(a)...}; + return FormatUntyped(&sink, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); +} + +TEST_F(FormatConvertTest, ExtremeWidthPrecision) { + for (const char *fmt : {"f"}) { + for (double d : {1e-100, 1.0, 1e100}) { + constexpr int max = std::numeric_limits<int>::max(); + EXPECT_TRUE(FormatWithNullSink(std::string("%.*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%1.*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%*.*") + fmt, max, max, d)); + } + } +} + +TEST_F(FormatConvertTest, LongDouble) { +#ifdef _MSC_VER + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + return; +#endif // _MSC_VER + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + const char *const kFormats[] = {"%", "%.3", "%8.5", "%9", "%.5000", + "%.60", "%+", "% ", "%-10"}; + + std::vector<long double> doubles = { + 0.0, + -0.0, + std::numeric_limits<long double>::max(), + -std::numeric_limits<long double>::max(), + std::numeric_limits<long double>::min(), + -std::numeric_limits<long double>::min(), + std::numeric_limits<long double>::infinity(), + -std::numeric_limits<long double>::infinity()}; + + for (long double base : {1.L, 12.L, 123.L, 1234.L, 12345.L, 123456.L, + 1234567.L, 12345678.L, 123456789.L, 1234567890.L, + 12345678901.L, 123456789012.L, 1234567890123.L, + // This value is not representable in double, but it + // is in long double that uses the extended format. + // This is to verify that we are not truncating the + // value mistakenly through a double. + 10000000000000000.25L}) { + for (int exp : {-1000, -500, 0, 500, 1000}) { + for (int sign : {1, -1}) { + doubles.push_back(sign * std::ldexp(base, exp)); + doubles.push_back(sign / std::ldexp(base, exp)); + } + } + } + + // Regression tests + // + // Using a string literal because not all platforms support hex literals or it + // might be out of range. + doubles.push_back(std::strtold("-0xf.ffffffb5feafffbp-16324L", nullptr)); + + for (const char *fmt : kFormats) { + for (char f : {'f', 'F', // + 'g', 'G', // + 'a', 'A', // + 'e', 'E'}) { + std::string fmt_str = std::string(fmt) + 'L' + f; + + if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F' && + f != 'a' && f != 'A') { + // This particular test takes way too long with snprintf. + // Disable for the case we are not implementing natively. + continue; + } + + if (f == 'a' || f == 'A') { + if (!native_traits.hex_float_has_glibc_rounding || + !native_traits.hex_float_optimizes_leading_digit_bit_count) { + continue; + } + } + + for (auto d : doubles) { + FormatArgImpl arg(d); + UntypedFormatSpecImpl format(fmt_str); + // We use ASSERT_EQ here because failures are usually correlated and a + // bug would print way too many failed expectations causing the test to + // time out. + ASSERT_EQ(StrPrint(fmt_str.c_str(), d), FormatPack(format, {&arg, 1})) + << fmt_str << " " << StrPrint("%.18Lg", d) << " " + << StrPrint("%La", d) << " " << StrPrint("%.1080Lf", d); + } + } + } +} + +TEST_F(FormatConvertTest, IntAsDouble) { + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + const int kMin = std::numeric_limits<int>::min(); + const int kMax = std::numeric_limits<int>::max(); + const int ia[] = { + 1, 2, 3, 123, + -1, -2, -3, -123, + 0, kMax - 1, kMax, kMin + 1, kMin }; + for (const int fx : ia) { + SCOPED_TRACE(fx); + const FormatArgImpl args[] = {FormatArgImpl(fx)}; + struct Expectation { + int line; + std::string out; + const char *fmt; + }; + const double dx = static_cast<double>(fx); + std::vector<Expectation> expect = { + {__LINE__, StrPrint("%f", dx), "%f"}, + {__LINE__, StrPrint("%12f", dx), "%12f"}, + {__LINE__, StrPrint("%.12f", dx), "%.12f"}, + {__LINE__, StrPrint("%.12a", dx), "%.12a"}, + }; + if (native_traits.hex_float_uses_minimal_precision_when_not_specified) { + Expectation ex = {__LINE__, StrPrint("%12a", dx), "%12a"}; + expect.push_back(ex); + } + for (const Expectation &e : expect) { + SCOPED_TRACE(e.line); + SCOPED_TRACE(e.fmt); + UntypedFormatSpecImpl format(e.fmt); + EXPECT_EQ(e.out, FormatPack(format, absl::MakeSpan(args))); + } + } +} + +template <typename T> +bool FormatFails(const char* test_format, T value) { + std::string format_string = std::string("<<") + test_format + ">>"; + UntypedFormatSpecImpl format(format_string); + + int one = 1; + const FormatArgImpl args[] = {FormatArgImpl(value), FormatArgImpl(one)}; + EXPECT_EQ(FormatPack(format, absl::MakeSpan(args)), "") + << "format=" << test_format << " value=" << value; + return FormatPack(format, absl::MakeSpan(args)).empty(); +} + +TEST_F(FormatConvertTest, ExpectedFailures) { + // Int input + EXPECT_TRUE(FormatFails("%p", 1)); + EXPECT_TRUE(FormatFails("%s", 1)); + EXPECT_TRUE(FormatFails("%n", 1)); + + // Double input + EXPECT_TRUE(FormatFails("%p", 1.)); + EXPECT_TRUE(FormatFails("%s", 1.)); + EXPECT_TRUE(FormatFails("%n", 1.)); + EXPECT_TRUE(FormatFails("%c", 1.)); + EXPECT_TRUE(FormatFails("%d", 1.)); + EXPECT_TRUE(FormatFails("%x", 1.)); + EXPECT_TRUE(FormatFails("%*d", 1.)); + + // String input + EXPECT_TRUE(FormatFails("%n", "")); + EXPECT_TRUE(FormatFails("%c", "")); + EXPECT_TRUE(FormatFails("%d", "")); + EXPECT_TRUE(FormatFails("%x", "")); + EXPECT_TRUE(FormatFails("%f", "")); + EXPECT_TRUE(FormatFails("%*d", "")); +} + +// Sanity check to make sure that we are testing what we think we're testing on +// e.g. the x86_64+glibc platform. +TEST_F(FormatConvertTest, GlibcHasCorrectTraits) { +#if !defined(__GLIBC__) || !defined(__x86_64__) + return; +#endif + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + // If one of the following tests break then it is either because the above PP + // macro guards failed to exclude a new platform (likely) or because something + // has changed in the implemention of glibc sprintf float formatting behavior. + // If the latter, then the code that computes these flags needs to be + // revisited and/or possibly the StrFormat implementation. + EXPECT_TRUE(native_traits.hex_float_has_glibc_rounding); + EXPECT_TRUE(native_traits.hex_float_prefers_denormal_repr); + EXPECT_TRUE( + native_traits.hex_float_uses_minimal_precision_when_not_specified); + EXPECT_TRUE(native_traits.hex_float_optimizes_leading_digit_bit_count); +} + +} // namespace +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/extension.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/extension.cc new file mode 100644 index 000000000000..bb0d96cf3216 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/extension.cc @@ -0,0 +1,75 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/extension.h" + +#include <errno.h> +#include <algorithm> +#include <string> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +std::string Flags::ToString() const { + std::string s; + s.append(left ? "-" : ""); + s.append(show_pos ? "+" : ""); + s.append(sign_col ? " " : ""); + s.append(alt ? "#" : ""); + s.append(zero ? "0" : ""); + return s; +} + +#define ABSL_INTERNAL_X_VAL(id) \ + constexpr absl::FormatConversionChar FormatConversionCharInternal::id; +ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr absl::FormatConversionChar FormatConversionCharInternal::kNone; + +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + constexpr FormatConversionCharSet FormatConversionCharSetInternal::c; +ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) +#undef ABSL_INTERNAL_CHAR_SET_CASE + +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kStar; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kIntegral; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kFloating; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kNumeric; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kPointer; + +bool FormatSinkImpl::PutPaddedString(string_view value, int width, + int precision, bool left) { + size_t space_remaining = 0; + if (width >= 0) space_remaining = width; + size_t n = value.size(); + if (precision >= 0) n = std::min(n, static_cast<size_t>(precision)); + string_view shown(value.data(), n); + space_remaining = Excess(shown.size(), space_remaining); + if (!left) Append(space_remaining, ' '); + Append(shown); + if (left) Append(space_remaining, ' '); + return true; +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/extension.h b/third_party/abseil_cpp/absl/strings/internal/str_format/extension.h new file mode 100644 index 000000000000..a9b9e137deb2 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/extension.h @@ -0,0 +1,427 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ + +#include <limits.h> + +#include <cstddef> +#include <cstring> +#include <ostream> + +#include "absl/base/config.h" +#include "absl/base/port.h" +#include "absl/meta/type_traits.h" +#include "absl/strings/internal/str_format/output.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +enum class FormatConversionChar : uint8_t; +enum class FormatConversionCharSet : uint64_t; + +namespace str_format_internal { + +class FormatRawSinkImpl { + public: + // Implicitly convert from any type that provides the hook function as + // described above. + template <typename T, decltype(str_format_internal::InvokeFlush( + std::declval<T*>(), string_view()))* = nullptr> + FormatRawSinkImpl(T* raw) // NOLINT + : sink_(raw), write_(&FormatRawSinkImpl::Flush<T>) {} + + void Write(string_view s) { write_(sink_, s); } + + template <typename T> + static FormatRawSinkImpl Extract(T s) { + return s.sink_; + } + + private: + template <typename T> + static void Flush(void* r, string_view s) { + str_format_internal::InvokeFlush(static_cast<T*>(r), s); + } + + void* sink_; + void (*write_)(void*, string_view); +}; + +// An abstraction to which conversions write their string data. +class FormatSinkImpl { + public: + explicit FormatSinkImpl(FormatRawSinkImpl raw) : raw_(raw) {} + + ~FormatSinkImpl() { Flush(); } + + void Flush() { + raw_.Write(string_view(buf_, pos_ - buf_)); + pos_ = buf_; + } + + void Append(size_t n, char c) { + if (n == 0) return; + size_ += n; + auto raw_append = [&](size_t count) { + memset(pos_, c, count); + pos_ += count; + }; + while (n > Avail()) { + n -= Avail(); + if (Avail() > 0) { + raw_append(Avail()); + } + Flush(); + } + raw_append(n); + } + + void Append(string_view v) { + size_t n = v.size(); + if (n == 0) return; + size_ += n; + if (n >= Avail()) { + Flush(); + raw_.Write(v); + return; + } + memcpy(pos_, v.data(), n); + pos_ += n; + } + + size_t size() const { return size_; } + + // Put 'v' to 'sink' with specified width, precision, and left flag. + bool PutPaddedString(string_view v, int width, int precision, bool left); + + template <typename T> + T Wrap() { + return T(this); + } + + template <typename T> + static FormatSinkImpl* Extract(T* s) { + return s->sink_; + } + + private: + size_t Avail() const { return buf_ + sizeof(buf_) - pos_; } + + FormatRawSinkImpl raw_; + size_t size_ = 0; + char* pos_ = buf_; + char buf_[1024]; +}; + +struct Flags { + bool basic : 1; // fastest conversion: no flags, width, or precision + bool left : 1; // "-" + bool show_pos : 1; // "+" + bool sign_col : 1; // " " + bool alt : 1; // "#" + bool zero : 1; // "0" + std::string ToString() const; + friend std::ostream& operator<<(std::ostream& os, const Flags& v) { + return os << v.ToString(); + } +}; + +// clang-format off +#define ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) \ + /* text */ \ + X_VAL(c) X_SEP X_VAL(s) X_SEP \ + /* ints */ \ + X_VAL(d) X_SEP X_VAL(i) X_SEP X_VAL(o) X_SEP \ + X_VAL(u) X_SEP X_VAL(x) X_SEP X_VAL(X) X_SEP \ + /* floats */ \ + X_VAL(f) X_SEP X_VAL(F) X_SEP X_VAL(e) X_SEP X_VAL(E) X_SEP \ + X_VAL(g) X_SEP X_VAL(G) X_SEP X_VAL(a) X_SEP X_VAL(A) X_SEP \ + /* misc */ \ + X_VAL(n) X_SEP X_VAL(p) +// clang-format on + +// This type should not be referenced, it exists only to provide labels +// internally that match the values declared in FormatConversionChar in +// str_format.h. This is meant to allow internal libraries to use the same +// declared interface type as the public interface +// (absl::StrFormatConversionChar) while keeping the definition in a public +// header. +// Internal libraries should use the form +// `FormatConversionCharInternal::c`, `FormatConversionCharInternal::kNone` for +// comparisons. Use in switch statements is not recommended due to a bug in how +// gcc 4.9 -Wswitch handles declared but undefined enums. +struct FormatConversionCharInternal { + FormatConversionCharInternal() = delete; + + private: + // clang-format off + enum class Enum : uint8_t { + c, s, // text + d, i, o, u, x, X, // int + f, F, e, E, g, G, a, A, // float + n, p, // misc + kNone + }; + // clang-format on + public: +#define ABSL_INTERNAL_X_VAL(id) \ + static constexpr FormatConversionChar id = \ + static_cast<FormatConversionChar>(Enum::id); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL + static constexpr FormatConversionChar kNone = + static_cast<FormatConversionChar>(Enum::kNone); +}; +// clang-format on + +inline FormatConversionChar FormatConversionCharFromChar(char c) { + switch (c) { +#define ABSL_INTERNAL_X_VAL(id) \ + case #id[0]: \ + return FormatConversionCharInternal::id; + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL + } + return FormatConversionCharInternal::kNone; +} + +inline bool FormatConversionCharIsUpper(FormatConversionChar c) { + if (c == FormatConversionCharInternal::X || + c == FormatConversionCharInternal::F || + c == FormatConversionCharInternal::E || + c == FormatConversionCharInternal::G || + c == FormatConversionCharInternal::A) { + return true; + } else { + return false; + } +} + +inline bool FormatConversionCharIsFloat(FormatConversionChar c) { + if (c == FormatConversionCharInternal::a || + c == FormatConversionCharInternal::e || + c == FormatConversionCharInternal::f || + c == FormatConversionCharInternal::g || + c == FormatConversionCharInternal::A || + c == FormatConversionCharInternal::E || + c == FormatConversionCharInternal::F || + c == FormatConversionCharInternal::G) { + return true; + } else { + return false; + } +} + +inline char FormatConversionCharToChar(FormatConversionChar c) { + if (c == FormatConversionCharInternal::kNone) { + return '\0'; + +#define ABSL_INTERNAL_X_VAL(e) \ + } else if (c == FormatConversionCharInternal::e) { \ + return #e[0]; +#define ABSL_INTERNAL_X_SEP + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, + ABSL_INTERNAL_X_SEP) + } else { + return '\0'; + } + +#undef ABSL_INTERNAL_X_VAL +#undef ABSL_INTERNAL_X_SEP +} + +// The associated char. +inline std::ostream& operator<<(std::ostream& os, FormatConversionChar v) { + char c = FormatConversionCharToChar(v); + if (!c) c = '?'; + return os << c; +} + +struct FormatConversionSpecImplFriend; + +class FormatConversionSpecImpl { + public: + // Width and precison are not specified, no flags are set. + bool is_basic() const { return flags_.basic; } + bool has_left_flag() const { return flags_.left; } + bool has_show_pos_flag() const { return flags_.show_pos; } + bool has_sign_col_flag() const { return flags_.sign_col; } + bool has_alt_flag() const { return flags_.alt; } + bool has_zero_flag() const { return flags_.zero; } + + FormatConversionChar conversion_char() const { + // Keep this field first in the struct . It generates better code when + // accessing it when ConversionSpec is passed by value in registers. + static_assert(offsetof(FormatConversionSpecImpl, conv_) == 0, ""); + return conv_; + } + + // Returns the specified width. If width is unspecfied, it returns a negative + // value. + int width() const { return width_; } + // Returns the specified precision. If precision is unspecfied, it returns a + // negative value. + int precision() const { return precision_; } + + template <typename T> + T Wrap() { + return T(*this); + } + + private: + friend struct str_format_internal::FormatConversionSpecImplFriend; + FormatConversionChar conv_ = FormatConversionCharInternal::kNone; + Flags flags_; + int width_; + int precision_; +}; + +struct FormatConversionSpecImplFriend final { + static void SetFlags(Flags f, FormatConversionSpecImpl* conv) { + conv->flags_ = f; + } + static void SetConversionChar(FormatConversionChar c, + FormatConversionSpecImpl* conv) { + conv->conv_ = c; + } + static void SetWidth(int w, FormatConversionSpecImpl* conv) { + conv->width_ = w; + } + static void SetPrecision(int p, FormatConversionSpecImpl* conv) { + conv->precision_ = p; + } + static std::string FlagsToString(const FormatConversionSpecImpl& spec) { + return spec.flags_.ToString(); + } +}; + +// Type safe OR operator. +// We need this for two reasons: +// 1. operator| on enums makes them decay to integers and the result is an +// integer. We need the result to stay as an enum. +// 2. We use "enum class" which would not work even if we accepted the decay. +constexpr FormatConversionCharSet FormatConversionCharSetUnion( + FormatConversionCharSet a) { + return a; +} + +template <typename... CharSet> +constexpr FormatConversionCharSet FormatConversionCharSetUnion( + FormatConversionCharSet a, CharSet... rest) { + return static_cast<FormatConversionCharSet>( + static_cast<uint64_t>(a) | + static_cast<uint64_t>(FormatConversionCharSetUnion(rest...))); +} + +constexpr uint64_t FormatConversionCharToConvInt(FormatConversionChar c) { + return uint64_t{1} << (1 + static_cast<uint8_t>(c)); +} + +constexpr uint64_t FormatConversionCharToConvInt(char conv) { + return +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + conv == #c[0] \ + ? FormatConversionCharToConvInt(FormatConversionCharInternal::c) \ + : + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) +#undef ABSL_INTERNAL_CHAR_SET_CASE + conv == '*' + ? 1 + : 0; +} + +constexpr FormatConversionCharSet FormatConversionCharToConvValue(char conv) { + return static_cast<FormatConversionCharSet>( + FormatConversionCharToConvInt(conv)); +} + +struct FormatConversionCharSetInternal { +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + static constexpr FormatConversionCharSet c = \ + FormatConversionCharToConvValue(#c[0]); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) +#undef ABSL_INTERNAL_CHAR_SET_CASE + + // Used for width/precision '*' specification. + static constexpr FormatConversionCharSet kStar = + FormatConversionCharToConvValue('*'); + + static constexpr FormatConversionCharSet kIntegral = + FormatConversionCharSetUnion(d, i, u, o, x, X); + static constexpr FormatConversionCharSet kFloating = + FormatConversionCharSetUnion(a, e, f, g, A, E, F, G); + static constexpr FormatConversionCharSet kNumeric = + FormatConversionCharSetUnion(kIntegral, kFloating); + static constexpr FormatConversionCharSet kPointer = p; +}; + +// Type safe OR operator. +// We need this for two reasons: +// 1. operator| on enums makes them decay to integers and the result is an +// integer. We need the result to stay as an enum. +// 2. We use "enum class" which would not work even if we accepted the decay. +constexpr FormatConversionCharSet operator|(FormatConversionCharSet a, + FormatConversionCharSet b) { + return FormatConversionCharSetUnion(a, b); +} + +// Overloaded conversion functions to support absl::ParsedFormat. +// Get a conversion with a single character in it. +constexpr FormatConversionCharSet ToFormatConversionCharSet(char c) { + return static_cast<FormatConversionCharSet>( + FormatConversionCharToConvValue(c)); +} + +// Get a conversion with a single character in it. +constexpr FormatConversionCharSet ToFormatConversionCharSet( + FormatConversionCharSet c) { + return c; +} + +template <typename T> +void ToFormatConversionCharSet(T) = delete; + +// Checks whether `c` exists in `set`. +constexpr bool Contains(FormatConversionCharSet set, char c) { + return (static_cast<uint64_t>(set) & + static_cast<uint64_t>(FormatConversionCharToConvValue(c))) != 0; +} + +// Checks whether all the characters in `c` are contained in `set` +constexpr bool Contains(FormatConversionCharSet set, + FormatConversionCharSet c) { + return (static_cast<uint64_t>(set) & static_cast<uint64_t>(c)) == + static_cast<uint64_t>(c); +} + +// Checks whether all the characters in `c` are contained in `set` +constexpr bool Contains(FormatConversionCharSet set, FormatConversionChar c) { + return (static_cast<uint64_t>(set) & FormatConversionCharToConvInt(c)) != 0; +} + +// Return capacity - used, clipped to a minimum of 0. +inline size_t Excess(size_t used, size_t capacity) { + return used < capacity ? capacity - used : 0; +} + +} // namespace str_format_internal + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/extension_test.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/extension_test.cc new file mode 100644 index 000000000000..1c93fdb1c75b --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/extension_test.cc @@ -0,0 +1,98 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "absl/strings/internal/str_format/extension.h" + +#include <random> +#include <string> + +#include "gtest/gtest.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" + +namespace my_namespace { +class UserDefinedType { + public: + UserDefinedType() = default; + + void Append(absl::string_view str) { value_.append(str.data(), str.size()); } + const std::string& Value() const { return value_; } + + friend void AbslFormatFlush(UserDefinedType* x, absl::string_view str) { + x->Append(str); + } + + private: + std::string value_; +}; +} // namespace my_namespace + +namespace { + +std::string MakeRandomString(size_t len) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis('a', 'z'); + std::string s(len, '0'); + for (char& c : s) { + c = dis(gen); + } + return s; +} + +TEST(FormatExtensionTest, SinkAppendSubstring) { + for (size_t chunk_size : {1, 10, 100, 1000, 10000}) { + std::string expected, actual; + absl::str_format_internal::FormatSinkImpl sink(&actual); + for (size_t chunks = 0; chunks < 10; ++chunks) { + std::string rand = MakeRandomString(chunk_size); + expected += rand; + sink.Append(rand); + } + sink.Flush(); + EXPECT_EQ(actual, expected); + } +} + +TEST(FormatExtensionTest, SinkAppendChars) { + for (size_t chunk_size : {1, 10, 100, 1000, 10000}) { + std::string expected, actual; + absl::str_format_internal::FormatSinkImpl sink(&actual); + for (size_t chunks = 0; chunks < 10; ++chunks) { + std::string rand = MakeRandomString(1); + expected.append(chunk_size, rand[0]); + sink.Append(chunk_size, rand[0]); + } + sink.Flush(); + EXPECT_EQ(actual, expected); + } +} + +TEST(FormatExtensionTest, VerifyEnumEquality) { +#define X_VAL(id) \ + EXPECT_EQ(absl::FormatConversionChar::id, \ + absl::str_format_internal::FormatConversionCharInternal::id); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, ); +#undef X_VAL + +#define X_VAL(id) \ + EXPECT_EQ(absl::FormatConversionCharSet::id, \ + absl::str_format_internal::FormatConversionCharSetInternal::id); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, ); +#undef X_VAL +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/float_conversion.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/float_conversion.cc new file mode 100644 index 000000000000..0ded0a66afa9 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/float_conversion.cc @@ -0,0 +1,1419 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/float_conversion.h" + +#include <string.h> + +#include <algorithm> +#include <cassert> +#include <cmath> +#include <limits> +#include <string> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/base/internal/bits.h" +#include "absl/base/optimization.h" +#include "absl/functional/function_ref.h" +#include "absl/meta/type_traits.h" +#include "absl/numeric/int128.h" +#include "absl/strings/numbers.h" +#include "absl/types/optional.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +namespace { + +// The code below wants to avoid heap allocations. +// To do so it needs to allocate memory on the stack. +// `StackArray` will allocate memory on the stack in the form of a uint32_t +// array and call the provided callback with said memory. +// It will allocate memory in increments of 512 bytes. We could allocate the +// largest needed unconditionally, but that is more than we need in most of +// cases. This way we use less stack in the common cases. +class StackArray { + using Func = absl::FunctionRef<void(absl::Span<uint32_t>)>; + static constexpr size_t kStep = 512 / sizeof(uint32_t); + // 5 steps is 2560 bytes, which is enough to hold a long double with the + // largest/smallest exponents. + // The operations below will static_assert their particular maximum. + static constexpr size_t kNumSteps = 5; + + // We do not want this function to be inlined. + // Otherwise the caller will allocate the stack space unnecessarily for all + // the variants even though it only calls one. + template <size_t steps> + ABSL_ATTRIBUTE_NOINLINE static void RunWithCapacityImpl(Func f) { + uint32_t values[steps * kStep]{}; + f(absl::MakeSpan(values)); + } + + public: + static constexpr size_t kMaxCapacity = kStep * kNumSteps; + + static void RunWithCapacity(size_t capacity, Func f) { + assert(capacity <= kMaxCapacity); + const size_t step = (capacity + kStep - 1) / kStep; + assert(step <= kNumSteps); + switch (step) { + case 1: + return RunWithCapacityImpl<1>(f); + case 2: + return RunWithCapacityImpl<2>(f); + case 3: + return RunWithCapacityImpl<3>(f); + case 4: + return RunWithCapacityImpl<4>(f); + case 5: + return RunWithCapacityImpl<5>(f); + } + + assert(false && "Invalid capacity"); + } +}; + +// Calculates `10 * (*v) + carry` and stores the result in `*v` and returns +// the carry. +template <typename Int> +inline Int MultiplyBy10WithCarry(Int *v, Int carry) { + using BiggerInt = absl::conditional_t<sizeof(Int) == 4, uint64_t, uint128>; + BiggerInt tmp = 10 * static_cast<BiggerInt>(*v) + carry; + *v = static_cast<Int>(tmp); + return static_cast<Int>(tmp >> (sizeof(Int) * 8)); +} + +// Calculates `(2^64 * carry + *v) / 10`. +// Stores the quotient in `*v` and returns the remainder. +// Requires: `0 <= carry <= 9` +inline uint64_t DivideBy10WithCarry(uint64_t *v, uint64_t carry) { + constexpr uint64_t divisor = 10; + // 2^64 / divisor = chunk_quotient + chunk_remainder / divisor + constexpr uint64_t chunk_quotient = (uint64_t{1} << 63) / (divisor / 2); + constexpr uint64_t chunk_remainder = uint64_t{} - chunk_quotient * divisor; + + const uint64_t mod = *v % divisor; + const uint64_t next_carry = chunk_remainder * carry + mod; + *v = *v / divisor + carry * chunk_quotient + next_carry / divisor; + return next_carry % divisor; +} + +// Generates the decimal representation for an integer of the form `v * 2^exp`, +// where `v` and `exp` are both positive integers. +// It generates the digits from the left (ie the most significant digit first) +// to allow for direct printing into the sink. +// +// Requires `0 <= exp` and `exp <= numeric_limits<long double>::max_exponent`. +class BinaryToDecimal { + static constexpr int ChunksNeeded(int exp) { + // We will left shift a uint128 by `exp` bits, so we need `128+exp` total + // bits. Round up to 32. + // See constructor for details about adding `10%` to the value. + return (128 + exp + 31) / 32 * 11 / 10; + } + + public: + // Run the conversion for `v * 2^exp` and call `f(binary_to_decimal)`. + // This function will allocate enough stack space to perform the conversion. + static void RunConversion(uint128 v, int exp, + absl::FunctionRef<void(BinaryToDecimal)> f) { + assert(exp > 0); + assert(exp <= std::numeric_limits<long double>::max_exponent); + static_assert( + static_cast<int>(StackArray::kMaxCapacity) >= + ChunksNeeded(std::numeric_limits<long double>::max_exponent), + ""); + + StackArray::RunWithCapacity( + ChunksNeeded(exp), + [=](absl::Span<uint32_t> input) { f(BinaryToDecimal(input, v, exp)); }); + } + + int TotalDigits() const { + return static_cast<int>((decimal_end_ - decimal_start_) * kDigitsPerChunk + + CurrentDigits().size()); + } + + // See the current block of digits. + absl::string_view CurrentDigits() const { + return absl::string_view(digits_ + kDigitsPerChunk - size_, size_); + } + + // Advance the current view of digits. + // Returns `false` when no more digits are available. + bool AdvanceDigits() { + if (decimal_start_ >= decimal_end_) return false; + + uint32_t w = data_[decimal_start_++]; + for (size_ = 0; size_ < kDigitsPerChunk; w /= 10) { + digits_[kDigitsPerChunk - ++size_] = w % 10 + '0'; + } + return true; + } + + private: + BinaryToDecimal(absl::Span<uint32_t> data, uint128 v, int exp) : data_(data) { + // We need to print the digits directly into the sink object without + // buffering them all first. To do this we need two things: + // - to know the total number of digits to do padding when necessary + // - to generate the decimal digits from the left. + // + // In order to do this, we do a two pass conversion. + // On the first pass we convert the binary representation of the value into + // a decimal representation in which each uint32_t chunk holds up to 9 + // decimal digits. In the second pass we take each decimal-holding-uint32_t + // value and generate the ascii decimal digits into `digits_`. + // + // The binary and decimal representations actually share the same memory + // region. As we go converting the chunks from binary to decimal we free + // them up and reuse them for the decimal representation. One caveat is that + // the decimal representation is around 7% less efficient in space than the + // binary one. We allocate an extra 10% memory to account for this. See + // ChunksNeeded for this calculation. + int chunk_index = exp / 32; + decimal_start_ = decimal_end_ = ChunksNeeded(exp); + const int offset = exp % 32; + // Left shift v by exp bits. + data_[chunk_index] = static_cast<uint32_t>(v << offset); + for (v >>= (32 - offset); v; v >>= 32) + data_[++chunk_index] = static_cast<uint32_t>(v); + + while (chunk_index >= 0) { + // While we have more than one chunk available, go in steps of 1e9. + // `data_[chunk_index]` holds the highest non-zero binary chunk, so keep + // the variable updated. + uint32_t carry = 0; + for (int i = chunk_index; i >= 0; --i) { + uint64_t tmp = uint64_t{data_[i]} + (uint64_t{carry} << 32); + data_[i] = static_cast<uint32_t>(tmp / uint64_t{1000000000}); + carry = static_cast<uint32_t>(tmp % uint64_t{1000000000}); + } + + // If the highest chunk is now empty, remove it from view. + if (data_[chunk_index] == 0) --chunk_index; + + --decimal_start_; + assert(decimal_start_ != chunk_index); + data_[decimal_start_] = carry; + } + + // Fill the first set of digits. The first chunk might not be complete, so + // handle differently. + for (uint32_t first = data_[decimal_start_++]; first != 0; first /= 10) { + digits_[kDigitsPerChunk - ++size_] = first % 10 + '0'; + } + } + + private: + static constexpr int kDigitsPerChunk = 9; + + int decimal_start_; + int decimal_end_; + + char digits_[kDigitsPerChunk]; + int size_ = 0; + + absl::Span<uint32_t> data_; +}; + +// Converts a value of the form `x * 2^-exp` into a sequence of decimal digits. +// Requires `-exp < 0` and +// `-exp >= limits<long double>::min_exponent - limits<long double>::digits`. +class FractionalDigitGenerator { + public: + // Run the conversion for `v * 2^exp` and call `f(generator)`. + // This function will allocate enough stack space to perform the conversion. + static void RunConversion( + uint128 v, int exp, absl::FunctionRef<void(FractionalDigitGenerator)> f) { + using Limits = std::numeric_limits<long double>; + assert(-exp < 0); + assert(-exp >= Limits::min_exponent - 128); + static_assert(StackArray::kMaxCapacity >= + (Limits::digits + 128 - Limits::min_exponent + 31) / 32, + ""); + StackArray::RunWithCapacity((Limits::digits + exp + 31) / 32, + [=](absl::Span<uint32_t> input) { + f(FractionalDigitGenerator(input, v, exp)); + }); + } + + // Returns true if there are any more non-zero digits left. + bool HasMoreDigits() const { return next_digit_ != 0 || chunk_index_ >= 0; } + + // Returns true if the remainder digits are greater than 5000... + bool IsGreaterThanHalf() const { + return next_digit_ > 5 || (next_digit_ == 5 && chunk_index_ >= 0); + } + // Returns true if the remainder digits are exactly 5000... + bool IsExactlyHalf() const { return next_digit_ == 5 && chunk_index_ < 0; } + + struct Digits { + int digit_before_nine; + int num_nines; + }; + + // Get the next set of digits. + // They are composed by a non-9 digit followed by a runs of zero or more 9s. + Digits GetDigits() { + Digits digits{next_digit_, 0}; + + next_digit_ = GetOneDigit(); + while (next_digit_ == 9) { + ++digits.num_nines; + next_digit_ = GetOneDigit(); + } + + return digits; + } + + private: + // Return the next digit. + int GetOneDigit() { + if (chunk_index_ < 0) return 0; + + uint32_t carry = 0; + for (int i = chunk_index_; i >= 0; --i) { + carry = MultiplyBy10WithCarry(&data_[i], carry); + } + // If the lowest chunk is now empty, remove it from view. + if (data_[chunk_index_] == 0) --chunk_index_; + return carry; + } + + FractionalDigitGenerator(absl::Span<uint32_t> data, uint128 v, int exp) + : chunk_index_(exp / 32), data_(data) { + const int offset = exp % 32; + // Right shift `v` by `exp` bits. + data_[chunk_index_] = static_cast<uint32_t>(v << (32 - offset)); + v >>= offset; + // Make sure we don't overflow the data. We already calculated that + // non-zero bits fit, so we might not have space for leading zero bits. + for (int pos = chunk_index_; v; v >>= 32) + data_[--pos] = static_cast<uint32_t>(v); + + // Fill next_digit_, as GetDigits expects it to be populated always. + next_digit_ = GetOneDigit(); + } + + int next_digit_; + int chunk_index_; + absl::Span<uint32_t> data_; +}; + +// Count the number of leading zero bits. +int LeadingZeros(uint64_t v) { return base_internal::CountLeadingZeros64(v); } +int LeadingZeros(uint128 v) { + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + return high != 0 ? base_internal::CountLeadingZeros64(high) + : 64 + base_internal::CountLeadingZeros64(low); +} + +// Round up the text digits starting at `p`. +// The buffer must have an extra digit that is known to not need rounding. +// This is done below by having an extra '0' digit on the left. +void RoundUp(char *p) { + while (*p == '9' || *p == '.') { + if (*p == '9') *p = '0'; + --p; + } + ++*p; +} + +// Check the previous digit and round up or down to follow the round-to-even +// policy. +void RoundToEven(char *p) { + if (*p == '.') --p; + if (*p % 2 == 1) RoundUp(p); +} + +// Simple integral decimal digit printing for values that fit in 64-bits. +// Returns the pointer to the last written digit. +char *PrintIntegralDigitsFromRightFast(uint64_t v, char *p) { + do { + *--p = DivideBy10WithCarry(&v, 0) + '0'; + } while (v != 0); + return p; +} + +// Simple integral decimal digit printing for values that fit in 128-bits. +// Returns the pointer to the last written digit. +char *PrintIntegralDigitsFromRightFast(uint128 v, char *p) { + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + + while (high != 0) { + uint64_t carry = DivideBy10WithCarry(&high, 0); + carry = DivideBy10WithCarry(&low, carry); + *--p = carry + '0'; + } + return PrintIntegralDigitsFromRightFast(low, p); +} + +// Simple fractional decimal digit printing for values that fir in 64-bits after +// shifting. +// Performs rounding if necessary to fit within `precision`. +// Returns the pointer to one after the last character written. +char *PrintFractionalDigitsFast(uint64_t v, char *start, int exp, + int precision) { + char *p = start; + v <<= (64 - exp); + while (precision > 0) { + if (!v) return p; + *p++ = MultiplyBy10WithCarry(&v, uint64_t{0}) + '0'; + --precision; + } + + // We need to round. + if (v < 0x8000000000000000) { + // We round down, so nothing to do. + } else if (v > 0x8000000000000000) { + // We round up. + RoundUp(p - 1); + } else { + RoundToEven(p - 1); + } + + assert(precision == 0); + // Precision can only be zero here. + return p; +} + +// Simple fractional decimal digit printing for values that fir in 128-bits +// after shifting. +// Performs rounding if necessary to fit within `precision`. +// Returns the pointer to one after the last character written. +char *PrintFractionalDigitsFast(uint128 v, char *start, int exp, + int precision) { + char *p = start; + v <<= (128 - exp); + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + + // While we have digits to print and `low` is not empty, do the long + // multiplication. + while (precision > 0 && low != 0) { + uint64_t carry = MultiplyBy10WithCarry(&low, uint64_t{0}); + carry = MultiplyBy10WithCarry(&high, carry); + + *p++ = carry + '0'; + --precision; + } + + // Now `low` is empty, so use a faster approach for the rest of the digits. + // This block is pretty much the same as the main loop for the 64-bit case + // above. + while (precision > 0) { + if (!high) return p; + *p++ = MultiplyBy10WithCarry(&high, uint64_t{0}) + '0'; + --precision; + } + + // We need to round. + if (high < 0x8000000000000000) { + // We round down, so nothing to do. + } else if (high > 0x8000000000000000 || low != 0) { + // We round up. + RoundUp(p - 1); + } else { + RoundToEven(p - 1); + } + + assert(precision == 0); + // Precision can only be zero here. + return p; +} + +struct FormatState { + char sign_char; + int precision; + const FormatConversionSpecImpl &conv; + FormatSinkImpl *sink; + + // In `alt` mode (flag #) we keep the `.` even if there are no fractional + // digits. In non-alt mode, we strip it. + bool ShouldPrintDot() const { return precision != 0 || conv.has_alt_flag(); } +}; + +struct Padding { + int left_spaces; + int zeros; + int right_spaces; +}; + +Padding ExtraWidthToPadding(size_t total_size, const FormatState &state) { + if (state.conv.width() < 0 || + static_cast<size_t>(state.conv.width()) <= total_size) { + return {0, 0, 0}; + } + int missing_chars = state.conv.width() - total_size; + if (state.conv.has_left_flag()) { + return {0, 0, missing_chars}; + } else if (state.conv.has_zero_flag()) { + return {0, missing_chars, 0}; + } else { + return {missing_chars, 0, 0}; + } +} + +void FinalPrint(const FormatState &state, absl::string_view data, + int padding_offset, int trailing_zeros, + absl::string_view data_postfix) { + if (state.conv.width() < 0) { + // No width specified. Fast-path. + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(data); + state.sink->Append(trailing_zeros, '0'); + state.sink->Append(data_postfix); + return; + } + + auto padding = ExtraWidthToPadding((state.sign_char != '\0' ? 1 : 0) + + data.size() + data_postfix.size() + + static_cast<size_t>(trailing_zeros), + state); + + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + // Padding in general needs to be inserted somewhere in the middle of `data`. + state.sink->Append(data.substr(0, padding_offset)); + state.sink->Append(padding.zeros, '0'); + state.sink->Append(data.substr(padding_offset)); + state.sink->Append(trailing_zeros, '0'); + state.sink->Append(data_postfix); + state.sink->Append(padding.right_spaces, ' '); +} + +// Fastpath %f formatter for when the shifted value fits in a simple integral +// type. +// Prints `v*2^exp` with the options from `state`. +template <typename Int> +void FormatFFast(Int v, int exp, const FormatState &state) { + constexpr int input_bits = sizeof(Int) * 8; + + static constexpr size_t integral_size = + /* in case we need to round up an extra digit */ 1 + + /* decimal digits for uint128 */ 40 + 1; + char buffer[integral_size + /* . */ 1 + /* max digits uint128 */ 128]; + buffer[integral_size] = '.'; + char *const integral_digits_end = buffer + integral_size; + char *integral_digits_start; + char *const fractional_digits_start = buffer + integral_size + 1; + char *fractional_digits_end = fractional_digits_start; + + if (exp >= 0) { + const int total_bits = input_bits - LeadingZeros(v) + exp; + integral_digits_start = + total_bits <= 64 + ? PrintIntegralDigitsFromRightFast(static_cast<uint64_t>(v) << exp, + integral_digits_end) + : PrintIntegralDigitsFromRightFast(static_cast<uint128>(v) << exp, + integral_digits_end); + } else { + exp = -exp; + + integral_digits_start = PrintIntegralDigitsFromRightFast( + exp < input_bits ? v >> exp : 0, integral_digits_end); + // PrintFractionalDigits may pull a carried 1 all the way up through the + // integral portion. + integral_digits_start[-1] = '0'; + + fractional_digits_end = + exp <= 64 ? PrintFractionalDigitsFast(v, fractional_digits_start, exp, + state.precision) + : PrintFractionalDigitsFast(static_cast<uint128>(v), + fractional_digits_start, exp, + state.precision); + // There was a carry, so include the first digit too. + if (integral_digits_start[-1] != '0') --integral_digits_start; + } + + size_t size = fractional_digits_end - integral_digits_start; + + // In `alt` mode (flag #) we keep the `.` even if there are no fractional + // digits. In non-alt mode, we strip it. + if (!state.ShouldPrintDot()) --size; + FinalPrint(state, absl::string_view(integral_digits_start, size), + /*padding_offset=*/0, + static_cast<int>(state.precision - (fractional_digits_end - + fractional_digits_start)), + /*data_postfix=*/""); +} + +// Slow %f formatter for when the shifted value does not fit in a uint128, and +// `exp > 0`. +// Prints `v*2^exp` with the options from `state`. +// This one is guaranteed to not have fractional digits, so we don't have to +// worry about anything after the `.`. +void FormatFPositiveExpSlow(uint128 v, int exp, const FormatState &state) { + BinaryToDecimal::RunConversion(v, exp, [&](BinaryToDecimal btd) { + const size_t total_digits = + btd.TotalDigits() + + (state.ShouldPrintDot() ? static_cast<size_t>(state.precision) + 1 : 0); + + const auto padding = ExtraWidthToPadding( + total_digits + (state.sign_char != '\0' ? 1 : 0), state); + + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(padding.zeros, '0'); + + do { + state.sink->Append(btd.CurrentDigits()); + } while (btd.AdvanceDigits()); + + if (state.ShouldPrintDot()) state.sink->Append(1, '.'); + state.sink->Append(state.precision, '0'); + state.sink->Append(padding.right_spaces, ' '); + }); +} + +// Slow %f formatter for when the shifted value does not fit in a uint128, and +// `exp < 0`. +// Prints `v*2^exp` with the options from `state`. +// This one is guaranteed to be < 1.0, so we don't have to worry about integral +// digits. +void FormatFNegativeExpSlow(uint128 v, int exp, const FormatState &state) { + const size_t total_digits = + /* 0 */ 1 + + (state.ShouldPrintDot() ? static_cast<size_t>(state.precision) + 1 : 0); + auto padding = + ExtraWidthToPadding(total_digits + (state.sign_char ? 1 : 0), state); + padding.zeros += 1; + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(padding.zeros, '0'); + + if (state.ShouldPrintDot()) state.sink->Append(1, '.'); + + // Print digits + int digits_to_go = state.precision; + + FractionalDigitGenerator::RunConversion( + v, exp, [&](FractionalDigitGenerator digit_gen) { + // There are no digits to print here. + if (state.precision == 0) return; + + // We go one digit at a time, while keeping track of runs of nines. + // The runs of nines are used to perform rounding when necessary. + + while (digits_to_go > 0 && digit_gen.HasMoreDigits()) { + auto digits = digit_gen.GetDigits(); + + // Now we have a digit and a run of nines. + // See if we can print them all. + if (digits.num_nines + 1 < digits_to_go) { + // We don't have to round yet, so print them. + state.sink->Append(1, digits.digit_before_nine + '0'); + state.sink->Append(digits.num_nines, '9'); + digits_to_go -= digits.num_nines + 1; + + } else { + // We can't print all the nines, see where we have to truncate. + + bool round_up = false; + if (digits.num_nines + 1 > digits_to_go) { + // We round up at a nine. No need to print them. + round_up = true; + } else { + // We can fit all the nines, but truncate just after it. + if (digit_gen.IsGreaterThanHalf()) { + round_up = true; + } else if (digit_gen.IsExactlyHalf()) { + // Round to even + round_up = + digits.num_nines != 0 || digits.digit_before_nine % 2 == 1; + } + } + + if (round_up) { + state.sink->Append(1, digits.digit_before_nine + '1'); + --digits_to_go; + // The rest will be zeros. + } else { + state.sink->Append(1, digits.digit_before_nine + '0'); + state.sink->Append(digits_to_go - 1, '9'); + digits_to_go = 0; + } + return; + } + } + }); + + state.sink->Append(digits_to_go, '0'); + state.sink->Append(padding.right_spaces, ' '); +} + +template <typename Int> +void FormatF(Int mantissa, int exp, const FormatState &state) { + if (exp >= 0) { + const int total_bits = sizeof(Int) * 8 - LeadingZeros(mantissa) + exp; + + // Fallback to the slow stack-based approach if we can't do it in a 64 or + // 128 bit state. + if (ABSL_PREDICT_FALSE(total_bits > 128)) { + return FormatFPositiveExpSlow(mantissa, exp, state); + } + } else { + // Fallback to the slow stack-based approach if we can't do it in a 64 or + // 128 bit state. + if (ABSL_PREDICT_FALSE(exp < -128)) { + return FormatFNegativeExpSlow(mantissa, -exp, state); + } + } + return FormatFFast(mantissa, exp, state); +} + +// Grab the group of four bits (nibble) from `n`. E.g., nibble 1 corresponds to +// bits 4-7. +template <typename Int> +uint8_t GetNibble(Int n, int nibble_index) { + constexpr Int mask_low_nibble = Int{0xf}; + int shift = nibble_index * 4; + n &= mask_low_nibble << shift; + return static_cast<uint8_t>((n >> shift) & 0xf); +} + +// Add one to the given nibble, applying carry to higher nibbles. Returns true +// if overflow, false otherwise. +template <typename Int> +bool IncrementNibble(int nibble_index, Int *n) { + constexpr int kShift = sizeof(Int) * 8 - 1; + constexpr int kNumNibbles = sizeof(Int) * 8 / 4; + Int before = *n >> kShift; + // Here we essentially want to take the number 1 and move it into the requsted + // nibble, then add it to *n to effectively increment the nibble. However, + // ASan will complain if we try to shift the 1 beyond the limits of the Int, + // i.e., if the nibble_index is out of range. So therefore we check for this + // and if we are out of range we just add 0 which leaves *n unchanged, which + // seems like the reasonable thing to do in that case. + *n += ((nibble_index >= kNumNibbles) ? 0 : (Int{1} << (nibble_index * 4))); + Int after = *n >> kShift; + return (before && !after) || (nibble_index >= kNumNibbles); +} + +// Return a mask with 1's in the given nibble and all lower nibbles. +template <typename Int> +Int MaskUpToNibbleInclusive(int nibble_index) { + constexpr int kNumNibbles = sizeof(Int) * 8 / 4; + static const Int ones = ~Int{0}; + return ones >> std::max(0, 4 * (kNumNibbles - nibble_index - 1)); +} + +// Return a mask with 1's below the given nibble. +template <typename Int> +Int MaskUpToNibbleExclusive(int nibble_index) { + return nibble_index <= 0 ? 0 : MaskUpToNibbleInclusive<Int>(nibble_index - 1); +} + +template <typename Int> +Int MoveToNibble(uint8_t nibble, int nibble_index) { + return Int{nibble} << (4 * nibble_index); +} + +// Given mantissa size, find optimal # of mantissa bits to put in initial digit. +// +// In the hex representation we keep a single hex digit to the left of the dot. +// However, the question as to how many bits of the mantissa should be put into +// that hex digit in theory is arbitrary, but in practice it is optimal to +// choose based on the size of the mantissa. E.g., for a `double`, there are 53 +// mantissa bits, so that means that we should put 1 bit to the left of the dot, +// thereby leaving 52 bits to the right, which is evenly divisible by four and +// thus all fractional digits represent actual precision. For a `long double`, +// on the other hand, there are 64 bits of mantissa, thus we can use all four +// bits for the initial hex digit and still have a number left over (60) that is +// a multiple of four. Once again, the goal is to have all fractional digits +// represent real precision. +template <typename Float> +constexpr int HexFloatLeadingDigitSizeInBits() { + return std::numeric_limits<Float>::digits % 4 > 0 + ? std::numeric_limits<Float>::digits % 4 + : 4; +} + +// This function captures the rounding behavior of glibc for hex float +// representations. E.g. when rounding 0x1.ab800000 to a precision of .2 +// ("%.2a") glibc will round up because it rounds toward the even number (since +// 0xb is an odd number, it will round up to 0xc). However, when rounding at a +// point that is not followed by 800000..., it disregards the parity and rounds +// up if > 8 and rounds down if < 8. +template <typename Int> +bool HexFloatNeedsRoundUp(Int mantissa, int final_nibble_displayed, + uint8_t leading) { + // If the last nibble (hex digit) to be displayed is the lowest on in the + // mantissa then that means that we don't have any further nibbles to inform + // rounding, so don't round. + if (final_nibble_displayed <= 0) { + return false; + } + int rounding_nibble_idx = final_nibble_displayed - 1; + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + assert(final_nibble_displayed <= kTotalNibbles); + Int mantissa_up_to_rounding_nibble_inclusive = + mantissa & MaskUpToNibbleInclusive<Int>(rounding_nibble_idx); + Int eight = MoveToNibble<Int>(8, rounding_nibble_idx); + if (mantissa_up_to_rounding_nibble_inclusive != eight) { + return mantissa_up_to_rounding_nibble_inclusive > eight; + } + // Nibble in question == 8. + uint8_t round_if_odd = (final_nibble_displayed == kTotalNibbles) + ? leading + : GetNibble(mantissa, final_nibble_displayed); + return round_if_odd % 2 == 1; +} + +// Stores values associated with a Float type needed by the FormatA +// implementation in order to avoid templatizing that function by the Float +// type. +struct HexFloatTypeParams { + template <typename Float> + explicit HexFloatTypeParams(Float) + : min_exponent(std::numeric_limits<Float>::min_exponent - 1), + leading_digit_size_bits(HexFloatLeadingDigitSizeInBits<Float>()) { + assert(leading_digit_size_bits >= 1 && leading_digit_size_bits <= 4); + } + + int min_exponent; + int leading_digit_size_bits; +}; + +// Hex Float Rounding. First check if we need to round; if so, then we do that +// by manipulating (incrementing) the mantissa, that way we can later print the +// mantissa digits by iterating through them in the same way regardless of +// whether a rounding happened. +template <typename Int> +void FormatARound(bool precision_specified, const FormatState &state, + uint8_t *leading, Int *mantissa, int *exp) { + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + // Index of the last nibble that we could display given precision. + int final_nibble_displayed = + precision_specified ? std::max(0, (kTotalNibbles - state.precision)) : 0; + if (HexFloatNeedsRoundUp(*mantissa, final_nibble_displayed, *leading)) { + // Need to round up. + bool overflow = IncrementNibble(final_nibble_displayed, mantissa); + *leading += (overflow ? 1 : 0); + if (ABSL_PREDICT_FALSE(*leading > 15)) { + // We have overflowed the leading digit. This would mean that we would + // need two hex digits to the left of the dot, which is not allowed. So + // adjust the mantissa and exponent so that the result is always 1.0eXXX. + *leading = 1; + *mantissa = 0; + *exp += 4; + } + } + // Now that we have handled a possible round-up we can go ahead and zero out + // all the nibbles of the mantissa that we won't need. + if (precision_specified) { + *mantissa &= ~MaskUpToNibbleExclusive<Int>(final_nibble_displayed); + } +} + +template <typename Int> +void FormatANormalize(const HexFloatTypeParams float_traits, uint8_t *leading, + Int *mantissa, int *exp) { + constexpr int kIntBits = sizeof(Int) * 8; + static const Int kHighIntBit = Int{1} << (kIntBits - 1); + const int kLeadDigitBitsCount = float_traits.leading_digit_size_bits; + // Normalize mantissa so that highest bit set is in MSB position, unless we + // get interrupted by the exponent threshold. + while (*mantissa && !(*mantissa & kHighIntBit)) { + if (ABSL_PREDICT_FALSE(*exp - 1 < float_traits.min_exponent)) { + *mantissa >>= (float_traits.min_exponent - *exp); + *exp = float_traits.min_exponent; + return; + } + *mantissa <<= 1; + --*exp; + } + // Extract bits for leading digit then shift them away leaving the + // fractional part. + *leading = + static_cast<uint8_t>(*mantissa >> (kIntBits - kLeadDigitBitsCount)); + *exp -= (*mantissa != 0) ? kLeadDigitBitsCount : *exp; + *mantissa <<= kLeadDigitBitsCount; +} + +template <typename Int> +void FormatA(const HexFloatTypeParams float_traits, Int mantissa, int exp, + bool uppercase, const FormatState &state) { + // Int properties. + constexpr int kIntBits = sizeof(Int) * 8; + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + // Did the user specify a precision explicitly? + const bool precision_specified = state.conv.precision() >= 0; + + // ========== Normalize/Denormalize ========== + exp += kIntBits; // make all digits fractional digits. + // This holds the (up to four) bits of leading digit, i.e., the '1' in the + // number 0x1.e6fp+2. It's always > 0 unless number is zero or denormal. + uint8_t leading = 0; + FormatANormalize(float_traits, &leading, &mantissa, &exp); + + // =============== Rounding ================== + // Check if we need to round; if so, then we do that by manipulating + // (incrementing) the mantissa before beginning to print characters. + FormatARound(precision_specified, state, &leading, &mantissa, &exp); + + // ============= Format Result =============== + // This buffer holds the "0x1.ab1de3" portion of "0x1.ab1de3pe+2". Compute the + // size with long double which is the largest of the floats. + constexpr size_t kBufSizeForHexFloatRepr = + 2 // 0x + + std::numeric_limits<long double>::digits / 4 // number of hex digits + + 1 // round up + + 1; // "." (dot) + char digits_buffer[kBufSizeForHexFloatRepr]; + char *digits_iter = digits_buffer; + const char *const digits = + static_cast<const char *>("0123456789ABCDEF0123456789abcdef") + + (uppercase ? 0 : 16); + + // =============== Hex Prefix ================ + *digits_iter++ = '0'; + *digits_iter++ = uppercase ? 'X' : 'x'; + + // ========== Non-Fractional Digit =========== + *digits_iter++ = digits[leading]; + + // ================== Dot ==================== + // There are three reasons we might need a dot. Keep in mind that, at this + // point, the mantissa holds only the fractional part. + if ((precision_specified && state.precision > 0) || + (!precision_specified && mantissa > 0) || state.conv.has_alt_flag()) { + *digits_iter++ = '.'; + } + + // ============ Fractional Digits ============ + int digits_emitted = 0; + while (mantissa > 0) { + *digits_iter++ = digits[GetNibble(mantissa, kTotalNibbles - 1)]; + mantissa <<= 4; + ++digits_emitted; + } + int trailing_zeros = + precision_specified ? state.precision - digits_emitted : 0; + assert(trailing_zeros >= 0); + auto digits_result = string_view(digits_buffer, digits_iter - digits_buffer); + + // =============== Exponent ================== + constexpr size_t kBufSizeForExpDecRepr = + numbers_internal::kFastToBufferSize // requred for FastIntToBuffer + + 1 // 'p' or 'P' + + 1; // '+' or '-' + char exp_buffer[kBufSizeForExpDecRepr]; + exp_buffer[0] = uppercase ? 'P' : 'p'; + exp_buffer[1] = exp >= 0 ? '+' : '-'; + numbers_internal::FastIntToBuffer(exp < 0 ? -exp : exp, exp_buffer + 2); + + // ============ Assemble Result ============== + FinalPrint(state, // + digits_result, // 0xN.NNN... + 2, // offset in `data` to start padding if needed. + trailing_zeros, // num remaining mantissa padding zeros + exp_buffer); // exponent +} + +char *CopyStringTo(absl::string_view v, char *out) { + std::memcpy(out, v.data(), v.size()); + return out + v.size(); +} + +template <typename Float> +bool FallbackToSnprintf(const Float v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + int w = conv.width() >= 0 ? conv.width() : 0; + int p = conv.precision() >= 0 ? conv.precision() : -1; + char fmt[32]; + { + char *fp = fmt; + *fp++ = '%'; + fp = CopyStringTo(FormatConversionSpecImplFriend::FlagsToString(conv), fp); + fp = CopyStringTo("*.*", fp); + if (std::is_same<long double, Float>()) { + *fp++ = 'L'; + } + *fp++ = FormatConversionCharToChar(conv.conversion_char()); + *fp = 0; + assert(fp < fmt + sizeof(fmt)); + } + std::string space(512, '\0'); + absl::string_view result; + while (true) { + int n = snprintf(&space[0], space.size(), fmt, w, p, v); + if (n < 0) return false; + if (static_cast<size_t>(n) < space.size()) { + result = absl::string_view(space.data(), n); + break; + } + space.resize(n + 1); + } + sink->Append(result); + return true; +} + +// 128-bits in decimal: ceil(128*log(2)/log(10)) +// or std::numeric_limits<__uint128_t>::digits10 +constexpr int kMaxFixedPrecision = 39; + +constexpr int kBufferLength = /*sign*/ 1 + + /*integer*/ kMaxFixedPrecision + + /*point*/ 1 + + /*fraction*/ kMaxFixedPrecision + + /*exponent e+123*/ 5; + +struct Buffer { + void push_front(char c) { + assert(begin > data); + *--begin = c; + } + void push_back(char c) { + assert(end < data + sizeof(data)); + *end++ = c; + } + void pop_back() { + assert(begin < end); + --end; + } + + char &back() { + assert(begin < end); + return end[-1]; + } + + char last_digit() const { return end[-1] == '.' ? end[-2] : end[-1]; } + + int size() const { return static_cast<int>(end - begin); } + + char data[kBufferLength]; + char *begin; + char *end; +}; + +enum class FormatStyle { Fixed, Precision }; + +// If the value is Inf or Nan, print it and return true. +// Otherwise, return false. +template <typename Float> +bool ConvertNonNumericFloats(char sign_char, Float v, + const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + char text[4], *ptr = text; + if (sign_char != '\0') *ptr++ = sign_char; + if (std::isnan(v)) { + ptr = std::copy_n( + FormatConversionCharIsUpper(conv.conversion_char()) ? "NAN" : "nan", 3, + ptr); + } else if (std::isinf(v)) { + ptr = std::copy_n( + FormatConversionCharIsUpper(conv.conversion_char()) ? "INF" : "inf", 3, + ptr); + } else { + return false; + } + + return sink->PutPaddedString(string_view(text, ptr - text), conv.width(), -1, + conv.has_left_flag()); +} + +// Round up the last digit of the value. +// It will carry over and potentially overflow. 'exp' will be adjusted in that +// case. +template <FormatStyle mode> +void RoundUp(Buffer *buffer, int *exp) { + char *p = &buffer->back(); + while (p >= buffer->begin && (*p == '9' || *p == '.')) { + if (*p == '9') *p = '0'; + --p; + } + + if (p < buffer->begin) { + *p = '1'; + buffer->begin = p; + if (mode == FormatStyle::Precision) { + std::swap(p[1], p[2]); // move the . + ++*exp; + buffer->pop_back(); + } + } else { + ++*p; + } +} + +void PrintExponent(int exp, char e, Buffer *out) { + out->push_back(e); + if (exp < 0) { + out->push_back('-'); + exp = -exp; + } else { + out->push_back('+'); + } + // Exponent digits. + if (exp > 99) { + out->push_back(exp / 100 + '0'); + out->push_back(exp / 10 % 10 + '0'); + out->push_back(exp % 10 + '0'); + } else { + out->push_back(exp / 10 + '0'); + out->push_back(exp % 10 + '0'); + } +} + +template <typename Float, typename Int> +constexpr bool CanFitMantissa() { + return +#if defined(__clang__) && !defined(__SSE3__) + // Workaround for clang bug: https://bugs.llvm.org/show_bug.cgi?id=38289 + // Casting from long double to uint64_t is miscompiled and drops bits. + (!std::is_same<Float, long double>::value || + !std::is_same<Int, uint64_t>::value) && +#endif + std::numeric_limits<Float>::digits <= std::numeric_limits<Int>::digits; +} + +template <typename Float> +struct Decomposed { + using MantissaType = + absl::conditional_t<std::is_same<long double, Float>::value, uint128, + uint64_t>; + static_assert(std::numeric_limits<Float>::digits <= sizeof(MantissaType) * 8, + ""); + MantissaType mantissa; + int exponent; +}; + +// Decompose the double into an integer mantissa and an exponent. +template <typename Float> +Decomposed<Float> Decompose(Float v) { + int exp; + Float m = std::frexp(v, &exp); + m = std::ldexp(m, std::numeric_limits<Float>::digits); + exp -= std::numeric_limits<Float>::digits; + + return {static_cast<typename Decomposed<Float>::MantissaType>(m), exp}; +} + +// Print 'digits' as decimal. +// In Fixed mode, we add a '.' at the end. +// In Precision mode, we add a '.' after the first digit. +template <FormatStyle mode, typename Int> +int PrintIntegralDigits(Int digits, Buffer *out) { + int printed = 0; + if (digits) { + for (; digits; digits /= 10) out->push_front(digits % 10 + '0'); + printed = out->size(); + if (mode == FormatStyle::Precision) { + out->push_front(*out->begin); + out->begin[1] = '.'; + } else { + out->push_back('.'); + } + } else if (mode == FormatStyle::Fixed) { + out->push_front('0'); + out->push_back('.'); + printed = 1; + } + return printed; +} + +// Back out 'extra_digits' digits and round up if necessary. +bool RemoveExtraPrecision(int extra_digits, bool has_leftover_value, + Buffer *out, int *exp_out) { + if (extra_digits <= 0) return false; + + // Back out the extra digits + out->end -= extra_digits; + + bool needs_to_round_up = [&] { + // We look at the digit just past the end. + // There must be 'extra_digits' extra valid digits after end. + if (*out->end > '5') return true; + if (*out->end < '5') return false; + if (has_leftover_value || std::any_of(out->end + 1, out->end + extra_digits, + [](char c) { return c != '0'; })) + return true; + + // Ends in ...50*, round to even. + return out->last_digit() % 2 == 1; + }(); + + if (needs_to_round_up) { + RoundUp<FormatStyle::Precision>(out, exp_out); + } + return true; +} + +// Print the value into the buffer. +// This will not include the exponent, which will be returned in 'exp_out' for +// Precision mode. +template <typename Int, typename Float, FormatStyle mode> +bool FloatToBufferImpl(Int int_mantissa, int exp, int precision, Buffer *out, + int *exp_out) { + assert((CanFitMantissa<Float, Int>())); + + const int int_bits = std::numeric_limits<Int>::digits; + + // In precision mode, we start printing one char to the right because it will + // also include the '.' + // In fixed mode we put the dot afterwards on the right. + out->begin = out->end = + out->data + 1 + kMaxFixedPrecision + (mode == FormatStyle::Precision); + + if (exp >= 0) { + if (std::numeric_limits<Float>::digits + exp > int_bits) { + // The value will overflow the Int + return false; + } + int digits_printed = PrintIntegralDigits<mode>(int_mantissa << exp, out); + int digits_to_zero_pad = precision; + if (mode == FormatStyle::Precision) { + *exp_out = digits_printed - 1; + digits_to_zero_pad -= digits_printed - 1; + if (RemoveExtraPrecision(-digits_to_zero_pad, false, out, exp_out)) { + return true; + } + } + for (; digits_to_zero_pad-- > 0;) out->push_back('0'); + return true; + } + + exp = -exp; + // We need at least 4 empty bits for the next decimal digit. + // We will multiply by 10. + if (exp > int_bits - 4) return false; + + const Int mask = (Int{1} << exp) - 1; + + // Print the integral part first. + int digits_printed = PrintIntegralDigits<mode>(int_mantissa >> exp, out); + int_mantissa &= mask; + + int fractional_count = precision; + if (mode == FormatStyle::Precision) { + if (digits_printed == 0) { + // Find the first non-zero digit, when in Precision mode. + *exp_out = 0; + if (int_mantissa) { + while (int_mantissa <= mask) { + int_mantissa *= 10; + --*exp_out; + } + } + out->push_front(static_cast<char>(int_mantissa >> exp) + '0'); + out->push_back('.'); + int_mantissa &= mask; + } else { + // We already have a digit, and a '.' + *exp_out = digits_printed - 1; + fractional_count -= *exp_out; + if (RemoveExtraPrecision(-fractional_count, int_mantissa != 0, out, + exp_out)) { + // If we had enough digits, return right away. + // The code below will try to round again otherwise. + return true; + } + } + } + + auto get_next_digit = [&] { + int_mantissa *= 10; + int digit = static_cast<int>(int_mantissa >> exp); + int_mantissa &= mask; + return digit; + }; + + // Print fractional_count more digits, if available. + for (; fractional_count > 0; --fractional_count) { + out->push_back(get_next_digit() + '0'); + } + + int next_digit = get_next_digit(); + if (next_digit > 5 || + (next_digit == 5 && (int_mantissa || out->last_digit() % 2 == 1))) { + RoundUp<mode>(out, exp_out); + } + + return true; +} + +template <FormatStyle mode, typename Float> +bool FloatToBuffer(Decomposed<Float> decomposed, int precision, Buffer *out, + int *exp) { + if (precision > kMaxFixedPrecision) return false; + + // Try with uint64_t. + if (CanFitMantissa<Float, std::uint64_t>() && + FloatToBufferImpl<std::uint64_t, Float, mode>( + static_cast<std::uint64_t>(decomposed.mantissa), + static_cast<std::uint64_t>(decomposed.exponent), precision, out, exp)) + return true; + +#if defined(ABSL_HAVE_INTRINSIC_INT128) + // If that is not enough, try with __uint128_t. + return CanFitMantissa<Float, __uint128_t>() && + FloatToBufferImpl<__uint128_t, Float, mode>( + static_cast<__uint128_t>(decomposed.mantissa), + static_cast<__uint128_t>(decomposed.exponent), precision, out, + exp); +#endif + return false; +} + +void WriteBufferToSink(char sign_char, absl::string_view str, + const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + int left_spaces = 0, zeros = 0, right_spaces = 0; + int missing_chars = + conv.width() >= 0 ? std::max(conv.width() - static_cast<int>(str.size()) - + static_cast<int>(sign_char != 0), + 0) + : 0; + if (conv.has_left_flag()) { + right_spaces = missing_chars; + } else if (conv.has_zero_flag()) { + zeros = missing_chars; + } else { + left_spaces = missing_chars; + } + + sink->Append(left_spaces, ' '); + if (sign_char != '\0') sink->Append(1, sign_char); + sink->Append(zeros, '0'); + sink->Append(str); + sink->Append(right_spaces, ' '); +} + +template <typename Float> +bool FloatToSink(const Float v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + // Print the sign or the sign column. + Float abs_v = v; + char sign_char = 0; + if (std::signbit(abs_v)) { + sign_char = '-'; + abs_v = -abs_v; + } else if (conv.has_show_pos_flag()) { + sign_char = '+'; + } else if (conv.has_sign_col_flag()) { + sign_char = ' '; + } + + // Print nan/inf. + if (ConvertNonNumericFloats(sign_char, abs_v, conv, sink)) { + return true; + } + + int precision = conv.precision() < 0 ? 6 : conv.precision(); + + int exp = 0; + + auto decomposed = Decompose(abs_v); + + Buffer buffer; + + FormatConversionChar c = conv.conversion_char(); + + if (c == FormatConversionCharInternal::f || + c == FormatConversionCharInternal::F) { + FormatF(decomposed.mantissa, decomposed.exponent, + {sign_char, precision, conv, sink}); + return true; + } else if (c == FormatConversionCharInternal::e || + c == FormatConversionCharInternal::E) { + if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, + &exp)) { + return FallbackToSnprintf(v, conv, sink); + } + if (!conv.has_alt_flag() && buffer.back() == '.') buffer.pop_back(); + PrintExponent( + exp, FormatConversionCharIsUpper(conv.conversion_char()) ? 'E' : 'e', + &buffer); + } else if (c == FormatConversionCharInternal::g || + c == FormatConversionCharInternal::G) { + precision = std::max(0, precision - 1); + if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, + &exp)) { + return FallbackToSnprintf(v, conv, sink); + } + if (precision + 1 > exp && exp >= -4) { + if (exp < 0) { + // Have 1.23456, needs 0.00123456 + // Move the first digit + buffer.begin[1] = *buffer.begin; + // Add some zeros + for (; exp < -1; ++exp) *buffer.begin-- = '0'; + *buffer.begin-- = '.'; + *buffer.begin = '0'; + } else if (exp > 0) { + // Have 1.23456, needs 1234.56 + // Move the '.' exp positions to the right. + std::rotate(buffer.begin + 1, buffer.begin + 2, buffer.begin + exp + 2); + } + exp = 0; + } + if (!conv.has_alt_flag()) { + while (buffer.back() == '0') buffer.pop_back(); + if (buffer.back() == '.') buffer.pop_back(); + } + if (exp) { + PrintExponent( + exp, FormatConversionCharIsUpper(conv.conversion_char()) ? 'E' : 'e', + &buffer); + } + } else if (c == FormatConversionCharInternal::a || + c == FormatConversionCharInternal::A) { + bool uppercase = (c == FormatConversionCharInternal::A); + FormatA(HexFloatTypeParams(Float{}), decomposed.mantissa, + decomposed.exponent, uppercase, {sign_char, precision, conv, sink}); + return true; + } else { + return false; + } + + WriteBufferToSink(sign_char, + absl::string_view(buffer.begin, buffer.end - buffer.begin), + conv, sink); + + return true; +} + +} // namespace + +bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + if (std::numeric_limits<long double>::digits == + 2 * std::numeric_limits<double>::digits) { + // This is the `double-double` representation of `long double`. + // We do not handle it natively. Fallback to snprintf. + return FallbackToSnprintf(v, conv, sink); + } + + return FloatToSink(v, conv, sink); +} + +bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + return FloatToSink(static_cast<double>(v), conv, sink); +} + +bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { + return FloatToSink(v, conv, sink); +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/float_conversion.h b/third_party/abseil_cpp/absl/strings/internal/str_format/float_conversion.h new file mode 100644 index 000000000000..71100e714257 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/float_conversion.h @@ -0,0 +1,37 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ + +#include "absl/strings/internal/str_format/extension.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink); + +bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink); + +bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink); + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/output.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/output.cc new file mode 100644 index 000000000000..c4b24706132c --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/output.cc @@ -0,0 +1,72 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/output.h" + +#include <errno.h> +#include <cstring> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +namespace { +struct ClearErrnoGuard { + ClearErrnoGuard() : old_value(errno) { errno = 0; } + ~ClearErrnoGuard() { + if (!errno) errno = old_value; + } + int old_value; +}; +} // namespace + +void BufferRawSink::Write(string_view v) { + size_t to_write = std::min(v.size(), size_); + std::memcpy(buffer_, v.data(), to_write); + buffer_ += to_write; + size_ -= to_write; + total_written_ += v.size(); +} + +void FILERawSink::Write(string_view v) { + while (!v.empty() && !error_) { + // Reset errno to zero in case the libc implementation doesn't set errno + // when a failure occurs. + ClearErrnoGuard guard; + + if (size_t result = std::fwrite(v.data(), 1, v.size(), output_)) { + // Some progress was made. + count_ += result; + v.remove_prefix(result); + } else { + if (errno == EINTR) { + continue; + } else if (errno) { + error_ = errno; + } else if (std::ferror(output_)) { + // Non-POSIX compliant libc implementations may not set errno, so we + // have check the streams error indicator. + error_ = EBADF; + } else { + // We're likely on a non-POSIX system that encountered EINTR but had no + // way of reporting it. + continue; + } + } + } +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/output.h b/third_party/abseil_cpp/absl/strings/internal/str_format/output.h new file mode 100644 index 000000000000..8030dae00f4f --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/output.h @@ -0,0 +1,96 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Output extension hooks for the Format library. +// `internal::InvokeFlush` calls the appropriate flush function for the +// specified output argument. +// `BufferRawSink` is a simple output sink for a char buffer. Used by SnprintF. +// `FILERawSink` is a std::FILE* based sink. Used by PrintF and FprintF. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_ + +#include <cstdio> +#include <ostream> +#include <string> + +#include "absl/base/port.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +// RawSink implementation that writes into a char* buffer. +// It will not overflow the buffer, but will keep the total count of chars +// that would have been written. +class BufferRawSink { + public: + BufferRawSink(char* buffer, size_t size) : buffer_(buffer), size_(size) {} + + size_t total_written() const { return total_written_; } + void Write(string_view v); + + private: + char* buffer_; + size_t size_; + size_t total_written_ = 0; +}; + +// RawSink implementation that writes into a FILE*. +// It keeps track of the total number of bytes written and any error encountered +// during the writes. +class FILERawSink { + public: + explicit FILERawSink(std::FILE* output) : output_(output) {} + + void Write(string_view v); + + size_t count() const { return count_; } + int error() const { return error_; } + + private: + std::FILE* output_; + int error_ = 0; + size_t count_ = 0; +}; + +// Provide RawSink integration with common types from the STL. +inline void AbslFormatFlush(std::string* out, string_view s) { + out->append(s.data(), s.size()); +} +inline void AbslFormatFlush(std::ostream* out, string_view s) { + out->write(s.data(), s.size()); +} + +inline void AbslFormatFlush(FILERawSink* sink, string_view v) { + sink->Write(v); +} + +inline void AbslFormatFlush(BufferRawSink* sink, string_view v) { + sink->Write(v); +} + +// This is a SFINAE to get a better compiler error message when the type +// is not supported. +template <typename T> +auto InvokeFlush(T* out, string_view s) -> decltype(AbslFormatFlush(out, s)) { + AbslFormatFlush(out, s); +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/output_test.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/output_test.cc new file mode 100644 index 000000000000..ce2e91a0bbe8 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/output_test.cc @@ -0,0 +1,79 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/output.h" + +#include <sstream> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/cord.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace { + +TEST(InvokeFlush, String) { + std::string str = "ABC"; + str_format_internal::InvokeFlush(&str, "DEF"); + EXPECT_EQ(str, "ABCDEF"); +} + +TEST(InvokeFlush, Stream) { + std::stringstream str; + str << "ABC"; + str_format_internal::InvokeFlush(&str, "DEF"); + EXPECT_EQ(str.str(), "ABCDEF"); +} + +TEST(InvokeFlush, Cord) { + absl::Cord str("ABC"); + str_format_internal::InvokeFlush(&str, "DEF"); + EXPECT_EQ(str, "ABCDEF"); +} + +TEST(BufferRawSink, Limits) { + char buf[16]; + { + std::fill(std::begin(buf), std::end(buf), 'x'); + str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1); + str_format_internal::InvokeFlush(&bufsink, "Hello World237"); + EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World237xx"); + } + { + std::fill(std::begin(buf), std::end(buf), 'x'); + str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1); + str_format_internal::InvokeFlush(&bufsink, "Hello World237237"); + EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World2372x"); + } + { + std::fill(std::begin(buf), std::end(buf), 'x'); + str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1); + str_format_internal::InvokeFlush(&bufsink, "Hello World"); + str_format_internal::InvokeFlush(&bufsink, "237"); + EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World237xx"); + } + { + std::fill(std::begin(buf), std::end(buf), 'x'); + str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1); + str_format_internal::InvokeFlush(&bufsink, "Hello World"); + str_format_internal::InvokeFlush(&bufsink, "237237"); + EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World2372x"); + } +} + +} // namespace +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/parser.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/parser.cc new file mode 100644 index 000000000000..f308d0235120 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/parser.cc @@ -0,0 +1,350 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/parser.h" + +#include <assert.h> +#include <string.h> +#include <wchar.h> +#include <cctype> +#include <cstdint> + +#include <algorithm> +#include <initializer_list> +#include <limits> +#include <ostream> +#include <string> +#include <unordered_set> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +using CC = FormatConversionCharInternal; +using LM = LengthMod; + +ABSL_CONST_INIT const ConvTag kTags[256] = { + {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 + {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f + {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 + {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f + {}, {}, {}, {}, {}, {}, {}, {}, // 20-27 + {}, {}, {}, {}, {}, {}, {}, {}, // 28-2f + {}, {}, {}, {}, {}, {}, {}, {}, // 30-37 + {}, {}, {}, {}, {}, {}, {}, {}, // 38-3f + {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG + {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO + {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW + CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ + {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg + LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno + CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw + CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! + {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 + {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f + {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 + {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f + {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 + {}, {}, {}, {}, {}, {}, {}, {}, // a8-af + {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 + {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf + {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 + {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf + {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 + {}, {}, {}, {}, {}, {}, {}, {}, // d8-df + {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 + {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef + {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 + {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff +}; + +namespace { + +bool CheckFastPathSetting(const UnboundConversion& conv) { + bool should_be_basic = !conv.flags.left && // + !conv.flags.show_pos && // + !conv.flags.sign_col && // + !conv.flags.alt && // + !conv.flags.zero && // + (conv.width.value() == -1) && + (conv.precision.value() == -1); + if (should_be_basic != conv.flags.basic) { + fprintf(stderr, + "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d " + "width=%d precision=%d\n", + conv.flags.basic, conv.flags.left, conv.flags.show_pos, + conv.flags.sign_col, conv.flags.alt, conv.flags.zero, + conv.width.value(), conv.precision.value()); + } + return should_be_basic == conv.flags.basic; +} + +template <bool is_positional> +const char *ConsumeConversion(const char *pos, const char *const end, + UnboundConversion *conv, int *next_arg) { + const char* const original_pos = pos; + char c; + // Read the next char into `c` and update `pos`. Returns false if there are + // no more chars to read. +#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \ + do { \ + if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \ + c = *pos++; \ + } while (0) + + const auto parse_digits = [&] { + int digits = c - '0'; + // We do not want to overflow `digits` so we consume at most digits10 + // digits. If there are more digits the parsing will fail later on when the + // digit doesn't match the expected characters. + int num_digits = std::numeric_limits<int>::digits10; + for (;;) { + if (ABSL_PREDICT_FALSE(pos == end)) break; + c = *pos++; + if (!std::isdigit(c)) break; + --num_digits; + if (ABSL_PREDICT_FALSE(!num_digits)) break; + digits = 10 * digits + c - '0'; + } + return digits; + }; + + if (is_positional) { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->arg_position = parse_digits(); + assert(conv->arg_position > 0); + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; + } + + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + + // We should start with the basic flag on. + assert(conv->flags.basic); + + // Any non alpha character makes this conversion not basic. + // This includes flags (-+ #0), width (1-9, *) or precision (.). + // All conversion characters and length modifiers are alpha characters. + if (c < 'A') { + conv->flags.basic = false; + + for (; c <= '0';) { + // FIXME: We might be able to speed this up reusing the lookup table from + // above. It might require changing Flags to be a plain integer where we + // can |= a value. + switch (c) { + case '-': + conv->flags.left = true; + break; + case '+': + conv->flags.show_pos = true; + break; + case ' ': + conv->flags.sign_col = true; + break; + case '#': + conv->flags.alt = true; + break; + case '0': + conv->flags.zero = true; + break; + default: + goto flags_done; + } + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } +flags_done: + + if (c <= '9') { + if (c >= '0') { + int maybe_width = parse_digits(); + if (!is_positional && c == '$') { + if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; + // Positional conversion. + *next_arg = -1; + conv->flags = Flags(); + conv->flags.basic = true; + return ConsumeConversion<true>(original_pos, end, conv, next_arg); + } + conv->width.set_value(maybe_width); + } else if (c == '*') { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (is_positional) { + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->width.set_from_arg(parse_digits()); + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->width.set_from_arg(++*next_arg); + } + } + } + + if (c == '.') { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (std::isdigit(c)) { + conv->precision.set_value(parse_digits()); + } else if (c == '*') { + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (is_positional) { + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; + conv->precision.set_from_arg(parse_digits()); + if (c != '$') return nullptr; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->precision.set_from_arg(++*next_arg); + } + } else { + conv->precision.set_value(0); + } + } + } + + auto tag = GetTagForChar(c); + + if (ABSL_PREDICT_FALSE(!tag.is_conv())) { + if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr; + + // It is a length modifier. + using str_format_internal::LengthMod; + LengthMod length_mod = tag.as_length(); + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + if (c == 'h' && length_mod == LengthMod::h) { + conv->length_mod = LengthMod::hh; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else if (c == 'l' && length_mod == LengthMod::l) { + conv->length_mod = LengthMod::ll; + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + conv->length_mod = length_mod; + } + tag = GetTagForChar(c); + if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr; + } + + assert(CheckFastPathSetting(*conv)); + (void)(&CheckFastPathSetting); + + conv->conv = tag.as_conv(); + if (!is_positional) conv->arg_position = ++*next_arg; + return pos; +} + +} // namespace + +std::string LengthModToString(LengthMod v) { + switch (v) { + case LengthMod::h: + return "h"; + case LengthMod::hh: + return "hh"; + case LengthMod::l: + return "l"; + case LengthMod::ll: + return "ll"; + case LengthMod::L: + return "L"; + case LengthMod::j: + return "j"; + case LengthMod::z: + return "z"; + case LengthMod::t: + return "t"; + case LengthMod::q: + return "q"; + case LengthMod::none: + return ""; + } + return ""; +} + +const char *ConsumeUnboundConversion(const char *p, const char *end, + UnboundConversion *conv, int *next_arg) { + if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg); + return ConsumeConversion<false>(p, end, conv, next_arg); +} + +struct ParsedFormatBase::ParsedFormatConsumer { + explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat) + : parsed(parsedformat), data_pos(parsedformat->data_.get()) {} + + bool Append(string_view s) { + if (s.empty()) return true; + + size_t text_end = AppendText(s); + + if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) { + // Let's extend the existing text run. + parsed->items_.back().text_end = text_end; + } else { + // Let's make a new text run. + parsed->items_.push_back({false, text_end, {}}); + } + return true; + } + + bool ConvertOne(const UnboundConversion &conv, string_view s) { + size_t text_end = AppendText(s); + parsed->items_.push_back({true, text_end, conv}); + return true; + } + + size_t AppendText(string_view s) { + memcpy(data_pos, s.data(), s.size()); + data_pos += s.size(); + return static_cast<size_t>(data_pos - parsed->data_.get()); + } + + ParsedFormatBase *parsed; + char* data_pos; +}; + +ParsedFormatBase::ParsedFormatBase( + string_view format, bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) + : data_(format.empty() ? nullptr : new char[format.size()]) { + has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) || + !MatchesConversions(allow_ignored, convs); +} + +bool ParsedFormatBase::MatchesConversions( + bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) const { + std::unordered_set<int> used; + auto add_if_valid_conv = [&](int pos, char c) { + if (static_cast<size_t>(pos) > convs.size() || + !Contains(convs.begin()[pos - 1], c)) + return false; + used.insert(pos); + return true; + }; + for (const ConversionItem &item : items_) { + if (!item.is_conversion) continue; + auto &conv = item.conv; + if (conv.precision.is_from_arg() && + !add_if_valid_conv(conv.precision.get_from_arg(), '*')) + return false; + if (conv.width.is_from_arg() && + !add_if_valid_conv(conv.width.get_from_arg(), '*')) + return false; + if (!add_if_valid_conv(conv.arg_position, + FormatConversionCharToChar(conv.conv))) + return false; + } + return used.size() == convs.size() || allow_ignored; +} + +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/parser.h b/third_party/abseil_cpp/absl/strings/internal/str_format/parser.h new file mode 100644 index 000000000000..6504dd3ddc20 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/parser.h @@ -0,0 +1,349 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ +#define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ + +#include <limits.h> +#include <stddef.h> +#include <stdlib.h> + +#include <cassert> +#include <cstdint> +#include <initializer_list> +#include <iosfwd> +#include <iterator> +#include <memory> +#include <string> +#include <vector> + +#include "absl/strings/internal/str_format/checker.h" +#include "absl/strings/internal/str_format/extension.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none }; + +std::string LengthModToString(LengthMod v); + +// The analyzed properties of a single specified conversion. +struct UnboundConversion { + UnboundConversion() + : flags() /* This is required to zero all the fields of flags. */ { + flags.basic = true; + } + + class InputValue { + public: + void set_value(int value) { + assert(value >= 0); + value_ = value; + } + int value() const { return value_; } + + // Marks the value as "from arg". aka the '*' format. + // Requires `value >= 1`. + // When set, is_from_arg() return true and get_from_arg() returns the + // original value. + // `value()`'s return value is unspecfied in this state. + void set_from_arg(int value) { + assert(value > 0); + value_ = -value - 1; + } + bool is_from_arg() const { return value_ < -1; } + int get_from_arg() const { + assert(is_from_arg()); + return -value_ - 1; + } + + private: + int value_ = -1; + }; + + // No need to initialize. It will always be set in the parser. + int arg_position; + + InputValue width; + InputValue precision; + + Flags flags; + LengthMod length_mod = LengthMod::none; + FormatConversionChar conv = FormatConversionCharInternal::kNone; +}; + +// Consume conversion spec prefix (not including '%') of [p, end) if valid. +// Examples of valid specs would be e.g.: "s", "d", "-12.6f". +// If valid, it returns the first character following the conversion spec, +// and the spec part is broken down and returned in 'conv'. +// If invalid, returns nullptr. +const char* ConsumeUnboundConversion(const char* p, const char* end, + UnboundConversion* conv, int* next_arg); + +// Helper tag class for the table below. +// It allows fast `char -> ConversionChar/LengthMod` checking and +// conversions. +class ConvTag { + public: + constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT + : tag_(static_cast<int8_t>(conversion_char)) {} + // We invert the length modifiers to make them negative so that we can easily + // test for them. + constexpr ConvTag(LengthMod length_mod) // NOLINT + : tag_(~static_cast<std::int8_t>(length_mod)) {} + // Everything else is -128, which is negative to make is_conv() simpler. + constexpr ConvTag() : tag_(-128) {} + + bool is_conv() const { return tag_ >= 0; } + bool is_length() const { return tag_ < 0 && tag_ != -128; } + FormatConversionChar as_conv() const { + assert(is_conv()); + return static_cast<FormatConversionChar>(tag_); + } + LengthMod as_length() const { + assert(is_length()); + return static_cast<LengthMod>(~tag_); + } + + private: + std::int8_t tag_; +}; + +extern const ConvTag kTags[256]; +// Keep a single table for all the conversion chars and length modifiers. +inline ConvTag GetTagForChar(char c) { + return kTags[static_cast<unsigned char>(c)]; +} + +// Parse the format string provided in 'src' and pass the identified items into +// 'consumer'. +// Text runs will be passed by calling +// Consumer::Append(string_view); +// ConversionItems will be passed by calling +// Consumer::ConvertOne(UnboundConversion, string_view); +// In the case of ConvertOne, the string_view that is passed is the +// portion of the format string corresponding to the conversion, not including +// the leading %. On success, it returns true. On failure, it stops and returns +// false. +template <typename Consumer> +bool ParseFormatString(string_view src, Consumer consumer) { + int next_arg = 0; + const char* p = src.data(); + const char* const end = p + src.size(); + while (p != end) { + const char* percent = static_cast<const char*>(memchr(p, '%', end - p)); + if (!percent) { + // We found the last substring. + return consumer.Append(string_view(p, end - p)); + } + // We found a percent, so push the text run then process the percent. + if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) { + return false; + } + if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false; + + auto tag = GetTagForChar(percent[1]); + if (tag.is_conv()) { + if (ABSL_PREDICT_FALSE(next_arg < 0)) { + // This indicates an error in the format string. + // The only way to get `next_arg < 0` here is to have a positional + // argument first which sets next_arg to -1 and then a non-positional + // argument. + return false; + } + p = percent + 2; + + // Keep this case separate from the one below. + // ConvertOne is more efficient when the compiler can see that the `basic` + // flag is set. + UnboundConversion conv; + conv.conv = tag.as_conv(); + conv.arg_position = ++next_arg; + if (ABSL_PREDICT_FALSE( + !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) { + return false; + } + } else if (percent[1] != '%') { + UnboundConversion conv; + p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg); + if (ABSL_PREDICT_FALSE(p == nullptr)) return false; + if (ABSL_PREDICT_FALSE(!consumer.ConvertOne( + conv, string_view(percent + 1, p - (percent + 1))))) { + return false; + } + } else { + if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false; + p = percent + 2; + continue; + } + } + return true; +} + +// Always returns true, or fails to compile in a constexpr context if s does not +// point to a constexpr char array. +constexpr bool EnsureConstexpr(string_view s) { + return s.empty() || s[0] == s[0]; +} + +class ParsedFormatBase { + public: + explicit ParsedFormatBase( + string_view format, bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs); + + ParsedFormatBase(const ParsedFormatBase& other) { *this = other; } + + ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); } + + ParsedFormatBase& operator=(const ParsedFormatBase& other) { + if (this == &other) return *this; + has_error_ = other.has_error_; + items_ = other.items_; + size_t text_size = items_.empty() ? 0 : items_.back().text_end; + data_.reset(new char[text_size]); + memcpy(data_.get(), other.data_.get(), text_size); + return *this; + } + + ParsedFormatBase& operator=(ParsedFormatBase&& other) { + if (this == &other) return *this; + has_error_ = other.has_error_; + data_ = std::move(other.data_); + items_ = std::move(other.items_); + // Reset the vector to make sure the invariants hold. + other.items_.clear(); + return *this; + } + + template <typename Consumer> + bool ProcessFormat(Consumer consumer) const { + const char* const base = data_.get(); + string_view text(base, 0); + for (const auto& item : items_) { + const char* const end = text.data() + text.size(); + text = string_view(end, (base + item.text_end) - end); + if (item.is_conversion) { + if (!consumer.ConvertOne(item.conv, text)) return false; + } else { + if (!consumer.Append(text)) return false; + } + } + return !has_error_; + } + + bool has_error() const { return has_error_; } + + private: + // Returns whether the conversions match and if !allow_ignored it verifies + // that all conversions are used by the format. + bool MatchesConversions( + bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) const; + + struct ParsedFormatConsumer; + + struct ConversionItem { + bool is_conversion; + // Points to the past-the-end location of this element in the data_ array. + size_t text_end; + UnboundConversion conv; + }; + + bool has_error_; + std::unique_ptr<char[]> data_; + std::vector<ConversionItem> items_; +}; + + +// A value type representing a preparsed format. These can be created, copied +// around, and reused to speed up formatting loops. +// The user must specify through the template arguments the conversion +// characters used in the format. This will be checked at compile time. +// +// This class uses Conv enum values to specify each argument. +// This allows for more flexibility as you can specify multiple possible +// conversion characters for each argument. +// ParsedFormat<char...> is a simplified alias for when the user only +// needs to specify a single conversion character for each argument. +// +// Example: +// // Extended format supports multiple characters per argument: +// using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>; +// MyFormat GetFormat(bool use_hex) { +// if (use_hex) return MyFormat("foo %x bar"); +// return MyFormat("foo %d bar"); +// } +// // 'format' can be used with any value that supports 'd' and 'x', +// // like `int`. +// auto format = GetFormat(use_hex); +// value = StringF(format, i); +// +// This class also supports runtime format checking with the ::New() and +// ::NewAllowIgnored() factory functions. +// This is the only API that allows the user to pass a runtime specified format +// string. These factory functions will return NULL if the format does not match +// the conversions requested by the user. +template <FormatConversionCharSet... C> +class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase { + public: + explicit ExtendedParsedFormat(string_view format) +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + __attribute__(( + enable_if(str_format_internal::EnsureConstexpr(format), + "Format string is not constexpr."), + enable_if(str_format_internal::ValidFormatImpl<C...>(format), + "Format specified does not match the template arguments."))) +#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER + : ExtendedParsedFormat(format, false) { + } + + // ExtendedParsedFormat factory function. + // The user still has to specify the conversion characters, but they will not + // be checked at compile time. Instead, it will be checked at runtime. + // This delays the checking to runtime, but allows the user to pass + // dynamically sourced formats. + // It returns NULL if the format does not match the conversion characters. + // The user is responsible for checking the return value before using it. + // + // The 'New' variant will check that all the specified arguments are being + // consumed by the format and return NULL if any argument is being ignored. + // The 'NewAllowIgnored' variant will not verify this and will allow formats + // that ignore arguments. + static std::unique_ptr<ExtendedParsedFormat> New(string_view format) { + return New(format, false); + } + static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored( + string_view format) { + return New(format, true); + } + + private: + static std::unique_ptr<ExtendedParsedFormat> New(string_view format, + bool allow_ignored) { + std::unique_ptr<ExtendedParsedFormat> conv( + new ExtendedParsedFormat(format, allow_ignored)); + if (conv->has_error()) return nullptr; + return conv; + } + + ExtendedParsedFormat(string_view s, bool allow_ignored) + : ParsedFormatBase(s, allow_ignored, {C...}) {} +}; +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_format/parser_test.cc b/third_party/abseil_cpp/absl/strings/internal/str_format/parser_test.cc new file mode 100644 index 000000000000..a5fa1c79aaf4 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_format/parser_test.cc @@ -0,0 +1,427 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/str_format/parser.h" + +#include <string.h> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/macros.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace str_format_internal { + +namespace { + +using testing::Pair; + +TEST(LengthModTest, Names) { + struct Expectation { + int line; + LengthMod mod; + const char *name; + }; + const Expectation kExpect[] = { + {__LINE__, LengthMod::none, "" }, + {__LINE__, LengthMod::h, "h" }, + {__LINE__, LengthMod::hh, "hh"}, + {__LINE__, LengthMod::l, "l" }, + {__LINE__, LengthMod::ll, "ll"}, + {__LINE__, LengthMod::L, "L" }, + {__LINE__, LengthMod::j, "j" }, + {__LINE__, LengthMod::z, "z" }, + {__LINE__, LengthMod::t, "t" }, + {__LINE__, LengthMod::q, "q" }, + }; + EXPECT_EQ(ABSL_ARRAYSIZE(kExpect), 10); + for (auto e : kExpect) { + SCOPED_TRACE(e.line); + EXPECT_EQ(e.name, LengthModToString(e.mod)); + } +} + +TEST(ConversionCharTest, Names) { + struct Expectation { + FormatConversionChar id; + char name; + }; + // clang-format off + const Expectation kExpect[] = { +#define X(c) {FormatConversionCharInternal::c, #c[0]} + X(c), X(s), // text + X(d), X(i), X(o), X(u), X(x), X(X), // int + X(f), X(F), X(e), X(E), X(g), X(G), X(a), X(A), // float + X(n), X(p), // misc +#undef X + {FormatConversionCharInternal::kNone, '\0'}, + }; + // clang-format on + for (auto e : kExpect) { + SCOPED_TRACE(e.name); + FormatConversionChar v = e.id; + EXPECT_EQ(e.name, FormatConversionCharToChar(v)); + } +} + +class ConsumeUnboundConversionTest : public ::testing::Test { + public: + std::pair<string_view, string_view> Consume(string_view src) { + int next = 0; + o = UnboundConversion(); // refresh + const char* p = ConsumeUnboundConversion( + src.data(), src.data() + src.size(), &o, &next); + if (!p) return {{}, src}; + return {string_view(src.data(), p - src.data()), + string_view(p, src.data() + src.size() - p)}; + } + + bool Run(const char *fmt, bool force_positional = false) { + int next = force_positional ? -1 : 0; + o = UnboundConversion(); // refresh + return ConsumeUnboundConversion(fmt, fmt + strlen(fmt), &o, &next) == + fmt + strlen(fmt); + } + UnboundConversion o; +}; + +TEST_F(ConsumeUnboundConversionTest, ConsumeSpecification) { + struct Expectation { + int line; + string_view src; + string_view out; + string_view src_post; + }; + const Expectation kExpect[] = { + {__LINE__, "", "", "" }, + {__LINE__, "b", "", "b" }, // 'b' is invalid + {__LINE__, "ba", "", "ba"}, // 'b' is invalid + {__LINE__, "l", "", "l" }, // just length mod isn't okay + {__LINE__, "d", "d", "" }, // basic + {__LINE__, "d ", "d", " " }, // leave suffix + {__LINE__, "dd", "d", "d" }, // don't be greedy + {__LINE__, "d9", "d", "9" }, // leave non-space suffix + {__LINE__, "dzz", "d", "zz"}, // length mod as suffix + {__LINE__, "1$*2$d", "1$*2$d", "" }, // arg indexing and * allowed. + {__LINE__, "0-14.3hhd", "0-14.3hhd", ""}, // precision, width + {__LINE__, " 0-+#14.3hhd", " 0-+#14.3hhd", ""}, // flags + }; + for (const auto& e : kExpect) { + SCOPED_TRACE(e.line); + EXPECT_THAT(Consume(e.src), Pair(e.out, e.src_post)); + } +} + +TEST_F(ConsumeUnboundConversionTest, BasicConversion) { + EXPECT_FALSE(Run("")); + EXPECT_FALSE(Run("z")); + + EXPECT_FALSE(Run("dd")); // no excess allowed + + EXPECT_TRUE(Run("d")); + EXPECT_EQ('d', FormatConversionCharToChar(o.conv)); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_LT(o.precision.value(), 0); + EXPECT_EQ(1, o.arg_position); +} + +TEST_F(ConsumeUnboundConversionTest, ArgPosition) { + EXPECT_TRUE(Run("d")); + EXPECT_EQ(1, o.arg_position); + EXPECT_TRUE(Run("3$d")); + EXPECT_EQ(3, o.arg_position); + EXPECT_TRUE(Run("1$d")); + EXPECT_EQ(1, o.arg_position); + EXPECT_TRUE(Run("1$d", true)); + EXPECT_EQ(1, o.arg_position); + EXPECT_TRUE(Run("123$d")); + EXPECT_EQ(123, o.arg_position); + EXPECT_TRUE(Run("123$d", true)); + EXPECT_EQ(123, o.arg_position); + EXPECT_TRUE(Run("10$d")); + EXPECT_EQ(10, o.arg_position); + EXPECT_TRUE(Run("10$d", true)); + EXPECT_EQ(10, o.arg_position); + + // Position can't be zero. + EXPECT_FALSE(Run("0$d")); + EXPECT_FALSE(Run("0$d", true)); + EXPECT_FALSE(Run("1$*0$d")); + EXPECT_FALSE(Run("1$.*0$d")); + + // Position can't start with a zero digit at all. That is not a 'decimal'. + EXPECT_FALSE(Run("01$p")); + EXPECT_FALSE(Run("01$p", true)); + EXPECT_FALSE(Run("1$*01$p")); + EXPECT_FALSE(Run("1$.*01$p")); +} + +TEST_F(ConsumeUnboundConversionTest, WidthAndPrecision) { + EXPECT_TRUE(Run("14d")); + EXPECT_EQ('d', FormatConversionCharToChar(o.conv)); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_EQ(14, o.width.value()); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_LT(o.precision.value(), 0); + + EXPECT_TRUE(Run("14.d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(14, o.width.value()); + EXPECT_EQ(0, o.precision.value()); + + EXPECT_TRUE(Run(".d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(0, o.precision.value()); + + EXPECT_TRUE(Run(".5d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(5, o.precision.value()); + + EXPECT_TRUE(Run(".0d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(0, o.precision.value()); + + EXPECT_TRUE(Run("14.5d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(14, o.width.value()); + EXPECT_EQ(5, o.precision.value()); + + EXPECT_TRUE(Run("*.*d")); + EXPECT_TRUE(o.width.is_from_arg()); + EXPECT_EQ(1, o.width.get_from_arg()); + EXPECT_TRUE(o.precision.is_from_arg()); + EXPECT_EQ(2, o.precision.get_from_arg()); + EXPECT_EQ(3, o.arg_position); + + EXPECT_TRUE(Run("*d")); + EXPECT_TRUE(o.width.is_from_arg()); + EXPECT_EQ(1, o.width.get_from_arg()); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_LT(o.precision.value(), 0); + EXPECT_EQ(2, o.arg_position); + + EXPECT_TRUE(Run(".*d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_LT(o.width.value(), 0); + EXPECT_TRUE(o.precision.is_from_arg()); + EXPECT_EQ(1, o.precision.get_from_arg()); + EXPECT_EQ(2, o.arg_position); + + // mixed implicit and explicit: didn't specify arg position. + EXPECT_FALSE(Run("*23$.*34$d")); + + EXPECT_TRUE(Run("12$*23$.*34$d")); + EXPECT_EQ(12, o.arg_position); + EXPECT_TRUE(o.width.is_from_arg()); + EXPECT_EQ(23, o.width.get_from_arg()); + EXPECT_TRUE(o.precision.is_from_arg()); + EXPECT_EQ(34, o.precision.get_from_arg()); + + EXPECT_TRUE(Run("2$*5$.*9$d")); + EXPECT_EQ(2, o.arg_position); + EXPECT_TRUE(o.width.is_from_arg()); + EXPECT_EQ(5, o.width.get_from_arg()); + EXPECT_TRUE(o.precision.is_from_arg()); + EXPECT_EQ(9, o.precision.get_from_arg()); + + EXPECT_FALSE(Run(".*0$d")) << "no arg 0"; + + // Large values + EXPECT_TRUE(Run("999999999.999999999d")); + EXPECT_FALSE(o.width.is_from_arg()); + EXPECT_EQ(999999999, o.width.value()); + EXPECT_FALSE(o.precision.is_from_arg()); + EXPECT_EQ(999999999, o.precision.value()); + + EXPECT_FALSE(Run("1000000000.999999999d")); + EXPECT_FALSE(Run("999999999.1000000000d")); + EXPECT_FALSE(Run("9999999999d")); + EXPECT_FALSE(Run(".9999999999d")); +} + +TEST_F(ConsumeUnboundConversionTest, Flags) { + static const char kAllFlags[] = "-+ #0"; + static const int kNumFlags = ABSL_ARRAYSIZE(kAllFlags) - 1; + for (int rev = 0; rev < 2; ++rev) { + for (int i = 0; i < 1 << kNumFlags; ++i) { + std::string fmt; + for (int k = 0; k < kNumFlags; ++k) + if ((i >> k) & 1) fmt += kAllFlags[k]; + // flag order shouldn't matter + if (rev == 1) { std::reverse(fmt.begin(), fmt.end()); } + fmt += 'd'; + SCOPED_TRACE(fmt); + EXPECT_TRUE(Run(fmt.c_str())); + EXPECT_EQ(fmt.find('-') == std::string::npos, !o.flags.left); + EXPECT_EQ(fmt.find('+') == std::string::npos, !o.flags.show_pos); + EXPECT_EQ(fmt.find(' ') == std::string::npos, !o.flags.sign_col); + EXPECT_EQ(fmt.find('#') == std::string::npos, !o.flags.alt); + EXPECT_EQ(fmt.find('0') == std::string::npos, !o.flags.zero); + } + } +} + +TEST_F(ConsumeUnboundConversionTest, BasicFlag) { + // Flag is on + for (const char* fmt : {"d", "llx", "G", "1$X"}) { + SCOPED_TRACE(fmt); + EXPECT_TRUE(Run(fmt)); + EXPECT_TRUE(o.flags.basic); + } + + // Flag is off + for (const char* fmt : {"3d", ".llx", "-G", "1$#X"}) { + SCOPED_TRACE(fmt); + EXPECT_TRUE(Run(fmt)); + EXPECT_FALSE(o.flags.basic); + } +} + +TEST_F(ConsumeUnboundConversionTest, LengthMod) { + EXPECT_TRUE(Run("d")); + EXPECT_EQ(LengthMod::none, o.length_mod); + EXPECT_TRUE(Run("hd")); + EXPECT_EQ(LengthMod::h, o.length_mod); + EXPECT_TRUE(Run("hhd")); + EXPECT_EQ(LengthMod::hh, o.length_mod); + EXPECT_TRUE(Run("ld")); + EXPECT_EQ(LengthMod::l, o.length_mod); + EXPECT_TRUE(Run("lld")); + EXPECT_EQ(LengthMod::ll, o.length_mod); + EXPECT_TRUE(Run("Lf")); + EXPECT_EQ(LengthMod::L, o.length_mod); + EXPECT_TRUE(Run("qf")); + EXPECT_EQ(LengthMod::q, o.length_mod); + EXPECT_TRUE(Run("jd")); + EXPECT_EQ(LengthMod::j, o.length_mod); + EXPECT_TRUE(Run("zd")); + EXPECT_EQ(LengthMod::z, o.length_mod); + EXPECT_TRUE(Run("td")); + EXPECT_EQ(LengthMod::t, o.length_mod); +} + +struct SummarizeConsumer { + std::string* out; + explicit SummarizeConsumer(std::string* out) : out(out) {} + + bool Append(string_view s) { + *out += "[" + std::string(s) + "]"; + return true; + } + + bool ConvertOne(const UnboundConversion& conv, string_view s) { + *out += "{"; + *out += std::string(s); + *out += ":"; + *out += std::to_string(conv.arg_position) + "$"; + if (conv.width.is_from_arg()) { + *out += std::to_string(conv.width.get_from_arg()) + "$*"; + } + if (conv.precision.is_from_arg()) { + *out += "." + std::to_string(conv.precision.get_from_arg()) + "$*"; + } + *out += FormatConversionCharToChar(conv.conv); + *out += "}"; + return true; + } +}; + +std::string SummarizeParsedFormat(const ParsedFormatBase& pc) { + std::string out; + if (!pc.ProcessFormat(SummarizeConsumer(&out))) out += "!"; + return out; +} + +class ParsedFormatTest : public testing::Test {}; + +TEST_F(ParsedFormatTest, ValueSemantics) { + ParsedFormatBase p1({}, true, {}); // empty format + EXPECT_EQ("", SummarizeParsedFormat(p1)); + + ParsedFormatBase p2 = p1; // copy construct (empty) + EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2)); + + p1 = ParsedFormatBase("hello%s", true, + {FormatConversionCharSetInternal::s}); // move assign + EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p1)); + + ParsedFormatBase p3 = p1; // copy construct (nonempty) + EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p3)); + + using std::swap; + swap(p1, p2); + EXPECT_EQ("", SummarizeParsedFormat(p1)); + EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p2)); + swap(p1, p2); // undo + + p2 = p1; // copy assign + EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2)); +} + +struct ExpectParse { + const char* in; + std::initializer_list<FormatConversionCharSet> conv_set; + const char* out; +}; + +TEST_F(ParsedFormatTest, Parsing) { + // Parse should be equivalent to that obtained by ConversionParseIterator. + // No need to retest the parsing edge cases here. + const ExpectParse kExpect[] = { + {"", {}, ""}, + {"ab", {}, "[ab]"}, + {"a%d", {FormatConversionCharSetInternal::d}, "[a]{d:1$d}"}, + {"a%+d", {FormatConversionCharSetInternal::d}, "[a]{+d:1$d}"}, + {"a% d", {FormatConversionCharSetInternal::d}, "[a]{ d:1$d}"}, + {"a%b %d", {}, "[a]!"}, // stop after error + }; + for (const auto& e : kExpect) { + SCOPED_TRACE(e.in); + EXPECT_EQ(e.out, + SummarizeParsedFormat(ParsedFormatBase(e.in, false, e.conv_set))); + } +} + +TEST_F(ParsedFormatTest, ParsingFlagOrder) { + const ExpectParse kExpect[] = { + {"a%+ 0d", {FormatConversionCharSetInternal::d}, "[a]{+ 0d:1$d}"}, + {"a%+0 d", {FormatConversionCharSetInternal::d}, "[a]{+0 d:1$d}"}, + {"a%0+ d", {FormatConversionCharSetInternal::d}, "[a]{0+ d:1$d}"}, + {"a% +0d", {FormatConversionCharSetInternal::d}, "[a]{ +0d:1$d}"}, + {"a%0 +d", {FormatConversionCharSetInternal::d}, "[a]{0 +d:1$d}"}, + {"a% 0+d", {FormatConversionCharSetInternal::d}, "[a]{ 0+d:1$d}"}, + {"a%+ 0+d", {FormatConversionCharSetInternal::d}, "[a]{+ 0+d:1$d}"}, + }; + for (const auto& e : kExpect) { + SCOPED_TRACE(e.in); + EXPECT_EQ(e.out, + SummarizeParsedFormat(ParsedFormatBase(e.in, false, e.conv_set))); + } +} + +} // namespace +} // namespace str_format_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/str_join_internal.h b/third_party/abseil_cpp/absl/strings/internal/str_join_internal.h new file mode 100644 index 000000000000..31dbf672f0b6 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_join_internal.h @@ -0,0 +1,314 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// This file declares INTERNAL parts of the Join API that are inlined/templated +// or otherwise need to be available at compile time. The main abstractions +// defined in this file are: +// +// - A handful of default Formatters +// - JoinAlgorithm() overloads +// - JoinRange() overloads +// - JoinTuple() +// +// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including +// absl/strings/str_join.h +// +// IWYU pragma: private, include "absl/strings/str_join.h" + +#ifndef ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ + +#include <cstring> +#include <iterator> +#include <memory> +#include <string> +#include <type_traits> +#include <utility> + +#include "absl/strings/internal/ostringstream.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/str_cat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// +// Formatter objects +// +// The following are implementation classes for standard Formatter objects. The +// factory functions that users will call to create and use these formatters are +// defined and documented in strings/join.h. +// + +// The default formatter. Converts alpha-numeric types to strings. +struct AlphaNumFormatterImpl { + // This template is needed in order to support passing in a dereferenced + // vector<bool>::iterator + template <typename T> + void operator()(std::string* out, const T& t) const { + StrAppend(out, AlphaNum(t)); + } + + void operator()(std::string* out, const AlphaNum& t) const { + StrAppend(out, t); + } +}; + +// A type that's used to overload the JoinAlgorithm() function (defined below) +// for ranges that do not require additional formatting (e.g., a range of +// strings). + +struct NoFormatter : public AlphaNumFormatterImpl {}; + +// Formats types to strings using the << operator. +class StreamFormatterImpl { + public: + // The method isn't const because it mutates state. Making it const will + // render StreamFormatterImpl thread-hostile. + template <typename T> + void operator()(std::string* out, const T& t) { + // The stream is created lazily to avoid paying the relatively high cost + // of its construction when joining an empty range. + if (strm_) { + strm_->clear(); // clear the bad, fail and eof bits in case they were set + strm_->str(out); + } else { + strm_.reset(new strings_internal::OStringStream(out)); + } + *strm_ << t; + } + + private: + std::unique_ptr<strings_internal::OStringStream> strm_; +}; + +// Formats a std::pair<>. The 'first' member is formatted using f1_ and the +// 'second' member is formatted using f2_. sep_ is the separator. +template <typename F1, typename F2> +class PairFormatterImpl { + public: + PairFormatterImpl(F1 f1, absl::string_view sep, F2 f2) + : f1_(std::move(f1)), sep_(sep), f2_(std::move(f2)) {} + + template <typename T> + void operator()(std::string* out, const T& p) { + f1_(out, p.first); + out->append(sep_); + f2_(out, p.second); + } + + template <typename T> + void operator()(std::string* out, const T& p) const { + f1_(out, p.first); + out->append(sep_); + f2_(out, p.second); + } + + private: + F1 f1_; + std::string sep_; + F2 f2_; +}; + +// Wraps another formatter and dereferences the argument to operator() then +// passes the dereferenced argument to the wrapped formatter. This can be +// useful, for example, to join a std::vector<int*>. +template <typename Formatter> +class DereferenceFormatterImpl { + public: + DereferenceFormatterImpl() : f_() {} + explicit DereferenceFormatterImpl(Formatter&& f) + : f_(std::forward<Formatter>(f)) {} + + template <typename T> + void operator()(std::string* out, const T& t) { + f_(out, *t); + } + + template <typename T> + void operator()(std::string* out, const T& t) const { + f_(out, *t); + } + + private: + Formatter f_; +}; + +// DefaultFormatter<T> is a traits class that selects a default Formatter to use +// for the given type T. The ::Type member names the Formatter to use. This is +// used by the strings::Join() functions that do NOT take a Formatter argument, +// in which case a default Formatter must be chosen. +// +// AlphaNumFormatterImpl is the default in the base template, followed by +// specializations for other types. +template <typename ValueType> +struct DefaultFormatter { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<const char*> { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<char*> { + typedef AlphaNumFormatterImpl Type; +}; +template <> +struct DefaultFormatter<std::string> { + typedef NoFormatter Type; +}; +template <> +struct DefaultFormatter<absl::string_view> { + typedef NoFormatter Type; +}; +template <typename ValueType> +struct DefaultFormatter<ValueType*> { + typedef DereferenceFormatterImpl<typename DefaultFormatter<ValueType>::Type> + Type; +}; + +template <typename ValueType> +struct DefaultFormatter<std::unique_ptr<ValueType>> + : public DefaultFormatter<ValueType*> {}; + +// +// JoinAlgorithm() functions +// + +// The main joining algorithm. This simply joins the elements in the given +// iterator range, each separated by the given separator, into an output string, +// and formats each element using the provided Formatter object. +template <typename Iterator, typename Formatter> +std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s, + Formatter&& f) { + std::string result; + absl::string_view sep(""); + for (Iterator it = start; it != end; ++it) { + result.append(sep.data(), sep.size()); + f(&result, *it); + sep = s; + } + return result; +} + +// A joining algorithm that's optimized for a forward iterator range of +// string-like objects that do not need any additional formatting. This is to +// optimize the common case of joining, say, a std::vector<string> or a +// std::vector<absl::string_view>. +// +// This is an overload of the previous JoinAlgorithm() function. Here the +// Formatter argument is of type NoFormatter. Since NoFormatter is an internal +// type, this overload is only invoked when strings::Join() is called with a +// range of string-like objects (e.g., std::string, absl::string_view), and an +// explicit Formatter argument was NOT specified. +// +// The optimization is that the needed space will be reserved in the output +// string to avoid the need to resize while appending. To do this, the iterator +// range will be traversed twice: once to calculate the total needed size, and +// then again to copy the elements and delimiters to the output string. +template <typename Iterator, + typename = typename std::enable_if<std::is_convertible< + typename std::iterator_traits<Iterator>::iterator_category, + std::forward_iterator_tag>::value>::type> +std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s, + NoFormatter) { + std::string result; + if (start != end) { + // Sums size + size_t result_size = start->size(); + for (Iterator it = start; ++it != end;) { + result_size += s.size(); + result_size += it->size(); + } + + if (result_size > 0) { + STLStringResizeUninitialized(&result, result_size); + + // Joins strings + char* result_buf = &*result.begin(); + memcpy(result_buf, start->data(), start->size()); + result_buf += start->size(); + for (Iterator it = start; ++it != end;) { + memcpy(result_buf, s.data(), s.size()); + result_buf += s.size(); + memcpy(result_buf, it->data(), it->size()); + result_buf += it->size(); + } + } + } + + return result; +} + +// JoinTupleLoop implements a loop over the elements of a std::tuple, which +// are heterogeneous. The primary template matches the tuple interior case. It +// continues the iteration after appending a separator (for nonzero indices) +// and formatting an element of the tuple. The specialization for the I=N case +// matches the end-of-tuple, and terminates the iteration. +template <size_t I, size_t N> +struct JoinTupleLoop { + template <typename Tup, typename Formatter> + void operator()(std::string* out, const Tup& tup, absl::string_view sep, + Formatter&& fmt) { + if (I > 0) out->append(sep.data(), sep.size()); + fmt(out, std::get<I>(tup)); + JoinTupleLoop<I + 1, N>()(out, tup, sep, fmt); + } +}; +template <size_t N> +struct JoinTupleLoop<N, N> { + template <typename Tup, typename Formatter> + void operator()(std::string*, const Tup&, absl::string_view, Formatter&&) {} +}; + +template <typename... T, typename Formatter> +std::string JoinAlgorithm(const std::tuple<T...>& tup, absl::string_view sep, + Formatter&& fmt) { + std::string result; + JoinTupleLoop<0, sizeof...(T)>()(&result, tup, sep, fmt); + return result; +} + +template <typename Iterator> +std::string JoinRange(Iterator first, Iterator last, + absl::string_view separator) { + // No formatter was explicitly given, so a default must be chosen. + typedef typename std::iterator_traits<Iterator>::value_type ValueType; + typedef typename DefaultFormatter<ValueType>::Type Formatter; + return JoinAlgorithm(first, last, separator, Formatter()); +} + +template <typename Range, typename Formatter> +std::string JoinRange(const Range& range, absl::string_view separator, + Formatter&& fmt) { + using std::begin; + using std::end; + return JoinAlgorithm(begin(range), end(range), separator, fmt); +} + +template <typename Range> +std::string JoinRange(const Range& range, absl::string_view separator) { + using std::begin; + using std::end; + return JoinRange(begin(range), end(range), separator); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/str_split_internal.h b/third_party/abseil_cpp/absl/strings/internal/str_split_internal.h new file mode 100644 index 000000000000..a2f41c153131 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/str_split_internal.h @@ -0,0 +1,430 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// This file declares INTERNAL parts of the Split API that are inline/templated +// or otherwise need to be available at compile time. The main abstractions +// defined in here are +// +// - ConvertibleToStringView +// - SplitIterator<> +// - Splitter<> +// +// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including +// absl/strings/str_split.h. +// +// IWYU pragma: private, include "absl/strings/str_split.h" + +#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ + +#include <array> +#include <initializer_list> +#include <iterator> +#include <map> +#include <type_traits> +#include <utility> +#include <vector> + +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/meta/type_traits.h" +#include "absl/strings/string_view.h" + +#ifdef _GLIBCXX_DEBUG +#include "absl/strings/internal/stl_type_traits.h" +#endif // _GLIBCXX_DEBUG + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// This class is implicitly constructible from everything that absl::string_view +// is implicitly constructible from, except for rvalue strings. This means it +// can be used as a function parameter in places where passing a temporary +// string might cause memory lifetime issues. +class ConvertibleToStringView { + public: + ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit) + : value_(s) {} + ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit) + ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit) + : value_(s) {} + ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit) + : value_(s) {} + + // Matches rvalue strings and moves their data to a member. + ConvertibleToStringView(std::string&& s) = delete; + ConvertibleToStringView(const std::string&& s) = delete; + + absl::string_view value() const { return value_; } + + private: + absl::string_view value_; +}; + +// An iterator that enumerates the parts of a string from a Splitter. The text +// to be split, the Delimiter, and the Predicate are all taken from the given +// Splitter object. Iterators may only be compared if they refer to the same +// Splitter instance. +// +// This class is NOT part of the public splitting API. +template <typename Splitter> +class SplitIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = absl::string_view; + using difference_type = ptrdiff_t; + using pointer = const value_type*; + using reference = const value_type&; + + enum State { kInitState, kLastState, kEndState }; + SplitIterator(State state, const Splitter* splitter) + : pos_(0), + state_(state), + splitter_(splitter), + delimiter_(splitter->delimiter()), + predicate_(splitter->predicate()) { + // Hack to maintain backward compatibility. This one block makes it so an + // empty absl::string_view whose .data() happens to be nullptr behaves + // *differently* from an otherwise empty absl::string_view whose .data() is + // not nullptr. This is an undesirable difference in general, but this + // behavior is maintained to avoid breaking existing code that happens to + // depend on this old behavior/bug. Perhaps it will be fixed one day. The + // difference in behavior is as follows: + // Split(absl::string_view(""), '-'); // {""} + // Split(absl::string_view(), '-'); // {} + if (splitter_->text().data() == nullptr) { + state_ = kEndState; + pos_ = splitter_->text().size(); + return; + } + + if (state_ == kEndState) { + pos_ = splitter_->text().size(); + } else { + ++(*this); + } + } + + bool at_end() const { return state_ == kEndState; } + + reference operator*() const { return curr_; } + pointer operator->() const { return &curr_; } + + SplitIterator& operator++() { + do { + if (state_ == kLastState) { + state_ = kEndState; + return *this; + } + const absl::string_view text = splitter_->text(); + const absl::string_view d = delimiter_.Find(text, pos_); + if (d.data() == text.data() + text.size()) state_ = kLastState; + curr_ = text.substr(pos_, d.data() - (text.data() + pos_)); + pos_ += curr_.size() + d.size(); + } while (!predicate_(curr_)); + return *this; + } + + SplitIterator operator++(int) { + SplitIterator old(*this); + ++(*this); + return old; + } + + friend bool operator==(const SplitIterator& a, const SplitIterator& b) { + return a.state_ == b.state_ && a.pos_ == b.pos_; + } + + friend bool operator!=(const SplitIterator& a, const SplitIterator& b) { + return !(a == b); + } + + private: + size_t pos_; + State state_; + absl::string_view curr_; + const Splitter* splitter_; + typename Splitter::DelimiterType delimiter_; + typename Splitter::PredicateType predicate_; +}; + +// HasMappedType<T>::value is true iff there exists a type T::mapped_type. +template <typename T, typename = void> +struct HasMappedType : std::false_type {}; +template <typename T> +struct HasMappedType<T, absl::void_t<typename T::mapped_type>> + : std::true_type {}; + +// HasValueType<T>::value is true iff there exists a type T::value_type. +template <typename T, typename = void> +struct HasValueType : std::false_type {}; +template <typename T> +struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type { +}; + +// HasConstIterator<T>::value is true iff there exists a type T::const_iterator. +template <typename T, typename = void> +struct HasConstIterator : std::false_type {}; +template <typename T> +struct HasConstIterator<T, absl::void_t<typename T::const_iterator>> + : std::true_type {}; + +// IsInitializerList<T>::value is true iff T is an std::initializer_list. More +// details below in Splitter<> where this is used. +std::false_type IsInitializerListDispatch(...); // default: No +template <typename T> +std::true_type IsInitializerListDispatch(std::initializer_list<T>*); +template <typename T> +struct IsInitializerList + : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {}; + +// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition +// is true for type 'C'. +// +// Restricts conversion to container-like types (by testing for the presence of +// a const_iterator member type) and also to disable conversion to an +// std::initializer_list (which also has a const_iterator). Otherwise, code +// compiled in C++11 will get an error due to ambiguous conversion paths (in +// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T> +// or an std::initializer_list<T>). + +template <typename C, bool has_value_type, bool has_mapped_type> +struct SplitterIsConvertibleToImpl : std::false_type {}; + +template <typename C> +struct SplitterIsConvertibleToImpl<C, true, false> + : std::is_constructible<typename C::value_type, absl::string_view> {}; + +template <typename C> +struct SplitterIsConvertibleToImpl<C, true, true> + : absl::conjunction< + std::is_constructible<typename C::key_type, absl::string_view>, + std::is_constructible<typename C::mapped_type, absl::string_view>> {}; + +template <typename C> +struct SplitterIsConvertibleTo + : SplitterIsConvertibleToImpl< + C, +#ifdef _GLIBCXX_DEBUG + !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value && +#endif // _GLIBCXX_DEBUG + !IsInitializerList< + typename std::remove_reference<C>::type>::value && + HasValueType<C>::value && HasConstIterator<C>::value, + HasMappedType<C>::value> { +}; + +// This class implements the range that is returned by absl::StrSplit(). This +// class has templated conversion operators that allow it to be implicitly +// converted to a variety of types that the caller may have specified on the +// left-hand side of an assignment. +// +// The main interface for interacting with this class is through its implicit +// conversion operators. However, this class may also be used like a container +// in that it has .begin() and .end() member functions. It may also be used +// within a range-for loop. +// +// Output containers can be collections of any type that is constructible from +// an absl::string_view. +// +// An Predicate functor may be supplied. This predicate will be used to filter +// the split strings: only strings for which the predicate returns true will be +// kept. A Predicate object is any unary functor that takes an absl::string_view +// and returns bool. +// +// The StringType parameter can be either string_view or string, depending on +// whether the Splitter refers to a string stored elsewhere, or if the string +// resides inside the Splitter itself. +template <typename Delimiter, typename Predicate, typename StringType> +class Splitter { + public: + using DelimiterType = Delimiter; + using PredicateType = Predicate; + using const_iterator = strings_internal::SplitIterator<Splitter>; + using value_type = typename std::iterator_traits<const_iterator>::value_type; + + Splitter(StringType input_text, Delimiter d, Predicate p) + : text_(std::move(input_text)), + delimiter_(std::move(d)), + predicate_(std::move(p)) {} + + absl::string_view text() const { return text_; } + const Delimiter& delimiter() const { return delimiter_; } + const Predicate& predicate() const { return predicate_; } + + // Range functions that iterate the split substrings as absl::string_view + // objects. These methods enable a Splitter to be used in a range-based for + // loop. + const_iterator begin() const { return {const_iterator::kInitState, this}; } + const_iterator end() const { return {const_iterator::kEndState, this}; } + + // An implicit conversion operator that is restricted to only those containers + // that the splitter is convertible to. + template <typename Container, + typename = typename std::enable_if< + SplitterIsConvertibleTo<Container>::value>::type> + operator Container() const { // NOLINT(runtime/explicit) + return ConvertToContainer<Container, typename Container::value_type, + HasMappedType<Container>::value>()(*this); + } + + // Returns a pair with its .first and .second members set to the first two + // strings returned by the begin() iterator. Either/both of .first and .second + // will be constructed with empty strings if the iterator doesn't have a + // corresponding value. + template <typename First, typename Second> + operator std::pair<First, Second>() const { // NOLINT(runtime/explicit) + absl::string_view first, second; + auto it = begin(); + if (it != end()) { + first = *it; + if (++it != end()) { + second = *it; + } + } + return {First(first), Second(second)}; + } + + private: + // ConvertToContainer is a functor converting a Splitter to the requested + // Container of ValueType. It is specialized below to optimize splitting to + // certain combinations of Container and ValueType. + // + // This base template handles the generic case of storing the split results in + // the requested non-map-like container and converting the split substrings to + // the requested type. + template <typename Container, typename ValueType, bool is_map = false> + struct ConvertToContainer { + Container operator()(const Splitter& splitter) const { + Container c; + auto it = std::inserter(c, c.end()); + for (const auto& sp : splitter) { + *it++ = ValueType(sp); + } + return c; + } + }; + + // Partial specialization for a std::vector<absl::string_view>. + // + // Optimized for the common case of splitting to a + // std::vector<absl::string_view>. In this case we first split the results to + // a small array of absl::string_view on the stack, to reduce reallocations. + template <typename A> + struct ConvertToContainer<std::vector<absl::string_view, A>, + absl::string_view, false> { + std::vector<absl::string_view, A> operator()( + const Splitter& splitter) const { + struct raw_view { + const char* data; + size_t size; + operator absl::string_view() const { // NOLINT(runtime/explicit) + return {data, size}; + } + }; + std::vector<absl::string_view, A> v; + std::array<raw_view, 16> ar; + for (auto it = splitter.begin(); !it.at_end();) { + size_t index = 0; + do { + ar[index].data = it->data(); + ar[index].size = it->size(); + ++it; + } while (++index != ar.size() && !it.at_end()); + v.insert(v.end(), ar.begin(), ar.begin() + index); + } + return v; + } + }; + + // Partial specialization for a std::vector<std::string>. + // + // Optimized for the common case of splitting to a std::vector<std::string>. + // In this case we first split the results to a std::vector<absl::string_view> + // so the returned std::vector<std::string> can have space reserved to avoid + // std::string moves. + template <typename A> + struct ConvertToContainer<std::vector<std::string, A>, std::string, false> { + std::vector<std::string, A> operator()(const Splitter& splitter) const { + const std::vector<absl::string_view> v = splitter; + return std::vector<std::string, A>(v.begin(), v.end()); + } + }; + + // Partial specialization for containers of pairs (e.g., maps). + // + // The algorithm is to insert a new pair into the map for each even-numbered + // item, with the even-numbered item as the key with a default-constructed + // value. Each odd-numbered item will then be assigned to the last pair's + // value. + template <typename Container, typename First, typename Second> + struct ConvertToContainer<Container, std::pair<const First, Second>, true> { + Container operator()(const Splitter& splitter) const { + Container m; + typename Container::iterator it; + bool insert = true; + for (const auto& sp : splitter) { + if (insert) { + it = Inserter<Container>::Insert(&m, First(sp), Second()); + } else { + it->second = Second(sp); + } + insert = !insert; + } + return m; + } + + // Inserts the key and value into the given map, returning an iterator to + // the inserted item. Specialized for std::map and std::multimap to use + // emplace() and adapt emplace()'s return value. + template <typename Map> + struct Inserter { + using M = Map; + template <typename... Args> + static typename M::iterator Insert(M* m, Args&&... args) { + return m->insert(std::make_pair(std::forward<Args>(args)...)).first; + } + }; + + template <typename... Ts> + struct Inserter<std::map<Ts...>> { + using M = std::map<Ts...>; + template <typename... Args> + static typename M::iterator Insert(M* m, Args&&... args) { + return m->emplace(std::make_pair(std::forward<Args>(args)...)).first; + } + }; + + template <typename... Ts> + struct Inserter<std::multimap<Ts...>> { + using M = std::multimap<Ts...>; + template <typename... Args> + static typename M::iterator Insert(M* m, Args&&... args) { + return m->emplace(std::make_pair(std::forward<Args>(args)...)); + } + }; + }; + + StringType text_; + Delimiter delimiter_; + Predicate predicate_; +}; + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/string_constant.h b/third_party/abseil_cpp/absl/strings/internal/string_constant.h new file mode 100644 index 000000000000..b15f1d9bcfac --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/string_constant.h @@ -0,0 +1,70 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ +#define ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ + +#include "absl/meta/type_traits.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// StringConstant<T> represents a compile time string constant. +// It can be accessed via its `absl::string_view value` static member. +// It is guaranteed that the `string_view` returned has constant `.data()`, +// constant `.size()` and constant `value[i]` for all `0 <= i < .size()` +// +// The `T` is an opaque type. It is guaranteed that different string constants +// will have different values of `T`. This allows users to associate the string +// constant with other static state at compile time. +// +// Instances should be made using the `MakeStringConstant()` factory function +// below. +template <typename T> +struct StringConstant { + private: + // Returns true if `view` points to constant data. + // Otherwise, it can't be constant evaluated. + static constexpr bool ValidateConstant(absl::string_view view) { + return view.empty() || 2 * view[0] != 1; + } + + public: + static constexpr absl::string_view value = T{}(); + constexpr absl::string_view operator()() const { return value; } + + static_assert(ValidateConstant(value), + "The input string_view must point to constant data."); +}; + +template <typename T> +constexpr absl::string_view StringConstant<T>::value; // NOLINT + +// Factory function for `StringConstant` instances. +// It supports callables that have a constexpr default constructor and a +// constexpr operator(). +// It must return an `absl::string_view` or `const char*` pointing to constant +// data. This is validated at compile time. +template <typename T> +constexpr StringConstant<T> MakeStringConstant(T) { + return {}; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/string_constant_test.cc b/third_party/abseil_cpp/absl/strings/internal/string_constant_test.cc new file mode 100644 index 000000000000..392833cf1592 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/string_constant_test.cc @@ -0,0 +1,60 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/string_constant.h" + +#include "absl/meta/type_traits.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { + +using absl::strings_internal::MakeStringConstant; + +struct Callable { + constexpr absl::string_view operator()() const { + return absl::string_view("Callable", 8); + } +}; + +TEST(StringConstant, Traits) { + constexpr auto str = MakeStringConstant(Callable{}); + using T = decltype(str); + + EXPECT_TRUE(std::is_empty<T>::value); + EXPECT_TRUE(std::is_trivial<T>::value); + EXPECT_TRUE(absl::is_trivially_default_constructible<T>::value); + EXPECT_TRUE(absl::is_trivially_copy_constructible<T>::value); + EXPECT_TRUE(absl::is_trivially_move_constructible<T>::value); + EXPECT_TRUE(absl::is_trivially_destructible<T>::value); +} + +TEST(StringConstant, MakeFromCallable) { + constexpr auto str = MakeStringConstant(Callable{}); + using T = decltype(str); + EXPECT_EQ(Callable{}(), T::value); + EXPECT_EQ(Callable{}(), str()); +} + +TEST(StringConstant, MakeFromStringConstant) { + // We want to make sure the StringConstant itself is a valid input to the + // factory function. + constexpr auto str = MakeStringConstant(Callable{}); + constexpr auto str2 = MakeStringConstant(str); + using T = decltype(str2); + EXPECT_EQ(Callable{}(), T::value); + EXPECT_EQ(Callable{}(), str2()); +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/internal/utf8.cc b/third_party/abseil_cpp/absl/strings/internal/utf8.cc new file mode 100644 index 000000000000..8fd8edc1ec6f --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/utf8.cc @@ -0,0 +1,53 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// UTF8 utilities, implemented to reduce dependencies. + +#include "absl/strings/internal/utf8.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) { + if (utf8_char <= 0x7F) { + *buffer = static_cast<char>(utf8_char); + return 1; + } else if (utf8_char <= 0x7FF) { + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xC0 | utf8_char; + return 2; + } else if (utf8_char <= 0xFFFF) { + buffer[2] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xE0 | utf8_char; + return 3; + } else { + buffer[3] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[2] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[1] = 0x80 | (utf8_char & 0x3F); + utf8_char >>= 6; + buffer[0] = 0xF0 | utf8_char; + return 4; + } +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/internal/utf8.h b/third_party/abseil_cpp/absl/strings/internal/utf8.h new file mode 100644 index 000000000000..32fb1093bea3 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/utf8.h @@ -0,0 +1,50 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// UTF8 utilities, implemented to reduce dependencies. + +#ifndef ABSL_STRINGS_INTERNAL_UTF8_H_ +#define ABSL_STRINGS_INTERNAL_UTF8_H_ + +#include <cstddef> +#include <cstdint> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// For Unicode code points 0 through 0x10FFFF, EncodeUTF8Char writes +// out the UTF-8 encoding into buffer, and returns the number of chars +// it wrote. +// +// As described in https://tools.ietf.org/html/rfc3629#section-3 , the encodings +// are: +// 00 - 7F : 0xxxxxxx +// 80 - 7FF : 110xxxxx 10xxxxxx +// 800 - FFFF : 1110xxxx 10xxxxxx 10xxxxxx +// 10000 - 10FFFF : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx +// +// Values greater than 0x10FFFF are not supported and may or may not write +// characters into buffer, however never will more than kMaxEncodedUTF8Size +// bytes be written, regardless of the value of utf8_char. +enum { kMaxEncodedUTF8Size = 4 }; +size_t EncodeUTF8Char(char *buffer, char32_t utf8_char); + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_UTF8_H_ diff --git a/third_party/abseil_cpp/absl/strings/internal/utf8_test.cc b/third_party/abseil_cpp/absl/strings/internal/utf8_test.cc new file mode 100644 index 000000000000..88dd5036e3da --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/internal/utf8_test.cc @@ -0,0 +1,66 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/utf8.h" + +#include <cstdint> +#include <utility> + +#include "gtest/gtest.h" +#include "absl/base/port.h" + +namespace { + +#if !defined(__cpp_char8_t) +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++2a-compat" +#endif +TEST(EncodeUTF8Char, BasicFunction) { + std::pair<char32_t, std::string> tests[] = {{0x0030, u8"\u0030"}, + {0x00A3, u8"\u00A3"}, + {0x00010000, u8"\U00010000"}, + {0x0000FFFF, u8"\U0000FFFF"}, + {0x0010FFFD, u8"\U0010FFFD"}}; + for (auto &test : tests) { + char buf0[7] = {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}; + char buf1[7] = {'\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF'}; + char *buf0_written = + &buf0[absl::strings_internal::EncodeUTF8Char(buf0, test.first)]; + char *buf1_written = + &buf1[absl::strings_internal::EncodeUTF8Char(buf1, test.first)]; + int apparent_length = 7; + while (buf0[apparent_length - 1] == '\x00' && + buf1[apparent_length - 1] == '\xFF') { + if (--apparent_length == 0) break; + } + EXPECT_EQ(apparent_length, buf0_written - buf0); + EXPECT_EQ(apparent_length, buf1_written - buf1); + EXPECT_EQ(apparent_length, test.second.length()); + EXPECT_EQ(std::string(buf0, apparent_length), test.second); + EXPECT_EQ(std::string(buf1, apparent_length), test.second); + } + char buf[32] = "Don't Tread On Me"; + EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf, 0x00110000), + absl::strings_internal::kMaxEncodedUTF8Size); + char buf2[32] = "Negative is invalid but sane"; + EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf2, -1), + absl::strings_internal::kMaxEncodedUTF8Size); +} +#if defined(__clang__) +#pragma clang diagnostic pop +#endif +#endif // !defined(__cpp_char8_t) + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/match.cc b/third_party/abseil_cpp/absl/strings/match.cc new file mode 100644 index 000000000000..8127cb0c5e77 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/match.cc @@ -0,0 +1,40 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/match.h" + +#include "absl/strings/internal/memutil.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +bool EqualsIgnoreCase(absl::string_view piece1, absl::string_view piece2) { + return (piece1.size() == piece2.size() && + 0 == absl::strings_internal::memcasecmp(piece1.data(), piece2.data(), + piece1.size())); + // memcasecmp uses absl::ascii_tolower(). +} + +bool StartsWithIgnoreCase(absl::string_view text, absl::string_view prefix) { + return (text.size() >= prefix.size()) && + EqualsIgnoreCase(text.substr(0, prefix.size()), prefix); +} + +bool EndsWithIgnoreCase(absl::string_view text, absl::string_view suffix) { + return (text.size() >= suffix.size()) && + EqualsIgnoreCase(text.substr(text.size() - suffix.size()), suffix); +} + +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/match.h b/third_party/abseil_cpp/absl/strings/match.h new file mode 100644 index 000000000000..90fca98ad2cf --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/match.h @@ -0,0 +1,90 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: match.h +// ----------------------------------------------------------------------------- +// +// This file contains simple utilities for performing string matching checks. +// All of these function parameters are specified as `absl::string_view`, +// meaning that these functions can accept `std::string`, `absl::string_view` or +// NUL-terminated C-style strings. +// +// Examples: +// std::string s = "foo"; +// absl::string_view sv = "f"; +// assert(absl::StrContains(s, sv)); +// +// Note: The order of parameters in these functions is designed to mimic the +// order an equivalent member function would exhibit; +// e.g. `s.Contains(x)` ==> `absl::StrContains(s, x). +#ifndef ABSL_STRINGS_MATCH_H_ +#define ABSL_STRINGS_MATCH_H_ + +#include <cstring> + +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// StrContains() +// +// Returns whether a given string `haystack` contains the substring `needle`. +inline bool StrContains(absl::string_view haystack, absl::string_view needle) { + return haystack.find(needle, 0) != haystack.npos; +} + +// StartsWith() +// +// Returns whether a given string `text` begins with `prefix`. +inline bool StartsWith(absl::string_view text, absl::string_view prefix) { + return prefix.empty() || + (text.size() >= prefix.size() && + memcmp(text.data(), prefix.data(), prefix.size()) == 0); +} + +// EndsWith() +// +// Returns whether a given string `text` ends with `suffix`. +inline bool EndsWith(absl::string_view text, absl::string_view suffix) { + return suffix.empty() || + (text.size() >= suffix.size() && + memcmp(text.data() + (text.size() - suffix.size()), suffix.data(), + suffix.size()) == 0); +} + +// EqualsIgnoreCase() +// +// Returns whether given ASCII strings `piece1` and `piece2` are equal, ignoring +// case in the comparison. +bool EqualsIgnoreCase(absl::string_view piece1, absl::string_view piece2); + +// StartsWithIgnoreCase() +// +// Returns whether a given ASCII string `text` starts with `prefix`, +// ignoring case in the comparison. +bool StartsWithIgnoreCase(absl::string_view text, absl::string_view prefix); + +// EndsWithIgnoreCase() +// +// Returns whether a given ASCII string `text` ends with `suffix`, ignoring +// case in the comparison. +bool EndsWithIgnoreCase(absl::string_view text, absl::string_view suffix); + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_MATCH_H_ diff --git a/third_party/abseil_cpp/absl/strings/match_test.cc b/third_party/abseil_cpp/absl/strings/match_test.cc new file mode 100644 index 000000000000..4c313dda14e3 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/match_test.cc @@ -0,0 +1,110 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/match.h" + +#include "gtest/gtest.h" + +namespace { + +TEST(MatchTest, StartsWith) { + const std::string s1("123\0abc", 7); + const absl::string_view a("foobar"); + const absl::string_view b(s1); + const absl::string_view e; + EXPECT_TRUE(absl::StartsWith(a, a)); + EXPECT_TRUE(absl::StartsWith(a, "foo")); + EXPECT_TRUE(absl::StartsWith(a, e)); + EXPECT_TRUE(absl::StartsWith(b, s1)); + EXPECT_TRUE(absl::StartsWith(b, b)); + EXPECT_TRUE(absl::StartsWith(b, e)); + EXPECT_TRUE(absl::StartsWith(e, "")); + EXPECT_FALSE(absl::StartsWith(a, b)); + EXPECT_FALSE(absl::StartsWith(b, a)); + EXPECT_FALSE(absl::StartsWith(e, a)); +} + +TEST(MatchTest, EndsWith) { + const std::string s1("123\0abc", 7); + const absl::string_view a("foobar"); + const absl::string_view b(s1); + const absl::string_view e; + EXPECT_TRUE(absl::EndsWith(a, a)); + EXPECT_TRUE(absl::EndsWith(a, "bar")); + EXPECT_TRUE(absl::EndsWith(a, e)); + EXPECT_TRUE(absl::EndsWith(b, s1)); + EXPECT_TRUE(absl::EndsWith(b, b)); + EXPECT_TRUE(absl::EndsWith(b, e)); + EXPECT_TRUE(absl::EndsWith(e, "")); + EXPECT_FALSE(absl::EndsWith(a, b)); + EXPECT_FALSE(absl::EndsWith(b, a)); + EXPECT_FALSE(absl::EndsWith(e, a)); +} + +TEST(MatchTest, Contains) { + absl::string_view a("abcdefg"); + absl::string_view b("abcd"); + absl::string_view c("efg"); + absl::string_view d("gh"); + EXPECT_TRUE(absl::StrContains(a, a)); + EXPECT_TRUE(absl::StrContains(a, b)); + EXPECT_TRUE(absl::StrContains(a, c)); + EXPECT_FALSE(absl::StrContains(a, d)); + EXPECT_TRUE(absl::StrContains("", "")); + EXPECT_TRUE(absl::StrContains("abc", "")); + EXPECT_FALSE(absl::StrContains("", "a")); +} + +TEST(MatchTest, ContainsNull) { + const std::string s = "foo"; + const char* cs = "foo"; + const absl::string_view sv("foo"); + const absl::string_view sv2("foo\0bar", 4); + EXPECT_EQ(s, "foo"); + EXPECT_EQ(sv, "foo"); + EXPECT_NE(sv2, "foo"); + EXPECT_TRUE(absl::EndsWith(s, sv)); + EXPECT_TRUE(absl::StartsWith(cs, sv)); + EXPECT_TRUE(absl::StrContains(cs, sv)); + EXPECT_FALSE(absl::StrContains(cs, sv2)); +} + +TEST(MatchTest, EqualsIgnoreCase) { + std::string text = "the"; + absl::string_view data(text); + + EXPECT_TRUE(absl::EqualsIgnoreCase(data, "The")); + EXPECT_TRUE(absl::EqualsIgnoreCase(data, "THE")); + EXPECT_TRUE(absl::EqualsIgnoreCase(data, "the")); + EXPECT_FALSE(absl::EqualsIgnoreCase(data, "Quick")); + EXPECT_FALSE(absl::EqualsIgnoreCase(data, "then")); +} + +TEST(MatchTest, StartsWithIgnoreCase) { + EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", "foo")); + EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", "Fo")); + EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", "")); + EXPECT_FALSE(absl::StartsWithIgnoreCase("foo", "fooo")); + EXPECT_FALSE(absl::StartsWithIgnoreCase("", "fo")); +} + +TEST(MatchTest, EndsWithIgnoreCase) { + EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", "foo")); + EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", "Oo")); + EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", "")); + EXPECT_FALSE(absl::EndsWithIgnoreCase("foo", "fooo")); + EXPECT_FALSE(absl::EndsWithIgnoreCase("", "fo")); +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/numbers.cc b/third_party/abseil_cpp/absl/strings/numbers.cc new file mode 100644 index 000000000000..3da1059c908d --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/numbers.cc @@ -0,0 +1,1083 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains string processing functions related to +// numeric values. + +#include "absl/strings/numbers.h" + +#include <algorithm> +#include <cassert> +#include <cfloat> // for DBL_DIG and FLT_DIG +#include <cmath> // for HUGE_VAL +#include <cstdint> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <iterator> +#include <limits> +#include <memory> +#include <utility> + +#include "absl/base/attributes.h" +#include "absl/base/internal/bits.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/ascii.h" +#include "absl/strings/charconv.h" +#include "absl/strings/escaping.h" +#include "absl/strings/internal/memutil.h" +#include "absl/strings/match.h" +#include "absl/strings/str_cat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +bool SimpleAtof(absl::string_view str, float* out) { + *out = 0.0; + str = StripAsciiWhitespace(str); + if (!str.empty() && str[0] == '+') { + str.remove_prefix(1); + } + auto result = absl::from_chars(str.data(), str.data() + str.size(), *out); + if (result.ec == std::errc::invalid_argument) { + return false; + } + if (result.ptr != str.data() + str.size()) { + // not all non-whitespace characters consumed + return false; + } + // from_chars() with DR 3081's current wording will return max() on + // overflow. SimpleAtof returns infinity instead. + if (result.ec == std::errc::result_out_of_range) { + if (*out > 1.0) { + *out = std::numeric_limits<float>::infinity(); + } else if (*out < -1.0) { + *out = -std::numeric_limits<float>::infinity(); + } + } + return true; +} + +bool SimpleAtod(absl::string_view str, double* out) { + *out = 0.0; + str = StripAsciiWhitespace(str); + if (!str.empty() && str[0] == '+') { + str.remove_prefix(1); + } + auto result = absl::from_chars(str.data(), str.data() + str.size(), *out); + if (result.ec == std::errc::invalid_argument) { + return false; + } + if (result.ptr != str.data() + str.size()) { + // not all non-whitespace characters consumed + return false; + } + // from_chars() with DR 3081's current wording will return max() on + // overflow. SimpleAtod returns infinity instead. + if (result.ec == std::errc::result_out_of_range) { + if (*out > 1.0) { + *out = std::numeric_limits<double>::infinity(); + } else if (*out < -1.0) { + *out = -std::numeric_limits<double>::infinity(); + } + } + return true; +} + +bool SimpleAtob(absl::string_view str, bool* out) { + ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr."); + if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") || + EqualsIgnoreCase(str, "yes") || EqualsIgnoreCase(str, "y") || + EqualsIgnoreCase(str, "1")) { + *out = true; + return true; + } + if (EqualsIgnoreCase(str, "false") || EqualsIgnoreCase(str, "f") || + EqualsIgnoreCase(str, "no") || EqualsIgnoreCase(str, "n") || + EqualsIgnoreCase(str, "0")) { + *out = false; + return true; + } + return false; +} + +// ---------------------------------------------------------------------- +// FastIntToBuffer() overloads +// +// Like the Fast*ToBuffer() functions above, these are intended for speed. +// Unlike the Fast*ToBuffer() functions, however, these functions write +// their output to the beginning of the buffer. The caller is responsible +// for ensuring that the buffer has enough space to hold the output. +// +// Returns a pointer to the end of the string (i.e. the null character +// terminating the string). +// ---------------------------------------------------------------------- + +namespace { + +// Used to optimize printing a decimal number's final digit. +const char one_ASCII_final_digits[10][2] { + {'0', 0}, {'1', 0}, {'2', 0}, {'3', 0}, {'4', 0}, + {'5', 0}, {'6', 0}, {'7', 0}, {'8', 0}, {'9', 0}, +}; + +} // namespace + +char* numbers_internal::FastIntToBuffer(uint32_t i, char* buffer) { + uint32_t digits; + // The idea of this implementation is to trim the number of divides to as few + // as possible, and also reducing memory stores and branches, by going in + // steps of two digits at a time rather than one whenever possible. + // The huge-number case is first, in the hopes that the compiler will output + // that case in one branch-free block of code, and only output conditional + // branches into it from below. + if (i >= 1000000000) { // >= 1,000,000,000 + digits = i / 100000000; // 100,000,000 + i -= digits * 100000000; + PutTwoDigits(digits, buffer); + buffer += 2; + lt100_000_000: + digits = i / 1000000; // 1,000,000 + i -= digits * 1000000; + PutTwoDigits(digits, buffer); + buffer += 2; + lt1_000_000: + digits = i / 10000; // 10,000 + i -= digits * 10000; + PutTwoDigits(digits, buffer); + buffer += 2; + lt10_000: + digits = i / 100; + i -= digits * 100; + PutTwoDigits(digits, buffer); + buffer += 2; + lt100: + digits = i; + PutTwoDigits(digits, buffer); + buffer += 2; + *buffer = 0; + return buffer; + } + + if (i < 100) { + digits = i; + if (i >= 10) goto lt100; + memcpy(buffer, one_ASCII_final_digits[i], 2); + return buffer + 1; + } + if (i < 10000) { // 10,000 + if (i >= 1000) goto lt10_000; + digits = i / 100; + i -= digits * 100; + *buffer++ = '0' + digits; + goto lt100; + } + if (i < 1000000) { // 1,000,000 + if (i >= 100000) goto lt1_000_000; + digits = i / 10000; // 10,000 + i -= digits * 10000; + *buffer++ = '0' + digits; + goto lt10_000; + } + if (i < 100000000) { // 100,000,000 + if (i >= 10000000) goto lt100_000_000; + digits = i / 1000000; // 1,000,000 + i -= digits * 1000000; + *buffer++ = '0' + digits; + goto lt1_000_000; + } + // we already know that i < 1,000,000,000 + digits = i / 100000000; // 100,000,000 + i -= digits * 100000000; + *buffer++ = '0' + digits; + goto lt100_000_000; +} + +char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) { + uint32_t u = i; + if (i < 0) { + *buffer++ = '-'; + // We need to do the negation in modular (i.e., "unsigned") + // arithmetic; MSVC++ apprently warns for plain "-u", so + // we write the equivalent expression "0 - u" instead. + u = 0 - u; + } + return numbers_internal::FastIntToBuffer(u, buffer); +} + +char* numbers_internal::FastIntToBuffer(uint64_t i, char* buffer) { + uint32_t u32 = static_cast<uint32_t>(i); + if (u32 == i) return numbers_internal::FastIntToBuffer(u32, buffer); + + // Here we know i has at least 10 decimal digits. + uint64_t top_1to11 = i / 1000000000; + u32 = static_cast<uint32_t>(i - top_1to11 * 1000000000); + uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11); + + if (top_1to11_32 == top_1to11) { + buffer = numbers_internal::FastIntToBuffer(top_1to11_32, buffer); + } else { + // top_1to11 has more than 32 bits too; print it in two steps. + uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / 100); + uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * 100); + buffer = numbers_internal::FastIntToBuffer(top_8to9, buffer); + PutTwoDigits(mid_2, buffer); + buffer += 2; + } + + // We have only 9 digits now, again the maximum uint32_t can handle fully. + uint32_t digits = u32 / 10000000; // 10,000,000 + u32 -= digits * 10000000; + PutTwoDigits(digits, buffer); + buffer += 2; + digits = u32 / 100000; // 100,000 + u32 -= digits * 100000; + PutTwoDigits(digits, buffer); + buffer += 2; + digits = u32 / 1000; // 1,000 + u32 -= digits * 1000; + PutTwoDigits(digits, buffer); + buffer += 2; + digits = u32 / 10; + u32 -= digits * 10; + PutTwoDigits(digits, buffer); + buffer += 2; + memcpy(buffer, one_ASCII_final_digits[u32], 2); + return buffer + 1; +} + +char* numbers_internal::FastIntToBuffer(int64_t i, char* buffer) { + uint64_t u = i; + if (i < 0) { + *buffer++ = '-'; + u = 0 - u; + } + return numbers_internal::FastIntToBuffer(u, buffer); +} + +// Given a 128-bit number expressed as a pair of uint64_t, high half first, +// return that number multiplied by the given 32-bit value. If the result is +// too large to fit in a 128-bit number, divide it by 2 until it fits. +static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num, + uint32_t mul) { + uint64_t bits0_31 = num.second & 0xFFFFFFFF; + uint64_t bits32_63 = num.second >> 32; + uint64_t bits64_95 = num.first & 0xFFFFFFFF; + uint64_t bits96_127 = num.first >> 32; + + // The picture so far: each of these 64-bit values has only the lower 32 bits + // filled in. + // bits96_127: [ 00000000 xxxxxxxx ] + // bits64_95: [ 00000000 xxxxxxxx ] + // bits32_63: [ 00000000 xxxxxxxx ] + // bits0_31: [ 00000000 xxxxxxxx ] + + bits0_31 *= mul; + bits32_63 *= mul; + bits64_95 *= mul; + bits96_127 *= mul; + + // Now the top halves may also have value, though all 64 of their bits will + // never be set at the same time, since they are a result of a 32x32 bit + // multiply. This makes the carry calculation slightly easier. + // bits96_127: [ mmmmmmmm | mmmmmmmm ] + // bits64_95: [ | mmmmmmmm mmmmmmmm | ] + // bits32_63: | [ mmmmmmmm | mmmmmmmm ] + // bits0_31: | [ | mmmmmmmm mmmmmmmm ] + // eventually: [ bits128_up | ...bits64_127.... | ..bits0_63... ] + + uint64_t bits0_63 = bits0_31 + (bits32_63 << 32); + uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) + + (bits0_63 < bits0_31); + uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95); + if (bits128_up == 0) return {bits64_127, bits0_63}; + + int shift = 64 - base_internal::CountLeadingZeros64(bits128_up); + uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift)); + uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift)); + return {hi, lo}; +} + +// Compute num * 5 ^ expfive, and return the first 128 bits of the result, +// where the first bit is always a one. So PowFive(1, 0) starts 0b100000, +// PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc. +static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) { + std::pair<uint64_t, uint64_t> result = {num, 0}; + while (expfive >= 13) { + // 5^13 is the highest power of five that will fit in a 32-bit integer. + result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5); + expfive -= 13; + } + constexpr int powers_of_five[13] = { + 1, + 5, + 5 * 5, + 5 * 5 * 5, + 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, + 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5}; + result = Mul32(result, powers_of_five[expfive & 15]); + int shift = base_internal::CountLeadingZeros64(result.first); + if (shift != 0) { + result.first = (result.first << shift) + (result.second >> (64 - shift)); + result.second = (result.second << shift); + } + return result; +} + +struct ExpDigits { + int32_t exponent; + char digits[6]; +}; + +// SplitToSix converts value, a positive double-precision floating-point number, +// into a base-10 exponent and 6 ASCII digits, where the first digit is never +// zero. For example, SplitToSix(1) returns an exponent of zero and a digits +// array of {'1', '0', '0', '0', '0', '0'}. If value is exactly halfway between +// two possible representations, e.g. value = 100000.5, then "round to even" is +// performed. +static ExpDigits SplitToSix(const double value) { + ExpDigits exp_dig; + int exp = 5; + double d = value; + // First step: calculate a close approximation of the output, where the + // value d will be between 100,000 and 999,999, representing the digits + // in the output ASCII array, and exp is the base-10 exponent. It would be + // faster to use a table here, and to look up the base-2 exponent of value, + // however value is an IEEE-754 64-bit number, so the table would have 2,000 + // entries, which is not cache-friendly. + if (d >= 999999.5) { + if (d >= 1e+261) exp += 256, d *= 1e-256; + if (d >= 1e+133) exp += 128, d *= 1e-128; + if (d >= 1e+69) exp += 64, d *= 1e-64; + if (d >= 1e+37) exp += 32, d *= 1e-32; + if (d >= 1e+21) exp += 16, d *= 1e-16; + if (d >= 1e+13) exp += 8, d *= 1e-8; + if (d >= 1e+9) exp += 4, d *= 1e-4; + if (d >= 1e+7) exp += 2, d *= 1e-2; + if (d >= 1e+6) exp += 1, d *= 1e-1; + } else { + if (d < 1e-250) exp -= 256, d *= 1e256; + if (d < 1e-122) exp -= 128, d *= 1e128; + if (d < 1e-58) exp -= 64, d *= 1e64; + if (d < 1e-26) exp -= 32, d *= 1e32; + if (d < 1e-10) exp -= 16, d *= 1e16; + if (d < 1e-2) exp -= 8, d *= 1e8; + if (d < 1e+2) exp -= 4, d *= 1e4; + if (d < 1e+4) exp -= 2, d *= 1e2; + if (d < 1e+5) exp -= 1, d *= 1e1; + } + // At this point, d is in the range [99999.5..999999.5) and exp is in the + // range [-324..308]. Since we need to round d up, we want to add a half + // and truncate. + // However, the technique above may have lost some precision, due to its + // repeated multiplication by constants that each may be off by half a bit + // of precision. This only matters if we're close to the edge though. + // Since we'd like to know if the fractional part of d is close to a half, + // we multiply it by 65536 and see if the fractional part is close to 32768. + // (The number doesn't have to be a power of two,but powers of two are faster) + uint64_t d64k = d * 65536; + int dddddd; // A 6-digit decimal integer. + if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) { + // OK, it's fairly likely that precision was lost above, which is + // not a surprise given only 52 mantissa bits are available. Therefore + // redo the calculation using 128-bit numbers. (64 bits are not enough). + + // Start out with digits rounded down; maybe add one below. + dddddd = static_cast<int>(d64k / 65536); + + // mantissa is a 64-bit integer representing M.mmm... * 2^63. The actual + // value we're representing, of course, is M.mmm... * 2^exp2. + int exp2; + double m = std::frexp(value, &exp2); + uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0); + // std::frexp returns an m value in the range [0.5, 1.0), however we + // can't multiply it by 2^64 and convert to an integer because some FPUs + // throw an exception when converting an number higher than 2^63 into an + // integer - even an unsigned 64-bit integer! Fortunately it doesn't matter + // since m only has 52 significant bits anyway. + mantissa <<= 1; + exp2 -= 64; // not needed, but nice for debugging + + // OK, we are here to compare: + // (dddddd + 0.5) * 10^(exp-5) vs. mantissa * 2^exp2 + // so we can round up dddddd if appropriate. Those values span the full + // range of 600 orders of magnitude of IEE 64-bit floating-point. + // Fortunately, we already know they are very close, so we don't need to + // track the base-2 exponent of both sides. This greatly simplifies the + // the math since the 2^exp2 calculation is unnecessary and the power-of-10 + // calculation can become a power-of-5 instead. + + std::pair<uint64_t, uint64_t> edge, val; + if (exp >= 6) { + // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa + // Since we're tossing powers of two, 2 * dddddd + 1 is the + // same as dddddd + 0.5 + edge = PowFive(2 * dddddd + 1, exp - 5); + + val.first = mantissa; + val.second = 0; + } else { + // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did + // above because (exp - 5) is negative. So we compare (dddddd + 0.5) to + // mantissa * 5 ^ (5 - exp) + edge = PowFive(2 * dddddd + 1, 0); + + val = PowFive(mantissa, 5 - exp); + } + // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first, + // val.second, edge.first, edge.second); + if (val > edge) { + dddddd++; + } else if (val == edge) { + dddddd += (dddddd & 1); + } + } else { + // Here, we are not close to the edge. + dddddd = static_cast<int>((d64k + 32768) / 65536); + } + if (dddddd == 1000000) { + dddddd = 100000; + exp += 1; + } + exp_dig.exponent = exp; + + int two_digits = dddddd / 10000; + dddddd -= two_digits * 10000; + numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[0]); + + two_digits = dddddd / 100; + dddddd -= two_digits * 100; + numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[2]); + + numbers_internal::PutTwoDigits(dddddd, &exp_dig.digits[4]); + return exp_dig; +} + +// Helper function for fast formatting of floating-point. +// The result is the same as "%g", a.k.a. "%.6g". +size_t numbers_internal::SixDigitsToBuffer(double d, char* const buffer) { + static_assert(std::numeric_limits<float>::is_iec559, + "IEEE-754/IEC-559 support only"); + + char* out = buffer; // we write data to out, incrementing as we go, but + // FloatToBuffer always returns the address of the buffer + // passed in. + + if (std::isnan(d)) { + strcpy(out, "nan"); // NOLINT(runtime/printf) + return 3; + } + if (d == 0) { // +0 and -0 are handled here + if (std::signbit(d)) *out++ = '-'; + *out++ = '0'; + *out = 0; + return out - buffer; + } + if (d < 0) { + *out++ = '-'; + d = -d; + } + if (std::isinf(d)) { + strcpy(out, "inf"); // NOLINT(runtime/printf) + return out + 3 - buffer; + } + + auto exp_dig = SplitToSix(d); + int exp = exp_dig.exponent; + const char* digits = exp_dig.digits; + out[0] = '0'; + out[1] = '.'; + switch (exp) { + case 5: + memcpy(out, &digits[0], 6), out += 6; + *out = 0; + return out - buffer; + case 4: + memcpy(out, &digits[0], 5), out += 5; + if (digits[5] != '0') { + *out++ = '.'; + *out++ = digits[5]; + } + *out = 0; + return out - buffer; + case 3: + memcpy(out, &digits[0], 4), out += 4; + if ((digits[5] | digits[4]) != '0') { + *out++ = '.'; + *out++ = digits[4]; + if (digits[5] != '0') *out++ = digits[5]; + } + *out = 0; + return out - buffer; + case 2: + memcpy(out, &digits[0], 3), out += 3; + *out++ = '.'; + memcpy(out, &digits[3], 3); + out += 3; + while (out[-1] == '0') --out; + if (out[-1] == '.') --out; + *out = 0; + return out - buffer; + case 1: + memcpy(out, &digits[0], 2), out += 2; + *out++ = '.'; + memcpy(out, &digits[2], 4); + out += 4; + while (out[-1] == '0') --out; + if (out[-1] == '.') --out; + *out = 0; + return out - buffer; + case 0: + memcpy(out, &digits[0], 1), out += 1; + *out++ = '.'; + memcpy(out, &digits[1], 5); + out += 5; + while (out[-1] == '0') --out; + if (out[-1] == '.') --out; + *out = 0; + return out - buffer; + case -4: + out[2] = '0'; + ++out; + ABSL_FALLTHROUGH_INTENDED; + case -3: + out[2] = '0'; + ++out; + ABSL_FALLTHROUGH_INTENDED; + case -2: + out[2] = '0'; + ++out; + ABSL_FALLTHROUGH_INTENDED; + case -1: + out += 2; + memcpy(out, &digits[0], 6); + out += 6; + while (out[-1] == '0') --out; + *out = 0; + return out - buffer; + } + assert(exp < -4 || exp >= 6); + out[0] = digits[0]; + assert(out[1] == '.'); + out += 2; + memcpy(out, &digits[1], 5), out += 5; + while (out[-1] == '0') --out; + if (out[-1] == '.') --out; + *out++ = 'e'; + if (exp > 0) { + *out++ = '+'; + } else { + *out++ = '-'; + exp = -exp; + } + if (exp > 99) { + int dig1 = exp / 100; + exp -= dig1 * 100; + *out++ = '0' + dig1; + } + PutTwoDigits(exp, out); + out += 2; + *out = 0; + return out - buffer; +} + +namespace { +// Represents integer values of digits. +// Uses 36 to indicate an invalid character since we support +// bases up to 36. +static const int8_t kAsciiToInt[256] = { + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s. + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36}; + +// Parse the sign and optional hex or oct prefix in text. +inline bool safe_parse_sign_and_base(absl::string_view* text /*inout*/, + int* base_ptr /*inout*/, + bool* negative_ptr /*output*/) { + if (text->data() == nullptr) { + return false; + } + + const char* start = text->data(); + const char* end = start + text->size(); + int base = *base_ptr; + + // Consume whitespace. + while (start < end && absl::ascii_isspace(start[0])) { + ++start; + } + while (start < end && absl::ascii_isspace(end[-1])) { + --end; + } + if (start >= end) { + return false; + } + + // Consume sign. + *negative_ptr = (start[0] == '-'); + if (*negative_ptr || start[0] == '+') { + ++start; + if (start >= end) { + return false; + } + } + + // Consume base-dependent prefix. + // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10 + // base 16: "0x" -> base 16 + // Also validate the base. + if (base == 0) { + if (end - start >= 2 && start[0] == '0' && + (start[1] == 'x' || start[1] == 'X')) { + base = 16; + start += 2; + if (start >= end) { + // "0x" with no digits after is invalid. + return false; + } + } else if (end - start >= 1 && start[0] == '0') { + base = 8; + start += 1; + } else { + base = 10; + } + } else if (base == 16) { + if (end - start >= 2 && start[0] == '0' && + (start[1] == 'x' || start[1] == 'X')) { + start += 2; + if (start >= end) { + // "0x" with no digits after is invalid. + return false; + } + } + } else if (base >= 2 && base <= 36) { + // okay + } else { + return false; + } + *text = absl::string_view(start, end - start); + *base_ptr = base; + return true; +} + +// Consume digits. +// +// The classic loop: +// +// for each digit +// value = value * base + digit +// value *= sign +// +// The classic loop needs overflow checking. It also fails on the most +// negative integer, -2147483648 in 32-bit two's complement representation. +// +// My improved loop: +// +// if (!negative) +// for each digit +// value = value * base +// value = value + digit +// else +// for each digit +// value = value * base +// value = value - digit +// +// Overflow checking becomes simple. + +// Lookup tables per IntType: +// vmax/base and vmin/base are precomputed because division costs at least 8ns. +// TODO(junyer): Doing this per base instead (i.e. an array of structs, not a +// struct of arrays) would probably be better in terms of d-cache for the most +// commonly used bases. +template <typename IntType> +struct LookupTables { + ABSL_CONST_INIT static const IntType kVmaxOverBase[]; + ABSL_CONST_INIT static const IntType kVminOverBase[]; +}; + +// An array initializer macro for X/base where base in [0, 36]. +// However, note that lookups for base in [0, 1] should never happen because +// base has been validated to be in [2, 36] by safe_parse_sign_and_base(). +#define X_OVER_BASE_INITIALIZER(X) \ + { \ + 0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \ + X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18, \ + X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26, \ + X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34, \ + X / 35, X / 36, \ + } + +// This kVmaxOverBase is generated with +// for (int base = 2; base < 37; ++base) { +// absl::uint128 max = std::numeric_limits<absl::uint128>::max(); +// auto result = max / base; +// std::cout << " MakeUint128(" << absl::Uint128High64(result) << "u, " +// << absl::Uint128Low64(result) << "u),\n"; +// } +// See https://godbolt.org/z/aneYsb +// +// uint128& operator/=(uint128) is not constexpr, so hardcode the resulting +// array to avoid a static initializer. +template<> +const uint128 LookupTables<uint128>::kVmaxOverBase[] = { + 0, + 0, + MakeUint128(9223372036854775807u, 18446744073709551615u), + MakeUint128(6148914691236517205u, 6148914691236517205u), + MakeUint128(4611686018427387903u, 18446744073709551615u), + MakeUint128(3689348814741910323u, 3689348814741910323u), + MakeUint128(3074457345618258602u, 12297829382473034410u), + MakeUint128(2635249153387078802u, 5270498306774157604u), + MakeUint128(2305843009213693951u, 18446744073709551615u), + MakeUint128(2049638230412172401u, 14347467612885206812u), + MakeUint128(1844674407370955161u, 11068046444225730969u), + MakeUint128(1676976733973595601u, 8384883669867978007u), + MakeUint128(1537228672809129301u, 6148914691236517205u), + MakeUint128(1418980313362273201u, 4256940940086819603u), + MakeUint128(1317624576693539401u, 2635249153387078802u), + MakeUint128(1229782938247303441u, 1229782938247303441u), + MakeUint128(1152921504606846975u, 18446744073709551615u), + MakeUint128(1085102592571150095u, 1085102592571150095u), + MakeUint128(1024819115206086200u, 16397105843297379214u), + MakeUint128(970881267037344821u, 16504981539634861972u), + MakeUint128(922337203685477580u, 14757395258967641292u), + MakeUint128(878416384462359600u, 14054662151397753612u), + MakeUint128(838488366986797800u, 13415813871788764811u), + MakeUint128(802032351030850070u, 4812194106185100421u), + MakeUint128(768614336404564650u, 12297829382473034410u), + MakeUint128(737869762948382064u, 11805916207174113034u), + MakeUint128(709490156681136600u, 11351842506898185609u), + MakeUint128(683212743470724133u, 17080318586768103348u), + MakeUint128(658812288346769700u, 10540996613548315209u), + MakeUint128(636094623231363848u, 15266270957552732371u), + MakeUint128(614891469123651720u, 9838263505978427528u), + MakeUint128(595056260442243600u, 9520900167075897608u), + MakeUint128(576460752303423487u, 18446744073709551615u), + MakeUint128(558992244657865200u, 8943875914525843207u), + MakeUint128(542551296285575047u, 9765923333140350855u), + MakeUint128(527049830677415760u, 8432797290838652167u), + MakeUint128(512409557603043100u, 8198552921648689607u), +}; + +// This kVmaxOverBase generated with +// for (int base = 2; base < 37; ++base) { +// absl::int128 max = std::numeric_limits<absl::int128>::max(); +// auto result = max / base; +// std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", " +// << absl::Int128Low64(result) << "u),\n"; +// } +// See https://godbolt.org/z/7djYWz +// +// int128& operator/=(int128) is not constexpr, so hardcode the resulting array +// to avoid a static initializer. +template<> +const int128 LookupTables<int128>::kVmaxOverBase[] = { + 0, + 0, + MakeInt128(4611686018427387903, 18446744073709551615u), + MakeInt128(3074457345618258602, 12297829382473034410u), + MakeInt128(2305843009213693951, 18446744073709551615u), + MakeInt128(1844674407370955161, 11068046444225730969u), + MakeInt128(1537228672809129301, 6148914691236517205u), + MakeInt128(1317624576693539401, 2635249153387078802u), + MakeInt128(1152921504606846975, 18446744073709551615u), + MakeInt128(1024819115206086200, 16397105843297379214u), + MakeInt128(922337203685477580, 14757395258967641292u), + MakeInt128(838488366986797800, 13415813871788764811u), + MakeInt128(768614336404564650, 12297829382473034410u), + MakeInt128(709490156681136600, 11351842506898185609u), + MakeInt128(658812288346769700, 10540996613548315209u), + MakeInt128(614891469123651720, 9838263505978427528u), + MakeInt128(576460752303423487, 18446744073709551615u), + MakeInt128(542551296285575047, 9765923333140350855u), + MakeInt128(512409557603043100, 8198552921648689607u), + MakeInt128(485440633518672410, 17475862806672206794u), + MakeInt128(461168601842738790, 7378697629483820646u), + MakeInt128(439208192231179800, 7027331075698876806u), + MakeInt128(419244183493398900, 6707906935894382405u), + MakeInt128(401016175515425035, 2406097053092550210u), + MakeInt128(384307168202282325, 6148914691236517205u), + MakeInt128(368934881474191032, 5902958103587056517u), + MakeInt128(354745078340568300, 5675921253449092804u), + MakeInt128(341606371735362066, 17763531330238827482u), + MakeInt128(329406144173384850, 5270498306774157604u), + MakeInt128(318047311615681924, 7633135478776366185u), + MakeInt128(307445734561825860, 4919131752989213764u), + MakeInt128(297528130221121800, 4760450083537948804u), + MakeInt128(288230376151711743, 18446744073709551615u), + MakeInt128(279496122328932600, 4471937957262921603u), + MakeInt128(271275648142787523, 14106333703424951235u), + MakeInt128(263524915338707880, 4216398645419326083u), + MakeInt128(256204778801521550, 4099276460824344803u), +}; + +// This kVminOverBase generated with +// for (int base = 2; base < 37; ++base) { +// absl::int128 min = std::numeric_limits<absl::int128>::min(); +// auto result = min / base; +// std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", " +// << absl::Int128Low64(result) << "u),\n"; +// } +// +// See https://godbolt.org/z/7djYWz +// +// int128& operator/=(int128) is not constexpr, so hardcode the resulting array +// to avoid a static initializer. +template<> +const int128 LookupTables<int128>::kVminOverBase[] = { + 0, + 0, + MakeInt128(-4611686018427387904, 0u), + MakeInt128(-3074457345618258603, 6148914691236517206u), + MakeInt128(-2305843009213693952, 0u), + MakeInt128(-1844674407370955162, 7378697629483820647u), + MakeInt128(-1537228672809129302, 12297829382473034411u), + MakeInt128(-1317624576693539402, 15811494920322472814u), + MakeInt128(-1152921504606846976, 0u), + MakeInt128(-1024819115206086201, 2049638230412172402u), + MakeInt128(-922337203685477581, 3689348814741910324u), + MakeInt128(-838488366986797801, 5030930201920786805u), + MakeInt128(-768614336404564651, 6148914691236517206u), + MakeInt128(-709490156681136601, 7094901566811366007u), + MakeInt128(-658812288346769701, 7905747460161236407u), + MakeInt128(-614891469123651721, 8608480567731124088u), + MakeInt128(-576460752303423488, 0u), + MakeInt128(-542551296285575048, 8680820740569200761u), + MakeInt128(-512409557603043101, 10248191152060862009u), + MakeInt128(-485440633518672411, 970881267037344822u), + MakeInt128(-461168601842738791, 11068046444225730970u), + MakeInt128(-439208192231179801, 11419412998010674810u), + MakeInt128(-419244183493398901, 11738837137815169211u), + MakeInt128(-401016175515425036, 16040647020617001406u), + MakeInt128(-384307168202282326, 12297829382473034411u), + MakeInt128(-368934881474191033, 12543785970122495099u), + MakeInt128(-354745078340568301, 12770822820260458812u), + MakeInt128(-341606371735362067, 683212743470724134u), + MakeInt128(-329406144173384851, 13176245766935394012u), + MakeInt128(-318047311615681925, 10813608594933185431u), + MakeInt128(-307445734561825861, 13527612320720337852u), + MakeInt128(-297528130221121801, 13686293990171602812u), + MakeInt128(-288230376151711744, 0u), + MakeInt128(-279496122328932601, 13974806116446630013u), + MakeInt128(-271275648142787524, 4340410370284600381u), + MakeInt128(-263524915338707881, 14230345428290225533u), + MakeInt128(-256204778801521551, 14347467612885206813u), +}; + +template <typename IntType> +const IntType LookupTables<IntType>::kVmaxOverBase[] = + X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max()); + +template <typename IntType> +const IntType LookupTables<IntType>::kVminOverBase[] = + X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min()); + +#undef X_OVER_BASE_INITIALIZER + +template <typename IntType> +inline bool safe_parse_positive_int(absl::string_view text, int base, + IntType* value_p) { + IntType value = 0; + const IntType vmax = std::numeric_limits<IntType>::max(); + assert(vmax > 0); + assert(base >= 0); + assert(vmax >= static_cast<IntType>(base)); + const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base]; + assert(base < 2 || + std::numeric_limits<IntType>::max() / base == vmax_over_base); + const char* start = text.data(); + const char* end = start + text.size(); + // loop over digits + for (; start < end; ++start) { + unsigned char c = static_cast<unsigned char>(start[0]); + int digit = kAsciiToInt[c]; + if (digit >= base) { + *value_p = value; + return false; + } + if (value > vmax_over_base) { + *value_p = vmax; + return false; + } + value *= base; + if (value > vmax - digit) { + *value_p = vmax; + return false; + } + value += digit; + } + *value_p = value; + return true; +} + +template <typename IntType> +inline bool safe_parse_negative_int(absl::string_view text, int base, + IntType* value_p) { + IntType value = 0; + const IntType vmin = std::numeric_limits<IntType>::min(); + assert(vmin < 0); + assert(vmin <= 0 - base); + IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base]; + assert(base < 2 || + std::numeric_limits<IntType>::min() / base == vmin_over_base); + // 2003 c++ standard [expr.mul] + // "... the sign of the remainder is implementation-defined." + // Although (vmin/base)*base + vmin%base is always vmin. + // 2011 c++ standard tightens the spec but we cannot rely on it. + // TODO(junyer): Handle this in the lookup table generation. + if (vmin % base > 0) { + vmin_over_base += 1; + } + const char* start = text.data(); + const char* end = start + text.size(); + // loop over digits + for (; start < end; ++start) { + unsigned char c = static_cast<unsigned char>(start[0]); + int digit = kAsciiToInt[c]; + if (digit >= base) { + *value_p = value; + return false; + } + if (value < vmin_over_base) { + *value_p = vmin; + return false; + } + value *= base; + if (value < vmin + digit) { + *value_p = vmin; + return false; + } + value -= digit; + } + *value_p = value; + return true; +} + +// Input format based on POSIX.1-2008 strtol +// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html +template <typename IntType> +inline bool safe_int_internal(absl::string_view text, IntType* value_p, + int base) { + *value_p = 0; + bool negative; + if (!safe_parse_sign_and_base(&text, &base, &negative)) { + return false; + } + if (!negative) { + return safe_parse_positive_int(text, base, value_p); + } else { + return safe_parse_negative_int(text, base, value_p); + } +} + +template <typename IntType> +inline bool safe_uint_internal(absl::string_view text, IntType* value_p, + int base) { + *value_p = 0; + bool negative; + if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) { + return false; + } + return safe_parse_positive_int(text, base, value_p); +} +} // anonymous namespace + +namespace numbers_internal { + +// Digit conversion. +ABSL_CONST_INIT ABSL_DLL const char kHexChar[] = + "0123456789abcdef"; + +ABSL_CONST_INIT ABSL_DLL const char kHexTable[513] = + "000102030405060708090a0b0c0d0e0f" + "101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f" + "303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f" + "505152535455565758595a5b5c5d5e5f" + "606162636465666768696a6b6c6d6e6f" + "707172737475767778797a7b7c7d7e7f" + "808182838485868788898a8b8c8d8e8f" + "909192939495969798999a9b9c9d9e9f" + "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" + "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" + "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" + "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; + +ABSL_CONST_INIT ABSL_DLL const char two_ASCII_digits[100][2] = { + {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, + {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'}, + {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, + {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, + {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, + {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'}, + {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'}, + {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, + {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, + {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, + {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'}, + {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'}, + {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'}, + {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, + {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, + {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, + {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}}; + +bool safe_strto32_base(absl::string_view text, int32_t* value, int base) { + return safe_int_internal<int32_t>(text, value, base); +} + +bool safe_strto64_base(absl::string_view text, int64_t* value, int base) { + return safe_int_internal<int64_t>(text, value, base); +} + +bool safe_strto128_base(absl::string_view text, int128* value, int base) { + return safe_int_internal<absl::int128>(text, value, base); +} + +bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) { + return safe_uint_internal<uint32_t>(text, value, base); +} + +bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) { + return safe_uint_internal<uint64_t>(text, value, base); +} + +bool safe_strtou128_base(absl::string_view text, uint128* value, int base) { + return safe_uint_internal<absl::uint128>(text, value, base); +} + +} // namespace numbers_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/numbers.h b/third_party/abseil_cpp/absl/strings/numbers.h new file mode 100644 index 000000000000..2e004b44f887 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/numbers.h @@ -0,0 +1,273 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: numbers.h +// ----------------------------------------------------------------------------- +// +// This package contains functions for converting strings to numbers. For +// converting numbers to strings, use `StrCat()` or `StrAppend()` in str_cat.h, +// which automatically detect and convert most number values appropriately. + +#ifndef ABSL_STRINGS_NUMBERS_H_ +#define ABSL_STRINGS_NUMBERS_H_ + +#ifdef __SSE4_2__ +#include <x86intrin.h> +#endif + +#include <cstddef> +#include <cstdlib> +#include <cstring> +#include <ctime> +#include <limits> +#include <string> +#include <type_traits> + +#include "absl/base/config.h" +#include "absl/base/internal/bits.h" +#ifdef __SSE4_2__ +// TODO(jorg): Remove this when we figure out the right way +// to swap bytes on SSE 4.2 that works with the compilers +// we claim to support. Also, add tests for the compiler +// that doesn't support the Intel _bswap64 intrinsic but +// does support all the SSE 4.2 intrinsics +#include "absl/base/internal/endian.h" +#endif +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/numeric/int128.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// SimpleAtoi() +// +// Converts the given string (optionally followed or preceded by ASCII +// whitespace) into an integer value, returning `true` if successful. The string +// must reflect a base-10 integer whose value falls within the range of the +// integer type (optionally preceded by a `+` or `-`). If any errors are +// encountered, this function returns `false`, leaving `out` in an unspecified +// state. +template <typename int_type> +ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out); + +// SimpleAtof() +// +// Converts the given string (optionally followed or preceded by ASCII +// whitespace) into a float, which may be rounded on overflow or underflow, +// returning `true` if successful. +// See https://en.cppreference.com/w/c/string/byte/strtof for details about the +// allowed formats for `str`, except SimpleAtof() is locale-independent and will +// always use the "C" locale. If any errors are encountered, this function +// returns `false`, leaving `out` in an unspecified state. +ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* out); + +// SimpleAtod() +// +// Converts the given string (optionally followed or preceded by ASCII +// whitespace) into a double, which may be rounded on overflow or underflow, +// returning `true` if successful. +// See https://en.cppreference.com/w/c/string/byte/strtof for details about the +// allowed formats for `str`, except SimpleAtod is locale-independent and will +// always use the "C" locale. If any errors are encountered, this function +// returns `false`, leaving `out` in an unspecified state. +ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* out); + +// SimpleAtob() +// +// Converts the given string into a boolean, returning `true` if successful. +// The following case-insensitive strings are interpreted as boolean `true`: +// "true", "t", "yes", "y", "1". The following case-insensitive strings +// are interpreted as boolean `false`: "false", "f", "no", "n", "0". If any +// errors are encountered, this function returns `false`, leaving `out` in an +// unspecified state. +ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* out); + +ABSL_NAMESPACE_END +} // namespace absl + +// End of public API. Implementation details follow. + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace numbers_internal { + +// Digit conversion. +ABSL_DLL extern const char kHexChar[17]; // 0123456789abcdef +ABSL_DLL extern const char + kHexTable[513]; // 000102030405060708090a0b0c0d0e0f1011... +ABSL_DLL extern const char + two_ASCII_digits[100][2]; // 00, 01, 02, 03... + +// Writes a two-character representation of 'i' to 'buf'. 'i' must be in the +// range 0 <= i < 100, and buf must have space for two characters. Example: +// char buf[2]; +// PutTwoDigits(42, buf); +// // buf[0] == '4' +// // buf[1] == '2' +inline void PutTwoDigits(size_t i, char* buf) { + assert(i < 100); + memcpy(buf, two_ASCII_digits[i], 2); +} + +// safe_strto?() functions for implementing SimpleAtoi() +bool safe_strto32_base(absl::string_view text, int32_t* value, int base); +bool safe_strto64_base(absl::string_view text, int64_t* value, int base); +bool safe_strto128_base(absl::string_view text, absl::int128* value, + int base); +bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base); +bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base); +bool safe_strtou128_base(absl::string_view text, absl::uint128* value, + int base); + +static const int kFastToBufferSize = 32; +static const int kSixDigitsToBufferSize = 16; + +// Helper function for fast formatting of floating-point values. +// The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six +// significant digits are returned, trailing zeros are removed, and numbers +// outside the range 0.0001-999999 are output using scientific notation +// (1.23456e+06). This routine is heavily optimized. +// Required buffer size is `kSixDigitsToBufferSize`. +size_t SixDigitsToBuffer(double d, char* buffer); + +// These functions are intended for speed. All functions take an output buffer +// as an argument and return a pointer to the last byte they wrote, which is the +// terminating '\0'. At most `kFastToBufferSize` bytes are written. +char* FastIntToBuffer(int32_t, char*); +char* FastIntToBuffer(uint32_t, char*); +char* FastIntToBuffer(int64_t, char*); +char* FastIntToBuffer(uint64_t, char*); + +// For enums and integer types that are not an exact match for the types above, +// use templates to call the appropriate one of the four overloads above. +template <typename int_type> +char* FastIntToBuffer(int_type i, char* buffer) { + static_assert(sizeof(i) <= 64 / 8, + "FastIntToBuffer works only with 64-bit-or-less integers."); + // TODO(jorg): This signed-ness check is used because it works correctly + // with enums, and it also serves to check that int_type is not a pointer. + // If one day something like std::is_signed<enum E> works, switch to it. + if (static_cast<int_type>(1) - 2 < 0) { // Signed + if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit + return FastIntToBuffer(static_cast<int64_t>(i), buffer); + } else { // 32-bit or less + return FastIntToBuffer(static_cast<int32_t>(i), buffer); + } + } else { // Unsigned + if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit + return FastIntToBuffer(static_cast<uint64_t>(i), buffer); + } else { // 32-bit or less + return FastIntToBuffer(static_cast<uint32_t>(i), buffer); + } + } +} + +// Implementation of SimpleAtoi, generalized to support arbitrary base (used +// with base different from 10 elsewhere in Abseil implementation). +template <typename int_type> +ABSL_MUST_USE_RESULT bool safe_strtoi_base(absl::string_view s, int_type* out, + int base) { + static_assert(sizeof(*out) == 4 || sizeof(*out) == 8, + "SimpleAtoi works only with 32-bit or 64-bit integers."); + static_assert(!std::is_floating_point<int_type>::value, + "Use SimpleAtof or SimpleAtod instead."); + bool parsed; + // TODO(jorg): This signed-ness check is used because it works correctly + // with enums, and it also serves to check that int_type is not a pointer. + // If one day something like std::is_signed<enum E> works, switch to it. + if (static_cast<int_type>(1) - 2 < 0) { // Signed + if (sizeof(*out) == 64 / 8) { // 64-bit + int64_t val; + parsed = numbers_internal::safe_strto64_base(s, &val, base); + *out = static_cast<int_type>(val); + } else { // 32-bit + int32_t val; + parsed = numbers_internal::safe_strto32_base(s, &val, base); + *out = static_cast<int_type>(val); + } + } else { // Unsigned + if (sizeof(*out) == 64 / 8) { // 64-bit + uint64_t val; + parsed = numbers_internal::safe_strtou64_base(s, &val, base); + *out = static_cast<int_type>(val); + } else { // 32-bit + uint32_t val; + parsed = numbers_internal::safe_strtou32_base(s, &val, base); + *out = static_cast<int_type>(val); + } + } + return parsed; +} + +// FastHexToBufferZeroPad16() +// +// Outputs `val` into `out` as if by `snprintf(out, 17, "%016x", val)` but +// without the terminating null character. Thus `out` must be of length >= 16. +// Returns the number of non-pad digits of the output (it can never be zero +// since 0 has one digit). +inline size_t FastHexToBufferZeroPad16(uint64_t val, char* out) { +#ifdef __SSE4_2__ + uint64_t be = absl::big_endian::FromHost64(val); + const auto kNibbleMask = _mm_set1_epi8(0xf); + const auto kHexDigits = _mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'); + auto v = _mm_loadl_epi64(reinterpret_cast<__m128i*>(&be)); // load lo dword + auto v4 = _mm_srli_epi64(v, 4); // shift 4 right + auto il = _mm_unpacklo_epi8(v4, v); // interleave bytes + auto m = _mm_and_si128(il, kNibbleMask); // mask out nibbles + auto hexchars = _mm_shuffle_epi8(kHexDigits, m); // hex chars + _mm_storeu_si128(reinterpret_cast<__m128i*>(out), hexchars); +#else + for (int i = 0; i < 8; ++i) { + auto byte = (val >> (56 - 8 * i)) & 0xFF; + auto* hex = &absl::numbers_internal::kHexTable[byte * 2]; + std::memcpy(out + 2 * i, hex, 2); + } +#endif + // | 0x1 so that even 0 has 1 digit. + return 16 - absl::base_internal::CountLeadingZeros64(val | 0x1) / 4; +} + +} // namespace numbers_internal + +// SimpleAtoi() +// +// Converts a string to an integer, using `safe_strto?()` functions for actual +// parsing, returning `true` if successful. The `safe_strto?()` functions apply +// strict checking; the string must be a base-10 integer, optionally followed or +// preceded by ASCII whitespace, with a value in the range of the corresponding +// integer type. +template <typename int_type> +ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out) { + return numbers_internal::safe_strtoi_base(str, out, 10); +} + +ABSL_MUST_USE_RESULT inline bool SimpleAtoi(absl::string_view str, + absl::int128* out) { + return numbers_internal::safe_strto128_base(str, out, 10); +} + +ABSL_MUST_USE_RESULT inline bool SimpleAtoi(absl::string_view str, + absl::uint128* out) { + return numbers_internal::safe_strtou128_base(str, out, 10); +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_NUMBERS_H_ diff --git a/third_party/abseil_cpp/absl/strings/numbers_benchmark.cc b/third_party/abseil_cpp/absl/strings/numbers_benchmark.cc new file mode 100644 index 000000000000..6e79b3e811ff --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/numbers_benchmark.cc @@ -0,0 +1,286 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cstdint> +#include <random> +#include <string> +#include <type_traits> +#include <vector> + +#include "benchmark/benchmark.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/random/distributions.h" +#include "absl/random/random.h" +#include "absl/strings/numbers.h" + +namespace { + +template <typename T> +void BM_FastIntToBuffer(benchmark::State& state) { + const int inc = state.range(0); + char buf[absl::numbers_internal::kFastToBufferSize]; + // Use the unsigned type to increment to take advantage of well-defined + // modular arithmetic. + typename std::make_unsigned<T>::type x = 0; + for (auto _ : state) { + absl::numbers_internal::FastIntToBuffer(static_cast<T>(x), buf); + x += inc; + } +} +BENCHMARK_TEMPLATE(BM_FastIntToBuffer, int32_t)->Range(0, 1 << 15); +BENCHMARK_TEMPLATE(BM_FastIntToBuffer, int64_t)->Range(0, 1 << 30); + +// Creates an integer that would be printed as `num_digits` repeated 7s in the +// given `base`. `base` must be greater than or equal to 8. +int64_t RepeatedSevens(int num_digits, int base) { + ABSL_RAW_CHECK(base >= 8, ""); + int64_t num = 7; + while (--num_digits) num = base * num + 7; + return num; +} + +void BM_safe_strto32_string(benchmark::State& state) { + const int digits = state.range(0); + const int base = state.range(1); + std::string str(digits, '7'); // valid in octal, decimal and hex + int32_t value = 0; + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::numbers_internal::safe_strto32_base(str, &value, base)); + } + ABSL_RAW_CHECK(value == RepeatedSevens(digits, base), ""); +} +BENCHMARK(BM_safe_strto32_string) + ->ArgPair(1, 8) + ->ArgPair(1, 10) + ->ArgPair(1, 16) + ->ArgPair(2, 8) + ->ArgPair(2, 10) + ->ArgPair(2, 16) + ->ArgPair(4, 8) + ->ArgPair(4, 10) + ->ArgPair(4, 16) + ->ArgPair(8, 8) + ->ArgPair(8, 10) + ->ArgPair(8, 16) + ->ArgPair(10, 8) + ->ArgPair(9, 10); + +void BM_safe_strto64_string(benchmark::State& state) { + const int digits = state.range(0); + const int base = state.range(1); + std::string str(digits, '7'); // valid in octal, decimal and hex + int64_t value = 0; + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::numbers_internal::safe_strto64_base(str, &value, base)); + } + ABSL_RAW_CHECK(value == RepeatedSevens(digits, base), ""); +} +BENCHMARK(BM_safe_strto64_string) + ->ArgPair(1, 8) + ->ArgPair(1, 10) + ->ArgPair(1, 16) + ->ArgPair(2, 8) + ->ArgPair(2, 10) + ->ArgPair(2, 16) + ->ArgPair(4, 8) + ->ArgPair(4, 10) + ->ArgPair(4, 16) + ->ArgPair(8, 8) + ->ArgPair(8, 10) + ->ArgPair(8, 16) + ->ArgPair(16, 8) + ->ArgPair(16, 10) + ->ArgPair(16, 16); + +void BM_safe_strtou32_string(benchmark::State& state) { + const int digits = state.range(0); + const int base = state.range(1); + std::string str(digits, '7'); // valid in octal, decimal and hex + uint32_t value = 0; + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::numbers_internal::safe_strtou32_base(str, &value, base)); + } + ABSL_RAW_CHECK(value == RepeatedSevens(digits, base), ""); +} +BENCHMARK(BM_safe_strtou32_string) + ->ArgPair(1, 8) + ->ArgPair(1, 10) + ->ArgPair(1, 16) + ->ArgPair(2, 8) + ->ArgPair(2, 10) + ->ArgPair(2, 16) + ->ArgPair(4, 8) + ->ArgPair(4, 10) + ->ArgPair(4, 16) + ->ArgPair(8, 8) + ->ArgPair(8, 10) + ->ArgPair(8, 16) + ->ArgPair(10, 8) + ->ArgPair(9, 10); + +void BM_safe_strtou64_string(benchmark::State& state) { + const int digits = state.range(0); + const int base = state.range(1); + std::string str(digits, '7'); // valid in octal, decimal and hex + uint64_t value = 0; + for (auto _ : state) { + benchmark::DoNotOptimize( + absl::numbers_internal::safe_strtou64_base(str, &value, base)); + } + ABSL_RAW_CHECK(value == RepeatedSevens(digits, base), ""); +} +BENCHMARK(BM_safe_strtou64_string) + ->ArgPair(1, 8) + ->ArgPair(1, 10) + ->ArgPair(1, 16) + ->ArgPair(2, 8) + ->ArgPair(2, 10) + ->ArgPair(2, 16) + ->ArgPair(4, 8) + ->ArgPair(4, 10) + ->ArgPair(4, 16) + ->ArgPair(8, 8) + ->ArgPair(8, 10) + ->ArgPair(8, 16) + ->ArgPair(16, 8) + ->ArgPair(16, 10) + ->ArgPair(16, 16); + +// Returns a vector of `num_strings` strings. Each string represents a +// floating point number with `num_digits` digits before the decimal point and +// another `num_digits` digits after. +std::vector<std::string> MakeFloatStrings(int num_strings, int num_digits) { + // For convenience, use a random number generator to generate the test data. + // We don't actually need random properties, so use a fixed seed. + std::minstd_rand0 rng(1); + std::uniform_int_distribution<int> random_digit('0', '9'); + + std::vector<std::string> float_strings(num_strings); + for (std::string& s : float_strings) { + s.reserve(2 * num_digits + 1); + for (int i = 0; i < num_digits; ++i) { + s.push_back(static_cast<char>(random_digit(rng))); + } + s.push_back('.'); + for (int i = 0; i < num_digits; ++i) { + s.push_back(static_cast<char>(random_digit(rng))); + } + } + return float_strings; +} + +template <typename StringType> +StringType GetStringAs(const std::string& s) { + return static_cast<StringType>(s); +} +template <> +const char* GetStringAs<const char*>(const std::string& s) { + return s.c_str(); +} + +template <typename StringType> +std::vector<StringType> GetStringsAs(const std::vector<std::string>& strings) { + std::vector<StringType> result; + result.reserve(strings.size()); + for (const std::string& s : strings) { + result.push_back(GetStringAs<StringType>(s)); + } + return result; +} + +template <typename T> +void BM_SimpleAtof(benchmark::State& state) { + const int num_strings = state.range(0); + const int num_digits = state.range(1); + std::vector<std::string> backing_strings = + MakeFloatStrings(num_strings, num_digits); + std::vector<T> inputs = GetStringsAs<T>(backing_strings); + float value; + for (auto _ : state) { + for (const T& input : inputs) { + benchmark::DoNotOptimize(absl::SimpleAtof(input, &value)); + } + } +} +BENCHMARK_TEMPLATE(BM_SimpleAtof, absl::string_view) + ->ArgPair(10, 1) + ->ArgPair(10, 2) + ->ArgPair(10, 4) + ->ArgPair(10, 8); +BENCHMARK_TEMPLATE(BM_SimpleAtof, const char*) + ->ArgPair(10, 1) + ->ArgPair(10, 2) + ->ArgPair(10, 4) + ->ArgPair(10, 8); +BENCHMARK_TEMPLATE(BM_SimpleAtof, std::string) + ->ArgPair(10, 1) + ->ArgPair(10, 2) + ->ArgPair(10, 4) + ->ArgPair(10, 8); + +template <typename T> +void BM_SimpleAtod(benchmark::State& state) { + const int num_strings = state.range(0); + const int num_digits = state.range(1); + std::vector<std::string> backing_strings = + MakeFloatStrings(num_strings, num_digits); + std::vector<T> inputs = GetStringsAs<T>(backing_strings); + double value; + for (auto _ : state) { + for (const T& input : inputs) { + benchmark::DoNotOptimize(absl::SimpleAtod(input, &value)); + } + } +} +BENCHMARK_TEMPLATE(BM_SimpleAtod, absl::string_view) + ->ArgPair(10, 1) + ->ArgPair(10, 2) + ->ArgPair(10, 4) + ->ArgPair(10, 8); +BENCHMARK_TEMPLATE(BM_SimpleAtod, const char*) + ->ArgPair(10, 1) + ->ArgPair(10, 2) + ->ArgPair(10, 4) + ->ArgPair(10, 8); +BENCHMARK_TEMPLATE(BM_SimpleAtod, std::string) + ->ArgPair(10, 1) + ->ArgPair(10, 2) + ->ArgPair(10, 4) + ->ArgPair(10, 8); + +void BM_FastHexToBufferZeroPad16(benchmark::State& state) { + absl::BitGen rng; + std::vector<uint64_t> nums; + nums.resize(1000); + auto min = std::numeric_limits<uint64_t>::min(); + auto max = std::numeric_limits<uint64_t>::max(); + for (auto& num : nums) { + num = absl::LogUniform(rng, min, max); + } + + char buf[16]; + while (state.KeepRunningBatch(nums.size())) { + for (auto num : nums) { + auto digits = absl::numbers_internal::FastHexToBufferZeroPad16(num, buf); + benchmark::DoNotOptimize(digits); + benchmark::DoNotOptimize(buf); + } + } +} +BENCHMARK(BM_FastHexToBufferZeroPad16); + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/numbers_test.cc b/third_party/abseil_cpp/absl/strings/numbers_test.cc new file mode 100644 index 000000000000..4ab67fb669be --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/numbers_test.cc @@ -0,0 +1,1356 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file tests string processing functions related to numeric values. + +#include "absl/strings/numbers.h" + +#include <sys/types.h> + +#include <cfenv> // NOLINT(build/c++11) +#include <cinttypes> +#include <climits> +#include <cmath> +#include <cstddef> +#include <cstdint> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <limits> +#include <numeric> +#include <random> +#include <set> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/random/distributions.h" +#include "absl/random/random.h" +#include "absl/strings/internal/numbers_test_common.h" +#include "absl/strings/internal/ostringstream.h" +#include "absl/strings/internal/pow10_helper.h" +#include "absl/strings/str_cat.h" + +namespace { + +using absl::numbers_internal::kSixDigitsToBufferSize; +using absl::numbers_internal::safe_strto32_base; +using absl::numbers_internal::safe_strto64_base; +using absl::numbers_internal::safe_strtou32_base; +using absl::numbers_internal::safe_strtou64_base; +using absl::numbers_internal::SixDigitsToBuffer; +using absl::strings_internal::Itoa; +using absl::strings_internal::strtouint32_test_cases; +using absl::strings_internal::strtouint64_test_cases; +using absl::SimpleAtoi; +using testing::Eq; +using testing::MatchesRegex; + +// Number of floats to test with. +// 5,000,000 is a reasonable default for a test that only takes a few seconds. +// 1,000,000,000+ triggers checking for all possible mantissa values for +// double-precision tests. 2,000,000,000+ triggers checking for every possible +// single-precision float. +const int kFloatNumCases = 5000000; + +// This is a slow, brute-force routine to compute the exact base-10 +// representation of a double-precision floating-point number. It +// is useful for debugging only. +std::string PerfectDtoa(double d) { + if (d == 0) return "0"; + if (d < 0) return "-" + PerfectDtoa(-d); + + // Basic theory: decompose d into mantissa and exp, where + // d = mantissa * 2^exp, and exp is as close to zero as possible. + int64_t mantissa, exp = 0; + while (d >= 1ULL << 63) ++exp, d *= 0.5; + while ((mantissa = d) != d) --exp, d *= 2.0; + + // Then convert mantissa to ASCII, and either double it (if + // exp > 0) or halve it (if exp < 0) repeatedly. "halve it" + // in this case means multiplying it by five and dividing by 10. + constexpr int maxlen = 1100; // worst case is actually 1030 or so. + char buf[maxlen + 5]; + for (int64_t num = mantissa, pos = maxlen; --pos >= 0;) { + buf[pos] = '0' + (num % 10); + num /= 10; + } + char* begin = &buf[0]; + char* end = buf + maxlen; + for (int i = 0; i != exp; i += (exp > 0) ? 1 : -1) { + int carry = 0; + for (char* p = end; --p != begin;) { + int dig = *p - '0'; + dig = dig * (exp > 0 ? 2 : 5) + carry; + carry = dig / 10; + dig %= 10; + *p = '0' + dig; + } + } + if (exp < 0) { + // "dividing by 10" above means we have to add the decimal point. + memmove(end + 1 + exp, end + exp, 1 - exp); + end[exp] = '.'; + ++end; + } + while (*begin == '0' && begin[1] != '.') ++begin; + return {begin, end}; +} + +TEST(ToString, PerfectDtoa) { + EXPECT_THAT(PerfectDtoa(1), Eq("1")); + EXPECT_THAT(PerfectDtoa(0.1), + Eq("0.1000000000000000055511151231257827021181583404541015625")); + EXPECT_THAT(PerfectDtoa(1e24), Eq("999999999999999983222784")); + EXPECT_THAT(PerfectDtoa(5e-324), MatchesRegex("0.0000.*625")); + for (int i = 0; i < 100; ++i) { + for (double multiplier : + {1e-300, 1e-200, 1e-100, 0.1, 1.0, 10.0, 1e100, 1e300}) { + double d = multiplier * i; + std::string s = PerfectDtoa(d); + EXPECT_DOUBLE_EQ(d, strtod(s.c_str(), nullptr)); + } + } +} + +template <typename integer> +struct MyInteger { + integer i; + explicit constexpr MyInteger(integer i) : i(i) {} + constexpr operator integer() const { return i; } + + constexpr MyInteger operator+(MyInteger other) const { return i + other.i; } + constexpr MyInteger operator-(MyInteger other) const { return i - other.i; } + constexpr MyInteger operator*(MyInteger other) const { return i * other.i; } + constexpr MyInteger operator/(MyInteger other) const { return i / other.i; } + + constexpr bool operator<(MyInteger other) const { return i < other.i; } + constexpr bool operator<=(MyInteger other) const { return i <= other.i; } + constexpr bool operator==(MyInteger other) const { return i == other.i; } + constexpr bool operator>=(MyInteger other) const { return i >= other.i; } + constexpr bool operator>(MyInteger other) const { return i > other.i; } + constexpr bool operator!=(MyInteger other) const { return i != other.i; } + + integer as_integer() const { return i; } +}; + +typedef MyInteger<int64_t> MyInt64; +typedef MyInteger<uint64_t> MyUInt64; + +void CheckInt32(int32_t x) { + char buffer[absl::numbers_internal::kFastToBufferSize]; + char* actual = absl::numbers_internal::FastIntToBuffer(x, buffer); + std::string expected = std::to_string(x); + EXPECT_EQ(expected, std::string(buffer, actual)) << " Input " << x; + + char* generic_actual = absl::numbers_internal::FastIntToBuffer(x, buffer); + EXPECT_EQ(expected, std::string(buffer, generic_actual)) << " Input " << x; +} + +void CheckInt64(int64_t x) { + char buffer[absl::numbers_internal::kFastToBufferSize + 3]; + buffer[0] = '*'; + buffer[23] = '*'; + buffer[24] = '*'; + char* actual = absl::numbers_internal::FastIntToBuffer(x, &buffer[1]); + std::string expected = std::to_string(x); + EXPECT_EQ(expected, std::string(&buffer[1], actual)) << " Input " << x; + EXPECT_EQ(buffer[0], '*'); + EXPECT_EQ(buffer[23], '*'); + EXPECT_EQ(buffer[24], '*'); + + char* my_actual = + absl::numbers_internal::FastIntToBuffer(MyInt64(x), &buffer[1]); + EXPECT_EQ(expected, std::string(&buffer[1], my_actual)) << " Input " << x; +} + +void CheckUInt32(uint32_t x) { + char buffer[absl::numbers_internal::kFastToBufferSize]; + char* actual = absl::numbers_internal::FastIntToBuffer(x, buffer); + std::string expected = std::to_string(x); + EXPECT_EQ(expected, std::string(buffer, actual)) << " Input " << x; + + char* generic_actual = absl::numbers_internal::FastIntToBuffer(x, buffer); + EXPECT_EQ(expected, std::string(buffer, generic_actual)) << " Input " << x; +} + +void CheckUInt64(uint64_t x) { + char buffer[absl::numbers_internal::kFastToBufferSize + 1]; + char* actual = absl::numbers_internal::FastIntToBuffer(x, &buffer[1]); + std::string expected = std::to_string(x); + EXPECT_EQ(expected, std::string(&buffer[1], actual)) << " Input " << x; + + char* generic_actual = absl::numbers_internal::FastIntToBuffer(x, &buffer[1]); + EXPECT_EQ(expected, std::string(&buffer[1], generic_actual)) + << " Input " << x; + + char* my_actual = + absl::numbers_internal::FastIntToBuffer(MyUInt64(x), &buffer[1]); + EXPECT_EQ(expected, std::string(&buffer[1], my_actual)) << " Input " << x; +} + +void CheckHex64(uint64_t v) { + char expected[16 + 1]; + std::string actual = absl::StrCat(absl::Hex(v, absl::kZeroPad16)); + snprintf(expected, sizeof(expected), "%016" PRIx64, static_cast<uint64_t>(v)); + EXPECT_EQ(expected, actual) << " Input " << v; + actual = absl::StrCat(absl::Hex(v, absl::kSpacePad16)); + snprintf(expected, sizeof(expected), "%16" PRIx64, static_cast<uint64_t>(v)); + EXPECT_EQ(expected, actual) << " Input " << v; +} + +TEST(Numbers, TestFastPrints) { + for (int i = -100; i <= 100; i++) { + CheckInt32(i); + CheckInt64(i); + } + for (int i = 0; i <= 100; i++) { + CheckUInt32(i); + CheckUInt64(i); + } + // Test min int to make sure that works + CheckInt32(INT_MIN); + CheckInt32(INT_MAX); + CheckInt64(LONG_MIN); + CheckInt64(uint64_t{1000000000}); + CheckInt64(uint64_t{9999999999}); + CheckInt64(uint64_t{100000000000000}); + CheckInt64(uint64_t{999999999999999}); + CheckInt64(uint64_t{1000000000000000000}); + CheckInt64(uint64_t{1199999999999999999}); + CheckInt64(int64_t{-700000000000000000}); + CheckInt64(LONG_MAX); + CheckUInt32(std::numeric_limits<uint32_t>::max()); + CheckUInt64(uint64_t{1000000000}); + CheckUInt64(uint64_t{9999999999}); + CheckUInt64(uint64_t{100000000000000}); + CheckUInt64(uint64_t{999999999999999}); + CheckUInt64(uint64_t{1000000000000000000}); + CheckUInt64(uint64_t{1199999999999999999}); + CheckUInt64(std::numeric_limits<uint64_t>::max()); + + for (int i = 0; i < 10000; i++) { + CheckHex64(i); + } + CheckHex64(uint64_t{0x123456789abcdef0}); +} + +template <typename int_type, typename in_val_type> +void VerifySimpleAtoiGood(in_val_type in_value, int_type exp_value) { + std::string s; + // (u)int128 can be streamed but not StrCat'd. + absl::strings_internal::OStringStream(&s) << in_value; + int_type x = static_cast<int_type>(~exp_value); + EXPECT_TRUE(SimpleAtoi(s, &x)) + << "in_value=" << in_value << " s=" << s << " x=" << x; + EXPECT_EQ(exp_value, x); + x = static_cast<int_type>(~exp_value); + EXPECT_TRUE(SimpleAtoi(s.c_str(), &x)); + EXPECT_EQ(exp_value, x); +} + +template <typename int_type, typename in_val_type> +void VerifySimpleAtoiBad(in_val_type in_value) { + std::string s; + // (u)int128 can be streamed but not StrCat'd. + absl::strings_internal::OStringStream(&s) << in_value; + int_type x; + EXPECT_FALSE(SimpleAtoi(s, &x)); + EXPECT_FALSE(SimpleAtoi(s.c_str(), &x)); +} + +TEST(NumbersTest, Atoi) { + // SimpleAtoi(absl::string_view, int32_t) + VerifySimpleAtoiGood<int32_t>(0, 0); + VerifySimpleAtoiGood<int32_t>(42, 42); + VerifySimpleAtoiGood<int32_t>(-42, -42); + + VerifySimpleAtoiGood<int32_t>(std::numeric_limits<int32_t>::min(), + std::numeric_limits<int32_t>::min()); + VerifySimpleAtoiGood<int32_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + + // SimpleAtoi(absl::string_view, uint32_t) + VerifySimpleAtoiGood<uint32_t>(0, 0); + VerifySimpleAtoiGood<uint32_t>(42, 42); + VerifySimpleAtoiBad<uint32_t>(-42); + + VerifySimpleAtoiBad<uint32_t>(std::numeric_limits<int32_t>::min()); + VerifySimpleAtoiGood<uint32_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleAtoiGood<uint32_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleAtoiBad<uint32_t>(std::numeric_limits<int64_t>::min()); + VerifySimpleAtoiBad<uint32_t>(std::numeric_limits<int64_t>::max()); + VerifySimpleAtoiBad<uint32_t>(std::numeric_limits<uint64_t>::max()); + + // SimpleAtoi(absl::string_view, int64_t) + VerifySimpleAtoiGood<int64_t>(0, 0); + VerifySimpleAtoiGood<int64_t>(42, 42); + VerifySimpleAtoiGood<int64_t>(-42, -42); + + VerifySimpleAtoiGood<int64_t>(std::numeric_limits<int32_t>::min(), + std::numeric_limits<int32_t>::min()); + VerifySimpleAtoiGood<int64_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleAtoiGood<int64_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleAtoiGood<int64_t>(std::numeric_limits<int64_t>::min(), + std::numeric_limits<int64_t>::min()); + VerifySimpleAtoiGood<int64_t>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleAtoiBad<int64_t>(std::numeric_limits<uint64_t>::max()); + + // SimpleAtoi(absl::string_view, uint64_t) + VerifySimpleAtoiGood<uint64_t>(0, 0); + VerifySimpleAtoiGood<uint64_t>(42, 42); + VerifySimpleAtoiBad<uint64_t>(-42); + + VerifySimpleAtoiBad<uint64_t>(std::numeric_limits<int32_t>::min()); + VerifySimpleAtoiGood<uint64_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleAtoiGood<uint64_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleAtoiBad<uint64_t>(std::numeric_limits<int64_t>::min()); + VerifySimpleAtoiGood<uint64_t>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleAtoiGood<uint64_t>(std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::max()); + + // SimpleAtoi(absl::string_view, absl::uint128) + VerifySimpleAtoiGood<absl::uint128>(0, 0); + VerifySimpleAtoiGood<absl::uint128>(42, 42); + VerifySimpleAtoiBad<absl::uint128>(-42); + + VerifySimpleAtoiBad<absl::uint128>(std::numeric_limits<int32_t>::min()); + VerifySimpleAtoiGood<absl::uint128>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleAtoiGood<absl::uint128>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleAtoiBad<absl::uint128>(std::numeric_limits<int64_t>::min()); + VerifySimpleAtoiGood<absl::uint128>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleAtoiGood<absl::uint128>(std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::max()); + VerifySimpleAtoiGood<absl::uint128>( + std::numeric_limits<absl::uint128>::max(), + std::numeric_limits<absl::uint128>::max()); + + // SimpleAtoi(absl::string_view, absl::int128) + VerifySimpleAtoiGood<absl::int128>(0, 0); + VerifySimpleAtoiGood<absl::int128>(42, 42); + VerifySimpleAtoiGood<absl::int128>(-42, -42); + + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<int32_t>::min(), + std::numeric_limits<int32_t>::min()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<int64_t>::min(), + std::numeric_limits<int64_t>::min()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::max()); + VerifySimpleAtoiGood<absl::int128>( + std::numeric_limits<absl::int128>::min(), + std::numeric_limits<absl::int128>::min()); + VerifySimpleAtoiGood<absl::int128>( + std::numeric_limits<absl::int128>::max(), + std::numeric_limits<absl::int128>::max()); + VerifySimpleAtoiBad<absl::int128>(std::numeric_limits<absl::uint128>::max()); + + // Some other types + VerifySimpleAtoiGood<int>(-42, -42); + VerifySimpleAtoiGood<int32_t>(-42, -42); + VerifySimpleAtoiGood<uint32_t>(42, 42); + VerifySimpleAtoiGood<unsigned int>(42, 42); + VerifySimpleAtoiGood<int64_t>(-42, -42); + VerifySimpleAtoiGood<long>(-42, -42); // NOLINT(runtime/int) + VerifySimpleAtoiGood<uint64_t>(42, 42); + VerifySimpleAtoiGood<size_t>(42, 42); + VerifySimpleAtoiGood<std::string::size_type>(42, 42); +} + +TEST(NumbersTest, Atod) { + double d; + EXPECT_TRUE(absl::SimpleAtod("nan", &d)); + EXPECT_TRUE(std::isnan(d)); +} + +TEST(NumbersTest, Atoenum) { + enum E01 { + E01_zero = 0, + E01_one = 1, + }; + + VerifySimpleAtoiGood<E01>(E01_zero, E01_zero); + VerifySimpleAtoiGood<E01>(E01_one, E01_one); + + enum E_101 { + E_101_minusone = -1, + E_101_zero = 0, + E_101_one = 1, + }; + + VerifySimpleAtoiGood<E_101>(E_101_minusone, E_101_minusone); + VerifySimpleAtoiGood<E_101>(E_101_zero, E_101_zero); + VerifySimpleAtoiGood<E_101>(E_101_one, E_101_one); + + enum E_bigint { + E_bigint_zero = 0, + E_bigint_one = 1, + E_bigint_max31 = static_cast<int32_t>(0x7FFFFFFF), + }; + + VerifySimpleAtoiGood<E_bigint>(E_bigint_zero, E_bigint_zero); + VerifySimpleAtoiGood<E_bigint>(E_bigint_one, E_bigint_one); + VerifySimpleAtoiGood<E_bigint>(E_bigint_max31, E_bigint_max31); + + enum E_fullint { + E_fullint_zero = 0, + E_fullint_one = 1, + E_fullint_max31 = static_cast<int32_t>(0x7FFFFFFF), + E_fullint_min32 = INT32_MIN, + }; + + VerifySimpleAtoiGood<E_fullint>(E_fullint_zero, E_fullint_zero); + VerifySimpleAtoiGood<E_fullint>(E_fullint_one, E_fullint_one); + VerifySimpleAtoiGood<E_fullint>(E_fullint_max31, E_fullint_max31); + VerifySimpleAtoiGood<E_fullint>(E_fullint_min32, E_fullint_min32); + + enum E_biguint { + E_biguint_zero = 0, + E_biguint_one = 1, + E_biguint_max31 = static_cast<uint32_t>(0x7FFFFFFF), + E_biguint_max32 = static_cast<uint32_t>(0xFFFFFFFF), + }; + + VerifySimpleAtoiGood<E_biguint>(E_biguint_zero, E_biguint_zero); + VerifySimpleAtoiGood<E_biguint>(E_biguint_one, E_biguint_one); + VerifySimpleAtoiGood<E_biguint>(E_biguint_max31, E_biguint_max31); + VerifySimpleAtoiGood<E_biguint>(E_biguint_max32, E_biguint_max32); +} + +TEST(stringtest, safe_strto32_base) { + int32_t value; + EXPECT_TRUE(safe_strto32_base("0x34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("0X34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("0", &value, 16)); + EXPECT_EQ(0, value); + + EXPECT_TRUE(safe_strto32_base(" \t\n -0x34234324", &value, 16)); + EXPECT_EQ(-0x34234324, value); + + EXPECT_TRUE(safe_strto32_base(" \t\n -34234324", &value, 16)); + EXPECT_EQ(-0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("7654321", &value, 8)); + EXPECT_EQ(07654321, value); + + EXPECT_TRUE(safe_strto32_base("-01234", &value, 8)); + EXPECT_EQ(-01234, value); + + EXPECT_FALSE(safe_strto32_base("1834", &value, 8)); + + // Autodetect base. + EXPECT_TRUE(safe_strto32_base("0", &value, 0)); + EXPECT_EQ(0, value); + + EXPECT_TRUE(safe_strto32_base("077", &value, 0)); + EXPECT_EQ(077, value); // Octal interpretation + + // Leading zero indicates octal, but then followed by invalid digit. + EXPECT_FALSE(safe_strto32_base("088", &value, 0)); + + // Leading 0x indicated hex, but then followed by invalid digit. + EXPECT_FALSE(safe_strto32_base("0xG", &value, 0)); + + // Base-10 version. + EXPECT_TRUE(safe_strto32_base("34234324", &value, 10)); + EXPECT_EQ(34234324, value); + + EXPECT_TRUE(safe_strto32_base("0", &value, 10)); + EXPECT_EQ(0, value); + + EXPECT_TRUE(safe_strto32_base(" \t\n -34234324", &value, 10)); + EXPECT_EQ(-34234324, value); + + EXPECT_TRUE(safe_strto32_base("34234324 \n\t ", &value, 10)); + EXPECT_EQ(34234324, value); + + // Invalid ints. + EXPECT_FALSE(safe_strto32_base("", &value, 10)); + EXPECT_FALSE(safe_strto32_base(" ", &value, 10)); + EXPECT_FALSE(safe_strto32_base("abc", &value, 10)); + EXPECT_FALSE(safe_strto32_base("34234324a", &value, 10)); + EXPECT_FALSE(safe_strto32_base("34234.3", &value, 10)); + + // Out of bounds. + EXPECT_FALSE(safe_strto32_base("2147483648", &value, 10)); + EXPECT_FALSE(safe_strto32_base("-2147483649", &value, 10)); + + // String version. + EXPECT_TRUE(safe_strto32_base(std::string("0x1234"), &value, 16)); + EXPECT_EQ(0x1234, value); + + // Base-10 string version. + EXPECT_TRUE(safe_strto32_base("1234", &value, 10)); + EXPECT_EQ(1234, value); +} + +TEST(stringtest, safe_strto32_range) { + // These tests verify underflow/overflow behaviour. + int32_t value; + EXPECT_FALSE(safe_strto32_base("2147483648", &value, 10)); + EXPECT_EQ(std::numeric_limits<int32_t>::max(), value); + + EXPECT_TRUE(safe_strto32_base("-2147483648", &value, 10)); + EXPECT_EQ(std::numeric_limits<int32_t>::min(), value); + + EXPECT_FALSE(safe_strto32_base("-2147483649", &value, 10)); + EXPECT_EQ(std::numeric_limits<int32_t>::min(), value); +} + +TEST(stringtest, safe_strto64_range) { + // These tests verify underflow/overflow behaviour. + int64_t value; + EXPECT_FALSE(safe_strto64_base("9223372036854775808", &value, 10)); + EXPECT_EQ(std::numeric_limits<int64_t>::max(), value); + + EXPECT_TRUE(safe_strto64_base("-9223372036854775808", &value, 10)); + EXPECT_EQ(std::numeric_limits<int64_t>::min(), value); + + EXPECT_FALSE(safe_strto64_base("-9223372036854775809", &value, 10)); + EXPECT_EQ(std::numeric_limits<int64_t>::min(), value); +} + +TEST(stringtest, safe_strto32_leading_substring) { + // These tests verify this comment in numbers.h: + // On error, returns false, and sets *value to: [...] + // conversion of leading substring if available ("123@@@" -> 123) + // 0 if no leading substring available + int32_t value; + EXPECT_FALSE(safe_strto32_base("04069@@@", &value, 10)); + EXPECT_EQ(4069, value); + + EXPECT_FALSE(safe_strto32_base("04069@@@", &value, 8)); + EXPECT_EQ(0406, value); + + EXPECT_FALSE(safe_strto32_base("04069balloons", &value, 10)); + EXPECT_EQ(4069, value); + + EXPECT_FALSE(safe_strto32_base("04069balloons", &value, 16)); + EXPECT_EQ(0x4069ba, value); + + EXPECT_FALSE(safe_strto32_base("@@@", &value, 10)); + EXPECT_EQ(0, value); // there was no leading substring +} + +TEST(stringtest, safe_strto64_leading_substring) { + // These tests verify this comment in numbers.h: + // On error, returns false, and sets *value to: [...] + // conversion of leading substring if available ("123@@@" -> 123) + // 0 if no leading substring available + int64_t value; + EXPECT_FALSE(safe_strto64_base("04069@@@", &value, 10)); + EXPECT_EQ(4069, value); + + EXPECT_FALSE(safe_strto64_base("04069@@@", &value, 8)); + EXPECT_EQ(0406, value); + + EXPECT_FALSE(safe_strto64_base("04069balloons", &value, 10)); + EXPECT_EQ(4069, value); + + EXPECT_FALSE(safe_strto64_base("04069balloons", &value, 16)); + EXPECT_EQ(0x4069ba, value); + + EXPECT_FALSE(safe_strto64_base("@@@", &value, 10)); + EXPECT_EQ(0, value); // there was no leading substring +} + +TEST(stringtest, safe_strto64_base) { + int64_t value; + EXPECT_TRUE(safe_strto64_base("0x3423432448783446", &value, 16)); + EXPECT_EQ(int64_t{0x3423432448783446}, value); + + EXPECT_TRUE(safe_strto64_base("3423432448783446", &value, 16)); + EXPECT_EQ(int64_t{0x3423432448783446}, value); + + EXPECT_TRUE(safe_strto64_base("0", &value, 16)); + EXPECT_EQ(0, value); + + EXPECT_TRUE(safe_strto64_base(" \t\n -0x3423432448783446", &value, 16)); + EXPECT_EQ(int64_t{-0x3423432448783446}, value); + + EXPECT_TRUE(safe_strto64_base(" \t\n -3423432448783446", &value, 16)); + EXPECT_EQ(int64_t{-0x3423432448783446}, value); + + EXPECT_TRUE(safe_strto64_base("123456701234567012", &value, 8)); + EXPECT_EQ(int64_t{0123456701234567012}, value); + + EXPECT_TRUE(safe_strto64_base("-017777777777777", &value, 8)); + EXPECT_EQ(int64_t{-017777777777777}, value); + + EXPECT_FALSE(safe_strto64_base("19777777777777", &value, 8)); + + // Autodetect base. + EXPECT_TRUE(safe_strto64_base("0", &value, 0)); + EXPECT_EQ(0, value); + + EXPECT_TRUE(safe_strto64_base("077", &value, 0)); + EXPECT_EQ(077, value); // Octal interpretation + + // Leading zero indicates octal, but then followed by invalid digit. + EXPECT_FALSE(safe_strto64_base("088", &value, 0)); + + // Leading 0x indicated hex, but then followed by invalid digit. + EXPECT_FALSE(safe_strto64_base("0xG", &value, 0)); + + // Base-10 version. + EXPECT_TRUE(safe_strto64_base("34234324487834466", &value, 10)); + EXPECT_EQ(int64_t{34234324487834466}, value); + + EXPECT_TRUE(safe_strto64_base("0", &value, 10)); + EXPECT_EQ(0, value); + + EXPECT_TRUE(safe_strto64_base(" \t\n -34234324487834466", &value, 10)); + EXPECT_EQ(int64_t{-34234324487834466}, value); + + EXPECT_TRUE(safe_strto64_base("34234324487834466 \n\t ", &value, 10)); + EXPECT_EQ(int64_t{34234324487834466}, value); + + // Invalid ints. + EXPECT_FALSE(safe_strto64_base("", &value, 10)); + EXPECT_FALSE(safe_strto64_base(" ", &value, 10)); + EXPECT_FALSE(safe_strto64_base("abc", &value, 10)); + EXPECT_FALSE(safe_strto64_base("34234324487834466a", &value, 10)); + EXPECT_FALSE(safe_strto64_base("34234487834466.3", &value, 10)); + + // Out of bounds. + EXPECT_FALSE(safe_strto64_base("9223372036854775808", &value, 10)); + EXPECT_FALSE(safe_strto64_base("-9223372036854775809", &value, 10)); + + // String version. + EXPECT_TRUE(safe_strto64_base(std::string("0x1234"), &value, 16)); + EXPECT_EQ(0x1234, value); + + // Base-10 string version. + EXPECT_TRUE(safe_strto64_base("1234", &value, 10)); + EXPECT_EQ(1234, value); +} + +const size_t kNumRandomTests = 10000; + +template <typename IntType> +void test_random_integer_parse_base(bool (*parse_func)(absl::string_view, + IntType* value, + int base)) { + using RandomEngine = std::minstd_rand0; + std::random_device rd; + RandomEngine rng(rd()); + std::uniform_int_distribution<IntType> random_int( + std::numeric_limits<IntType>::min()); + std::uniform_int_distribution<int> random_base(2, 35); + for (size_t i = 0; i < kNumRandomTests; i++) { + IntType value = random_int(rng); + int base = random_base(rng); + std::string str_value; + EXPECT_TRUE(Itoa<IntType>(value, base, &str_value)); + IntType parsed_value; + + // Test successful parse + EXPECT_TRUE(parse_func(str_value, &parsed_value, base)); + EXPECT_EQ(parsed_value, value); + + // Test overflow + EXPECT_FALSE( + parse_func(absl::StrCat(std::numeric_limits<IntType>::max(), value), + &parsed_value, base)); + + // Test underflow + if (std::numeric_limits<IntType>::min() < 0) { + EXPECT_FALSE( + parse_func(absl::StrCat(std::numeric_limits<IntType>::min(), value), + &parsed_value, base)); + } else { + EXPECT_FALSE(parse_func(absl::StrCat("-", value), &parsed_value, base)); + } + } +} + +TEST(stringtest, safe_strto32_random) { + test_random_integer_parse_base<int32_t>(&safe_strto32_base); +} +TEST(stringtest, safe_strto64_random) { + test_random_integer_parse_base<int64_t>(&safe_strto64_base); +} +TEST(stringtest, safe_strtou32_random) { + test_random_integer_parse_base<uint32_t>(&safe_strtou32_base); +} +TEST(stringtest, safe_strtou64_random) { + test_random_integer_parse_base<uint64_t>(&safe_strtou64_base); +} +TEST(stringtest, safe_strtou128_random) { + // random number generators don't work for uint128, and + // uint128 can be streamed but not StrCat'd, so this code must be custom + // implemented for uint128, but is generally the same as what's above. + // test_random_integer_parse_base<absl::uint128>( + // &absl::numbers_internal::safe_strtou128_base); + using RandomEngine = std::minstd_rand0; + using IntType = absl::uint128; + constexpr auto parse_func = &absl::numbers_internal::safe_strtou128_base; + + std::random_device rd; + RandomEngine rng(rd()); + std::uniform_int_distribution<uint64_t> random_uint64( + std::numeric_limits<uint64_t>::min()); + std::uniform_int_distribution<int> random_base(2, 35); + + for (size_t i = 0; i < kNumRandomTests; i++) { + IntType value = random_uint64(rng); + value = (value << 64) + random_uint64(rng); + int base = random_base(rng); + std::string str_value; + EXPECT_TRUE(Itoa<IntType>(value, base, &str_value)); + IntType parsed_value; + + // Test successful parse + EXPECT_TRUE(parse_func(str_value, &parsed_value, base)); + EXPECT_EQ(parsed_value, value); + + // Test overflow + std::string s; + absl::strings_internal::OStringStream(&s) + << std::numeric_limits<IntType>::max() << value; + EXPECT_FALSE(parse_func(s, &parsed_value, base)); + + // Test underflow + s.clear(); + absl::strings_internal::OStringStream(&s) << "-" << value; + EXPECT_FALSE(parse_func(s, &parsed_value, base)); + } +} +TEST(stringtest, safe_strto128_random) { + // random number generators don't work for int128, and + // int128 can be streamed but not StrCat'd, so this code must be custom + // implemented for int128, but is generally the same as what's above. + // test_random_integer_parse_base<absl::int128>( + // &absl::numbers_internal::safe_strto128_base); + using RandomEngine = std::minstd_rand0; + using IntType = absl::int128; + constexpr auto parse_func = &absl::numbers_internal::safe_strto128_base; + + std::random_device rd; + RandomEngine rng(rd()); + std::uniform_int_distribution<int64_t> random_int64( + std::numeric_limits<int64_t>::min()); + std::uniform_int_distribution<uint64_t> random_uint64( + std::numeric_limits<uint64_t>::min()); + std::uniform_int_distribution<int> random_base(2, 35); + + for (size_t i = 0; i < kNumRandomTests; ++i) { + int64_t high = random_int64(rng); + uint64_t low = random_uint64(rng); + IntType value = absl::MakeInt128(high, low); + + int base = random_base(rng); + std::string str_value; + EXPECT_TRUE(Itoa<IntType>(value, base, &str_value)); + IntType parsed_value; + + // Test successful parse + EXPECT_TRUE(parse_func(str_value, &parsed_value, base)); + EXPECT_EQ(parsed_value, value); + + // Test overflow + std::string s; + absl::strings_internal::OStringStream(&s) + << std::numeric_limits<IntType>::max() << value; + EXPECT_FALSE(parse_func(s, &parsed_value, base)); + + // Test underflow + s.clear(); + absl::strings_internal::OStringStream(&s) + << std::numeric_limits<IntType>::min() << value; + EXPECT_FALSE(parse_func(s, &parsed_value, base)); + } +} + +TEST(stringtest, safe_strtou32_base) { + for (int i = 0; strtouint32_test_cases()[i].str != nullptr; ++i) { + const auto& e = strtouint32_test_cases()[i]; + uint32_t value; + EXPECT_EQ(e.expect_ok, safe_strtou32_base(e.str, &value, e.base)) + << "str=\"" << e.str << "\" base=" << e.base; + if (e.expect_ok) { + EXPECT_EQ(e.expected, value) << "i=" << i << " str=\"" << e.str + << "\" base=" << e.base; + } + } +} + +TEST(stringtest, safe_strtou32_base_length_delimited) { + for (int i = 0; strtouint32_test_cases()[i].str != nullptr; ++i) { + const auto& e = strtouint32_test_cases()[i]; + std::string tmp(e.str); + tmp.append("12"); // Adds garbage at the end. + + uint32_t value; + EXPECT_EQ(e.expect_ok, + safe_strtou32_base(absl::string_view(tmp.data(), strlen(e.str)), + &value, e.base)) + << "str=\"" << e.str << "\" base=" << e.base; + if (e.expect_ok) { + EXPECT_EQ(e.expected, value) << "i=" << i << " str=" << e.str + << " base=" << e.base; + } + } +} + +TEST(stringtest, safe_strtou64_base) { + for (int i = 0; strtouint64_test_cases()[i].str != nullptr; ++i) { + const auto& e = strtouint64_test_cases()[i]; + uint64_t value; + EXPECT_EQ(e.expect_ok, safe_strtou64_base(e.str, &value, e.base)) + << "str=\"" << e.str << "\" base=" << e.base; + if (e.expect_ok) { + EXPECT_EQ(e.expected, value) << "str=" << e.str << " base=" << e.base; + } + } +} + +TEST(stringtest, safe_strtou64_base_length_delimited) { + for (int i = 0; strtouint64_test_cases()[i].str != nullptr; ++i) { + const auto& e = strtouint64_test_cases()[i]; + std::string tmp(e.str); + tmp.append("12"); // Adds garbage at the end. + + uint64_t value; + EXPECT_EQ(e.expect_ok, + safe_strtou64_base(absl::string_view(tmp.data(), strlen(e.str)), + &value, e.base)) + << "str=\"" << e.str << "\" base=" << e.base; + if (e.expect_ok) { + EXPECT_EQ(e.expected, value) << "str=\"" << e.str << "\" base=" << e.base; + } + } +} + +// feenableexcept() and fedisableexcept() are extensions supported by some libc +// implementations. +#if defined(__GLIBC__) || defined(__BIONIC__) +#define ABSL_HAVE_FEENABLEEXCEPT 1 +#define ABSL_HAVE_FEDISABLEEXCEPT 1 +#endif + +class SimpleDtoaTest : public testing::Test { + protected: + void SetUp() override { + // Store the current floating point env & clear away any pending exceptions. + feholdexcept(&fp_env_); +#ifdef ABSL_HAVE_FEENABLEEXCEPT + // Turn on floating point exceptions. + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + } + + void TearDown() override { + // Restore the floating point environment to the original state. + // In theory fedisableexcept is unnecessary; fesetenv will also do it. + // In practice, our toolchains have subtle bugs. +#ifdef ABSL_HAVE_FEDISABLEEXCEPT + fedisableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); +#endif + fesetenv(&fp_env_); + } + + std::string ToNineDigits(double value) { + char buffer[16]; // more than enough for %.9g + snprintf(buffer, sizeof(buffer), "%.9g", value); + return buffer; + } + + fenv_t fp_env_; +}; + +// Run the given runnable functor for "cases" test cases, chosen over the +// available range of float. pi and e and 1/e are seeded, and then all +// available integer powers of 2 and 10 are multiplied against them. In +// addition to trying all those values, we try the next higher and next lower +// float, and then we add additional test cases evenly distributed between them. +// Each test case is passed to runnable as both a positive and negative value. +template <typename R> +void ExhaustiveFloat(uint32_t cases, R&& runnable) { + runnable(0.0f); + runnable(-0.0f); + if (cases >= 2e9) { // more than 2 billion? Might as well run them all. + for (float f = 0; f < std::numeric_limits<float>::max(); ) { + f = nextafterf(f, std::numeric_limits<float>::max()); + runnable(-f); + runnable(f); + } + return; + } + std::set<float> floats = {3.4028234e38f}; + for (float f : {1.0, 3.14159265, 2.718281828, 1 / 2.718281828}) { + for (float testf = f; testf != 0; testf *= 0.1f) floats.insert(testf); + for (float testf = f; testf != 0; testf *= 0.5f) floats.insert(testf); + for (float testf = f; testf < 3e38f / 2; testf *= 2.0f) + floats.insert(testf); + for (float testf = f; testf < 3e38f / 10; testf *= 10) floats.insert(testf); + } + + float last = *floats.begin(); + + runnable(last); + runnable(-last); + int iters_per_float = cases / floats.size(); + if (iters_per_float == 0) iters_per_float = 1; + for (float f : floats) { + if (f == last) continue; + float testf = std::nextafter(last, std::numeric_limits<float>::max()); + runnable(testf); + runnable(-testf); + last = testf; + if (f == last) continue; + double step = (double{f} - last) / iters_per_float; + for (double d = last + step; d < f; d += step) { + testf = d; + if (testf != last) { + runnable(testf); + runnable(-testf); + last = testf; + } + } + testf = std::nextafter(f, 0.0f); + if (testf > last) { + runnable(testf); + runnable(-testf); + last = testf; + } + if (f != last) { + runnable(f); + runnable(-f); + last = f; + } + } +} + +TEST_F(SimpleDtoaTest, ExhaustiveDoubleToSixDigits) { + uint64_t test_count = 0; + std::vector<double> mismatches; + auto checker = [&](double d) { + if (d != d) return; // rule out NaNs + ++test_count; + char sixdigitsbuf[kSixDigitsToBufferSize] = {0}; + SixDigitsToBuffer(d, sixdigitsbuf); + char snprintfbuf[kSixDigitsToBufferSize] = {0}; + snprintf(snprintfbuf, kSixDigitsToBufferSize, "%g", d); + if (strcmp(sixdigitsbuf, snprintfbuf) != 0) { + mismatches.push_back(d); + if (mismatches.size() < 10) { + ABSL_RAW_LOG(ERROR, "%s", + absl::StrCat("Six-digit failure with double. ", "d=", d, + "=", d, " sixdigits=", sixdigitsbuf, + " printf(%g)=", snprintfbuf) + .c_str()); + } + } + }; + // Some quick sanity checks... + checker(5e-324); + checker(1e-308); + checker(1.0); + checker(1.000005); + checker(1.7976931348623157e308); + checker(0.00390625); +#ifndef _MSC_VER + // on MSVC, snprintf() rounds it to 0.00195313. SixDigitsToBuffer() rounds it + // to 0.00195312 (round half to even). + checker(0.001953125); +#endif + checker(0.005859375); + // Some cases where the rounding is very very close + checker(1.089095e-15); + checker(3.274195e-55); + checker(6.534355e-146); + checker(2.920845e+234); + + if (mismatches.empty()) { + test_count = 0; + ExhaustiveFloat(kFloatNumCases, checker); + + test_count = 0; + std::vector<int> digit_testcases{ + 100000, 100001, 100002, 100005, 100010, 100020, 100050, 100100, // misc + 195312, 195313, // 1.953125 is a case where we round down, just barely. + 200000, 500000, 800000, // misc mid-range cases + 585937, 585938, // 5.859375 is a case where we round up, just barely. + 900000, 990000, 999000, 999900, 999990, 999996, 999997, 999998, 999999}; + if (kFloatNumCases >= 1e9) { + // If at least 1 billion test cases were requested, user wants an + // exhaustive test. So let's test all mantissas, too. + constexpr int min_mantissa = 100000, max_mantissa = 999999; + digit_testcases.resize(max_mantissa - min_mantissa + 1); + std::iota(digit_testcases.begin(), digit_testcases.end(), min_mantissa); + } + + for (int exponent = -324; exponent <= 308; ++exponent) { + double powten = absl::strings_internal::Pow10(exponent); + if (powten == 0) powten = 5e-324; + if (kFloatNumCases >= 1e9) { + // The exhaustive test takes a very long time, so log progress. + char buf[kSixDigitsToBufferSize]; + ABSL_RAW_LOG( + INFO, "%s", + absl::StrCat("Exp ", exponent, " powten=", powten, "(", powten, + ") (", + std::string(buf, SixDigitsToBuffer(powten, buf)), ")") + .c_str()); + } + for (int digits : digit_testcases) { + if (exponent == 308 && digits >= 179769) break; // don't overflow! + double digiform = (digits + 0.5) * 0.00001; + double testval = digiform * powten; + double pretestval = nextafter(testval, 0); + double posttestval = nextafter(testval, 1.7976931348623157e308); + checker(testval); + checker(pretestval); + checker(posttestval); + } + } + } else { + EXPECT_EQ(mismatches.size(), 0); + for (size_t i = 0; i < mismatches.size(); ++i) { + if (i > 100) i = mismatches.size() - 1; + double d = mismatches[i]; + char sixdigitsbuf[kSixDigitsToBufferSize] = {0}; + SixDigitsToBuffer(d, sixdigitsbuf); + char snprintfbuf[kSixDigitsToBufferSize] = {0}; + snprintf(snprintfbuf, kSixDigitsToBufferSize, "%g", d); + double before = nextafter(d, 0.0); + double after = nextafter(d, 1.7976931348623157e308); + char b1[32], b2[kSixDigitsToBufferSize]; + ABSL_RAW_LOG( + ERROR, "%s", + absl::StrCat( + "Mismatch #", i, " d=", d, " (", ToNineDigits(d), ")", + " sixdigits='", sixdigitsbuf, "'", " snprintf='", snprintfbuf, + "'", " Before.=", PerfectDtoa(before), " ", + (SixDigitsToBuffer(before, b2), b2), + " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", before), b1), + " Perfect=", PerfectDtoa(d), " ", (SixDigitsToBuffer(d, b2), b2), + " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", d), b1), + " After.=.", PerfectDtoa(after), " ", + (SixDigitsToBuffer(after, b2), b2), + " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", after), b1)) + .c_str()); + } + } +} + +TEST(StrToInt32, Partial) { + struct Int32TestLine { + std::string input; + bool status; + int32_t value; + }; + const int32_t int32_min = std::numeric_limits<int32_t>::min(); + const int32_t int32_max = std::numeric_limits<int32_t>::max(); + Int32TestLine int32_test_line[] = { + {"", false, 0}, + {" ", false, 0}, + {"-", false, 0}, + {"123@@@", false, 123}, + {absl::StrCat(int32_min, int32_max), false, int32_min}, + {absl::StrCat(int32_max, int32_max), false, int32_max}, + }; + + for (const Int32TestLine& test_line : int32_test_line) { + int32_t value = -2; + bool status = safe_strto32_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = -2; + status = safe_strto32_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = -2; + status = safe_strto32_base(absl::string_view(test_line.input), &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + } +} + +TEST(StrToUint32, Partial) { + struct Uint32TestLine { + std::string input; + bool status; + uint32_t value; + }; + const uint32_t uint32_max = std::numeric_limits<uint32_t>::max(); + Uint32TestLine uint32_test_line[] = { + {"", false, 0}, + {" ", false, 0}, + {"-", false, 0}, + {"123@@@", false, 123}, + {absl::StrCat(uint32_max, uint32_max), false, uint32_max}, + }; + + for (const Uint32TestLine& test_line : uint32_test_line) { + uint32_t value = 2; + bool status = safe_strtou32_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = 2; + status = safe_strtou32_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = 2; + status = safe_strtou32_base(absl::string_view(test_line.input), &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + } +} + +TEST(StrToInt64, Partial) { + struct Int64TestLine { + std::string input; + bool status; + int64_t value; + }; + const int64_t int64_min = std::numeric_limits<int64_t>::min(); + const int64_t int64_max = std::numeric_limits<int64_t>::max(); + Int64TestLine int64_test_line[] = { + {"", false, 0}, + {" ", false, 0}, + {"-", false, 0}, + {"123@@@", false, 123}, + {absl::StrCat(int64_min, int64_max), false, int64_min}, + {absl::StrCat(int64_max, int64_max), false, int64_max}, + }; + + for (const Int64TestLine& test_line : int64_test_line) { + int64_t value = -2; + bool status = safe_strto64_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = -2; + status = safe_strto64_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = -2; + status = safe_strto64_base(absl::string_view(test_line.input), &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + } +} + +TEST(StrToUint64, Partial) { + struct Uint64TestLine { + std::string input; + bool status; + uint64_t value; + }; + const uint64_t uint64_max = std::numeric_limits<uint64_t>::max(); + Uint64TestLine uint64_test_line[] = { + {"", false, 0}, + {" ", false, 0}, + {"-", false, 0}, + {"123@@@", false, 123}, + {absl::StrCat(uint64_max, uint64_max), false, uint64_max}, + }; + + for (const Uint64TestLine& test_line : uint64_test_line) { + uint64_t value = 2; + bool status = safe_strtou64_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = 2; + status = safe_strtou64_base(test_line.input, &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + value = 2; + status = safe_strtou64_base(absl::string_view(test_line.input), &value, 10); + EXPECT_EQ(test_line.status, status) << test_line.input; + EXPECT_EQ(test_line.value, value) << test_line.input; + } +} + +TEST(StrToInt32Base, PrefixOnly) { + struct Int32TestLine { + std::string input; + bool status; + int32_t value; + }; + Int32TestLine int32_test_line[] = { + { "", false, 0 }, + { "-", false, 0 }, + { "-0", true, 0 }, + { "0", true, 0 }, + { "0x", false, 0 }, + { "-0x", false, 0 }, + }; + const int base_array[] = { 0, 2, 8, 10, 16 }; + + for (const Int32TestLine& line : int32_test_line) { + for (const int base : base_array) { + int32_t value = 2; + bool status = safe_strto32_base(line.input.c_str(), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strto32_base(line.input, &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strto32_base(absl::string_view(line.input), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + } + } +} + +TEST(StrToUint32Base, PrefixOnly) { + struct Uint32TestLine { + std::string input; + bool status; + uint32_t value; + }; + Uint32TestLine uint32_test_line[] = { + { "", false, 0 }, + { "0", true, 0 }, + { "0x", false, 0 }, + }; + const int base_array[] = { 0, 2, 8, 10, 16 }; + + for (const Uint32TestLine& line : uint32_test_line) { + for (const int base : base_array) { + uint32_t value = 2; + bool status = safe_strtou32_base(line.input.c_str(), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strtou32_base(line.input, &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strtou32_base(absl::string_view(line.input), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + } + } +} + +TEST(StrToInt64Base, PrefixOnly) { + struct Int64TestLine { + std::string input; + bool status; + int64_t value; + }; + Int64TestLine int64_test_line[] = { + { "", false, 0 }, + { "-", false, 0 }, + { "-0", true, 0 }, + { "0", true, 0 }, + { "0x", false, 0 }, + { "-0x", false, 0 }, + }; + const int base_array[] = { 0, 2, 8, 10, 16 }; + + for (const Int64TestLine& line : int64_test_line) { + for (const int base : base_array) { + int64_t value = 2; + bool status = safe_strto64_base(line.input.c_str(), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strto64_base(line.input, &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strto64_base(absl::string_view(line.input), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + } + } +} + +TEST(StrToUint64Base, PrefixOnly) { + struct Uint64TestLine { + std::string input; + bool status; + uint64_t value; + }; + Uint64TestLine uint64_test_line[] = { + { "", false, 0 }, + { "0", true, 0 }, + { "0x", false, 0 }, + }; + const int base_array[] = { 0, 2, 8, 10, 16 }; + + for (const Uint64TestLine& line : uint64_test_line) { + for (const int base : base_array) { + uint64_t value = 2; + bool status = safe_strtou64_base(line.input.c_str(), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strtou64_base(line.input, &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + value = 2; + status = safe_strtou64_base(absl::string_view(line.input), &value, base); + EXPECT_EQ(line.status, status) << line.input << " " << base; + EXPECT_EQ(line.value, value) << line.input << " " << base; + } + } +} + +void TestFastHexToBufferZeroPad16(uint64_t v) { + char buf[16]; + auto digits = absl::numbers_internal::FastHexToBufferZeroPad16(v, buf); + absl::string_view res(buf, 16); + char buf2[17]; + snprintf(buf2, sizeof(buf2), "%016" PRIx64, v); + EXPECT_EQ(res, buf2) << v; + size_t expected_digits = snprintf(buf2, sizeof(buf2), "%" PRIx64, v); + EXPECT_EQ(digits, expected_digits) << v; +} + +TEST(FastHexToBufferZeroPad16, Smoke) { + TestFastHexToBufferZeroPad16(std::numeric_limits<uint64_t>::min()); + TestFastHexToBufferZeroPad16(std::numeric_limits<uint64_t>::max()); + TestFastHexToBufferZeroPad16(std::numeric_limits<int64_t>::min()); + TestFastHexToBufferZeroPad16(std::numeric_limits<int64_t>::max()); + absl::BitGen rng; + for (int i = 0; i < 100000; ++i) { + TestFastHexToBufferZeroPad16( + absl::LogUniform(rng, std::numeric_limits<uint64_t>::min(), + std::numeric_limits<uint64_t>::max())); + } +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/str_cat.cc b/third_party/abseil_cpp/absl/strings/str_cat.cc new file mode 100644 index 000000000000..dd5d25b0d6df --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_cat.cc @@ -0,0 +1,246 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_cat.h" + +#include <assert.h> + +#include <algorithm> +#include <cstdint> +#include <cstring> + +#include "absl/strings/ascii.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/numbers.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +AlphaNum::AlphaNum(Hex hex) { + static_assert(numbers_internal::kFastToBufferSize >= 32, + "This function only works when output buffer >= 32 bytes long"); + char* const end = &digits_[numbers_internal::kFastToBufferSize]; + auto real_width = + absl::numbers_internal::FastHexToBufferZeroPad16(hex.value, end - 16); + if (real_width >= hex.width) { + piece_ = absl::string_view(end - real_width, real_width); + } else { + // Pad first 16 chars because FastHexToBufferZeroPad16 pads only to 16 and + // max pad width can be up to 20. + std::memset(end - 32, hex.fill, 16); + // Patch up everything else up to the real_width. + std::memset(end - real_width - 16, hex.fill, 16); + piece_ = absl::string_view(end - hex.width, hex.width); + } +} + +AlphaNum::AlphaNum(Dec dec) { + assert(dec.width <= numbers_internal::kFastToBufferSize); + char* const end = &digits_[numbers_internal::kFastToBufferSize]; + char* const minfill = end - dec.width; + char* writer = end; + uint64_t value = dec.value; + bool neg = dec.neg; + while (value > 9) { + *--writer = '0' + (value % 10); + value /= 10; + } + *--writer = '0' + value; + if (neg) *--writer = '-'; + + ptrdiff_t fillers = writer - minfill; + if (fillers > 0) { + // Tricky: if the fill character is ' ', then it's <fill><+/-><digits> + // But...: if the fill character is '0', then it's <+/-><fill><digits> + bool add_sign_again = false; + if (neg && dec.fill == '0') { // If filling with '0', + ++writer; // ignore the sign we just added + add_sign_again = true; // and re-add the sign later. + } + writer -= fillers; + std::fill_n(writer, fillers, dec.fill); + if (add_sign_again) *--writer = '-'; + } + + piece_ = absl::string_view(writer, end - writer); +} + +// ---------------------------------------------------------------------- +// StrCat() +// This merges the given strings or integers, with no delimiter. This +// is designed to be the fastest possible way to construct a string out +// of a mix of raw C strings, string_views, strings, and integer values. +// ---------------------------------------------------------------------- + +// Append is merely a version of memcpy that returns the address of the byte +// after the area just overwritten. +static char* Append(char* out, const AlphaNum& x) { + // memcpy is allowed to overwrite arbitrary memory, so doing this after the + // call would force an extra fetch of x.size(). + char* after = out + x.size(); + if (x.size() != 0) { + memcpy(out, x.data(), x.size()); + } + return after; +} + +std::string StrCat(const AlphaNum& a, const AlphaNum& b) { + std::string result; + absl::strings_internal::STLStringResizeUninitialized(&result, + a.size() + b.size()); + char* const begin = &result[0]; + char* out = begin; + out = Append(out, a); + out = Append(out, b); + assert(out == begin + result.size()); + return result; +} + +std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) { + std::string result; + strings_internal::STLStringResizeUninitialized( + &result, a.size() + b.size() + c.size()); + char* const begin = &result[0]; + char* out = begin; + out = Append(out, a); + out = Append(out, b); + out = Append(out, c); + assert(out == begin + result.size()); + return result; +} + +std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, + const AlphaNum& d) { + std::string result; + strings_internal::STLStringResizeUninitialized( + &result, a.size() + b.size() + c.size() + d.size()); + char* const begin = &result[0]; + char* out = begin; + out = Append(out, a); + out = Append(out, b); + out = Append(out, c); + out = Append(out, d); + assert(out == begin + result.size()); + return result; +} + +namespace strings_internal { + +// Do not call directly - these are not part of the public API. +std::string CatPieces(std::initializer_list<absl::string_view> pieces) { + std::string result; + size_t total_size = 0; + for (const absl::string_view& piece : pieces) total_size += piece.size(); + strings_internal::STLStringResizeUninitialized(&result, total_size); + + char* const begin = &result[0]; + char* out = begin; + for (const absl::string_view& piece : pieces) { + const size_t this_size = piece.size(); + if (this_size != 0) { + memcpy(out, piece.data(), this_size); + out += this_size; + } + } + assert(out == begin + result.size()); + return result; +} + +// It's possible to call StrAppend with an absl::string_view that is itself a +// fragment of the string we're appending to. However the results of this are +// random. Therefore, check for this in debug mode. Use unsigned math so we +// only have to do one comparison. Note, there's an exception case: appending an +// empty string is always allowed. +#define ASSERT_NO_OVERLAP(dest, src) \ + assert(((src).size() == 0) || \ + (uintptr_t((src).data() - (dest).data()) > uintptr_t((dest).size()))) + +void AppendPieces(std::string* dest, + std::initializer_list<absl::string_view> pieces) { + size_t old_size = dest->size(); + size_t total_size = old_size; + for (const absl::string_view& piece : pieces) { + ASSERT_NO_OVERLAP(*dest, piece); + total_size += piece.size(); + } + strings_internal::STLStringResizeUninitialized(dest, total_size); + + char* const begin = &(*dest)[0]; + char* out = begin + old_size; + for (const absl::string_view& piece : pieces) { + const size_t this_size = piece.size(); + if (this_size != 0) { + memcpy(out, piece.data(), this_size); + out += this_size; + } + } + assert(out == begin + dest->size()); +} + +} // namespace strings_internal + +void StrAppend(std::string* dest, const AlphaNum& a) { + ASSERT_NO_OVERLAP(*dest, a); + dest->append(a.data(), a.size()); +} + +void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b) { + ASSERT_NO_OVERLAP(*dest, a); + ASSERT_NO_OVERLAP(*dest, b); + std::string::size_type old_size = dest->size(); + strings_internal::STLStringResizeUninitialized( + dest, old_size + a.size() + b.size()); + char* const begin = &(*dest)[0]; + char* out = begin + old_size; + out = Append(out, a); + out = Append(out, b); + assert(out == begin + dest->size()); +} + +void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c) { + ASSERT_NO_OVERLAP(*dest, a); + ASSERT_NO_OVERLAP(*dest, b); + ASSERT_NO_OVERLAP(*dest, c); + std::string::size_type old_size = dest->size(); + strings_internal::STLStringResizeUninitialized( + dest, old_size + a.size() + b.size() + c.size()); + char* const begin = &(*dest)[0]; + char* out = begin + old_size; + out = Append(out, a); + out = Append(out, b); + out = Append(out, c); + assert(out == begin + dest->size()); +} + +void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d) { + ASSERT_NO_OVERLAP(*dest, a); + ASSERT_NO_OVERLAP(*dest, b); + ASSERT_NO_OVERLAP(*dest, c); + ASSERT_NO_OVERLAP(*dest, d); + std::string::size_type old_size = dest->size(); + strings_internal::STLStringResizeUninitialized( + dest, old_size + a.size() + b.size() + c.size() + d.size()); + char* const begin = &(*dest)[0]; + char* out = begin + old_size; + out = Append(out, a); + out = Append(out, b); + out = Append(out, c); + out = Append(out, d); + assert(out == begin + dest->size()); +} + +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/str_cat.h b/third_party/abseil_cpp/absl/strings/str_cat.h new file mode 100644 index 000000000000..a8a85c7322b2 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_cat.h @@ -0,0 +1,408 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_cat.h +// ----------------------------------------------------------------------------- +// +// This package contains functions for efficiently concatenating and appending +// strings: `StrCat()` and `StrAppend()`. Most of the work within these routines +// is actually handled through use of a special AlphaNum type, which was +// designed to be used as a parameter type that efficiently manages conversion +// to strings and avoids copies in the above operations. +// +// Any routine accepting either a string or a number may accept `AlphaNum`. +// The basic idea is that by accepting a `const AlphaNum &` as an argument +// to your function, your callers will automagically convert bools, integers, +// and floating point values to strings for you. +// +// NOTE: Use of `AlphaNum` outside of the //absl/strings package is unsupported +// except for the specific case of function parameters of type `AlphaNum` or +// `const AlphaNum &`. In particular, instantiating `AlphaNum` directly as a +// stack variable is not supported. +// +// Conversion from 8-bit values is not accepted because, if it were, then an +// attempt to pass ':' instead of ":" might result in a 58 ending up in your +// result. +// +// Bools convert to "0" or "1". Pointers to types other than `char *` are not +// valid inputs. No output is generated for null `char *` pointers. +// +// Floating point numbers are formatted with six-digit precision, which is +// the default for "std::cout <<" or printf "%g" (the same as "%.6g"). +// +// You can convert to hexadecimal output rather than decimal output using the +// `Hex` type contained here. To do so, pass `Hex(my_int)` as a parameter to +// `StrCat()` or `StrAppend()`. You may specify a minimum hex field width using +// a `PadSpec` enum. +// +// ----------------------------------------------------------------------------- + +#ifndef ABSL_STRINGS_STR_CAT_H_ +#define ABSL_STRINGS_STR_CAT_H_ + +#include <array> +#include <cstdint> +#include <string> +#include <type_traits> +#include <vector> + +#include "absl/base/port.h" +#include "absl/strings/numbers.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +namespace strings_internal { +// AlphaNumBuffer allows a way to pass a string to StrCat without having to do +// memory allocation. It is simply a pair of a fixed-size character array, and +// a size. Please don't use outside of absl, yet. +template <size_t max_size> +struct AlphaNumBuffer { + std::array<char, max_size> data; + size_t size; +}; + +} // namespace strings_internal + +// Enum that specifies the number of significant digits to return in a `Hex` or +// `Dec` conversion and fill character to use. A `kZeroPad2` value, for example, +// would produce hexadecimal strings such as "0a","0f" and a 'kSpacePad5' value +// would produce hexadecimal strings such as " a"," f". +enum PadSpec : uint8_t { + kNoPad = 1, + kZeroPad2, + kZeroPad3, + kZeroPad4, + kZeroPad5, + kZeroPad6, + kZeroPad7, + kZeroPad8, + kZeroPad9, + kZeroPad10, + kZeroPad11, + kZeroPad12, + kZeroPad13, + kZeroPad14, + kZeroPad15, + kZeroPad16, + kZeroPad17, + kZeroPad18, + kZeroPad19, + kZeroPad20, + + kSpacePad2 = kZeroPad2 + 64, + kSpacePad3, + kSpacePad4, + kSpacePad5, + kSpacePad6, + kSpacePad7, + kSpacePad8, + kSpacePad9, + kSpacePad10, + kSpacePad11, + kSpacePad12, + kSpacePad13, + kSpacePad14, + kSpacePad15, + kSpacePad16, + kSpacePad17, + kSpacePad18, + kSpacePad19, + kSpacePad20, +}; + +// ----------------------------------------------------------------------------- +// Hex +// ----------------------------------------------------------------------------- +// +// `Hex` stores a set of hexadecimal string conversion parameters for use +// within `AlphaNum` string conversions. +struct Hex { + uint64_t value; + uint8_t width; + char fill; + + template <typename Int> + explicit Hex( + Int v, PadSpec spec = absl::kNoPad, + typename std::enable_if<sizeof(Int) == 1 && + !std::is_pointer<Int>::value>::type* = nullptr) + : Hex(spec, static_cast<uint8_t>(v)) {} + template <typename Int> + explicit Hex( + Int v, PadSpec spec = absl::kNoPad, + typename std::enable_if<sizeof(Int) == 2 && + !std::is_pointer<Int>::value>::type* = nullptr) + : Hex(spec, static_cast<uint16_t>(v)) {} + template <typename Int> + explicit Hex( + Int v, PadSpec spec = absl::kNoPad, + typename std::enable_if<sizeof(Int) == 4 && + !std::is_pointer<Int>::value>::type* = nullptr) + : Hex(spec, static_cast<uint32_t>(v)) {} + template <typename Int> + explicit Hex( + Int v, PadSpec spec = absl::kNoPad, + typename std::enable_if<sizeof(Int) == 8 && + !std::is_pointer<Int>::value>::type* = nullptr) + : Hex(spec, static_cast<uint64_t>(v)) {} + template <typename Pointee> + explicit Hex(Pointee* v, PadSpec spec = absl::kNoPad) + : Hex(spec, reinterpret_cast<uintptr_t>(v)) {} + + private: + Hex(PadSpec spec, uint64_t v) + : value(v), + width(spec == absl::kNoPad + ? 1 + : spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2 + : spec - absl::kZeroPad2 + 2), + fill(spec >= absl::kSpacePad2 ? ' ' : '0') {} +}; + +// ----------------------------------------------------------------------------- +// Dec +// ----------------------------------------------------------------------------- +// +// `Dec` stores a set of decimal string conversion parameters for use +// within `AlphaNum` string conversions. Dec is slower than the default +// integer conversion, so use it only if you need padding. +struct Dec { + uint64_t value; + uint8_t width; + char fill; + bool neg; + + template <typename Int> + explicit Dec(Int v, PadSpec spec = absl::kNoPad, + typename std::enable_if<(sizeof(Int) <= 8)>::type* = nullptr) + : value(v >= 0 ? static_cast<uint64_t>(v) + : uint64_t{0} - static_cast<uint64_t>(v)), + width(spec == absl::kNoPad + ? 1 + : spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2 + : spec - absl::kZeroPad2 + 2), + fill(spec >= absl::kSpacePad2 ? ' ' : '0'), + neg(v < 0) {} +}; + +// ----------------------------------------------------------------------------- +// AlphaNum +// ----------------------------------------------------------------------------- +// +// The `AlphaNum` class acts as the main parameter type for `StrCat()` and +// `StrAppend()`, providing efficient conversion of numeric, boolean, and +// hexadecimal values (through the `Hex` type) into strings. + +class AlphaNum { + public: + // No bool ctor -- bools convert to an integral type. + // A bool ctor would also convert incoming pointers (bletch). + + AlphaNum(int x) // NOLINT(runtime/explicit) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(unsigned int x) // NOLINT(runtime/explicit) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(long x) // NOLINT(*) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(unsigned long x) // NOLINT(*) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(long long x) // NOLINT(*) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + AlphaNum(unsigned long long x) // NOLINT(*) + : piece_(digits_, + numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + + AlphaNum(float f) // NOLINT(runtime/explicit) + : piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {} + AlphaNum(double f) // NOLINT(runtime/explicit) + : piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {} + + AlphaNum(Hex hex); // NOLINT(runtime/explicit) + AlphaNum(Dec dec); // NOLINT(runtime/explicit) + + template <size_t size> + AlphaNum( // NOLINT(runtime/explicit) + const strings_internal::AlphaNumBuffer<size>& buf) + : piece_(&buf.data[0], buf.size) {} + + AlphaNum(const char* c_str) : piece_(c_str) {} // NOLINT(runtime/explicit) + AlphaNum(absl::string_view pc) : piece_(pc) {} // NOLINT(runtime/explicit) + + template <typename Allocator> + AlphaNum( // NOLINT(runtime/explicit) + const std::basic_string<char, std::char_traits<char>, Allocator>& str) + : piece_(str) {} + + // Use string literals ":" instead of character literals ':'. + AlphaNum(char c) = delete; // NOLINT(runtime/explicit) + + AlphaNum(const AlphaNum&) = delete; + AlphaNum& operator=(const AlphaNum&) = delete; + + absl::string_view::size_type size() const { return piece_.size(); } + const char* data() const { return piece_.data(); } + absl::string_view Piece() const { return piece_; } + + // Normal enums are already handled by the integer formatters. + // This overload matches only scoped enums. + template <typename T, + typename = typename std::enable_if< + std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type> + AlphaNum(T e) // NOLINT(runtime/explicit) + : AlphaNum(static_cast<typename std::underlying_type<T>::type>(e)) {} + + // vector<bool>::reference and const_reference require special help to + // convert to `AlphaNum` because it requires two user defined conversions. + template < + typename T, + typename std::enable_if< + std::is_class<T>::value && + (std::is_same<T, std::vector<bool>::reference>::value || + std::is_same<T, std::vector<bool>::const_reference>::value)>::type* = + nullptr> + AlphaNum(T e) : AlphaNum(static_cast<bool>(e)) {} // NOLINT(runtime/explicit) + + private: + absl::string_view piece_; + char digits_[numbers_internal::kFastToBufferSize]; +}; + +// ----------------------------------------------------------------------------- +// StrCat() +// ----------------------------------------------------------------------------- +// +// Merges given strings or numbers, using no delimiter(s), returning the merged +// result as a string. +// +// `StrCat()` is designed to be the fastest possible way to construct a string +// out of a mix of raw C strings, string_views, strings, bool values, +// and numeric values. +// +// Don't use `StrCat()` for user-visible strings. The localization process +// works poorly on strings built up out of fragments. +// +// For clarity and performance, don't use `StrCat()` when appending to a +// string. Use `StrAppend()` instead. In particular, avoid using any of these +// (anti-)patterns: +// +// str.append(StrCat(...)) +// str += StrCat(...) +// str = StrCat(str, ...) +// +// The last case is the worst, with a potential to change a loop +// from a linear time operation with O(1) dynamic allocations into a +// quadratic time operation with O(n) dynamic allocations. +// +// See `StrAppend()` below for more information. + +namespace strings_internal { + +// Do not call directly - this is not part of the public API. +std::string CatPieces(std::initializer_list<absl::string_view> pieces); +void AppendPieces(std::string* dest, + std::initializer_list<absl::string_view> pieces); + +} // namespace strings_internal + +ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); } + +ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a) { + return std::string(a.data(), a.size()); +} + +ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b); +ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c); +ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d); + +// Support 5 or more arguments +template <typename... AV> +ABSL_MUST_USE_RESULT inline std::string StrCat( + const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AV&... args) { + return strings_internal::CatPieces( + {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(), + static_cast<const AlphaNum&>(args).Piece()...}); +} + +// ----------------------------------------------------------------------------- +// StrAppend() +// ----------------------------------------------------------------------------- +// +// Appends a string or set of strings to an existing string, in a similar +// fashion to `StrCat()`. +// +// WARNING: `StrAppend(&str, a, b, c, ...)` requires that none of the +// a, b, c, parameters be a reference into str. For speed, `StrAppend()` does +// not try to check each of its input arguments to be sure that they are not +// a subset of the string being appended to. That is, while this will work: +// +// std::string s = "foo"; +// s += s; +// +// This output is undefined: +// +// std::string s = "foo"; +// StrAppend(&s, s); +// +// This output is undefined as well, since `absl::string_view` does not own its +// data: +// +// std::string s = "foobar"; +// absl::string_view p = s; +// StrAppend(&s, p); + +inline void StrAppend(std::string*) {} +void StrAppend(std::string* dest, const AlphaNum& a); +void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b); +void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c); +void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d); + +// Support 5 or more arguments +template <typename... AV> +inline void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d, const AlphaNum& e, + const AV&... args) { + strings_internal::AppendPieces( + dest, {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(), + static_cast<const AlphaNum&>(args).Piece()...}); +} + +// Helper function for the future StrCat default floating-point format, %.6g +// This is fast. +inline strings_internal::AlphaNumBuffer< + numbers_internal::kSixDigitsToBufferSize> +SixDigits(double d) { + strings_internal::AlphaNumBuffer<numbers_internal::kSixDigitsToBufferSize> + result; + result.size = numbers_internal::SixDigitsToBuffer(d, &result.data[0]); + return result; +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_STR_CAT_H_ diff --git a/third_party/abseil_cpp/absl/strings/str_cat_benchmark.cc b/third_party/abseil_cpp/absl/strings/str_cat_benchmark.cc new file mode 100644 index 000000000000..02c4dbe6d8f1 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_cat_benchmark.cc @@ -0,0 +1,187 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_cat.h" + +#include <cstdint> +#include <string> + +#include "benchmark/benchmark.h" +#include "absl/strings/substitute.h" + +namespace { + +const char kStringOne[] = "Once Upon A Time, "; +const char kStringTwo[] = "There was a string benchmark"; + +// We want to include negative numbers in the benchmark, so this function +// is used to count 0, 1, -1, 2, -2, 3, -3, ... +inline int IncrementAlternatingSign(int i) { + return i > 0 ? -i : 1 - i; +} + +void BM_Sum_By_StrCat(benchmark::State& state) { + int i = 0; + char foo[100]; + for (auto _ : state) { + // NOLINTNEXTLINE(runtime/printf) + strcpy(foo, absl::StrCat(kStringOne, i, kStringTwo, i * 65536ULL).c_str()); + int sum = 0; + for (char* f = &foo[0]; *f != 0; ++f) { + sum += *f; + } + benchmark::DoNotOptimize(sum); + i = IncrementAlternatingSign(i); + } +} +BENCHMARK(BM_Sum_By_StrCat); + +void BM_StrCat_By_snprintf(benchmark::State& state) { + int i = 0; + char on_stack[1000]; + for (auto _ : state) { + snprintf(on_stack, sizeof(on_stack), "%s %s:%d", kStringOne, kStringTwo, i); + i = IncrementAlternatingSign(i); + } +} +BENCHMARK(BM_StrCat_By_snprintf); + +void BM_StrCat_By_Strings(benchmark::State& state) { + int i = 0; + for (auto _ : state) { + std::string result = + std::string(kStringOne) + " " + kStringTwo + ":" + absl::StrCat(i); + benchmark::DoNotOptimize(result); + i = IncrementAlternatingSign(i); + } +} +BENCHMARK(BM_StrCat_By_Strings); + +void BM_StrCat_By_StringOpPlus(benchmark::State& state) { + int i = 0; + for (auto _ : state) { + std::string result = kStringOne; + result += " "; + result += kStringTwo; + result += ":"; + result += absl::StrCat(i); + benchmark::DoNotOptimize(result); + i = IncrementAlternatingSign(i); + } +} +BENCHMARK(BM_StrCat_By_StringOpPlus); + +void BM_StrCat_By_StrCat(benchmark::State& state) { + int i = 0; + for (auto _ : state) { + std::string result = absl::StrCat(kStringOne, " ", kStringTwo, ":", i); + benchmark::DoNotOptimize(result); + i = IncrementAlternatingSign(i); + } +} +BENCHMARK(BM_StrCat_By_StrCat); + +void BM_HexCat_By_StrCat(benchmark::State& state) { + int i = 0; + for (auto _ : state) { + std::string result = + absl::StrCat(kStringOne, " ", absl::Hex(int64_t{i} + 0x10000000)); + benchmark::DoNotOptimize(result); + i = IncrementAlternatingSign(i); + } +} +BENCHMARK(BM_HexCat_By_StrCat); + +void BM_HexCat_By_Substitute(benchmark::State& state) { + int i = 0; + for (auto _ : state) { + std::string result = absl::Substitute( + "$0 $1", kStringOne, reinterpret_cast<void*>(int64_t{i} + 0x10000000)); + benchmark::DoNotOptimize(result); + i = IncrementAlternatingSign(i); + } +} +BENCHMARK(BM_HexCat_By_Substitute); + +void BM_FloatToString_By_StrCat(benchmark::State& state) { + int i = 0; + float foo = 0.0f; + for (auto _ : state) { + std::string result = absl::StrCat(foo += 1.001f, " != ", int64_t{i}); + benchmark::DoNotOptimize(result); + i = IncrementAlternatingSign(i); + } +} +BENCHMARK(BM_FloatToString_By_StrCat); + +void BM_DoubleToString_By_SixDigits(benchmark::State& state) { + int i = 0; + double foo = 0.0; + for (auto _ : state) { + std::string result = + absl::StrCat(absl::SixDigits(foo += 1.001), " != ", int64_t{i}); + benchmark::DoNotOptimize(result); + i = IncrementAlternatingSign(i); + } +} +BENCHMARK(BM_DoubleToString_By_SixDigits); + +template <typename... Chunks> +void BM_StrAppendImpl(benchmark::State& state, size_t total_bytes, + Chunks... chunks) { + for (auto s : state) { + std::string result; + while (result.size() < total_bytes) { + absl::StrAppend(&result, chunks...); + benchmark::DoNotOptimize(result); + } + } +} + +void BM_StrAppend(benchmark::State& state) { + const int total_bytes = state.range(0); + const int chunks_at_a_time = state.range(1); + const absl::string_view kChunk = "0123456789"; + + switch (chunks_at_a_time) { + case 1: + return BM_StrAppendImpl(state, total_bytes, kChunk); + case 2: + return BM_StrAppendImpl(state, total_bytes, kChunk, kChunk); + case 4: + return BM_StrAppendImpl(state, total_bytes, kChunk, kChunk, kChunk, + kChunk); + case 8: + return BM_StrAppendImpl(state, total_bytes, kChunk, kChunk, kChunk, + kChunk, kChunk, kChunk, kChunk, kChunk); + default: + std::abort(); + } +} + +template <typename B> +void StrAppendConfig(B* benchmark) { + for (int bytes : {10, 100, 1000, 10000}) { + for (int chunks : {1, 2, 4, 8}) { + // Only add the ones that divide properly. Otherwise we are over counting. + if (bytes % (10 * chunks) == 0) { + benchmark->Args({bytes, chunks}); + } + } + } +} + +BENCHMARK(BM_StrAppend)->Apply(StrAppendConfig); + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/str_cat_test.cc b/third_party/abseil_cpp/absl/strings/str_cat_test.cc new file mode 100644 index 000000000000..f3770dc076f0 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_cat_test.cc @@ -0,0 +1,610 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Unit tests for all str_cat.h functions + +#include "absl/strings/str_cat.h" + +#include <cstdint> +#include <string> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/substitute.h" + +#ifdef __ANDROID__ +// Android assert messages only go to system log, so death tests cannot inspect +// the message for matching. +#define ABSL_EXPECT_DEBUG_DEATH(statement, regex) \ + EXPECT_DEBUG_DEATH(statement, ".*") +#else +#define ABSL_EXPECT_DEBUG_DEATH(statement, regex) \ + EXPECT_DEBUG_DEATH(statement, regex) +#endif + +namespace { + +// Test absl::StrCat of ints and longs of various sizes and signdedness. +TEST(StrCat, Ints) { + const short s = -1; // NOLINT(runtime/int) + const uint16_t us = 2; + const int i = -3; + const unsigned int ui = 4; + const long l = -5; // NOLINT(runtime/int) + const unsigned long ul = 6; // NOLINT(runtime/int) + const long long ll = -7; // NOLINT(runtime/int) + const unsigned long long ull = 8; // NOLINT(runtime/int) + const ptrdiff_t ptrdiff = -9; + const size_t size = 10; + const intptr_t intptr = -12; + const uintptr_t uintptr = 13; + std::string answer; + answer = absl::StrCat(s, us); + EXPECT_EQ(answer, "-12"); + answer = absl::StrCat(i, ui); + EXPECT_EQ(answer, "-34"); + answer = absl::StrCat(l, ul); + EXPECT_EQ(answer, "-56"); + answer = absl::StrCat(ll, ull); + EXPECT_EQ(answer, "-78"); + answer = absl::StrCat(ptrdiff, size); + EXPECT_EQ(answer, "-910"); + answer = absl::StrCat(ptrdiff, intptr); + EXPECT_EQ(answer, "-9-12"); + answer = absl::StrCat(uintptr, 0); + EXPECT_EQ(answer, "130"); +} + +TEST(StrCat, Enums) { + enum SmallNumbers { One = 1, Ten = 10 } e = Ten; + EXPECT_EQ("10", absl::StrCat(e)); + EXPECT_EQ("-5", absl::StrCat(SmallNumbers(-5))); + + enum class Option { Boxers = 1, Briefs = -1 }; + + EXPECT_EQ("-1", absl::StrCat(Option::Briefs)); + + enum class Airplane : uint64_t { + Airbus = 1, + Boeing = 1000, + Canary = 10000000000 // too big for "int" + }; + + EXPECT_EQ("10000000000", absl::StrCat(Airplane::Canary)); + + enum class TwoGig : int32_t { + TwoToTheZero = 1, + TwoToTheSixteenth = 1 << 16, + TwoToTheThirtyFirst = INT32_MIN + }; + EXPECT_EQ("65536", absl::StrCat(TwoGig::TwoToTheSixteenth)); + EXPECT_EQ("-2147483648", absl::StrCat(TwoGig::TwoToTheThirtyFirst)); + EXPECT_EQ("-1", absl::StrCat(static_cast<TwoGig>(-1))); + + enum class FourGig : uint32_t { + TwoToTheZero = 1, + TwoToTheSixteenth = 1 << 16, + TwoToTheThirtyFirst = 1U << 31 // too big for "int" + }; + EXPECT_EQ("65536", absl::StrCat(FourGig::TwoToTheSixteenth)); + EXPECT_EQ("2147483648", absl::StrCat(FourGig::TwoToTheThirtyFirst)); + EXPECT_EQ("4294967295", absl::StrCat(static_cast<FourGig>(-1))); + + EXPECT_EQ("10000000000", absl::StrCat(Airplane::Canary)); +} + +TEST(StrCat, Basics) { + std::string result; + + std::string strs[] = {"Hello", "Cruel", "World"}; + + std::string stdstrs[] = { + "std::Hello", + "std::Cruel", + "std::World" + }; + + absl::string_view pieces[] = {"Hello", "Cruel", "World"}; + + const char* c_strs[] = { + "Hello", + "Cruel", + "World" + }; + + int32_t i32s[] = {'H', 'C', 'W'}; + uint64_t ui64s[] = {12345678910LL, 10987654321LL}; + + EXPECT_EQ(absl::StrCat(), ""); + + result = absl::StrCat(false, true, 2, 3); + EXPECT_EQ(result, "0123"); + + result = absl::StrCat(-1); + EXPECT_EQ(result, "-1"); + + result = absl::StrCat(absl::SixDigits(0.5)); + EXPECT_EQ(result, "0.5"); + + result = absl::StrCat(strs[1], pieces[2]); + EXPECT_EQ(result, "CruelWorld"); + + result = absl::StrCat(stdstrs[1], " ", stdstrs[2]); + EXPECT_EQ(result, "std::Cruel std::World"); + + result = absl::StrCat(strs[0], ", ", pieces[2]); + EXPECT_EQ(result, "Hello, World"); + + result = absl::StrCat(strs[0], ", ", strs[1], " ", strs[2], "!"); + EXPECT_EQ(result, "Hello, Cruel World!"); + + result = absl::StrCat(pieces[0], ", ", pieces[1], " ", pieces[2]); + EXPECT_EQ(result, "Hello, Cruel World"); + + result = absl::StrCat(c_strs[0], ", ", c_strs[1], " ", c_strs[2]); + EXPECT_EQ(result, "Hello, Cruel World"); + + result = absl::StrCat("ASCII ", i32s[0], ", ", i32s[1], " ", i32s[2], "!"); + EXPECT_EQ(result, "ASCII 72, 67 87!"); + + result = absl::StrCat(ui64s[0], ", ", ui64s[1], "!"); + EXPECT_EQ(result, "12345678910, 10987654321!"); + + std::string one = + "1"; // Actually, it's the size of this string that we want; a + // 64-bit build distinguishes between size_t and uint64_t, + // even though they're both unsigned 64-bit values. + result = absl::StrCat("And a ", one.size(), " and a ", + &result[2] - &result[0], " and a ", one, " 2 3 4", "!"); + EXPECT_EQ(result, "And a 1 and a 2 and a 1 2 3 4!"); + + // result = absl::StrCat("Single chars won't compile", '!'); + // result = absl::StrCat("Neither will nullptrs", nullptr); + result = + absl::StrCat("To output a char by ASCII/numeric value, use +: ", '!' + 0); + EXPECT_EQ(result, "To output a char by ASCII/numeric value, use +: 33"); + + float f = 100000.5; + result = absl::StrCat("A hundred K and a half is ", absl::SixDigits(f)); + EXPECT_EQ(result, "A hundred K and a half is 100000"); + + f = 100001.5; + result = + absl::StrCat("A hundred K and one and a half is ", absl::SixDigits(f)); + EXPECT_EQ(result, "A hundred K and one and a half is 100002"); + + double d = 100000.5; + d *= d; + result = + absl::StrCat("A hundred K and a half squared is ", absl::SixDigits(d)); + EXPECT_EQ(result, "A hundred K and a half squared is 1.00001e+10"); + + result = absl::StrCat(1, 2, 333, 4444, 55555, 666666, 7777777, 88888888, + 999999999); + EXPECT_EQ(result, "12333444455555666666777777788888888999999999"); +} + +TEST(StrCat, CornerCases) { + std::string result; + + result = absl::StrCat(""); // NOLINT + EXPECT_EQ(result, ""); + result = absl::StrCat("", ""); + EXPECT_EQ(result, ""); + result = absl::StrCat("", "", ""); + EXPECT_EQ(result, ""); + result = absl::StrCat("", "", "", ""); + EXPECT_EQ(result, ""); + result = absl::StrCat("", "", "", "", ""); + EXPECT_EQ(result, ""); +} + +// A minimal allocator that uses malloc(). +template <typename T> +struct Mallocator { + typedef T value_type; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + + size_type max_size() const { + return size_t(std::numeric_limits<size_type>::max()) / sizeof(value_type); + } + template <typename U> + struct rebind { + typedef Mallocator<U> other; + }; + Mallocator() = default; + template <class U> + Mallocator(const Mallocator<U>&) {} // NOLINT(runtime/explicit) + + T* allocate(size_t n) { return static_cast<T*>(std::malloc(n * sizeof(T))); } + void deallocate(T* p, size_t) { std::free(p); } +}; +template <typename T, typename U> +bool operator==(const Mallocator<T>&, const Mallocator<U>&) { + return true; +} +template <typename T, typename U> +bool operator!=(const Mallocator<T>&, const Mallocator<U>&) { + return false; +} + +TEST(StrCat, CustomAllocator) { + using mstring = + std::basic_string<char, std::char_traits<char>, Mallocator<char>>; + const mstring str1("PARACHUTE OFF A BLIMP INTO MOSCONE!!"); + + const mstring str2("Read this book about coffee tables"); + + std::string result = absl::StrCat(str1, str2); + EXPECT_EQ(result, + "PARACHUTE OFF A BLIMP INTO MOSCONE!!" + "Read this book about coffee tables"); +} + +TEST(StrCat, MaxArgs) { + std::string result; + // Test 10 up to 26 arguments, the old maximum + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a"); + EXPECT_EQ(result, "123456789a"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b"); + EXPECT_EQ(result, "123456789ab"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c"); + EXPECT_EQ(result, "123456789abc"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d"); + EXPECT_EQ(result, "123456789abcd"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e"); + EXPECT_EQ(result, "123456789abcde"); + result = + absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f"); + EXPECT_EQ(result, "123456789abcdef"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g"); + EXPECT_EQ(result, "123456789abcdefg"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h"); + EXPECT_EQ(result, "123456789abcdefgh"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i"); + EXPECT_EQ(result, "123456789abcdefghi"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j"); + EXPECT_EQ(result, "123456789abcdefghij"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k"); + EXPECT_EQ(result, "123456789abcdefghijk"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l"); + EXPECT_EQ(result, "123456789abcdefghijkl"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m"); + EXPECT_EQ(result, "123456789abcdefghijklm"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m", "n"); + EXPECT_EQ(result, "123456789abcdefghijklmn"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m", "n", "o"); + EXPECT_EQ(result, "123456789abcdefghijklmno"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m", "n", "o", "p"); + EXPECT_EQ(result, "123456789abcdefghijklmnop"); + result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q"); + EXPECT_EQ(result, "123456789abcdefghijklmnopq"); + // No limit thanks to C++11's variadic templates + result = absl::StrCat( + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, "a", "b", "c", "d", "e", "f", "g", "h", + "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", + "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", + "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"); + EXPECT_EQ(result, + "12345678910abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); +} + +TEST(StrAppend, Basics) { + std::string result = "existing text"; + + std::string strs[] = {"Hello", "Cruel", "World"}; + + std::string stdstrs[] = { + "std::Hello", + "std::Cruel", + "std::World" + }; + + absl::string_view pieces[] = {"Hello", "Cruel", "World"}; + + const char* c_strs[] = { + "Hello", + "Cruel", + "World" + }; + + int32_t i32s[] = {'H', 'C', 'W'}; + uint64_t ui64s[] = {12345678910LL, 10987654321LL}; + + std::string::size_type old_size = result.size(); + absl::StrAppend(&result); + EXPECT_EQ(result.size(), old_size); + + old_size = result.size(); + absl::StrAppend(&result, strs[0]); + EXPECT_EQ(result.substr(old_size), "Hello"); + + old_size = result.size(); + absl::StrAppend(&result, strs[1], pieces[2]); + EXPECT_EQ(result.substr(old_size), "CruelWorld"); + + old_size = result.size(); + absl::StrAppend(&result, stdstrs[0], ", ", pieces[2]); + EXPECT_EQ(result.substr(old_size), "std::Hello, World"); + + old_size = result.size(); + absl::StrAppend(&result, strs[0], ", ", stdstrs[1], " ", strs[2], "!"); + EXPECT_EQ(result.substr(old_size), "Hello, std::Cruel World!"); + + old_size = result.size(); + absl::StrAppend(&result, pieces[0], ", ", pieces[1], " ", pieces[2]); + EXPECT_EQ(result.substr(old_size), "Hello, Cruel World"); + + old_size = result.size(); + absl::StrAppend(&result, c_strs[0], ", ", c_strs[1], " ", c_strs[2]); + EXPECT_EQ(result.substr(old_size), "Hello, Cruel World"); + + old_size = result.size(); + absl::StrAppend(&result, "ASCII ", i32s[0], ", ", i32s[1], " ", i32s[2], "!"); + EXPECT_EQ(result.substr(old_size), "ASCII 72, 67 87!"); + + old_size = result.size(); + absl::StrAppend(&result, ui64s[0], ", ", ui64s[1], "!"); + EXPECT_EQ(result.substr(old_size), "12345678910, 10987654321!"); + + std::string one = + "1"; // Actually, it's the size of this string that we want; a + // 64-bit build distinguishes between size_t and uint64_t, + // even though they're both unsigned 64-bit values. + old_size = result.size(); + absl::StrAppend(&result, "And a ", one.size(), " and a ", + &result[2] - &result[0], " and a ", one, " 2 3 4", "!"); + EXPECT_EQ(result.substr(old_size), "And a 1 and a 2 and a 1 2 3 4!"); + + // result = absl::StrCat("Single chars won't compile", '!'); + // result = absl::StrCat("Neither will nullptrs", nullptr); + old_size = result.size(); + absl::StrAppend(&result, + "To output a char by ASCII/numeric value, use +: ", '!' + 0); + EXPECT_EQ(result.substr(old_size), + "To output a char by ASCII/numeric value, use +: 33"); + + // Test 9 arguments, the old maximum + old_size = result.size(); + absl::StrAppend(&result, 1, 22, 333, 4444, 55555, 666666, 7777777, 88888888, + 9); + EXPECT_EQ(result.substr(old_size), "1223334444555556666667777777888888889"); + + // No limit thanks to C++11's variadic templates + old_size = result.size(); + absl::StrAppend( + &result, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", // + "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", // + "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", // + "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", // + "No limit thanks to C++11's variadic templates"); + EXPECT_EQ(result.substr(old_size), + "12345678910abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + "No limit thanks to C++11's variadic templates"); +} + +TEST(StrCat, VectorBoolReferenceTypes) { + std::vector<bool> v; + v.push_back(true); + v.push_back(false); + std::vector<bool> const& cv = v; + // Test that vector<bool>::reference and vector<bool>::const_reference + // are handled as if the were really bool types and not the proxy types + // they really are. + std::string result = absl::StrCat(v[0], v[1], cv[0], cv[1]); // NOLINT + EXPECT_EQ(result, "1010"); +} + +// Passing nullptr to memcpy is undefined behavior and this test +// provides coverage of codepaths that handle empty strings with nullptrs. +TEST(StrCat, AvoidsMemcpyWithNullptr) { + EXPECT_EQ(absl::StrCat(42, absl::string_view{}), "42"); + + // Cover CatPieces code. + EXPECT_EQ(absl::StrCat(1, 2, 3, 4, 5, absl::string_view{}), "12345"); + + // Cover AppendPieces. + std::string result; + absl::StrAppend(&result, 1, 2, 3, 4, 5, absl::string_view{}); + EXPECT_EQ(result, "12345"); +} + +#ifdef GTEST_HAS_DEATH_TEST +TEST(StrAppend, Death) { + std::string s = "self"; + // on linux it's "assertion", on mac it's "Assertion", + // on chromiumos it's "Assertion ... failed". + ABSL_EXPECT_DEBUG_DEATH(absl::StrAppend(&s, s.c_str() + 1), + "ssertion.*failed"); + ABSL_EXPECT_DEBUG_DEATH(absl::StrAppend(&s, s), "ssertion.*failed"); +} +#endif // GTEST_HAS_DEATH_TEST + +TEST(StrAppend, CornerCases) { + std::string result; + absl::StrAppend(&result, ""); + EXPECT_EQ(result, ""); + absl::StrAppend(&result, "", ""); + EXPECT_EQ(result, ""); + absl::StrAppend(&result, "", "", ""); + EXPECT_EQ(result, ""); + absl::StrAppend(&result, "", "", "", ""); + EXPECT_EQ(result, ""); + absl::StrAppend(&result, "", "", "", "", ""); + EXPECT_EQ(result, ""); +} + +TEST(StrAppend, CornerCasesNonEmptyAppend) { + for (std::string result : {"hello", "a string too long to fit in the SSO"}) { + const std::string expected = result; + absl::StrAppend(&result, ""); + EXPECT_EQ(result, expected); + absl::StrAppend(&result, "", ""); + EXPECT_EQ(result, expected); + absl::StrAppend(&result, "", "", ""); + EXPECT_EQ(result, expected); + absl::StrAppend(&result, "", "", "", ""); + EXPECT_EQ(result, expected); + absl::StrAppend(&result, "", "", "", "", ""); + EXPECT_EQ(result, expected); + } +} + +template <typename IntType> +void CheckHex(IntType v, const char* nopad_format, const char* zeropad_format, + const char* spacepad_format) { + char expected[256]; + + std::string actual = absl::StrCat(absl::Hex(v, absl::kNoPad)); + snprintf(expected, sizeof(expected), nopad_format, v); + EXPECT_EQ(expected, actual) << " decimal value " << v; + + for (int spec = absl::kZeroPad2; spec <= absl::kZeroPad20; ++spec) { + std::string actual = + absl::StrCat(absl::Hex(v, static_cast<absl::PadSpec>(spec))); + snprintf(expected, sizeof(expected), zeropad_format, + spec - absl::kZeroPad2 + 2, v); + EXPECT_EQ(expected, actual) << " decimal value " << v; + } + + for (int spec = absl::kSpacePad2; spec <= absl::kSpacePad20; ++spec) { + std::string actual = + absl::StrCat(absl::Hex(v, static_cast<absl::PadSpec>(spec))); + snprintf(expected, sizeof(expected), spacepad_format, + spec - absl::kSpacePad2 + 2, v); + EXPECT_EQ(expected, actual) << " decimal value " << v; + } +} + +template <typename IntType> +void CheckDec(IntType v, const char* nopad_format, const char* zeropad_format, + const char* spacepad_format) { + char expected[256]; + + std::string actual = absl::StrCat(absl::Dec(v, absl::kNoPad)); + snprintf(expected, sizeof(expected), nopad_format, v); + EXPECT_EQ(expected, actual) << " decimal value " << v; + + for (int spec = absl::kZeroPad2; spec <= absl::kZeroPad20; ++spec) { + std::string actual = + absl::StrCat(absl::Dec(v, static_cast<absl::PadSpec>(spec))); + snprintf(expected, sizeof(expected), zeropad_format, + spec - absl::kZeroPad2 + 2, v); + EXPECT_EQ(expected, actual) + << " decimal value " << v << " format '" << zeropad_format + << "' digits " << (spec - absl::kZeroPad2 + 2); + } + + for (int spec = absl::kSpacePad2; spec <= absl::kSpacePad20; ++spec) { + std::string actual = + absl::StrCat(absl::Dec(v, static_cast<absl::PadSpec>(spec))); + snprintf(expected, sizeof(expected), spacepad_format, + spec - absl::kSpacePad2 + 2, v); + EXPECT_EQ(expected, actual) + << " decimal value " << v << " format '" << spacepad_format + << "' digits " << (spec - absl::kSpacePad2 + 2); + } +} + +void CheckHexDec64(uint64_t v) { + unsigned long long ullv = v; // NOLINT(runtime/int) + + CheckHex(ullv, "%llx", "%0*llx", "%*llx"); + CheckDec(ullv, "%llu", "%0*llu", "%*llu"); + + long long llv = static_cast<long long>(ullv); // NOLINT(runtime/int) + CheckDec(llv, "%lld", "%0*lld", "%*lld"); + + if (sizeof(v) == sizeof(&v)) { + auto uintptr = static_cast<uintptr_t>(v); + void* ptr = reinterpret_cast<void*>(uintptr); + CheckHex(ptr, "%llx", "%0*llx", "%*llx"); + } +} + +void CheckHexDec32(uint32_t uv) { + CheckHex(uv, "%x", "%0*x", "%*x"); + CheckDec(uv, "%u", "%0*u", "%*u"); + int32_t v = static_cast<int32_t>(uv); + CheckDec(v, "%d", "%0*d", "%*d"); + + if (sizeof(v) == sizeof(&v)) { + auto uintptr = static_cast<uintptr_t>(v); + void* ptr = reinterpret_cast<void*>(uintptr); + CheckHex(ptr, "%x", "%0*x", "%*x"); + } +} + +void CheckAll(uint64_t v) { + CheckHexDec64(v); + CheckHexDec32(static_cast<uint32_t>(v)); +} + +void TestFastPrints() { + // Test all small ints; there aren't many and they're common. + for (int i = 0; i < 10000; i++) { + CheckAll(i); + } + + CheckAll(std::numeric_limits<uint64_t>::max()); + CheckAll(std::numeric_limits<uint64_t>::max() - 1); + CheckAll(std::numeric_limits<int64_t>::min()); + CheckAll(std::numeric_limits<int64_t>::min() + 1); + CheckAll(std::numeric_limits<uint32_t>::max()); + CheckAll(std::numeric_limits<uint32_t>::max() - 1); + CheckAll(std::numeric_limits<int32_t>::min()); + CheckAll(std::numeric_limits<int32_t>::min() + 1); + CheckAll(999999999); // fits in 32 bits + CheckAll(1000000000); // fits in 32 bits + CheckAll(9999999999); // doesn't fit in 32 bits + CheckAll(10000000000); // doesn't fit in 32 bits + CheckAll(999999999999999999); // fits in signed 64-bit + CheckAll(9999999999999999999u); // fits in unsigned 64-bit, but not signed. + CheckAll(1000000000000000000); // fits in signed 64-bit + CheckAll(10000000000000000000u); // fits in unsigned 64-bit, but not signed. + + CheckAll(999999999876543210); // check all decimal digits, signed + CheckAll(9999999999876543210u); // check all decimal digits, unsigned. + CheckAll(0x123456789abcdef0); // check all hex digits + CheckAll(0x12345678); + + int8_t minus_one_8bit = -1; + EXPECT_EQ("ff", absl::StrCat(absl::Hex(minus_one_8bit))); + + int16_t minus_one_16bit = -1; + EXPECT_EQ("ffff", absl::StrCat(absl::Hex(minus_one_16bit))); +} + +TEST(Numbers, TestFunctionsMovedOverFromNumbersMain) { + TestFastPrints(); +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/str_format.h b/third_party/abseil_cpp/absl/strings/str_format.h new file mode 100644 index 000000000000..01465107e105 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_format.h @@ -0,0 +1,813 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_format.h +// ----------------------------------------------------------------------------- +// +// The `str_format` library is a typesafe replacement for the family of +// `printf()` string formatting routines within the `<cstdio>` standard library +// header. Like the `printf` family, `str_format` uses a "format string" to +// perform argument substitutions based on types. See the `FormatSpec` section +// below for format string documentation. +// +// Example: +// +// std::string s = absl::StrFormat( +// "%s %s You have $%d!", "Hello", name, dollars); +// +// The library consists of the following basic utilities: +// +// * `absl::StrFormat()`, a type-safe replacement for `std::sprintf()`, to +// write a format string to a `string` value. +// * `absl::StrAppendFormat()` to append a format string to a `string` +// * `absl::StreamFormat()` to more efficiently write a format string to a +// stream, such as`std::cout`. +// * `absl::PrintF()`, `absl::FPrintF()` and `absl::SNPrintF()` as +// replacements for `std::printf()`, `std::fprintf()` and `std::snprintf()`. +// +// Note: a version of `std::sprintf()` is not supported as it is +// generally unsafe due to buffer overflows. +// +// Additionally, you can provide a format string (and its associated arguments) +// using one of the following abstractions: +// +// * A `FormatSpec` class template fully encapsulates a format string and its +// type arguments and is usually provided to `str_format` functions as a +// variadic argument of type `FormatSpec<Arg...>`. The `FormatSpec<Args...>` +// template is evaluated at compile-time, providing type safety. +// * A `ParsedFormat` instance, which encapsulates a specific, pre-compiled +// format string for a specific set of type(s), and which can be passed +// between API boundaries. (The `FormatSpec` type should not be used +// directly except as an argument type for wrapper functions.) +// +// The `str_format` library provides the ability to output its format strings to +// arbitrary sink types: +// +// * A generic `Format()` function to write outputs to arbitrary sink types, +// which must implement a `FormatRawSink` interface. +// +// * A `FormatUntyped()` function that is similar to `Format()` except it is +// loosely typed. `FormatUntyped()` is not a template and does not perform +// any compile-time checking of the format string; instead, it returns a +// boolean from a runtime check. +// +// In addition, the `str_format` library provides extension points for +// augmenting formatting to new types. See "StrFormat Extensions" below. + +#ifndef ABSL_STRINGS_STR_FORMAT_H_ +#define ABSL_STRINGS_STR_FORMAT_H_ + +#include <cstdio> +#include <string> + +#include "absl/strings/internal/str_format/arg.h" // IWYU pragma: export +#include "absl/strings/internal/str_format/bind.h" // IWYU pragma: export +#include "absl/strings/internal/str_format/checker.h" // IWYU pragma: export +#include "absl/strings/internal/str_format/extension.h" // IWYU pragma: export +#include "absl/strings/internal/str_format/parser.h" // IWYU pragma: export + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// UntypedFormatSpec +// +// A type-erased class that can be used directly within untyped API entry +// points. An `UntypedFormatSpec` is specifically used as an argument to +// `FormatUntyped()`. +// +// Example: +// +// absl::UntypedFormatSpec format("%d"); +// std::string out; +// CHECK(absl::FormatUntyped(&out, format, {absl::FormatArg(1)})); +class UntypedFormatSpec { + public: + UntypedFormatSpec() = delete; + UntypedFormatSpec(const UntypedFormatSpec&) = delete; + UntypedFormatSpec& operator=(const UntypedFormatSpec&) = delete; + + explicit UntypedFormatSpec(string_view s) : spec_(s) {} + + protected: + explicit UntypedFormatSpec(const str_format_internal::ParsedFormatBase* pc) + : spec_(pc) {} + + private: + friend str_format_internal::UntypedFormatSpecImpl; + str_format_internal::UntypedFormatSpecImpl spec_; +}; + +// FormatStreamed() +// +// Takes a streamable argument and returns an object that can print it +// with '%s'. Allows printing of types that have an `operator<<` but no +// intrinsic type support within `StrFormat()` itself. +// +// Example: +// +// absl::StrFormat("%s", absl::FormatStreamed(obj)); +template <typename T> +str_format_internal::StreamedWrapper<T> FormatStreamed(const T& v) { + return str_format_internal::StreamedWrapper<T>(v); +} + +// FormatCountCapture +// +// This class provides a way to safely wrap `StrFormat()` captures of `%n` +// conversions, which denote the number of characters written by a formatting +// operation to this point, into an integer value. +// +// This wrapper is designed to allow safe usage of `%n` within `StrFormat(); in +// the `printf()` family of functions, `%n` is not safe to use, as the `int *` +// buffer can be used to capture arbitrary data. +// +// Example: +// +// int n = 0; +// std::string s = absl::StrFormat("%s%d%n", "hello", 123, +// absl::FormatCountCapture(&n)); +// EXPECT_EQ(8, n); +class FormatCountCapture { + public: + explicit FormatCountCapture(int* p) : p_(p) {} + + private: + // FormatCountCaptureHelper is used to define FormatConvertImpl() for this + // class. + friend struct str_format_internal::FormatCountCaptureHelper; + // Unused() is here because of the false positive from -Wunused-private-field + // p_ is used in the templated function of the friend FormatCountCaptureHelper + // class. + int* Unused() { return p_; } + int* p_; +}; + +// FormatSpec +// +// The `FormatSpec` type defines the makeup of a format string within the +// `str_format` library. It is a variadic class template that is evaluated at +// compile-time, according to the format string and arguments that are passed to +// it. +// +// You should not need to manipulate this type directly. You should only name it +// if you are writing wrapper functions which accept format arguments that will +// be provided unmodified to functions in this library. Such a wrapper function +// might be a class method that provides format arguments and/or internally uses +// the result of formatting. +// +// For a `FormatSpec` to be valid at compile-time, it must be provided as +// either: +// +// * A `constexpr` literal or `absl::string_view`, which is how it most often +// used. +// * A `ParsedFormat` instantiation, which ensures the format string is +// valid before use. (See below.) +// +// Example: +// +// // Provided as a string literal. +// absl::StrFormat("Welcome to %s, Number %d!", "The Village", 6); +// +// // Provided as a constexpr absl::string_view. +// constexpr absl::string_view formatString = "Welcome to %s, Number %d!"; +// absl::StrFormat(formatString, "The Village", 6); +// +// // Provided as a pre-compiled ParsedFormat object. +// // Note that this example is useful only for illustration purposes. +// absl::ParsedFormat<'s', 'd'> formatString("Welcome to %s, Number %d!"); +// absl::StrFormat(formatString, "TheVillage", 6); +// +// A format string generally follows the POSIX syntax as used within the POSIX +// `printf` specification. +// +// (See http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html.) +// +// In specific, the `FormatSpec` supports the following type specifiers: +// * `c` for characters +// * `s` for strings +// * `d` or `i` for integers +// * `o` for unsigned integer conversions into octal +// * `x` or `X` for unsigned integer conversions into hex +// * `u` for unsigned integers +// * `f` or `F` for floating point values into decimal notation +// * `e` or `E` for floating point values into exponential notation +// * `a` or `A` for floating point values into hex exponential notation +// * `g` or `G` for floating point values into decimal or exponential +// notation based on their precision +// * `p` for pointer address values +// * `n` for the special case of writing out the number of characters +// written to this point. The resulting value must be captured within an +// `absl::FormatCountCapture` type. +// +// Implementation-defined behavior: +// * A null pointer provided to "%s" or "%p" is output as "(nil)". +// * A non-null pointer provided to "%p" is output in hex as if by %#x or +// %#lx. +// +// NOTE: `o`, `x\X` and `u` will convert signed values to their unsigned +// counterpart before formatting. +// +// Examples: +// "%c", 'a' -> "a" +// "%c", 32 -> " " +// "%s", "C" -> "C" +// "%s", std::string("C++") -> "C++" +// "%d", -10 -> "-10" +// "%o", 10 -> "12" +// "%x", 16 -> "10" +// "%f", 123456789 -> "123456789.000000" +// "%e", .01 -> "1.00000e-2" +// "%a", -3.0 -> "-0x1.8p+1" +// "%g", .01 -> "1e-2" +// "%p", (void*)&value -> "0x7ffdeb6ad2a4" +// +// int n = 0; +// std::string s = absl::StrFormat( +// "%s%d%n", "hello", 123, absl::FormatCountCapture(&n)); +// EXPECT_EQ(8, n); +// +// The `FormatSpec` intrinsically supports all of these fundamental C++ types: +// +// * Characters: `char`, `signed char`, `unsigned char` +// * Integers: `int`, `short`, `unsigned short`, `unsigned`, `long`, +// `unsigned long`, `long long`, `unsigned long long` +// * Floating-point: `float`, `double`, `long double` +// +// However, in the `str_format` library, a format conversion specifies a broader +// C++ conceptual category instead of an exact type. For example, `%s` binds to +// any string-like argument, so `std::string`, `absl::string_view`, and +// `const char*` are all accepted. Likewise, `%d` accepts any integer-like +// argument, etc. + +template <typename... Args> +using FormatSpec = str_format_internal::FormatSpecTemplate< + str_format_internal::ArgumentToConv<Args>()...>; + +// ParsedFormat +// +// A `ParsedFormat` is a class template representing a preparsed `FormatSpec`, +// with template arguments specifying the conversion characters used within the +// format string. Such characters must be valid format type specifiers, and +// these type specifiers are checked at compile-time. +// +// Instances of `ParsedFormat` can be created, copied, and reused to speed up +// formatting loops. A `ParsedFormat` may either be constructed statically, or +// dynamically through its `New()` factory function, which only constructs a +// runtime object if the format is valid at that time. +// +// Example: +// +// // Verified at compile time. +// absl::ParsedFormat<'s', 'd'> formatString("Welcome to %s, Number %d!"); +// absl::StrFormat(formatString, "TheVillage", 6); +// +// // Verified at runtime. +// auto format_runtime = absl::ParsedFormat<'d'>::New(format_string); +// if (format_runtime) { +// value = absl::StrFormat(*format_runtime, i); +// } else { +// ... error case ... +// } + +#if defined(__cpp_nontype_template_parameter_auto) +// If C++17 is available, an 'extended' format is also allowed that can specify +// multiple conversion characters per format argument, using a combination of +// `absl::FormatConversionCharSet` enum values (logically a set union) +// via the `|` operator. (Single character-based arguments are still accepted, +// but cannot be combined). Some common conversions also have predefined enum +// values, such as `absl::FormatConversionCharSet::kIntegral`. +// +// Example: +// // Extended format supports multiple conversion characters per argument, +// // specified via a combination of `FormatConversionCharSet` enums. +// using MyFormat = absl::ParsedFormat<absl::FormatConversionCharSet::d | +// absl::FormatConversionCharSet::x>; +// MyFormat GetFormat(bool use_hex) { +// if (use_hex) return MyFormat("foo %x bar"); +// return MyFormat("foo %d bar"); +// } +// // `format` can be used with any value that supports 'd' and 'x', +// // like `int`. +// auto format = GetFormat(use_hex); +// value = StringF(format, i); +template <auto... Conv> +using ParsedFormat = absl::str_format_internal::ExtendedParsedFormat< + absl::str_format_internal::ToFormatConversionCharSet(Conv)...>; +#else +template <char... Conv> +using ParsedFormat = str_format_internal::ExtendedParsedFormat< + absl::str_format_internal::ToFormatConversionCharSet(Conv)...>; +#endif // defined(__cpp_nontype_template_parameter_auto) + +// StrFormat() +// +// Returns a `string` given a `printf()`-style format string and zero or more +// additional arguments. Use it as you would `sprintf()`. `StrFormat()` is the +// primary formatting function within the `str_format` library, and should be +// used in most cases where you need type-safe conversion of types into +// formatted strings. +// +// The format string generally consists of ordinary character data along with +// one or more format conversion specifiers (denoted by the `%` character). +// Ordinary character data is returned unchanged into the result string, while +// each conversion specification performs a type substitution from +// `StrFormat()`'s other arguments. See the comments for `FormatSpec` for full +// information on the makeup of this format string. +// +// Example: +// +// std::string s = absl::StrFormat( +// "Welcome to %s, Number %d!", "The Village", 6); +// EXPECT_EQ("Welcome to The Village, Number 6!", s); +// +// Returns an empty string in case of error. +template <typename... Args> +ABSL_MUST_USE_RESULT std::string StrFormat(const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::FormatPack( + str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// StrAppendFormat() +// +// Appends to a `dst` string given a format string, and zero or more additional +// arguments, returning `*dst` as a convenience for chaining purposes. Appends +// nothing in case of error (but possibly alters its capacity). +// +// Example: +// +// std::string orig("For example PI is approximately "); +// std::cout << StrAppendFormat(&orig, "%12.6f", 3.14); +template <typename... Args> +std::string& StrAppendFormat(std::string* dst, + const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::AppendPack( + dst, str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// StreamFormat() +// +// Writes to an output stream given a format string and zero or more arguments, +// generally in a manner that is more efficient than streaming the result of +// `absl:: StrFormat()`. The returned object must be streamed before the full +// expression ends. +// +// Example: +// +// std::cout << StreamFormat("%12.6f", 3.14); +template <typename... Args> +ABSL_MUST_USE_RESULT str_format_internal::Streamable StreamFormat( + const FormatSpec<Args...>& format, const Args&... args) { + return str_format_internal::Streamable( + str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// PrintF() +// +// Writes to stdout given a format string and zero or more arguments. This +// function is functionally equivalent to `std::printf()` (and type-safe); +// prefer `absl::PrintF()` over `std::printf()`. +// +// Example: +// +// std::string_view s = "Ulaanbaatar"; +// absl::PrintF("The capital of Mongolia is %s", s); +// +// Outputs: "The capital of Mongolia is Ulaanbaatar" +// +template <typename... Args> +int PrintF(const FormatSpec<Args...>& format, const Args&... args) { + return str_format_internal::FprintF( + stdout, str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// FPrintF() +// +// Writes to a file given a format string and zero or more arguments. This +// function is functionally equivalent to `std::fprintf()` (and type-safe); +// prefer `absl::FPrintF()` over `std::fprintf()`. +// +// Example: +// +// std::string_view s = "Ulaanbaatar"; +// absl::FPrintF(stdout, "The capital of Mongolia is %s", s); +// +// Outputs: "The capital of Mongolia is Ulaanbaatar" +// +template <typename... Args> +int FPrintF(std::FILE* output, const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::FprintF( + output, str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// SNPrintF() +// +// Writes to a sized buffer given a format string and zero or more arguments. +// This function is functionally equivalent to `std::snprintf()` (and +// type-safe); prefer `absl::SNPrintF()` over `std::snprintf()`. +// +// In particular, a successful call to `absl::SNPrintF()` writes at most `size` +// bytes of the formatted output to `output`, including a NUL-terminator, and +// returns the number of bytes that would have been written if truncation did +// not occur. In the event of an error, a negative value is returned and `errno` +// is set. +// +// Example: +// +// std::string_view s = "Ulaanbaatar"; +// char output[128]; +// absl::SNPrintF(output, sizeof(output), +// "The capital of Mongolia is %s", s); +// +// Post-condition: output == "The capital of Mongolia is Ulaanbaatar" +// +template <typename... Args> +int SNPrintF(char* output, std::size_t size, const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::SnprintF( + output, size, str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// ----------------------------------------------------------------------------- +// Custom Output Formatting Functions +// ----------------------------------------------------------------------------- + +// FormatRawSink +// +// FormatRawSink is a type erased wrapper around arbitrary sink objects +// specifically used as an argument to `Format()`. +// +// All the object has to do define an overload of `AbslFormatFlush()` for the +// sink, usually by adding a ADL-based free function in the same namespace as +// the sink: +// +// void AbslFormatFlush(MySink* dest, absl::string_view part); +// +// where `dest` is the pointer passed to `absl::Format()`. The function should +// append `part` to `dest`. +// +// FormatRawSink does not own the passed sink object. The passed object must +// outlive the FormatRawSink. +class FormatRawSink { + public: + // Implicitly convert from any type that provides the hook function as + // described above. + template <typename T, + typename = typename std::enable_if<std::is_constructible< + str_format_internal::FormatRawSinkImpl, T*>::value>::type> + FormatRawSink(T* raw) // NOLINT + : sink_(raw) {} + + private: + friend str_format_internal::FormatRawSinkImpl; + str_format_internal::FormatRawSinkImpl sink_; +}; + +// Format() +// +// Writes a formatted string to an arbitrary sink object (implementing the +// `absl::FormatRawSink` interface), using a format string and zero or more +// additional arguments. +// +// By default, `std::string`, `std::ostream`, and `absl::Cord` are supported as +// destination objects. If a `std::string` is used the formatted string is +// appended to it. +// +// `absl::Format()` is a generic version of `absl::StrAppendFormat()`, for +// custom sinks. The format string, like format strings for `StrFormat()`, is +// checked at compile-time. +// +// On failure, this function returns `false` and the state of the sink is +// unspecified. +template <typename... Args> +bool Format(FormatRawSink raw_sink, const FormatSpec<Args...>& format, + const Args&... args) { + return str_format_internal::FormatUntyped( + str_format_internal::FormatRawSinkImpl::Extract(raw_sink), + str_format_internal::UntypedFormatSpecImpl::Extract(format), + {str_format_internal::FormatArgImpl(args)...}); +} + +// FormatArg +// +// A type-erased handle to a format argument specifically used as an argument to +// `FormatUntyped()`. You may construct `FormatArg` by passing +// reference-to-const of any printable type. `FormatArg` is both copyable and +// assignable. The source data must outlive the `FormatArg` instance. See +// example below. +// +using FormatArg = str_format_internal::FormatArgImpl; + +// FormatUntyped() +// +// Writes a formatted string to an arbitrary sink object (implementing the +// `absl::FormatRawSink` interface), using an `UntypedFormatSpec` and zero or +// more additional arguments. +// +// This function acts as the most generic formatting function in the +// `str_format` library. The caller provides a raw sink, an unchecked format +// string, and (usually) a runtime specified list of arguments; no compile-time +// checking of formatting is performed within this function. As a result, a +// caller should check the return value to verify that no error occurred. +// On failure, this function returns `false` and the state of the sink is +// unspecified. +// +// The arguments are provided in an `absl::Span<const absl::FormatArg>`. +// Each `absl::FormatArg` object binds to a single argument and keeps a +// reference to it. The values used to create the `FormatArg` objects must +// outlive this function call. (See `str_format_arg.h` for information on +// the `FormatArg` class.)_ +// +// Example: +// +// std::optional<std::string> FormatDynamic( +// const std::string& in_format, +// const vector<std::string>& in_args) { +// std::string out; +// std::vector<absl::FormatArg> args; +// for (const auto& v : in_args) { +// // It is important that 'v' is a reference to the objects in in_args. +// // The values we pass to FormatArg must outlive the call to +// // FormatUntyped. +// args.emplace_back(v); +// } +// absl::UntypedFormatSpec format(in_format); +// if (!absl::FormatUntyped(&out, format, args)) { +// return std::nullopt; +// } +// return std::move(out); +// } +// +ABSL_MUST_USE_RESULT inline bool FormatUntyped( + FormatRawSink raw_sink, const UntypedFormatSpec& format, + absl::Span<const FormatArg> args) { + return str_format_internal::FormatUntyped( + str_format_internal::FormatRawSinkImpl::Extract(raw_sink), + str_format_internal::UntypedFormatSpecImpl::Extract(format), args); +} + +//------------------------------------------------------------------------------ +// StrFormat Extensions +//------------------------------------------------------------------------------ +// +// AbslFormatConvert() +// +// The StrFormat library provides a customization API for formatting +// user-defined types using absl::StrFormat(). The API relies on detecting an +// overload in the user-defined type's namespace of a free (non-member) +// `AbslFormatConvert()` function, usually as a friend definition with the +// following signature: +// +// absl::FormatConvertResult<...> AbslFormatConvert( +// const X& value, +// const absl::FormatConversionSpec& spec, +// absl::FormatSink *sink); +// +// An `AbslFormatConvert()` overload for a type should only be declared in the +// same file and namespace as said type. +// +// The abstractions within this definition include: +// +// * An `absl::FormatConversionSpec` to specify the fields to pull from a +// user-defined type's format string +// * An `absl::FormatSink` to hold the converted string data during the +// conversion process. +// * An `absl::FormatConvertResult` to hold the status of the returned +// formatting operation +// +// The return type encodes all the conversion characters that your +// AbslFormatConvert() routine accepts. The return value should be {true}. +// A return value of {false} will result in `StrFormat()` returning +// an empty string. This result will be propagated to the result of +// `FormatUntyped`. +// +// Example: +// +// struct Point { +// // To add formatting support to `Point`, we simply need to add a free +// // (non-member) function `AbslFormatConvert()`. This method interprets +// // `spec` to print in the request format. The allowed conversion characters +// // can be restricted via the type of the result, in this example +// // string and integral formatting are allowed (but not, for instance +// // floating point characters like "%f"). You can add such a free function +// // using a friend declaration within the body of the class: +// friend absl::FormatConvertResult<absl::FormatConversionCharSet::kString | +// absl::FormatConversionCharSet::kIntegral> +// AbslFormatConvert(const Point& p, const absl::FormatConversionSpec& spec, +// absl::FormatSink* s) { +// if (spec.conversion_char() == absl::FormatConversionChar::s) { +// s->Append(absl::StrCat("x=", p.x, " y=", p.y)); +// } else { +// s->Append(absl::StrCat(p.x, ",", p.y)); +// } +// return {true}; +// } +// +// int x; +// int y; +// }; + +// clang-format off + +// FormatConversionChar +// +// Specifies the formatting character provided in the format string +// passed to `StrFormat()`. +enum class FormatConversionChar : uint8_t { + c, s, // text + d, i, o, u, x, X, // int + f, F, e, E, g, G, a, A, // float + n, p // misc +}; +// clang-format on + +// FormatConversionSpec +// +// Specifies modifications to the conversion of the format string, through use +// of one or more format flags in the source format string. +class FormatConversionSpec { + public: + // FormatConversionSpec::is_basic() + // + // Indicates that width and precision are not specified, and no additional + // flags are set for this conversion character in the format string. + bool is_basic() const { return impl_.is_basic(); } + + // FormatConversionSpec::has_left_flag() + // + // Indicates whether the result should be left justified for this conversion + // character in the format string. This flag is set through use of a '-' + // character in the format string. E.g. "%-s" + bool has_left_flag() const { return impl_.has_left_flag(); } + + // FormatConversionSpec::has_show_pos_flag() + // + // Indicates whether a sign column is prepended to the result for this + // conversion character in the format string, even if the result is positive. + // This flag is set through use of a '+' character in the format string. + // E.g. "%+d" + bool has_show_pos_flag() const { return impl_.has_show_pos_flag(); } + + // FormatConversionSpec::has_sign_col_flag() + // + // Indicates whether a mandatory sign column is added to the result for this + // conversion character. This flag is set through use of a space character + // (' ') in the format string. E.g. "% i" + bool has_sign_col_flag() const { return impl_.has_sign_col_flag(); } + + // FormatConversionSpec::has_alt_flag() + // + // Indicates whether an "alternate" format is applied to the result for this + // conversion character. Alternative forms depend on the type of conversion + // character, and unallowed alternatives are undefined. This flag is set + // through use of a '#' character in the format string. E.g. "%#h" + bool has_alt_flag() const { return impl_.has_alt_flag(); } + + // FormatConversionSpec::has_zero_flag() + // + // Indicates whether zeroes should be prepended to the result for this + // conversion character instead of spaces. This flag is set through use of the + // '0' character in the format string. E.g. "%0f" + bool has_zero_flag() const { return impl_.has_zero_flag(); } + + // FormatConversionSpec::conversion_char() + // + // Returns the underlying conversion character. + FormatConversionChar conversion_char() const { + return impl_.conversion_char(); + } + + // FormatConversionSpec::width() + // + // Returns the specified width (indicated through use of a non-zero integer + // value or '*' character) of the conversion character. If width is + // unspecified, it returns a negative value. + int width() const { return impl_.width(); } + + // FormatConversionSpec::precision() + // + // Returns the specified precision (through use of the '.' character followed + // by a non-zero integer value or '*' character) of the conversion character. + // If precision is unspecified, it returns a negative value. + int precision() const { return impl_.precision(); } + + private: + explicit FormatConversionSpec( + str_format_internal::FormatConversionSpecImpl impl) + : impl_(impl) {} + + friend str_format_internal::FormatConversionSpecImpl; + + absl::str_format_internal::FormatConversionSpecImpl impl_; +}; + +// Type safe OR operator for FormatConversionCharSet to allow accepting multiple +// conversion chars in custom format converters. +constexpr FormatConversionCharSet operator|(FormatConversionCharSet a, + FormatConversionCharSet b) { + return static_cast<FormatConversionCharSet>(static_cast<uint64_t>(a) | + static_cast<uint64_t>(b)); +} + +// FormatConversionCharSet +// +// Specifies the _accepted_ conversion types as a template parameter to +// FormatConvertResult for custom implementations of `AbslFormatConvert`. +// Note the helper predefined alias definitions (kIntegral, etc.) below. +enum class FormatConversionCharSet : uint64_t { + // text + c = str_format_internal::FormatConversionCharToConvInt('c'), + s = str_format_internal::FormatConversionCharToConvInt('s'), + // integer + d = str_format_internal::FormatConversionCharToConvInt('d'), + i = str_format_internal::FormatConversionCharToConvInt('i'), + o = str_format_internal::FormatConversionCharToConvInt('o'), + u = str_format_internal::FormatConversionCharToConvInt('u'), + x = str_format_internal::FormatConversionCharToConvInt('x'), + X = str_format_internal::FormatConversionCharToConvInt('X'), + // Float + f = str_format_internal::FormatConversionCharToConvInt('f'), + F = str_format_internal::FormatConversionCharToConvInt('F'), + e = str_format_internal::FormatConversionCharToConvInt('e'), + E = str_format_internal::FormatConversionCharToConvInt('E'), + g = str_format_internal::FormatConversionCharToConvInt('g'), + G = str_format_internal::FormatConversionCharToConvInt('G'), + a = str_format_internal::FormatConversionCharToConvInt('a'), + A = str_format_internal::FormatConversionCharToConvInt('A'), + // misc + n = str_format_internal::FormatConversionCharToConvInt('n'), + p = str_format_internal::FormatConversionCharToConvInt('p'), + + // Used for width/precision '*' specification. + kStar = static_cast<uint64_t>( + absl::str_format_internal::FormatConversionCharSetInternal::kStar), + // Some predefined values: + kIntegral = d | i | u | o | x | X, + kFloating = a | e | f | g | A | E | F | G, + kNumeric = kIntegral | kFloating, + kString = s, + kPointer = p, +}; + +// FormatSink +// +// An abstraction to which conversions write their string data. +// +class FormatSink { + public: + // Appends `count` copies of `ch`. + void Append(size_t count, char ch) { sink_->Append(count, ch); } + + void Append(string_view v) { sink_->Append(v); } + + // Appends the first `precision` bytes of `v`. If this is less than + // `width`, spaces will be appended first (if `left` is false), or + // after (if `left` is true) to ensure the total amount appended is + // at least `width`. + bool PutPaddedString(string_view v, int width, int precision, bool left) { + return sink_->PutPaddedString(v, width, precision, left); + } + + private: + friend str_format_internal::FormatSinkImpl; + explicit FormatSink(str_format_internal::FormatSinkImpl* s) : sink_(s) {} + str_format_internal::FormatSinkImpl* sink_; +}; + +// FormatConvertResult +// +// Indicates whether a call to AbslFormatConvert() was successful. +// This return type informs the StrFormat extension framework (through +// ADL but using the return type) of what conversion characters are supported. +// It is strongly discouraged to return {false}, as this will result in an +// empty string in StrFormat. +template <FormatConversionCharSet C> +struct FormatConvertResult { + bool value; +}; + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_STR_FORMAT_H_ diff --git a/third_party/abseil_cpp/absl/strings/str_format_test.cc b/third_party/abseil_cpp/absl/strings/str_format_test.cc new file mode 100644 index 000000000000..c60027ad297d --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_format_test.cc @@ -0,0 +1,774 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_format.h" + +#include <cstdarg> +#include <cstdint> +#include <cstdio> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/cord.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace { +using str_format_internal::FormatArgImpl; + +using FormatEntryPointTest = ::testing::Test; + +TEST_F(FormatEntryPointTest, Format) { + std::string sink; + EXPECT_TRUE(Format(&sink, "A format %d", 123)); + EXPECT_EQ("A format 123", sink); + sink.clear(); + + ParsedFormat<'d'> pc("A format %d"); + EXPECT_TRUE(Format(&sink, pc, 123)); + EXPECT_EQ("A format 123", sink); +} +TEST_F(FormatEntryPointTest, UntypedFormat) { + constexpr const char* formats[] = { + "", + "a", + "%80d", +#if !defined(_MSC_VER) && !defined(__ANDROID__) && !defined(__native_client__) + // MSVC, NaCL and Android don't support positional syntax. + "complicated multipart %% %1$d format %1$0999d", +#endif // _MSC_VER + }; + for (const char* fmt : formats) { + std::string actual; + int i = 123; + FormatArgImpl arg_123(i); + absl::Span<const FormatArgImpl> args(&arg_123, 1); + UntypedFormatSpec format(fmt); + + EXPECT_TRUE(FormatUntyped(&actual, format, args)); + char buf[4096]{}; + snprintf(buf, sizeof(buf), fmt, 123); + EXPECT_EQ( + str_format_internal::FormatPack( + str_format_internal::UntypedFormatSpecImpl::Extract(format), args), + buf); + EXPECT_EQ(actual, buf); + } + // The internal version works with a preparsed format. + ParsedFormat<'d'> pc("A format %d"); + int i = 345; + FormatArg arg(i); + std::string out; + EXPECT_TRUE(str_format_internal::FormatUntyped( + &out, str_format_internal::UntypedFormatSpecImpl(&pc), {&arg, 1})); + EXPECT_EQ("A format 345", out); +} + +TEST_F(FormatEntryPointTest, StringFormat) { + EXPECT_EQ("123", StrFormat("%d", 123)); + constexpr absl::string_view view("=%d=", 4); + EXPECT_EQ("=123=", StrFormat(view, 123)); +} + +TEST_F(FormatEntryPointTest, AppendFormat) { + std::string s; + std::string& r = StrAppendFormat(&s, "%d", 123); + EXPECT_EQ(&s, &r); // should be same object + EXPECT_EQ("123", r); +} + +TEST_F(FormatEntryPointTest, AppendFormatFail) { + std::string s = "orig"; + + UntypedFormatSpec format(" more %d"); + FormatArgImpl arg("not an int"); + + EXPECT_EQ("orig", + str_format_internal::AppendPack( + &s, str_format_internal::UntypedFormatSpecImpl::Extract(format), + {&arg, 1})); +} + + +TEST_F(FormatEntryPointTest, ManyArgs) { + EXPECT_EQ("24", StrFormat("%24$d", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)); + EXPECT_EQ("60", StrFormat("%60$d", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, + 53, 54, 55, 56, 57, 58, 59, 60)); +} + +TEST_F(FormatEntryPointTest, Preparsed) { + ParsedFormat<'d'> pc("%d"); + EXPECT_EQ("123", StrFormat(pc, 123)); + // rvalue ok? + EXPECT_EQ("123", StrFormat(ParsedFormat<'d'>("%d"), 123)); + constexpr absl::string_view view("=%d=", 4); + EXPECT_EQ("=123=", StrFormat(ParsedFormat<'d'>(view), 123)); +} + +TEST_F(FormatEntryPointTest, FormatCountCapture) { + int n = 0; + EXPECT_EQ("", StrFormat("%n", FormatCountCapture(&n))); + EXPECT_EQ(0, n); + EXPECT_EQ("123", StrFormat("%d%n", 123, FormatCountCapture(&n))); + EXPECT_EQ(3, n); +} + +TEST_F(FormatEntryPointTest, FormatCountCaptureWrongType) { + // Should reject int*. + int n = 0; + UntypedFormatSpec format("%d%n"); + int i = 123, *ip = &n; + FormatArgImpl args[2] = {FormatArgImpl(i), FormatArgImpl(ip)}; + + EXPECT_EQ("", str_format_internal::FormatPack( + str_format_internal::UntypedFormatSpecImpl::Extract(format), + absl::MakeSpan(args))); +} + +TEST_F(FormatEntryPointTest, FormatCountCaptureMultiple) { + int n1 = 0; + int n2 = 0; + EXPECT_EQ(" 1 2", + StrFormat("%5d%n%10d%n", 1, FormatCountCapture(&n1), 2, + FormatCountCapture(&n2))); + EXPECT_EQ(5, n1); + EXPECT_EQ(15, n2); +} + +TEST_F(FormatEntryPointTest, FormatCountCaptureExample) { + int n; + std::string s; + StrAppendFormat(&s, "%s: %n%s\n", "(1,1)", FormatCountCapture(&n), "(1,2)"); + StrAppendFormat(&s, "%*s%s\n", n, "", "(2,2)"); + EXPECT_EQ(7, n); + EXPECT_EQ( + "(1,1): (1,2)\n" + " (2,2)\n", + s); +} + +TEST_F(FormatEntryPointTest, Stream) { + const std::string formats[] = { + "", + "a", + "%80d", + "%d %u %c %s %f %g", +#if !defined(_MSC_VER) && !defined(__ANDROID__) && !defined(__native_client__) + // MSVC, NaCL and Android don't support positional syntax. + "complicated multipart %% %1$d format %1$080d", +#endif // _MSC_VER + }; + std::string buf(4096, '\0'); + for (const auto& fmt : formats) { + const auto parsed = + ParsedFormat<'d', 'u', 'c', 's', 'f', 'g'>::NewAllowIgnored(fmt); + std::ostringstream oss; + oss << StreamFormat(*parsed, 123, 3, 49, "multistreaming!!!", 1.01, 1.01); + int fmt_result = snprintf(&*buf.begin(), buf.size(), fmt.c_str(), // + 123, 3, 49, "multistreaming!!!", 1.01, 1.01); + ASSERT_TRUE(oss) << fmt; + ASSERT_TRUE(fmt_result >= 0 && static_cast<size_t>(fmt_result) < buf.size()) + << fmt_result; + EXPECT_EQ(buf.c_str(), oss.str()); + } +} + +TEST_F(FormatEntryPointTest, StreamOk) { + std::ostringstream oss; + oss << StreamFormat("hello %d", 123); + EXPECT_EQ("hello 123", oss.str()); + EXPECT_TRUE(oss.good()); +} + +TEST_F(FormatEntryPointTest, StreamFail) { + std::ostringstream oss; + UntypedFormatSpec format("hello %d"); + FormatArgImpl arg("non-numeric"); + oss << str_format_internal::Streamable( + str_format_internal::UntypedFormatSpecImpl::Extract(format), {&arg, 1}); + EXPECT_EQ("hello ", oss.str()); // partial write + EXPECT_TRUE(oss.fail()); +} + +std::string WithSnprintf(const char* fmt, ...) { + std::string buf; + buf.resize(128); + va_list va; + va_start(va, fmt); + int r = vsnprintf(&*buf.begin(), buf.size(), fmt, va); + va_end(va); + EXPECT_GE(r, 0); + EXPECT_LT(r, buf.size()); + buf.resize(r); + return buf; +} + +TEST_F(FormatEntryPointTest, FloatPrecisionArg) { + // Test that positional parameters for width and precision + // are indexed to precede the value. + // Also sanity check the same formats against snprintf. + EXPECT_EQ("0.1", StrFormat("%.1f", 0.1)); + EXPECT_EQ("0.1", WithSnprintf("%.1f", 0.1)); + EXPECT_EQ(" 0.1", StrFormat("%*.1f", 5, 0.1)); + EXPECT_EQ(" 0.1", WithSnprintf("%*.1f", 5, 0.1)); + EXPECT_EQ("0.1", StrFormat("%.*f", 1, 0.1)); + EXPECT_EQ("0.1", WithSnprintf("%.*f", 1, 0.1)); + EXPECT_EQ(" 0.1", StrFormat("%*.*f", 5, 1, 0.1)); + EXPECT_EQ(" 0.1", WithSnprintf("%*.*f", 5, 1, 0.1)); +} +namespace streamed_test { +struct X {}; +std::ostream& operator<<(std::ostream& os, const X&) { + return os << "X"; +} +} // streamed_test + +TEST_F(FormatEntryPointTest, FormatStreamed) { + EXPECT_EQ("123", StrFormat("%s", FormatStreamed(123))); + EXPECT_EQ(" 123", StrFormat("%5s", FormatStreamed(123))); + EXPECT_EQ("123 ", StrFormat("%-5s", FormatStreamed(123))); + EXPECT_EQ("X", StrFormat("%s", FormatStreamed(streamed_test::X()))); + EXPECT_EQ("123", StrFormat("%s", FormatStreamed(StreamFormat("%d", 123)))); +} + +// Helper class that creates a temporary file and exposes a FILE* to it. +// It will close the file on destruction. +class TempFile { + public: + TempFile() : file_(std::tmpfile()) {} + ~TempFile() { std::fclose(file_); } + + std::FILE* file() const { return file_; } + + // Read the file into a string. + std::string ReadFile() { + std::fseek(file_, 0, SEEK_END); + int size = std::ftell(file_); + EXPECT_GT(size, 0); + std::rewind(file_); + std::string str(2 * size, ' '); + int read_bytes = std::fread(&str[0], 1, str.size(), file_); + EXPECT_EQ(read_bytes, size); + str.resize(read_bytes); + EXPECT_TRUE(std::feof(file_)); + return str; + } + + private: + std::FILE* file_; +}; + +TEST_F(FormatEntryPointTest, FPrintF) { + TempFile tmp; + int result = + FPrintF(tmp.file(), "STRING: %s NUMBER: %010d", std::string("ABC"), -19); + EXPECT_EQ(result, 30); + EXPECT_EQ(tmp.ReadFile(), "STRING: ABC NUMBER: -000000019"); +} + +TEST_F(FormatEntryPointTest, FPrintFError) { + errno = 0; + int result = FPrintF(stdin, "ABC"); + EXPECT_LT(result, 0); + EXPECT_EQ(errno, EBADF); +} + +#ifdef __GLIBC__ +TEST_F(FormatEntryPointTest, FprintfTooLarge) { + std::FILE* f = std::fopen("/dev/null", "w"); + int width = 2000000000; + errno = 0; + int result = FPrintF(f, "%*d %*d", width, 0, width, 0); + EXPECT_LT(result, 0); + EXPECT_EQ(errno, EFBIG); + std::fclose(f); +} + +TEST_F(FormatEntryPointTest, PrintF) { + int stdout_tmp = dup(STDOUT_FILENO); + + TempFile tmp; + std::fflush(stdout); + dup2(fileno(tmp.file()), STDOUT_FILENO); + + int result = PrintF("STRING: %s NUMBER: %010d", std::string("ABC"), -19); + + std::fflush(stdout); + dup2(stdout_tmp, STDOUT_FILENO); + close(stdout_tmp); + + EXPECT_EQ(result, 30); + EXPECT_EQ(tmp.ReadFile(), "STRING: ABC NUMBER: -000000019"); +} +#endif // __GLIBC__ + +TEST_F(FormatEntryPointTest, SNPrintF) { + char buffer[16]; + int result = + SNPrintF(buffer, sizeof(buffer), "STRING: %s", std::string("ABC")); + EXPECT_EQ(result, 11); + EXPECT_EQ(std::string(buffer), "STRING: ABC"); + + result = SNPrintF(buffer, sizeof(buffer), "NUMBER: %d", 123456); + EXPECT_EQ(result, 14); + EXPECT_EQ(std::string(buffer), "NUMBER: 123456"); + + result = SNPrintF(buffer, sizeof(buffer), "NUMBER: %d", 1234567); + EXPECT_EQ(result, 15); + EXPECT_EQ(std::string(buffer), "NUMBER: 1234567"); + + result = SNPrintF(buffer, sizeof(buffer), "NUMBER: %d", 12345678); + EXPECT_EQ(result, 16); + EXPECT_EQ(std::string(buffer), "NUMBER: 1234567"); + + result = SNPrintF(buffer, sizeof(buffer), "NUMBER: %d", 123456789); + EXPECT_EQ(result, 17); + EXPECT_EQ(std::string(buffer), "NUMBER: 1234567"); + + result = SNPrintF(nullptr, 0, "Just checking the %s of the output.", "size"); + EXPECT_EQ(result, 37); +} + +TEST(StrFormat, BehavesAsDocumented) { + std::string s = absl::StrFormat("%s, %d!", "Hello", 123); + EXPECT_EQ("Hello, 123!", s); + // The format of a replacement is + // '%'[position][flags][width['.'precision]][length_modifier][format] + EXPECT_EQ(absl::StrFormat("%1$+3.2Lf", 1.1), "+1.10"); + // Text conversion: + // "c" - Character. Eg: 'a' -> "A", 20 -> " " + EXPECT_EQ(StrFormat("%c", 'a'), "a"); + EXPECT_EQ(StrFormat("%c", 0x20), " "); + // Formats char and integral types: int, long, uint64_t, etc. + EXPECT_EQ(StrFormat("%c", int{'a'}), "a"); + EXPECT_EQ(StrFormat("%c", long{'a'}), "a"); // NOLINT + EXPECT_EQ(StrFormat("%c", uint64_t{'a'}), "a"); + // "s" - string Eg: "C" -> "C", std::string("C++") -> "C++" + // Formats std::string, char*, string_view, and Cord. + EXPECT_EQ(StrFormat("%s", "C"), "C"); + EXPECT_EQ(StrFormat("%s", std::string("C++")), "C++"); + EXPECT_EQ(StrFormat("%s", string_view("view")), "view"); + EXPECT_EQ(StrFormat("%s", absl::Cord("cord")), "cord"); + // Integral Conversion + // These format integral types: char, int, long, uint64_t, etc. + EXPECT_EQ(StrFormat("%d", char{10}), "10"); + EXPECT_EQ(StrFormat("%d", int{10}), "10"); + EXPECT_EQ(StrFormat("%d", long{10}), "10"); // NOLINT + EXPECT_EQ(StrFormat("%d", uint64_t{10}), "10"); + // d,i - signed decimal Eg: -10 -> "-10" + EXPECT_EQ(StrFormat("%d", -10), "-10"); + EXPECT_EQ(StrFormat("%i", -10), "-10"); + // o - octal Eg: 10 -> "12" + EXPECT_EQ(StrFormat("%o", 10), "12"); + // u - unsigned decimal Eg: 10 -> "10" + EXPECT_EQ(StrFormat("%u", 10), "10"); + // x/X - lower,upper case hex Eg: 10 -> "a"/"A" + EXPECT_EQ(StrFormat("%x", 10), "a"); + EXPECT_EQ(StrFormat("%X", 10), "A"); + // Floating-point, with upper/lower-case output. + // These format floating points types: float, double, long double, etc. + EXPECT_EQ(StrFormat("%.1f", float{1}), "1.0"); + EXPECT_EQ(StrFormat("%.1f", double{1}), "1.0"); + const long double long_double = 1.0; + EXPECT_EQ(StrFormat("%.1f", long_double), "1.0"); + // These also format integral types: char, int, long, uint64_t, etc.: + EXPECT_EQ(StrFormat("%.1f", char{1}), "1.0"); + EXPECT_EQ(StrFormat("%.1f", int{1}), "1.0"); + EXPECT_EQ(StrFormat("%.1f", long{1}), "1.0"); // NOLINT + EXPECT_EQ(StrFormat("%.1f", uint64_t{1}), "1.0"); + // f/F - decimal. Eg: 123456789 -> "123456789.000000" + EXPECT_EQ(StrFormat("%f", 123456789), "123456789.000000"); + EXPECT_EQ(StrFormat("%F", 123456789), "123456789.000000"); + // e/E - exponentiated Eg: .01 -> "1.00000e-2"/"1.00000E-2" + EXPECT_EQ(StrFormat("%e", .01), "1.000000e-02"); + EXPECT_EQ(StrFormat("%E", .01), "1.000000E-02"); + // g/G - exponentiate to fit Eg: .01 -> "0.01", 1e10 ->"1e+10"/"1E+10" + EXPECT_EQ(StrFormat("%g", .01), "0.01"); + EXPECT_EQ(StrFormat("%g", 1e10), "1e+10"); + EXPECT_EQ(StrFormat("%G", 1e10), "1E+10"); + // a/A - lower,upper case hex Eg: -3.0 -> "-0x1.8p+1"/"-0X1.8P+1" + +// On Android platform <=21, there is a regression in hexfloat formatting. +#if !defined(__ANDROID_API__) || __ANDROID_API__ > 21 + EXPECT_EQ(StrFormat("%.1a", -3.0), "-0x1.8p+1"); // .1 to fix MSVC output + EXPECT_EQ(StrFormat("%.1A", -3.0), "-0X1.8P+1"); // .1 to fix MSVC output +#endif + + // Other conversion + int64_t value = 0x7ffdeb4; + auto ptr_value = static_cast<uintptr_t>(value); + const int& something = *reinterpret_cast<const int*>(ptr_value); + EXPECT_EQ(StrFormat("%p", &something), StrFormat("0x%x", ptr_value)); + + // Output widths are supported, with optional flags. + EXPECT_EQ(StrFormat("%3d", 1), " 1"); + EXPECT_EQ(StrFormat("%3d", 123456), "123456"); + EXPECT_EQ(StrFormat("%06.2f", 1.234), "001.23"); + EXPECT_EQ(StrFormat("%+d", 1), "+1"); + EXPECT_EQ(StrFormat("% d", 1), " 1"); + EXPECT_EQ(StrFormat("%-4d", -1), "-1 "); + EXPECT_EQ(StrFormat("%#o", 10), "012"); + EXPECT_EQ(StrFormat("%#x", 15), "0xf"); + EXPECT_EQ(StrFormat("%04d", 8), "0008"); + // Posix positional substitution. + EXPECT_EQ(absl::StrFormat("%2$s, %3$s, %1$s!", "vici", "veni", "vidi"), + "veni, vidi, vici!"); + // Length modifiers are ignored. + EXPECT_EQ(StrFormat("%hhd", int{1}), "1"); + EXPECT_EQ(StrFormat("%hd", int{1}), "1"); + EXPECT_EQ(StrFormat("%ld", int{1}), "1"); + EXPECT_EQ(StrFormat("%lld", int{1}), "1"); + EXPECT_EQ(StrFormat("%Ld", int{1}), "1"); + EXPECT_EQ(StrFormat("%jd", int{1}), "1"); + EXPECT_EQ(StrFormat("%zd", int{1}), "1"); + EXPECT_EQ(StrFormat("%td", int{1}), "1"); + EXPECT_EQ(StrFormat("%qd", int{1}), "1"); +} + +using str_format_internal::ExtendedParsedFormat; +using str_format_internal::ParsedFormatBase; + +struct SummarizeConsumer { + std::string* out; + explicit SummarizeConsumer(std::string* out) : out(out) {} + + bool Append(string_view s) { + *out += "[" + std::string(s) + "]"; + return true; + } + + bool ConvertOne(const str_format_internal::UnboundConversion& conv, + string_view s) { + *out += "{"; + *out += std::string(s); + *out += ":"; + *out += std::to_string(conv.arg_position) + "$"; + if (conv.width.is_from_arg()) { + *out += std::to_string(conv.width.get_from_arg()) + "$*"; + } + if (conv.precision.is_from_arg()) { + *out += "." + std::to_string(conv.precision.get_from_arg()) + "$*"; + } + *out += str_format_internal::FormatConversionCharToChar(conv.conv); + *out += "}"; + return true; + } +}; + +std::string SummarizeParsedFormat(const ParsedFormatBase& pc) { + std::string out; + if (!pc.ProcessFormat(SummarizeConsumer(&out))) out += "!"; + return out; +} + +using ParsedFormatTest = ::testing::Test; + +TEST_F(ParsedFormatTest, SimpleChecked) { + EXPECT_EQ("[ABC]{d:1$d}[DEF]", + SummarizeParsedFormat(ParsedFormat<'d'>("ABC%dDEF"))); + EXPECT_EQ("{s:1$s}[FFF]{d:2$d}[ZZZ]{f:3$f}", + SummarizeParsedFormat(ParsedFormat<'s', 'd', 'f'>("%sFFF%dZZZ%f"))); + EXPECT_EQ("{s:1$s}[ ]{.*d:3$.2$*d}", + SummarizeParsedFormat(ParsedFormat<'s', '*', 'd'>("%s %.*d"))); +} + +TEST_F(ParsedFormatTest, SimpleUncheckedCorrect) { + auto f = ParsedFormat<'d'>::New("ABC%dDEF"); + ASSERT_TRUE(f); + EXPECT_EQ("[ABC]{d:1$d}[DEF]", SummarizeParsedFormat(*f)); + + std::string format = "%sFFF%dZZZ%f"; + auto f2 = ParsedFormat<'s', 'd', 'f'>::New(format); + + ASSERT_TRUE(f2); + EXPECT_EQ("{s:1$s}[FFF]{d:2$d}[ZZZ]{f:3$f}", SummarizeParsedFormat(*f2)); + + f2 = ParsedFormat<'s', 'd', 'f'>::New("%s %d %f"); + + ASSERT_TRUE(f2); + EXPECT_EQ("{s:1$s}[ ]{d:2$d}[ ]{f:3$f}", SummarizeParsedFormat(*f2)); + + auto star = ParsedFormat<'*', 'd'>::New("%*d"); + ASSERT_TRUE(star); + EXPECT_EQ("{*d:2$1$*d}", SummarizeParsedFormat(*star)); + + auto dollar = ParsedFormat<'d', 's'>::New("%2$s %1$d"); + ASSERT_TRUE(dollar); + EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}", SummarizeParsedFormat(*dollar)); + // with reuse + dollar = ParsedFormat<'d', 's'>::New("%2$s %1$d %1$d"); + ASSERT_TRUE(dollar); + EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}[ ]{1$d:1$d}", + SummarizeParsedFormat(*dollar)); +} + +TEST_F(ParsedFormatTest, SimpleUncheckedIgnoredArgs) { + EXPECT_FALSE((ParsedFormat<'d', 's'>::New("ABC"))); + EXPECT_FALSE((ParsedFormat<'d', 's'>::New("%dABC"))); + EXPECT_FALSE((ParsedFormat<'d', 's'>::New("ABC%2$s"))); + auto f = ParsedFormat<'d', 's'>::NewAllowIgnored("ABC"); + ASSERT_TRUE(f); + EXPECT_EQ("[ABC]", SummarizeParsedFormat(*f)); + f = ParsedFormat<'d', 's'>::NewAllowIgnored("%dABC"); + ASSERT_TRUE(f); + EXPECT_EQ("{d:1$d}[ABC]", SummarizeParsedFormat(*f)); + f = ParsedFormat<'d', 's'>::NewAllowIgnored("ABC%2$s"); + ASSERT_TRUE(f); + EXPECT_EQ("[ABC]{2$s:2$s}", SummarizeParsedFormat(*f)); +} + +TEST_F(ParsedFormatTest, SimpleUncheckedUnsupported) { + EXPECT_FALSE(ParsedFormat<'d'>::New("%1$d %1$x")); + EXPECT_FALSE(ParsedFormat<'x'>::New("%1$d %1$x")); +} + +TEST_F(ParsedFormatTest, SimpleUncheckedIncorrect) { + EXPECT_FALSE(ParsedFormat<'d'>::New("")); + + EXPECT_FALSE(ParsedFormat<'d'>::New("ABC%dDEF%d")); + + std::string format = "%sFFF%dZZZ%f"; + EXPECT_FALSE((ParsedFormat<'s', 'd', 'g'>::New(format))); +} + +#if defined(__cpp_nontype_template_parameter_auto) + +template <auto T> +std::true_type IsValidParsedFormatArgTest(ParsedFormat<T>*); + +template <auto T> +std::false_type IsValidParsedFormatArgTest(...); + +template <auto T> +using IsValidParsedFormatArg = decltype(IsValidParsedFormatArgTest<T>(nullptr)); + +TEST_F(ParsedFormatTest, OnlyValidTypesAllowed) { + ASSERT_TRUE(IsValidParsedFormatArg<'c'>::value); + + ASSERT_TRUE(IsValidParsedFormatArg<FormatConversionCharSet::d>::value); + + ASSERT_TRUE(IsValidParsedFormatArg<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::x>::value); + ASSERT_TRUE( + IsValidParsedFormatArg<absl::FormatConversionCharSet::kIntegral>::value); + + // This is an easy mistake to make, however, this will reduce to an integer + // which has no meaning, so we need to ensure it doesn't compile. + ASSERT_FALSE(IsValidParsedFormatArg<'x' | 'd'>::value); + + // For now, we disallow construction based on ConversionChar (rather than + // CharSet) + ASSERT_FALSE(IsValidParsedFormatArg<absl::FormatConversionChar::d>::value); +} + +TEST_F(ParsedFormatTest, ExtendedTyping) { + EXPECT_FALSE(ParsedFormat<FormatConversionCharSet::d>::New("")); + ASSERT_TRUE(ParsedFormat<absl::FormatConversionCharSet::d>::New("%d")); + auto v1 = ParsedFormat<'d', absl::FormatConversionCharSet::s>::New("%d%s"); + ASSERT_TRUE(v1); + auto v2 = ParsedFormat<absl::FormatConversionCharSet::d, 's'>::New("%d%s"); + ASSERT_TRUE(v2); + auto v3 = ParsedFormat<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::s, + 's'>::New("%d%s"); + ASSERT_TRUE(v3); + auto v4 = ParsedFormat<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::s, + 's'>::New("%s%s"); + ASSERT_TRUE(v4); +} +#endif + +TEST_F(ParsedFormatTest, UncheckedCorrect) { + auto f = + ExtendedParsedFormat<absl::FormatConversionCharSet::d>::New("ABC%dDEF"); + ASSERT_TRUE(f); + EXPECT_EQ("[ABC]{d:1$d}[DEF]", SummarizeParsedFormat(*f)); + + std::string format = "%sFFF%dZZZ%f"; + auto f2 = ExtendedParsedFormat< + absl::FormatConversionCharSet::kString, absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::kFloating>::New(format); + + ASSERT_TRUE(f2); + EXPECT_EQ("{s:1$s}[FFF]{d:2$d}[ZZZ]{f:3$f}", SummarizeParsedFormat(*f2)); + + f2 = ExtendedParsedFormat< + absl::FormatConversionCharSet::kString, absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::kFloating>::New("%s %d %f"); + + ASSERT_TRUE(f2); + EXPECT_EQ("{s:1$s}[ ]{d:2$d}[ ]{f:3$f}", SummarizeParsedFormat(*f2)); + + auto star = + ExtendedParsedFormat<absl::FormatConversionCharSet::kStar, + absl::FormatConversionCharSet::d>::New("%*d"); + ASSERT_TRUE(star); + EXPECT_EQ("{*d:2$1$*d}", SummarizeParsedFormat(*star)); + + auto dollar = + ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("%2$s %1$d"); + ASSERT_TRUE(dollar); + EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}", SummarizeParsedFormat(*dollar)); + // with reuse + dollar = ExtendedParsedFormat< + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("%2$s %1$d %1$d"); + ASSERT_TRUE(dollar); + EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}[ ]{1$d:1$d}", + SummarizeParsedFormat(*dollar)); +} + +TEST_F(ParsedFormatTest, UncheckedIgnoredArgs) { + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("ABC"))); + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("%dABC"))); + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("ABC%2$s"))); + auto f = ExtendedParsedFormat< + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::NewAllowIgnored("ABC"); + ASSERT_TRUE(f); + EXPECT_EQ("[ABC]", SummarizeParsedFormat(*f)); + f = ExtendedParsedFormat< + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::NewAllowIgnored("%dABC"); + ASSERT_TRUE(f); + EXPECT_EQ("{d:1$d}[ABC]", SummarizeParsedFormat(*f)); + f = ExtendedParsedFormat< + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::NewAllowIgnored("ABC%2$s"); + ASSERT_TRUE(f); + EXPECT_EQ("[ABC]{2$s:2$s}", SummarizeParsedFormat(*f)); +} + +TEST_F(ParsedFormatTest, UncheckedMultipleTypes) { + auto dx = + ExtendedParsedFormat<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::x>::New("%1$d %1$x"); + EXPECT_TRUE(dx); + EXPECT_EQ("{1$d:1$d}[ ]{1$x:1$x}", SummarizeParsedFormat(*dx)); + + dx = ExtendedParsedFormat<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::x>::New("%1$d"); + EXPECT_TRUE(dx); + EXPECT_EQ("{1$d:1$d}", SummarizeParsedFormat(*dx)); +} + +TEST_F(ParsedFormatTest, UncheckedIncorrect) { + EXPECT_FALSE(ExtendedParsedFormat<absl::FormatConversionCharSet::d>::New("")); + + EXPECT_FALSE(ExtendedParsedFormat<absl::FormatConversionCharSet::d>::New( + "ABC%dDEF%d")); + + std::string format = "%sFFF%dZZZ%f"; + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::s, + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::g>::New(format))); +} + +TEST_F(ParsedFormatTest, RegressionMixPositional) { + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::o>::New("%1$d %o"))); +} + +using FormatWrapperTest = ::testing::Test; + +// Plain wrapper for StrFormat. +template <typename... Args> +std::string WrappedFormat(const absl::FormatSpec<Args...>& format, + const Args&... args) { + return StrFormat(format, args...); +} + +TEST_F(FormatWrapperTest, ConstexprStringFormat) { + EXPECT_EQ(WrappedFormat("%s there", "hello"), "hello there"); +} + +TEST_F(FormatWrapperTest, ParsedFormat) { + ParsedFormat<'s'> format("%s there"); + EXPECT_EQ(WrappedFormat(format, "hello"), "hello there"); +} + +} // namespace +ABSL_NAMESPACE_END +} // namespace absl + +using FormatExtensionTest = ::testing::Test; + +struct Point { + friend absl::FormatConvertResult<absl::FormatConversionCharSet::kString | + absl::FormatConversionCharSet::kIntegral> + AbslFormatConvert(const Point& p, const absl::FormatConversionSpec& spec, + absl::FormatSink* s) { + if (spec.conversion_char() == absl::FormatConversionChar::s) { + s->Append(absl::StrCat("x=", p.x, " y=", p.y)); + } else { + s->Append(absl::StrCat(p.x, ",", p.y)); + } + return {true}; + } + + int x = 10; + int y = 20; +}; + +TEST_F(FormatExtensionTest, AbslFormatConvertExample) { + Point p; + EXPECT_EQ(absl::StrFormat("a %s z", p), "a x=10 y=20 z"); + EXPECT_EQ(absl::StrFormat("a %d z", p), "a 10,20 z"); + + // Typed formatting will fail to compile an invalid format. + // StrFormat("%f", p); // Does not compile. + std::string actual; + absl::UntypedFormatSpec f1("%f"); + // FormatUntyped will return false for bad character. + EXPECT_FALSE(absl::FormatUntyped(&actual, f1, {absl::FormatArg(p)})); +} + +// Some codegen thunks that we can use to easily dump the generated assembly for +// different StrFormat calls. + +std::string CodegenAbslStrFormatInt(int i) { // NOLINT + return absl::StrFormat("%d", i); +} + +std::string CodegenAbslStrFormatIntStringInt64(int i, const std::string& s, + int64_t i64) { // NOLINT + return absl::StrFormat("%d %s %d", i, s, i64); +} + +void CodegenAbslStrAppendFormatInt(std::string* out, int i) { // NOLINT + absl::StrAppendFormat(out, "%d", i); +} + +void CodegenAbslStrAppendFormatIntStringInt64(std::string* out, int i, + const std::string& s, + int64_t i64) { // NOLINT + absl::StrAppendFormat(out, "%d %s %d", i, s, i64); +} diff --git a/third_party/abseil_cpp/absl/strings/str_join.h b/third_party/abseil_cpp/absl/strings/str_join.h new file mode 100644 index 000000000000..ae5731a42b3d --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_join.h @@ -0,0 +1,293 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_join.h +// ----------------------------------------------------------------------------- +// +// This header file contains functions for joining a range of elements and +// returning the result as a std::string. StrJoin operations are specified by +// passing a range, a separator string to use between the elements joined, and +// an optional Formatter responsible for converting each argument in the range +// to a string. If omitted, a default `AlphaNumFormatter()` is called on the +// elements to be joined, using the same formatting that `absl::StrCat()` uses. +// This package defines a number of default formatters, and you can define your +// own implementations. +// +// Ranges are specified by passing a container with `std::begin()` and +// `std::end()` iterators, container-specific `begin()` and `end()` iterators, a +// brace-initialized `std::initializer_list`, or a `std::tuple` of heterogeneous +// objects. The separator string is specified as an `absl::string_view`. +// +// Because the default formatter uses the `absl::AlphaNum` class, +// `absl::StrJoin()`, like `absl::StrCat()`, will work out-of-the-box on +// collections of strings, ints, floats, doubles, etc. +// +// Example: +// +// std::vector<std::string> v = {"foo", "bar", "baz"}; +// std::string s = absl::StrJoin(v, "-"); +// EXPECT_EQ("foo-bar-baz", s); +// +// See comments on the `absl::StrJoin()` function for more examples. + +#ifndef ABSL_STRINGS_STR_JOIN_H_ +#define ABSL_STRINGS_STR_JOIN_H_ + +#include <cstdio> +#include <cstring> +#include <initializer_list> +#include <iterator> +#include <string> +#include <tuple> +#include <type_traits> +#include <utility> + +#include "absl/base/macros.h" +#include "absl/strings/internal/str_join_internal.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// ----------------------------------------------------------------------------- +// Concept: Formatter +// ----------------------------------------------------------------------------- +// +// A Formatter is a function object that is responsible for formatting its +// argument as a string and appending it to a given output std::string. +// Formatters may be implemented as function objects, lambdas, or normal +// functions. You may provide your own Formatter to enable `absl::StrJoin()` to +// work with arbitrary types. +// +// The following is an example of a custom Formatter that simply uses +// `std::to_string()` to format an integer as a std::string. +// +// struct MyFormatter { +// void operator()(std::string* out, int i) const { +// out->append(std::to_string(i)); +// } +// }; +// +// You would use the above formatter by passing an instance of it as the final +// argument to `absl::StrJoin()`: +// +// std::vector<int> v = {1, 2, 3, 4}; +// std::string s = absl::StrJoin(v, "-", MyFormatter()); +// EXPECT_EQ("1-2-3-4", s); +// +// The following standard formatters are provided within this file: +// +// - `AlphaNumFormatter()` (the default) +// - `StreamFormatter()` +// - `PairFormatter()` +// - `DereferenceFormatter()` + +// AlphaNumFormatter() +// +// Default formatter used if none is specified. Uses `absl::AlphaNum` to convert +// numeric arguments to strings. +inline strings_internal::AlphaNumFormatterImpl AlphaNumFormatter() { + return strings_internal::AlphaNumFormatterImpl(); +} + +// StreamFormatter() +// +// Formats its argument using the << operator. +inline strings_internal::StreamFormatterImpl StreamFormatter() { + return strings_internal::StreamFormatterImpl(); +} + +// Function Template: PairFormatter(Formatter, absl::string_view, Formatter) +// +// Formats a `std::pair` by putting a given separator between the pair's +// `.first` and `.second` members. This formatter allows you to specify +// custom Formatters for both the first and second member of each pair. +template <typename FirstFormatter, typename SecondFormatter> +inline strings_internal::PairFormatterImpl<FirstFormatter, SecondFormatter> +PairFormatter(FirstFormatter f1, absl::string_view sep, SecondFormatter f2) { + return strings_internal::PairFormatterImpl<FirstFormatter, SecondFormatter>( + std::move(f1), sep, std::move(f2)); +} + +// Function overload of PairFormatter() for using a default +// `AlphaNumFormatter()` for each Formatter in the pair. +inline strings_internal::PairFormatterImpl< + strings_internal::AlphaNumFormatterImpl, + strings_internal::AlphaNumFormatterImpl> +PairFormatter(absl::string_view sep) { + return PairFormatter(AlphaNumFormatter(), sep, AlphaNumFormatter()); +} + +// Function Template: DereferenceFormatter(Formatter) +// +// Formats its argument by dereferencing it and then applying the given +// formatter. This formatter is useful for formatting a container of +// pointer-to-T. This pattern often shows up when joining repeated fields in +// protocol buffers. +template <typename Formatter> +strings_internal::DereferenceFormatterImpl<Formatter> DereferenceFormatter( + Formatter&& f) { + return strings_internal::DereferenceFormatterImpl<Formatter>( + std::forward<Formatter>(f)); +} + +// Function overload of `DererefenceFormatter()` for using a default +// `AlphaNumFormatter()`. +inline strings_internal::DereferenceFormatterImpl< + strings_internal::AlphaNumFormatterImpl> +DereferenceFormatter() { + return strings_internal::DereferenceFormatterImpl< + strings_internal::AlphaNumFormatterImpl>(AlphaNumFormatter()); +} + +// ----------------------------------------------------------------------------- +// StrJoin() +// ----------------------------------------------------------------------------- +// +// Joins a range of elements and returns the result as a std::string. +// `absl::StrJoin()` takes a range, a separator string to use between the +// elements joined, and an optional Formatter responsible for converting each +// argument in the range to a string. +// +// If omitted, the default `AlphaNumFormatter()` is called on the elements to be +// joined. +// +// Example 1: +// // Joins a collection of strings. This pattern also works with a collection +// // of `absl::string_view` or even `const char*`. +// std::vector<std::string> v = {"foo", "bar", "baz"}; +// std::string s = absl::StrJoin(v, "-"); +// EXPECT_EQ("foo-bar-baz", s); +// +// Example 2: +// // Joins the values in the given `std::initializer_list<>` specified using +// // brace initialization. This pattern also works with an initializer_list +// // of ints or `absl::string_view` -- any `AlphaNum`-compatible type. +// std::string s = absl::StrJoin({"foo", "bar", "baz"}, "-"); +// EXPECT_EQ("foo-bar-baz", s); +// +// Example 3: +// // Joins a collection of ints. This pattern also works with floats, +// // doubles, int64s -- any `StrCat()`-compatible type. +// std::vector<int> v = {1, 2, 3, -4}; +// std::string s = absl::StrJoin(v, "-"); +// EXPECT_EQ("1-2-3--4", s); +// +// Example 4: +// // Joins a collection of pointer-to-int. By default, pointers are +// // dereferenced and the pointee is formatted using the default format for +// // that type; such dereferencing occurs for all levels of indirection, so +// // this pattern works just as well for `std::vector<int**>` as for +// // `std::vector<int*>`. +// int x = 1, y = 2, z = 3; +// std::vector<int*> v = {&x, &y, &z}; +// std::string s = absl::StrJoin(v, "-"); +// EXPECT_EQ("1-2-3", s); +// +// Example 5: +// // Dereferencing of `std::unique_ptr<>` is also supported: +// std::vector<std::unique_ptr<int>> v +// v.emplace_back(new int(1)); +// v.emplace_back(new int(2)); +// v.emplace_back(new int(3)); +// std::string s = absl::StrJoin(v, "-"); +// EXPECT_EQ("1-2-3", s); +// +// Example 6: +// // Joins a `std::map`, with each key-value pair separated by an equals +// // sign. This pattern would also work with, say, a +// // `std::vector<std::pair<>>`. +// std::map<std::string, int> m = { +// std::make_pair("a", 1), +// std::make_pair("b", 2), +// std::make_pair("c", 3)}; +// std::string s = absl::StrJoin(m, ",", absl::PairFormatter("=")); +// EXPECT_EQ("a=1,b=2,c=3", s); +// +// Example 7: +// // These examples show how `absl::StrJoin()` handles a few common edge +// // cases: +// std::vector<std::string> v_empty; +// EXPECT_EQ("", absl::StrJoin(v_empty, "-")); +// +// std::vector<std::string> v_one_item = {"foo"}; +// EXPECT_EQ("foo", absl::StrJoin(v_one_item, "-")); +// +// std::vector<std::string> v_empty_string = {""}; +// EXPECT_EQ("", absl::StrJoin(v_empty_string, "-")); +// +// std::vector<std::string> v_one_item_empty_string = {"a", ""}; +// EXPECT_EQ("a-", absl::StrJoin(v_one_item_empty_string, "-")); +// +// std::vector<std::string> v_two_empty_string = {"", ""}; +// EXPECT_EQ("-", absl::StrJoin(v_two_empty_string, "-")); +// +// Example 8: +// // Joins a `std::tuple<T...>` of heterogeneous types, converting each to +// // a std::string using the `absl::AlphaNum` class. +// std::string s = absl::StrJoin(std::make_tuple(123, "abc", 0.456), "-"); +// EXPECT_EQ("123-abc-0.456", s); + +template <typename Iterator, typename Formatter> +std::string StrJoin(Iterator start, Iterator end, absl::string_view sep, + Formatter&& fmt) { + return strings_internal::JoinAlgorithm(start, end, sep, fmt); +} + +template <typename Range, typename Formatter> +std::string StrJoin(const Range& range, absl::string_view separator, + Formatter&& fmt) { + return strings_internal::JoinRange(range, separator, fmt); +} + +template <typename T, typename Formatter> +std::string StrJoin(std::initializer_list<T> il, absl::string_view separator, + Formatter&& fmt) { + return strings_internal::JoinRange(il, separator, fmt); +} + +template <typename... T, typename Formatter> +std::string StrJoin(const std::tuple<T...>& value, absl::string_view separator, + Formatter&& fmt) { + return strings_internal::JoinAlgorithm(value, separator, fmt); +} + +template <typename Iterator> +std::string StrJoin(Iterator start, Iterator end, absl::string_view separator) { + return strings_internal::JoinRange(start, end, separator); +} + +template <typename Range> +std::string StrJoin(const Range& range, absl::string_view separator) { + return strings_internal::JoinRange(range, separator); +} + +template <typename T> +std::string StrJoin(std::initializer_list<T> il, + absl::string_view separator) { + return strings_internal::JoinRange(il, separator); +} + +template <typename... T> +std::string StrJoin(const std::tuple<T...>& value, + absl::string_view separator) { + return strings_internal::JoinAlgorithm(value, separator, AlphaNumFormatter()); +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_STR_JOIN_H_ diff --git a/third_party/abseil_cpp/absl/strings/str_join_benchmark.cc b/third_party/abseil_cpp/absl/strings/str_join_benchmark.cc new file mode 100644 index 000000000000..d6f689ff300b --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_join_benchmark.cc @@ -0,0 +1,97 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_join.h" + +#include <string> +#include <vector> +#include <utility> + +#include "benchmark/benchmark.h" + +namespace { + +void BM_Join2_Strings(benchmark::State& state) { + const int string_len = state.range(0); + const int num_strings = state.range(1); + const std::string s(string_len, 'x'); + const std::vector<std::string> v(num_strings, s); + for (auto _ : state) { + std::string s = absl::StrJoin(v, "-"); + benchmark::DoNotOptimize(s); + } +} +BENCHMARK(BM_Join2_Strings) + ->ArgPair(1 << 0, 1 << 3) + ->ArgPair(1 << 10, 1 << 3) + ->ArgPair(1 << 13, 1 << 3) + ->ArgPair(1 << 0, 1 << 10) + ->ArgPair(1 << 10, 1 << 10) + ->ArgPair(1 << 13, 1 << 10) + ->ArgPair(1 << 0, 1 << 13) + ->ArgPair(1 << 10, 1 << 13) + ->ArgPair(1 << 13, 1 << 13); + +void BM_Join2_Ints(benchmark::State& state) { + const int num_ints = state.range(0); + const std::vector<int> v(num_ints, 42); + for (auto _ : state) { + std::string s = absl::StrJoin(v, "-"); + benchmark::DoNotOptimize(s); + } +} +BENCHMARK(BM_Join2_Ints)->Range(0, 1 << 13); + +void BM_Join2_KeysAndValues(benchmark::State& state) { + const int string_len = state.range(0); + const int num_pairs = state.range(1); + const std::string s(string_len, 'x'); + const std::vector<std::pair<std::string, int>> v(num_pairs, + std::make_pair(s, 42)); + for (auto _ : state) { + std::string s = absl::StrJoin(v, ",", absl::PairFormatter("=")); + benchmark::DoNotOptimize(s); + } +} +BENCHMARK(BM_Join2_KeysAndValues) + ->ArgPair(1 << 0, 1 << 3) + ->ArgPair(1 << 10, 1 << 3) + ->ArgPair(1 << 13, 1 << 3) + ->ArgPair(1 << 0, 1 << 10) + ->ArgPair(1 << 10, 1 << 10) + ->ArgPair(1 << 13, 1 << 10) + ->ArgPair(1 << 0, 1 << 13) + ->ArgPair(1 << 10, 1 << 13) + ->ArgPair(1 << 13, 1 << 13); + +void BM_JoinStreamable(benchmark::State& state) { + const int string_len = state.range(0); + const int num_strings = state.range(1); + const std::vector<std::string> v(num_strings, std::string(string_len, 'x')); + for (auto _ : state) { + std::string s = absl::StrJoin(v, "", absl::StreamFormatter()); + benchmark::DoNotOptimize(s); + } +} +BENCHMARK(BM_JoinStreamable) + ->ArgPair(0, 0) + ->ArgPair(16, 1) + ->ArgPair(256, 1) + ->ArgPair(16, 16) + ->ArgPair(256, 16) + ->ArgPair(16, 256) + ->ArgPair(256, 256); + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/str_join_test.cc b/third_party/abseil_cpp/absl/strings/str_join_test.cc new file mode 100644 index 000000000000..2be6256e43f7 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_join_test.cc @@ -0,0 +1,474 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Unit tests for all join.h functions + +#include "absl/strings/str_join.h" + +#include <cstddef> +#include <cstdint> +#include <cstdio> +#include <functional> +#include <initializer_list> +#include <map> +#include <memory> +#include <ostream> +#include <tuple> +#include <type_traits> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/base/macros.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" + +namespace { + +TEST(StrJoin, APIExamples) { + { + // Collection of strings + std::vector<std::string> v = {"foo", "bar", "baz"}; + EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-")); + } + + { + // Collection of absl::string_view + std::vector<absl::string_view> v = {"foo", "bar", "baz"}; + EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-")); + } + + { + // Collection of const char* + std::vector<const char*> v = {"foo", "bar", "baz"}; + EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-")); + } + + { + // Collection of non-const char* + std::string a = "foo", b = "bar", c = "baz"; + std::vector<char*> v = {&a[0], &b[0], &c[0]}; + EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-")); + } + + { + // Collection of ints + std::vector<int> v = {1, 2, 3, -4}; + EXPECT_EQ("1-2-3--4", absl::StrJoin(v, "-")); + } + + { + // Literals passed as a std::initializer_list + std::string s = absl::StrJoin({"a", "b", "c"}, "-"); + EXPECT_EQ("a-b-c", s); + } + { + // Join a std::tuple<T...>. + std::string s = absl::StrJoin(std::make_tuple(123, "abc", 0.456), "-"); + EXPECT_EQ("123-abc-0.456", s); + } + + { + // Collection of unique_ptrs + std::vector<std::unique_ptr<int>> v; + v.emplace_back(new int(1)); + v.emplace_back(new int(2)); + v.emplace_back(new int(3)); + EXPECT_EQ("1-2-3", absl::StrJoin(v, "-")); + } + + { + // Array of ints + const int a[] = {1, 2, 3, -4}; + EXPECT_EQ("1-2-3--4", absl::StrJoin(a, a + ABSL_ARRAYSIZE(a), "-")); + } + + { + // Collection of pointers + int x = 1, y = 2, z = 3; + std::vector<int*> v = {&x, &y, &z}; + EXPECT_EQ("1-2-3", absl::StrJoin(v, "-")); + } + + { + // Collection of pointers to pointers + int x = 1, y = 2, z = 3; + int *px = &x, *py = &y, *pz = &z; + std::vector<int**> v = {&px, &py, &pz}; + EXPECT_EQ("1-2-3", absl::StrJoin(v, "-")); + } + + { + // Collection of pointers to std::string + std::string a("a"), b("b"); + std::vector<std::string*> v = {&a, &b}; + EXPECT_EQ("a-b", absl::StrJoin(v, "-")); + } + + { + // A std::map, which is a collection of std::pair<>s. + std::map<std::string, int> m = {{"a", 1}, {"b", 2}, {"c", 3}}; + EXPECT_EQ("a=1,b=2,c=3", absl::StrJoin(m, ",", absl::PairFormatter("="))); + } + + { + // Shows absl::StrSplit and absl::StrJoin working together. This example is + // equivalent to s/=/-/g. + const std::string s = "a=b=c=d"; + EXPECT_EQ("a-b-c-d", absl::StrJoin(absl::StrSplit(s, "="), "-")); + } + + // + // A few examples of edge cases + // + + { + // Empty range yields an empty string. + std::vector<std::string> v; + EXPECT_EQ("", absl::StrJoin(v, "-")); + } + + { + // A range of 1 element gives a string with that element but no + // separator. + std::vector<std::string> v = {"foo"}; + EXPECT_EQ("foo", absl::StrJoin(v, "-")); + } + + { + // A range with a single empty string element + std::vector<std::string> v = {""}; + EXPECT_EQ("", absl::StrJoin(v, "-")); + } + + { + // A range with 2 elements, one of which is an empty string + std::vector<std::string> v = {"a", ""}; + EXPECT_EQ("a-", absl::StrJoin(v, "-")); + } + + { + // A range with 2 empty elements. + std::vector<std::string> v = {"", ""}; + EXPECT_EQ("-", absl::StrJoin(v, "-")); + } + + { + // A std::vector of bool. + std::vector<bool> v = {true, false, true}; + EXPECT_EQ("1-0-1", absl::StrJoin(v, "-")); + } +} + +TEST(StrJoin, CustomFormatter) { + std::vector<std::string> v{"One", "Two", "Three"}; + { + std::string joined = + absl::StrJoin(v, "", [](std::string* out, const std::string& in) { + absl::StrAppend(out, "(", in, ")"); + }); + EXPECT_EQ("(One)(Two)(Three)", joined); + } + { + class ImmovableFormatter { + public: + void operator()(std::string* out, const std::string& in) { + absl::StrAppend(out, "(", in, ")"); + } + ImmovableFormatter() {} + ImmovableFormatter(const ImmovableFormatter&) = delete; + }; + EXPECT_EQ("(One)(Two)(Three)", absl::StrJoin(v, "", ImmovableFormatter())); + } + { + class OverloadedFormatter { + public: + void operator()(std::string* out, const std::string& in) { + absl::StrAppend(out, "(", in, ")"); + } + void operator()(std::string* out, const std::string& in) const { + absl::StrAppend(out, "[", in, "]"); + } + }; + EXPECT_EQ("(One)(Two)(Three)", absl::StrJoin(v, "", OverloadedFormatter())); + const OverloadedFormatter fmt = {}; + EXPECT_EQ("[One][Two][Three]", absl::StrJoin(v, "", fmt)); + } +} + +// +// Tests the Formatters +// + +TEST(AlphaNumFormatter, FormatterAPI) { + // Not an exhaustive test. See strings/strcat_test.h for the exhaustive test + // of what AlphaNum can convert. + auto f = absl::AlphaNumFormatter(); + std::string s; + f(&s, "Testing: "); + f(&s, static_cast<int>(1)); + f(&s, static_cast<int16_t>(2)); + f(&s, static_cast<int64_t>(3)); + f(&s, static_cast<float>(4)); + f(&s, static_cast<double>(5)); + f(&s, static_cast<unsigned>(6)); + f(&s, static_cast<size_t>(7)); + f(&s, absl::string_view(" OK")); + EXPECT_EQ("Testing: 1234567 OK", s); +} + +// Make sure people who are mistakenly using std::vector<bool> even though +// they're not memory-constrained can use absl::AlphaNumFormatter(). +TEST(AlphaNumFormatter, VectorOfBool) { + auto f = absl::AlphaNumFormatter(); + std::string s; + std::vector<bool> v = {true, false, true}; + f(&s, *v.cbegin()); + f(&s, *v.begin()); + f(&s, v[1]); + EXPECT_EQ("110", s); +} + +TEST(AlphaNumFormatter, AlphaNum) { + auto f = absl::AlphaNumFormatter(); + std::string s; + f(&s, absl::AlphaNum("hello")); + EXPECT_EQ("hello", s); +} + +struct StreamableType { + std::string contents; +}; +inline std::ostream& operator<<(std::ostream& os, const StreamableType& t) { + os << "Streamable:" << t.contents; + return os; +} + +TEST(StreamFormatter, FormatterAPI) { + auto f = absl::StreamFormatter(); + std::string s; + f(&s, "Testing: "); + f(&s, static_cast<int>(1)); + f(&s, static_cast<int16_t>(2)); + f(&s, static_cast<int64_t>(3)); + f(&s, static_cast<float>(4)); + f(&s, static_cast<double>(5)); + f(&s, static_cast<unsigned>(6)); + f(&s, static_cast<size_t>(7)); + f(&s, absl::string_view(" OK ")); + StreamableType streamable = {"object"}; + f(&s, streamable); + EXPECT_EQ("Testing: 1234567 OK Streamable:object", s); +} + +// A dummy formatter that wraps each element in parens. Used in some tests +// below. +struct TestingParenFormatter { + template <typename T> + void operator()(std::string* s, const T& t) { + absl::StrAppend(s, "(", t, ")"); + } +}; + +TEST(PairFormatter, FormatterAPI) { + { + // Tests default PairFormatter(sep) that uses AlphaNumFormatter for the + // 'first' and 'second' members. + const auto f = absl::PairFormatter("="); + std::string s; + f(&s, std::make_pair("a", "b")); + f(&s, std::make_pair(1, 2)); + EXPECT_EQ("a=b1=2", s); + } + + { + // Tests using a custom formatter for the 'first' and 'second' members. + auto f = absl::PairFormatter(TestingParenFormatter(), "=", + TestingParenFormatter()); + std::string s; + f(&s, std::make_pair("a", "b")); + f(&s, std::make_pair(1, 2)); + EXPECT_EQ("(a)=(b)(1)=(2)", s); + } +} + +TEST(DereferenceFormatter, FormatterAPI) { + { + // Tests wrapping the default AlphaNumFormatter. + const absl::strings_internal::DereferenceFormatterImpl< + absl::strings_internal::AlphaNumFormatterImpl> + f; + int x = 1, y = 2, z = 3; + std::string s; + f(&s, &x); + f(&s, &y); + f(&s, &z); + EXPECT_EQ("123", s); + } + + { + // Tests wrapping std::string's default formatter. + absl::strings_internal::DereferenceFormatterImpl< + absl::strings_internal::DefaultFormatter<std::string>::Type> + f; + + std::string x = "x"; + std::string y = "y"; + std::string z = "z"; + std::string s; + f(&s, &x); + f(&s, &y); + f(&s, &z); + EXPECT_EQ(s, "xyz"); + } + + { + // Tests wrapping a custom formatter. + auto f = absl::DereferenceFormatter(TestingParenFormatter()); + int x = 1, y = 2, z = 3; + std::string s; + f(&s, &x); + f(&s, &y); + f(&s, &z); + EXPECT_EQ("(1)(2)(3)", s); + } + + { + absl::strings_internal::DereferenceFormatterImpl< + absl::strings_internal::AlphaNumFormatterImpl> + f; + auto x = std::unique_ptr<int>(new int(1)); + auto y = std::unique_ptr<int>(new int(2)); + auto z = std::unique_ptr<int>(new int(3)); + std::string s; + f(&s, x); + f(&s, y); + f(&s, z); + EXPECT_EQ("123", s); + } +} + +// +// Tests the interfaces for the 4 public Join function overloads. The semantics +// of the algorithm is covered in the above APIExamples test. +// +TEST(StrJoin, PublicAPIOverloads) { + std::vector<std::string> v = {"a", "b", "c"}; + + // Iterators + formatter + EXPECT_EQ("a-b-c", + absl::StrJoin(v.begin(), v.end(), "-", absl::AlphaNumFormatter())); + // Range + formatter + EXPECT_EQ("a-b-c", absl::StrJoin(v, "-", absl::AlphaNumFormatter())); + // Iterators, no formatter + EXPECT_EQ("a-b-c", absl::StrJoin(v.begin(), v.end(), "-")); + // Range, no formatter + EXPECT_EQ("a-b-c", absl::StrJoin(v, "-")); +} + +TEST(StrJoin, Array) { + const absl::string_view a[] = {"a", "b", "c"}; + EXPECT_EQ("a-b-c", absl::StrJoin(a, "-")); +} + +TEST(StrJoin, InitializerList) { + { EXPECT_EQ("a-b-c", absl::StrJoin({"a", "b", "c"}, "-")); } + + { + auto a = {"a", "b", "c"}; + EXPECT_EQ("a-b-c", absl::StrJoin(a, "-")); + } + + { + std::initializer_list<const char*> a = {"a", "b", "c"}; + EXPECT_EQ("a-b-c", absl::StrJoin(a, "-")); + } + + { + std::initializer_list<std::string> a = {"a", "b", "c"}; + EXPECT_EQ("a-b-c", absl::StrJoin(a, "-")); + } + + { + std::initializer_list<absl::string_view> a = {"a", "b", "c"}; + EXPECT_EQ("a-b-c", absl::StrJoin(a, "-")); + } + + { + // Tests initializer_list with a non-default formatter + auto a = {"a", "b", "c"}; + TestingParenFormatter f; + EXPECT_EQ("(a)-(b)-(c)", absl::StrJoin(a, "-", f)); + } + + { + // initializer_list of ints + EXPECT_EQ("1-2-3", absl::StrJoin({1, 2, 3}, "-")); + } + + { + // Tests initializer_list of ints with a non-default formatter + auto a = {1, 2, 3}; + TestingParenFormatter f; + EXPECT_EQ("(1)-(2)-(3)", absl::StrJoin(a, "-", f)); + } +} + +TEST(StrJoin, Tuple) { + EXPECT_EQ("", absl::StrJoin(std::make_tuple(), "-")); + EXPECT_EQ("hello", absl::StrJoin(std::make_tuple("hello"), "-")); + + int x(10); + std::string y("hello"); + double z(3.14); + EXPECT_EQ("10-hello-3.14", absl::StrJoin(std::make_tuple(x, y, z), "-")); + + // Faster! Faster!! + EXPECT_EQ("10-hello-3.14", + absl::StrJoin(std::make_tuple(x, std::cref(y), z), "-")); + + struct TestFormatter { + char buffer[128]; + void operator()(std::string* out, int v) { + snprintf(buffer, sizeof(buffer), "%#.8x", v); + out->append(buffer); + } + void operator()(std::string* out, double v) { + snprintf(buffer, sizeof(buffer), "%#.0f", v); + out->append(buffer); + } + void operator()(std::string* out, const std::string& v) { + snprintf(buffer, sizeof(buffer), "%.4s", v.c_str()); + out->append(buffer); + } + }; + EXPECT_EQ("0x0000000a-hell-3.", + absl::StrJoin(std::make_tuple(x, y, z), "-", TestFormatter())); + EXPECT_EQ( + "0x0000000a-hell-3.", + absl::StrJoin(std::make_tuple(x, std::cref(y), z), "-", TestFormatter())); + EXPECT_EQ("0x0000000a-hell-3.", + absl::StrJoin(std::make_tuple(&x, &y, &z), "-", + absl::DereferenceFormatter(TestFormatter()))); + EXPECT_EQ("0x0000000a-hell-3.", + absl::StrJoin(std::make_tuple(absl::make_unique<int>(x), + absl::make_unique<std::string>(y), + absl::make_unique<double>(z)), + "-", absl::DereferenceFormatter(TestFormatter()))); + EXPECT_EQ("0x0000000a-hell-3.", + absl::StrJoin(std::make_tuple(absl::make_unique<int>(x), &y, &z), + "-", absl::DereferenceFormatter(TestFormatter()))); +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/str_replace.cc b/third_party/abseil_cpp/absl/strings/str_replace.cc new file mode 100644 index 000000000000..2bd5fa98218c --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_replace.cc @@ -0,0 +1,82 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_replace.h" + +#include "absl/strings/str_cat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +using FixedMapping = + std::initializer_list<std::pair<absl::string_view, absl::string_view>>; + +// Applies the ViableSubstitutions in subs_ptr to the absl::string_view s, and +// stores the result in *result_ptr. Returns the number of substitutions that +// occurred. +int ApplySubstitutions( + absl::string_view s, + std::vector<strings_internal::ViableSubstitution>* subs_ptr, + std::string* result_ptr) { + auto& subs = *subs_ptr; + int substitutions = 0; + size_t pos = 0; + while (!subs.empty()) { + auto& sub = subs.back(); + if (sub.offset >= pos) { + if (pos <= s.size()) { + StrAppend(result_ptr, s.substr(pos, sub.offset - pos), sub.replacement); + } + pos = sub.offset + sub.old.size(); + substitutions += 1; + } + sub.offset = s.find(sub.old, pos); + if (sub.offset == s.npos) { + subs.pop_back(); + } else { + // Insertion sort to ensure the last ViableSubstitution continues to be + // before all the others. + size_t index = subs.size(); + while (--index && subs[index - 1].OccursBefore(subs[index])) { + std::swap(subs[index], subs[index - 1]); + } + } + } + result_ptr->append(s.data() + pos, s.size() - pos); + return substitutions; +} + +} // namespace strings_internal + +// We can implement this in terms of the generic StrReplaceAll, but +// we must specify the template overload because C++ cannot deduce the type +// of an initializer_list parameter to a function, and also if we don't specify +// the type, we just call ourselves. +// +// Note that we implement them here, rather than in the header, so that they +// aren't inlined. + +std::string StrReplaceAll(absl::string_view s, + strings_internal::FixedMapping replacements) { + return StrReplaceAll<strings_internal::FixedMapping>(s, replacements); +} + +int StrReplaceAll(strings_internal::FixedMapping replacements, + std::string* target) { + return StrReplaceAll<strings_internal::FixedMapping>(replacements, target); +} + +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/str_replace.h b/third_party/abseil_cpp/absl/strings/str_replace.h new file mode 100644 index 000000000000..273c7077353e --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_replace.h @@ -0,0 +1,219 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_replace.h +// ----------------------------------------------------------------------------- +// +// This file defines `absl::StrReplaceAll()`, a general-purpose string +// replacement function designed for large, arbitrary text substitutions, +// especially on strings which you are receiving from some other system for +// further processing (e.g. processing regular expressions, escaping HTML +// entities, etc.). `StrReplaceAll` is designed to be efficient even when only +// one substitution is being performed, or when substitution is rare. +// +// If the string being modified is known at compile-time, and the substitutions +// vary, `absl::Substitute()` may be a better choice. +// +// Example: +// +// std::string html_escaped = absl::StrReplaceAll(user_input, { +// {"&", "&"}, +// {"<", "<"}, +// {">", ">"}, +// {"\"", """}, +// {"'", "'"}}); +#ifndef ABSL_STRINGS_STR_REPLACE_H_ +#define ABSL_STRINGS_STR_REPLACE_H_ + +#include <string> +#include <utility> +#include <vector> + +#include "absl/base/attributes.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// StrReplaceAll() +// +// Replaces character sequences within a given string with replacements provided +// within an initializer list of key/value pairs. Candidate replacements are +// considered in order as they occur within the string, with earlier matches +// taking precedence, and longer matches taking precedence for candidates +// starting at the same position in the string. Once a substitution is made, the +// replaced text is not considered for any further substitutions. +// +// Example: +// +// std::string s = absl::StrReplaceAll( +// "$who bought $count #Noun. Thanks $who!", +// {{"$count", absl::StrCat(5)}, +// {"$who", "Bob"}, +// {"#Noun", "Apples"}}); +// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +ABSL_MUST_USE_RESULT std::string StrReplaceAll( + absl::string_view s, + std::initializer_list<std::pair<absl::string_view, absl::string_view>> + replacements); + +// Overload of `StrReplaceAll()` to accept a container of key/value replacement +// pairs (typically either an associative map or a `std::vector` of `std::pair` +// elements). A vector of pairs is generally more efficient. +// +// Examples: +// +// std::map<const absl::string_view, const absl::string_view> replacements; +// replacements["$who"] = "Bob"; +// replacements["$count"] = "5"; +// replacements["#Noun"] = "Apples"; +// std::string s = absl::StrReplaceAll( +// "$who bought $count #Noun. Thanks $who!", +// replacements); +// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +// +// // A std::vector of std::pair elements can be more efficient. +// std::vector<std::pair<const absl::string_view, std::string>> replacements; +// replacements.push_back({"&", "&"}); +// replacements.push_back({"<", "<"}); +// replacements.push_back({">", ">"}); +// std::string s = absl::StrReplaceAll("if (ptr < &foo)", +// replacements); +// EXPECT_EQ("if (ptr < &foo)", s); +template <typename StrToStrMapping> +std::string StrReplaceAll(absl::string_view s, + const StrToStrMapping& replacements); + +// Overload of `StrReplaceAll()` to replace character sequences within a given +// output string *in place* with replacements provided within an initializer +// list of key/value pairs, returning the number of substitutions that occurred. +// +// Example: +// +// std::string s = std::string("$who bought $count #Noun. Thanks $who!"); +// int count; +// count = absl::StrReplaceAll({{"$count", absl::StrCat(5)}, +// {"$who", "Bob"}, +// {"#Noun", "Apples"}}, &s); +// EXPECT_EQ(count, 4); +// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +int StrReplaceAll( + std::initializer_list<std::pair<absl::string_view, absl::string_view>> + replacements, + std::string* target); + +// Overload of `StrReplaceAll()` to replace patterns within a given output +// string *in place* with replacements provided within a container of key/value +// pairs. +// +// Example: +// +// std::string s = std::string("if (ptr < &foo)"); +// int count = absl::StrReplaceAll({{"&", "&"}, +// {"<", "<"}, +// {">", ">"}}, &s); +// EXPECT_EQ(count, 2); +// EXPECT_EQ("if (ptr < &foo)", s); +template <typename StrToStrMapping> +int StrReplaceAll(const StrToStrMapping& replacements, std::string* target); + +// Implementation details only, past this point. +namespace strings_internal { + +struct ViableSubstitution { + absl::string_view old; + absl::string_view replacement; + size_t offset; + + ViableSubstitution(absl::string_view old_str, + absl::string_view replacement_str, size_t offset_val) + : old(old_str), replacement(replacement_str), offset(offset_val) {} + + // One substitution occurs "before" another (takes priority) if either + // it has the lowest offset, or it has the same offset but a larger size. + bool OccursBefore(const ViableSubstitution& y) const { + if (offset != y.offset) return offset < y.offset; + return old.size() > y.old.size(); + } +}; + +// Build a vector of ViableSubstitutions based on the given list of +// replacements. subs can be implemented as a priority_queue. However, it turns +// out that most callers have small enough a list of substitutions that the +// overhead of such a queue isn't worth it. +template <typename StrToStrMapping> +std::vector<ViableSubstitution> FindSubstitutions( + absl::string_view s, const StrToStrMapping& replacements) { + std::vector<ViableSubstitution> subs; + subs.reserve(replacements.size()); + + for (const auto& rep : replacements) { + using std::get; + absl::string_view old(get<0>(rep)); + + size_t pos = s.find(old); + if (pos == s.npos) continue; + + // Ignore attempts to replace "". This condition is almost never true, + // but above condition is frequently true. That's why we test for this + // now and not before. + if (old.empty()) continue; + + subs.emplace_back(old, get<1>(rep), pos); + + // Insertion sort to ensure the last ViableSubstitution comes before + // all the others. + size_t index = subs.size(); + while (--index && subs[index - 1].OccursBefore(subs[index])) { + std::swap(subs[index], subs[index - 1]); + } + } + return subs; +} + +int ApplySubstitutions(absl::string_view s, + std::vector<ViableSubstitution>* subs_ptr, + std::string* result_ptr); + +} // namespace strings_internal + +template <typename StrToStrMapping> +std::string StrReplaceAll(absl::string_view s, + const StrToStrMapping& replacements) { + auto subs = strings_internal::FindSubstitutions(s, replacements); + std::string result; + result.reserve(s.size()); + strings_internal::ApplySubstitutions(s, &subs, &result); + return result; +} + +template <typename StrToStrMapping> +int StrReplaceAll(const StrToStrMapping& replacements, std::string* target) { + auto subs = strings_internal::FindSubstitutions(*target, replacements); + if (subs.empty()) return 0; + + std::string result; + result.reserve(target->size()); + int substitutions = + strings_internal::ApplySubstitutions(*target, &subs, &result); + target->swap(result); + return substitutions; +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_STR_REPLACE_H_ diff --git a/third_party/abseil_cpp/absl/strings/str_replace_benchmark.cc b/third_party/abseil_cpp/absl/strings/str_replace_benchmark.cc new file mode 100644 index 000000000000..01331da29f6e --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_replace_benchmark.cc @@ -0,0 +1,122 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_replace.h" + +#include <cstring> +#include <string> + +#include "benchmark/benchmark.h" +#include "absl/base/internal/raw_logging.h" + +namespace { + +std::string* big_string; +std::string* after_replacing_the; +std::string* after_replacing_many; + +struct Replacement { + const char* needle; + const char* replacement; +} replacements[] = { + {"the", "box"}, // + {"brown", "quick"}, // + {"jumped", "liquored"}, // + {"dozen", "brown"}, // + {"lazy", "pack"}, // + {"liquor", "shakes"}, // +}; + +// Here, we set up a string for use in global-replace benchmarks. +// We started with a million blanks, and then deterministically insert +// 10,000 copies each of two pangrams. The result is a string that is +// 40% blank space and 60% these words. 'the' occurs 18,247 times and +// all the substitutions together occur 49,004 times. +// +// We then create "after_replacing_the" to be a string that is a result of +// replacing "the" with "box" in big_string. +// +// And then we create "after_replacing_many" to be a string that is result +// of preferring several substitutions. +void SetUpStrings() { + if (big_string == nullptr) { + size_t r = 0; + big_string = new std::string(1000 * 1000, ' '); + for (std::string phrase : {"the quick brown fox jumped over the lazy dogs", + "pack my box with the five dozen liquor jugs"}) { + for (int i = 0; i < 10 * 1000; ++i) { + r = r * 237 + 41; // not very random. + memcpy(&(*big_string)[r % (big_string->size() - phrase.size())], + phrase.data(), phrase.size()); + } + } + // big_string->resize(50); + // OK, we've set up the string, now let's set up expectations - first by + // just replacing "the" with "box" + after_replacing_the = new std::string(*big_string); + for (size_t pos = 0; + (pos = after_replacing_the->find("the", pos)) != std::string::npos;) { + memcpy(&(*after_replacing_the)[pos], "box", 3); + } + // And then with all the replacements. + after_replacing_many = new std::string(*big_string); + for (size_t pos = 0;;) { + size_t next_pos = static_cast<size_t>(-1); + const char* needle_string = nullptr; + const char* replacement_string = nullptr; + for (const auto& r : replacements) { + auto needlepos = after_replacing_many->find(r.needle, pos); + if (needlepos != std::string::npos && needlepos < next_pos) { + next_pos = needlepos; + needle_string = r.needle; + replacement_string = r.replacement; + } + } + if (next_pos > after_replacing_many->size()) break; + after_replacing_many->replace(next_pos, strlen(needle_string), + replacement_string); + next_pos += strlen(replacement_string); + pos = next_pos; + } + } +} + +void BM_StrReplaceAllOneReplacement(benchmark::State& state) { + SetUpStrings(); + std::string src = *big_string; + for (auto _ : state) { + std::string dest = absl::StrReplaceAll(src, {{"the", "box"}}); + ABSL_RAW_CHECK(dest == *after_replacing_the, + "not benchmarking intended behavior"); + } +} +BENCHMARK(BM_StrReplaceAllOneReplacement); + +void BM_StrReplaceAll(benchmark::State& state) { + SetUpStrings(); + std::string src = *big_string; + for (auto _ : state) { + std::string dest = absl::StrReplaceAll(src, {{"the", "box"}, + {"brown", "quick"}, + {"jumped", "liquored"}, + {"dozen", "brown"}, + {"lazy", "pack"}, + {"liquor", "shakes"}}); + ABSL_RAW_CHECK(dest == *after_replacing_many, + "not benchmarking intended behavior"); + } +} +BENCHMARK(BM_StrReplaceAll); + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/str_replace_test.cc b/third_party/abseil_cpp/absl/strings/str_replace_test.cc new file mode 100644 index 000000000000..9d8c7f75b57e --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_replace_test.cc @@ -0,0 +1,341 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_replace.h" + +#include <list> +#include <map> +#include <tuple> + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" + +TEST(StrReplaceAll, OneReplacement) { + std::string s; + + // Empty string. + s = absl::StrReplaceAll(s, {{"", ""}}); + EXPECT_EQ(s, ""); + s = absl::StrReplaceAll(s, {{"x", ""}}); + EXPECT_EQ(s, ""); + s = absl::StrReplaceAll(s, {{"", "y"}}); + EXPECT_EQ(s, ""); + s = absl::StrReplaceAll(s, {{"x", "y"}}); + EXPECT_EQ(s, ""); + + // Empty substring. + s = absl::StrReplaceAll("abc", {{"", ""}}); + EXPECT_EQ(s, "abc"); + s = absl::StrReplaceAll("abc", {{"", "y"}}); + EXPECT_EQ(s, "abc"); + s = absl::StrReplaceAll("abc", {{"x", ""}}); + EXPECT_EQ(s, "abc"); + + // Substring not found. + s = absl::StrReplaceAll("abc", {{"xyz", "123"}}); + EXPECT_EQ(s, "abc"); + + // Replace entire string. + s = absl::StrReplaceAll("abc", {{"abc", "xyz"}}); + EXPECT_EQ(s, "xyz"); + + // Replace once at the start. + s = absl::StrReplaceAll("abc", {{"a", "x"}}); + EXPECT_EQ(s, "xbc"); + + // Replace once in the middle. + s = absl::StrReplaceAll("abc", {{"b", "x"}}); + EXPECT_EQ(s, "axc"); + + // Replace once at the end. + s = absl::StrReplaceAll("abc", {{"c", "x"}}); + EXPECT_EQ(s, "abx"); + + // Replace multiple times with varying lengths of original/replacement. + s = absl::StrReplaceAll("ababa", {{"a", "xxx"}}); + EXPECT_EQ(s, "xxxbxxxbxxx"); + + s = absl::StrReplaceAll("ababa", {{"b", "xxx"}}); + EXPECT_EQ(s, "axxxaxxxa"); + + s = absl::StrReplaceAll("aaabaaabaaa", {{"aaa", "x"}}); + EXPECT_EQ(s, "xbxbx"); + + s = absl::StrReplaceAll("abbbabbba", {{"bbb", "x"}}); + EXPECT_EQ(s, "axaxa"); + + // Overlapping matches are replaced greedily. + s = absl::StrReplaceAll("aaa", {{"aa", "x"}}); + EXPECT_EQ(s, "xa"); + + // The replacements are not recursive. + s = absl::StrReplaceAll("aaa", {{"aa", "a"}}); + EXPECT_EQ(s, "aa"); +} + +TEST(StrReplaceAll, ManyReplacements) { + std::string s; + + // Empty string. + s = absl::StrReplaceAll("", {{"", ""}, {"x", ""}, {"", "y"}, {"x", "y"}}); + EXPECT_EQ(s, ""); + + // Empty substring. + s = absl::StrReplaceAll("abc", {{"", ""}, {"", "y"}, {"x", ""}}); + EXPECT_EQ(s, "abc"); + + // Replace entire string, one char at a time + s = absl::StrReplaceAll("abc", {{"a", "x"}, {"b", "y"}, {"c", "z"}}); + EXPECT_EQ(s, "xyz"); + s = absl::StrReplaceAll("zxy", {{"z", "x"}, {"x", "y"}, {"y", "z"}}); + EXPECT_EQ(s, "xyz"); + + // Replace once at the start (longer matches take precedence) + s = absl::StrReplaceAll("abc", {{"a", "x"}, {"ab", "xy"}, {"abc", "xyz"}}); + EXPECT_EQ(s, "xyz"); + + // Replace once in the middle. + s = absl::StrReplaceAll( + "Abc!", {{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc", "yz"}, {"c", "z"}}); + EXPECT_EQ(s, "Ayz!"); + + // Replace once at the end. + s = absl::StrReplaceAll( + "Abc!", + {{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc!", "yz?"}, {"c!", "z;"}}); + EXPECT_EQ(s, "Ayz?"); + + // Replace multiple times with varying lengths of original/replacement. + s = absl::StrReplaceAll("ababa", {{"a", "xxx"}, {"b", "XXXX"}}); + EXPECT_EQ(s, "xxxXXXXxxxXXXXxxx"); + + // Overlapping matches are replaced greedily. + s = absl::StrReplaceAll("aaa", {{"aa", "x"}, {"a", "X"}}); + EXPECT_EQ(s, "xX"); + s = absl::StrReplaceAll("aaa", {{"a", "X"}, {"aa", "x"}}); + EXPECT_EQ(s, "xX"); + + // Two well-known sentences + s = absl::StrReplaceAll("the quick brown fox jumped over the lazy dogs", + { + {"brown", "box"}, + {"dogs", "jugs"}, + {"fox", "with"}, + {"jumped", "five"}, + {"over", "dozen"}, + {"quick", "my"}, + {"the", "pack"}, + {"the lazy", "liquor"}, + }); + EXPECT_EQ(s, "pack my box with five dozen liquor jugs"); +} + +TEST(StrReplaceAll, ManyReplacementsInMap) { + std::map<const char *, const char *> replacements; + replacements["$who"] = "Bob"; + replacements["$count"] = "5"; + replacements["#Noun"] = "Apples"; + std::string s = absl::StrReplaceAll("$who bought $count #Noun. Thanks $who!", + replacements); + EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +} + +TEST(StrReplaceAll, ReplacementsInPlace) { + std::string s = std::string("$who bought $count #Noun. Thanks $who!"); + int count; + count = absl::StrReplaceAll({{"$count", absl::StrCat(5)}, + {"$who", "Bob"}, + {"#Noun", "Apples"}}, &s); + EXPECT_EQ(count, 4); + EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +} + +TEST(StrReplaceAll, ReplacementsInPlaceInMap) { + std::string s = std::string("$who bought $count #Noun. Thanks $who!"); + std::map<absl::string_view, absl::string_view> replacements; + replacements["$who"] = "Bob"; + replacements["$count"] = "5"; + replacements["#Noun"] = "Apples"; + int count; + count = absl::StrReplaceAll(replacements, &s); + EXPECT_EQ(count, 4); + EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s); +} + +struct Cont { + Cont() {} + explicit Cont(absl::string_view src) : data(src) {} + + absl::string_view data; +}; + +template <int index> +absl::string_view get(const Cont& c) { + auto splitter = absl::StrSplit(c.data, ':'); + auto it = splitter.begin(); + for (int i = 0; i < index; ++i) ++it; + + return *it; +} + +TEST(StrReplaceAll, VariableNumber) { + std::string s; + { + std::vector<std::pair<std::string, std::string>> replacements; + + s = "abc"; + EXPECT_EQ(0, absl::StrReplaceAll(replacements, &s)); + EXPECT_EQ("abc", s); + + s = "abc"; + replacements.push_back({"a", "A"}); + EXPECT_EQ(1, absl::StrReplaceAll(replacements, &s)); + EXPECT_EQ("Abc", s); + + s = "abc"; + replacements.push_back({"b", "B"}); + EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s)); + EXPECT_EQ("ABc", s); + + s = "abc"; + replacements.push_back({"d", "D"}); + EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s)); + EXPECT_EQ("ABc", s); + + EXPECT_EQ("ABcABc", absl::StrReplaceAll("abcabc", replacements)); + } + + { + std::map<const char*, const char*> replacements; + replacements["aa"] = "x"; + replacements["a"] = "X"; + s = "aaa"; + EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s)); + EXPECT_EQ("xX", s); + + EXPECT_EQ("xxX", absl::StrReplaceAll("aaaaa", replacements)); + } + + { + std::list<std::pair<absl::string_view, absl::string_view>> replacements = { + {"a", "x"}, {"b", "y"}, {"c", "z"}}; + + std::string s = absl::StrReplaceAll("abc", replacements); + EXPECT_EQ(s, "xyz"); + } + + { + using X = std::tuple<absl::string_view, std::string, int>; + std::vector<X> replacements(3); + replacements[0] = X{"a", "x", 1}; + replacements[1] = X{"b", "y", 0}; + replacements[2] = X{"c", "z", -1}; + + std::string s = absl::StrReplaceAll("abc", replacements); + EXPECT_EQ(s, "xyz"); + } + + { + std::vector<Cont> replacements(3); + replacements[0] = Cont{"a:x"}; + replacements[1] = Cont{"b:y"}; + replacements[2] = Cont{"c:z"}; + + std::string s = absl::StrReplaceAll("abc", replacements); + EXPECT_EQ(s, "xyz"); + } +} + +// Same as above, but using the in-place variant of absl::StrReplaceAll, +// that returns the # of replacements performed. +TEST(StrReplaceAll, Inplace) { + std::string s; + int reps; + + // Empty string. + s = ""; + reps = absl::StrReplaceAll({{"", ""}, {"x", ""}, {"", "y"}, {"x", "y"}}, &s); + EXPECT_EQ(reps, 0); + EXPECT_EQ(s, ""); + + // Empty substring. + s = "abc"; + reps = absl::StrReplaceAll({{"", ""}, {"", "y"}, {"x", ""}}, &s); + EXPECT_EQ(reps, 0); + EXPECT_EQ(s, "abc"); + + // Replace entire string, one char at a time + s = "abc"; + reps = absl::StrReplaceAll({{"a", "x"}, {"b", "y"}, {"c", "z"}}, &s); + EXPECT_EQ(reps, 3); + EXPECT_EQ(s, "xyz"); + s = "zxy"; + reps = absl::StrReplaceAll({{"z", "x"}, {"x", "y"}, {"y", "z"}}, &s); + EXPECT_EQ(reps, 3); + EXPECT_EQ(s, "xyz"); + + // Replace once at the start (longer matches take precedence) + s = "abc"; + reps = absl::StrReplaceAll({{"a", "x"}, {"ab", "xy"}, {"abc", "xyz"}}, &s); + EXPECT_EQ(reps, 1); + EXPECT_EQ(s, "xyz"); + + // Replace once in the middle. + s = "Abc!"; + reps = absl::StrReplaceAll( + {{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc", "yz"}, {"c", "z"}}, &s); + EXPECT_EQ(reps, 1); + EXPECT_EQ(s, "Ayz!"); + + // Replace once at the end. + s = "Abc!"; + reps = absl::StrReplaceAll( + {{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc!", "yz?"}, {"c!", "z;"}}, &s); + EXPECT_EQ(reps, 1); + EXPECT_EQ(s, "Ayz?"); + + // Replace multiple times with varying lengths of original/replacement. + s = "ababa"; + reps = absl::StrReplaceAll({{"a", "xxx"}, {"b", "XXXX"}}, &s); + EXPECT_EQ(reps, 5); + EXPECT_EQ(s, "xxxXXXXxxxXXXXxxx"); + + // Overlapping matches are replaced greedily. + s = "aaa"; + reps = absl::StrReplaceAll({{"aa", "x"}, {"a", "X"}}, &s); + EXPECT_EQ(reps, 2); + EXPECT_EQ(s, "xX"); + s = "aaa"; + reps = absl::StrReplaceAll({{"a", "X"}, {"aa", "x"}}, &s); + EXPECT_EQ(reps, 2); + EXPECT_EQ(s, "xX"); + + // Two well-known sentences + s = "the quick brown fox jumped over the lazy dogs"; + reps = absl::StrReplaceAll( + { + {"brown", "box"}, + {"dogs", "jugs"}, + {"fox", "with"}, + {"jumped", "five"}, + {"over", "dozen"}, + {"quick", "my"}, + {"the", "pack"}, + {"the lazy", "liquor"}, + }, + &s); + EXPECT_EQ(reps, 8); + EXPECT_EQ(s, "pack my box with five dozen liquor jugs"); +} diff --git a/third_party/abseil_cpp/absl/strings/str_split.cc b/third_party/abseil_cpp/absl/strings/str_split.cc new file mode 100644 index 000000000000..e08c26b6bbbe --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_split.cc @@ -0,0 +1,139 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_split.h" + +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <iterator> +#include <limits> +#include <memory> + +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/ascii.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +namespace { + +// This GenericFind() template function encapsulates the finding algorithm +// shared between the ByString and ByAnyChar delimiters. The FindPolicy +// template parameter allows each delimiter to customize the actual find +// function to use and the length of the found delimiter. For example, the +// Literal delimiter will ultimately use absl::string_view::find(), and the +// AnyOf delimiter will use absl::string_view::find_first_of(). +template <typename FindPolicy> +absl::string_view GenericFind(absl::string_view text, + absl::string_view delimiter, size_t pos, + FindPolicy find_policy) { + if (delimiter.empty() && text.length() > 0) { + // Special case for empty string delimiters: always return a zero-length + // absl::string_view referring to the item at position 1 past pos. + return absl::string_view(text.data() + pos + 1, 0); + } + size_t found_pos = absl::string_view::npos; + absl::string_view found(text.data() + text.size(), + 0); // By default, not found + found_pos = find_policy.Find(text, delimiter, pos); + if (found_pos != absl::string_view::npos) { + found = absl::string_view(text.data() + found_pos, + find_policy.Length(delimiter)); + } + return found; +} + +// Finds using absl::string_view::find(), therefore the length of the found +// delimiter is delimiter.length(). +struct LiteralPolicy { + size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) { + return text.find(delimiter, pos); + } + size_t Length(absl::string_view delimiter) { return delimiter.length(); } +}; + +// Finds using absl::string_view::find_first_of(), therefore the length of the +// found delimiter is 1. +struct AnyOfPolicy { + size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) { + return text.find_first_of(delimiter, pos); + } + size_t Length(absl::string_view /* delimiter */) { return 1; } +}; + +} // namespace + +// +// ByString +// + +ByString::ByString(absl::string_view sp) : delimiter_(sp) {} + +absl::string_view ByString::Find(absl::string_view text, size_t pos) const { + if (delimiter_.length() == 1) { + // Much faster to call find on a single character than on an + // absl::string_view. + size_t found_pos = text.find(delimiter_[0], pos); + if (found_pos == absl::string_view::npos) + return absl::string_view(text.data() + text.size(), 0); + return text.substr(found_pos, 1); + } + return GenericFind(text, delimiter_, pos, LiteralPolicy()); +} + +// +// ByChar +// + +absl::string_view ByChar::Find(absl::string_view text, size_t pos) const { + size_t found_pos = text.find(c_, pos); + if (found_pos == absl::string_view::npos) + return absl::string_view(text.data() + text.size(), 0); + return text.substr(found_pos, 1); +} + +// +// ByAnyChar +// + +ByAnyChar::ByAnyChar(absl::string_view sp) : delimiters_(sp) {} + +absl::string_view ByAnyChar::Find(absl::string_view text, size_t pos) const { + return GenericFind(text, delimiters_, pos, AnyOfPolicy()); +} + +// +// ByLength +// +ByLength::ByLength(ptrdiff_t length) : length_(length) { + ABSL_RAW_CHECK(length > 0, ""); +} + +absl::string_view ByLength::Find(absl::string_view text, + size_t pos) const { + pos = std::min(pos, text.size()); // truncate `pos` + absl::string_view substr = text.substr(pos); + // If the string is shorter than the chunk size we say we + // "can't find the delimiter" so this will be the last chunk. + if (substr.length() <= static_cast<size_t>(length_)) + return absl::string_view(text.data() + text.size(), 0); + + return absl::string_view(substr.data() + length_, 0); +} + +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/str_split.h b/third_party/abseil_cpp/absl/strings/str_split.h new file mode 100644 index 000000000000..bfbca422a8dc --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_split.h @@ -0,0 +1,548 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: str_split.h +// ----------------------------------------------------------------------------- +// +// This file contains functions for splitting strings. It defines the main +// `StrSplit()` function, several delimiters for determining the boundaries on +// which to split the string, and predicates for filtering delimited results. +// `StrSplit()` adapts the returned collection to the type specified by the +// caller. +// +// Example: +// +// // Splits the given string on commas. Returns the results in a +// // vector of strings. +// std::vector<std::string> v = absl::StrSplit("a,b,c", ','); +// // Can also use "," +// // v[0] == "a", v[1] == "b", v[2] == "c" +// +// See StrSplit() below for more information. +#ifndef ABSL_STRINGS_STR_SPLIT_H_ +#define ABSL_STRINGS_STR_SPLIT_H_ + +#include <algorithm> +#include <cstddef> +#include <map> +#include <set> +#include <string> +#include <utility> +#include <vector> + +#include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" +#include "absl/strings/internal/str_split_internal.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +//------------------------------------------------------------------------------ +// Delimiters +//------------------------------------------------------------------------------ +// +// `StrSplit()` uses delimiters to define the boundaries between elements in the +// provided input. Several `Delimiter` types are defined below. If a string +// (`const char*`, `std::string`, or `absl::string_view`) is passed in place of +// an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it +// were passed a `ByString` delimiter. +// +// A `Delimiter` is an object with a `Find()` function that knows how to find +// the first occurrence of itself in a given `absl::string_view`. +// +// The following `Delimiter` types are available for use within `StrSplit()`: +// +// - `ByString` (default for string arguments) +// - `ByChar` (default for a char argument) +// - `ByAnyChar` +// - `ByLength` +// - `MaxSplits` +// +// A Delimiter's `Find()` member function will be passed an input `text` that is +// to be split and a position (`pos`) to begin searching for the next delimiter +// in `text`. The returned absl::string_view should refer to the next occurrence +// (after `pos`) of the represented delimiter; this returned absl::string_view +// represents the next location where the input `text` should be broken. +// +// The returned absl::string_view may be zero-length if the Delimiter does not +// represent a part of the string (e.g., a fixed-length delimiter). If no +// delimiter is found in the input `text`, a zero-length absl::string_view +// referring to `text.end()` should be returned (e.g., +// `text.substr(text.size())`). It is important that the returned +// absl::string_view always be within the bounds of the input `text` given as an +// argument--it must not refer to a string that is physically located outside of +// the given string. +// +// The following example is a simple Delimiter object that is created with a +// single char and will look for that char in the text passed to the `Find()` +// function: +// +// struct SimpleDelimiter { +// const char c_; +// explicit SimpleDelimiter(char c) : c_(c) {} +// absl::string_view Find(absl::string_view text, size_t pos) { +// auto found = text.find(c_, pos); +// if (found == absl::string_view::npos) +// return text.substr(text.size()); +// +// return text.substr(found, 1); +// } +// }; + +// ByString +// +// A sub-string delimiter. If `StrSplit()` is passed a string in place of a +// `Delimiter` object, the string will be implicitly converted into a +// `ByString` delimiter. +// +// Example: +// +// // Because a string literal is converted to an `absl::ByString`, +// // the following two splits are equivalent. +// +// std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", "); +// +// using absl::ByString; +// std::vector<std::string> v2 = absl::StrSplit("a, b, c", +// ByString(", ")); +// // v[0] == "a", v[1] == "b", v[2] == "c" +class ByString { + public: + explicit ByString(absl::string_view sp); + absl::string_view Find(absl::string_view text, size_t pos) const; + + private: + const std::string delimiter_; +}; + +// ByChar +// +// A single character delimiter. `ByChar` is functionally equivalent to a +// 1-char string within a `ByString` delimiter, but slightly more efficient. +// +// Example: +// +// // Because a char literal is converted to a absl::ByChar, +// // the following two splits are equivalent. +// std::vector<std::string> v1 = absl::StrSplit("a,b,c", ','); +// using absl::ByChar; +// std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(',')); +// // v[0] == "a", v[1] == "b", v[2] == "c" +// +// `ByChar` is also the default delimiter if a single character is given +// as the delimiter to `StrSplit()`. For example, the following calls are +// equivalent: +// +// std::vector<std::string> v = absl::StrSplit("a-b", '-'); +// +// using absl::ByChar; +// std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-')); +// +class ByChar { + public: + explicit ByChar(char c) : c_(c) {} + absl::string_view Find(absl::string_view text, size_t pos) const; + + private: + char c_; +}; + +// ByAnyChar +// +// A delimiter that will match any of the given byte-sized characters within +// its provided string. +// +// Note: this delimiter works with single-byte string data, but does not work +// with variable-width encodings, such as UTF-8. +// +// Example: +// +// using absl::ByAnyChar; +// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",=")); +// // v[0] == "a", v[1] == "b", v[2] == "c" +// +// If `ByAnyChar` is given the empty string, it behaves exactly like +// `ByString` and matches each individual character in the input string. +// +class ByAnyChar { + public: + explicit ByAnyChar(absl::string_view sp); + absl::string_view Find(absl::string_view text, size_t pos) const; + + private: + const std::string delimiters_; +}; + +// ByLength +// +// A delimiter for splitting into equal-length strings. The length argument to +// the constructor must be greater than 0. +// +// Note: this delimiter works with single-byte string data, but does not work +// with variable-width encodings, such as UTF-8. +// +// Example: +// +// using absl::ByLength; +// std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3)); + +// // v[0] == "123", v[1] == "456", v[2] == "789" +// +// Note that the string does not have to be a multiple of the fixed split +// length. In such a case, the last substring will be shorter. +// +// using absl::ByLength; +// std::vector<std::string> v = absl::StrSplit("12345", ByLength(2)); +// +// // v[0] == "12", v[1] == "34", v[2] == "5" +class ByLength { + public: + explicit ByLength(ptrdiff_t length); + absl::string_view Find(absl::string_view text, size_t pos) const; + + private: + const ptrdiff_t length_; +}; + +namespace strings_internal { + +// A traits-like metafunction for selecting the default Delimiter object type +// for a particular Delimiter type. The base case simply exposes type Delimiter +// itself as the delimiter's Type. However, there are specializations for +// string-like objects that map them to the ByString delimiter object. +// This allows functions like absl::StrSplit() and absl::MaxSplits() to accept +// string-like objects (e.g., ',') as delimiter arguments but they will be +// treated as if a ByString delimiter was given. +template <typename Delimiter> +struct SelectDelimiter { + using type = Delimiter; +}; + +template <> +struct SelectDelimiter<char> { + using type = ByChar; +}; +template <> +struct SelectDelimiter<char*> { + using type = ByString; +}; +template <> +struct SelectDelimiter<const char*> { + using type = ByString; +}; +template <> +struct SelectDelimiter<absl::string_view> { + using type = ByString; +}; +template <> +struct SelectDelimiter<std::string> { + using type = ByString; +}; + +// Wraps another delimiter and sets a max number of matches for that delimiter. +template <typename Delimiter> +class MaxSplitsImpl { + public: + MaxSplitsImpl(Delimiter delimiter, int limit) + : delimiter_(delimiter), limit_(limit), count_(0) {} + absl::string_view Find(absl::string_view text, size_t pos) { + if (count_++ == limit_) { + return absl::string_view(text.data() + text.size(), + 0); // No more matches. + } + return delimiter_.Find(text, pos); + } + + private: + Delimiter delimiter_; + const int limit_; + int count_; +}; + +} // namespace strings_internal + +// MaxSplits() +// +// A delimiter that limits the number of matches which can occur to the passed +// `limit`. The last element in the returned collection will contain all +// remaining unsplit pieces, which may contain instances of the delimiter. +// The collection will contain at most `limit` + 1 elements. +// Example: +// +// using absl::MaxSplits; +// std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1)); +// +// // v[0] == "a", v[1] == "b,c" +template <typename Delimiter> +inline strings_internal::MaxSplitsImpl< + typename strings_internal::SelectDelimiter<Delimiter>::type> +MaxSplits(Delimiter delimiter, int limit) { + typedef + typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType; + return strings_internal::MaxSplitsImpl<DelimiterType>( + DelimiterType(delimiter), limit); +} + +//------------------------------------------------------------------------------ +// Predicates +//------------------------------------------------------------------------------ +// +// Predicates filter the results of a `StrSplit()` by determining whether or not +// a resultant element is included in the result set. A predicate may be passed +// as an optional third argument to the `StrSplit()` function. +// +// Predicates are unary functions (or functors) that take a single +// `absl::string_view` argument and return a bool indicating whether the +// argument should be included (`true`) or excluded (`false`). +// +// Predicates are useful when filtering out empty substrings. By default, empty +// substrings may be returned by `StrSplit()`, which is similar to the way split +// functions work in other programming languages. + +// AllowEmpty() +// +// Always returns `true`, indicating that all strings--including empty +// strings--should be included in the split output. This predicate is not +// strictly needed because this is the default behavior of `StrSplit()`; +// however, it might be useful at some call sites to make the intent explicit. +// +// Example: +// +// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty()); +// +// // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == "" +struct AllowEmpty { + bool operator()(absl::string_view) const { return true; } +}; + +// SkipEmpty() +// +// Returns `false` if the given `absl::string_view` is empty, indicating that +// `StrSplit()` should omit the empty string. +// +// Example: +// +// std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty()); +// +// // v[0] == "a", v[1] == "b" +// +// Note: `SkipEmpty()` does not consider a string containing only whitespace +// to be empty. To skip such whitespace as well, use the `SkipWhitespace()` +// predicate. +struct SkipEmpty { + bool operator()(absl::string_view sp) const { return !sp.empty(); } +}; + +// SkipWhitespace() +// +// Returns `false` if the given `absl::string_view` is empty *or* contains only +// whitespace, indicating that `StrSplit()` should omit the string. +// +// Example: +// +// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", +// ',', SkipWhitespace()); +// // v[0] == " a ", v[1] == "b" +// +// // SkipEmpty() would return whitespace elements +// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty()); +// // v[0] == " a ", v[1] == " ", v[2] == "b" +struct SkipWhitespace { + bool operator()(absl::string_view sp) const { + sp = absl::StripAsciiWhitespace(sp); + return !sp.empty(); + } +}; + +template <typename T> +using EnableSplitIfString = + typename std::enable_if<std::is_same<T, std::string>::value || + std::is_same<T, const std::string>::value, + int>::type; + +//------------------------------------------------------------------------------ +// StrSplit() +//------------------------------------------------------------------------------ + +// StrSplit() +// +// Splits a given string based on the provided `Delimiter` object, returning the +// elements within the type specified by the caller. Optionally, you may pass a +// `Predicate` to `StrSplit()` indicating whether to include or exclude the +// resulting element within the final result set. (See the overviews for +// Delimiters and Predicates above.) +// +// Example: +// +// std::vector<std::string> v = absl::StrSplit("a,b,c,d", ','); +// // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d" +// +// You can also provide an explicit `Delimiter` object: +// +// Example: +// +// using absl::ByAnyChar; +// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",=")); +// // v[0] == "a", v[1] == "b", v[2] == "c" +// +// See above for more information on delimiters. +// +// By default, empty strings are included in the result set. You can optionally +// include a third `Predicate` argument to apply a test for whether the +// resultant element should be included in the result set: +// +// Example: +// +// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", +// ',', SkipWhitespace()); +// // v[0] == " a ", v[1] == "b" +// +// See above for more information on predicates. +// +//------------------------------------------------------------------------------ +// StrSplit() Return Types +//------------------------------------------------------------------------------ +// +// The `StrSplit()` function adapts the returned collection to the collection +// specified by the caller (e.g. `std::vector` above). The returned collections +// may contain `std::string`, `absl::string_view` (in which case the original +// string being split must ensure that it outlives the collection), or any +// object that can be explicitly created from an `absl::string_view`. This +// behavior works for: +// +// 1) All standard STL containers including `std::vector`, `std::list`, +// `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap` +// 2) `std::pair` (which is not actually a container). See below. +// +// Example: +// +// // The results are returned as `absl::string_view` objects. Note that we +// // have to ensure that the input string outlives any results. +// std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); +// +// // Stores results in a std::set<std::string>, which also performs +// // de-duplication and orders the elements in ascending order. +// std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ','); +// // v[0] == "a", v[1] == "b", v[2] = "c" +// +// // `StrSplit()` can be used within a range-based for loop, in which case +// // each element will be of type `absl::string_view`. +// std::vector<std::string> v; +// for (const auto sv : absl::StrSplit("a,b,c", ',')) { +// if (sv != "b") v.emplace_back(sv); +// } +// // v[0] == "a", v[1] == "c" +// +// // Stores results in a map. The map implementation assumes that the input +// // is provided as a series of key/value pairs. For example, the 0th element +// // resulting from the split will be stored as a key to the 1st element. If +// // an odd number of elements are resolved, the last element is paired with +// // a default-constructed value (e.g., empty string). +// std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ','); +// // m["a"] == "b", m["c"] == "" // last component value equals "" +// +// Splitting to `std::pair` is an interesting case because it can hold only two +// elements and is not a collection type. When splitting to a `std::pair` the +// first two split strings become the `std::pair` `.first` and `.second` +// members, respectively. The remaining split substrings are discarded. If there +// are less than two split substrings, the empty string is used for the +// corresponding +// `std::pair` member. +// +// Example: +// +// // Stores first two split strings as the members in a std::pair. +// std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); +// // p.first == "a", p.second == "b" // "c" is omitted. +// +// The `StrSplit()` function can be used multiple times to perform more +// complicated splitting logic, such as intelligently parsing key-value pairs. +// +// Example: +// +// // The input string "a=b=c,d=e,f=,g" becomes +// // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" } +// std::map<std::string, std::string> m; +// for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) { +// m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1))); +// } +// EXPECT_EQ("b=c", m.find("a")->second); +// EXPECT_EQ("e", m.find("d")->second); +// EXPECT_EQ("", m.find("f")->second); +// EXPECT_EQ("", m.find("g")->second); +// +// WARNING: Due to a legacy bug that is maintained for backward compatibility, +// splitting the following empty string_views produces different results: +// +// absl::StrSplit(absl::string_view(""), '-'); // {""} +// absl::StrSplit(absl::string_view(), '-'); // {}, but should be {""} +// +// Try not to depend on this distinction because the bug may one day be fixed. +template <typename Delimiter> +strings_internal::Splitter< + typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty, + absl::string_view> +StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) { + using DelimiterType = + typename strings_internal::SelectDelimiter<Delimiter>::type; + return strings_internal::Splitter<DelimiterType, AllowEmpty, + absl::string_view>( + text.value(), DelimiterType(d), AllowEmpty()); +} + +template <typename Delimiter, typename StringType, + EnableSplitIfString<StringType> = 0> +strings_internal::Splitter< + typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty, + std::string> +StrSplit(StringType&& text, Delimiter d) { + using DelimiterType = + typename strings_internal::SelectDelimiter<Delimiter>::type; + return strings_internal::Splitter<DelimiterType, AllowEmpty, std::string>( + std::move(text), DelimiterType(d), AllowEmpty()); +} + +template <typename Delimiter, typename Predicate> +strings_internal::Splitter< + typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate, + absl::string_view> +StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d, + Predicate p) { + using DelimiterType = + typename strings_internal::SelectDelimiter<Delimiter>::type; + return strings_internal::Splitter<DelimiterType, Predicate, + absl::string_view>( + text.value(), DelimiterType(d), std::move(p)); +} + +template <typename Delimiter, typename Predicate, typename StringType, + EnableSplitIfString<StringType> = 0> +strings_internal::Splitter< + typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate, + std::string> +StrSplit(StringType&& text, Delimiter d, Predicate p) { + using DelimiterType = + typename strings_internal::SelectDelimiter<Delimiter>::type; + return strings_internal::Splitter<DelimiterType, Predicate, std::string>( + std::move(text), DelimiterType(d), std::move(p)); +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_STR_SPLIT_H_ diff --git a/third_party/abseil_cpp/absl/strings/str_split_benchmark.cc b/third_party/abseil_cpp/absl/strings/str_split_benchmark.cc new file mode 100644 index 000000000000..f38dfcfe5af7 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_split_benchmark.cc @@ -0,0 +1,180 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_split.h" + +#include <iterator> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include "benchmark/benchmark.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/string_view.h" + +namespace { + +std::string MakeTestString(int desired_length) { + static const int kAverageValueLen = 25; + std::string test(desired_length * kAverageValueLen, 'x'); + for (int i = 1; i < test.size(); i += kAverageValueLen) { + test[i] = ';'; + } + return test; +} + +void BM_Split2StringView(benchmark::State& state) { + std::string test = MakeTestString(state.range(0)); + for (auto _ : state) { + std::vector<absl::string_view> result = absl::StrSplit(test, ';'); + benchmark::DoNotOptimize(result); + } +} +BENCHMARK_RANGE(BM_Split2StringView, 0, 1 << 20); + +static const absl::string_view kDelimiters = ";:,."; + +std::string MakeMultiDelimiterTestString(int desired_length) { + static const int kAverageValueLen = 25; + std::string test(desired_length * kAverageValueLen, 'x'); + for (int i = 0; i * kAverageValueLen < test.size(); ++i) { + // Cycle through a variety of delimiters. + test[i * kAverageValueLen] = kDelimiters[i % kDelimiters.size()]; + } + return test; +} + +// Measure StrSplit with ByAnyChar with four delimiters to choose from. +void BM_Split2StringViewByAnyChar(benchmark::State& state) { + std::string test = MakeMultiDelimiterTestString(state.range(0)); + for (auto _ : state) { + std::vector<absl::string_view> result = + absl::StrSplit(test, absl::ByAnyChar(kDelimiters)); + benchmark::DoNotOptimize(result); + } +} +BENCHMARK_RANGE(BM_Split2StringViewByAnyChar, 0, 1 << 20); + +void BM_Split2StringViewLifted(benchmark::State& state) { + std::string test = MakeTestString(state.range(0)); + std::vector<absl::string_view> result; + for (auto _ : state) { + result = absl::StrSplit(test, ';'); + } + benchmark::DoNotOptimize(result); +} +BENCHMARK_RANGE(BM_Split2StringViewLifted, 0, 1 << 20); + +void BM_Split2String(benchmark::State& state) { + std::string test = MakeTestString(state.range(0)); + for (auto _ : state) { + std::vector<std::string> result = absl::StrSplit(test, ';'); + benchmark::DoNotOptimize(result); + } +} +BENCHMARK_RANGE(BM_Split2String, 0, 1 << 20); + +// This benchmark is for comparing Split2 to Split1 (SplitStringUsing). In +// particular, this benchmark uses SkipEmpty() to match SplitStringUsing's +// behavior. +void BM_Split2SplitStringUsing(benchmark::State& state) { + std::string test = MakeTestString(state.range(0)); + for (auto _ : state) { + std::vector<std::string> result = + absl::StrSplit(test, ';', absl::SkipEmpty()); + benchmark::DoNotOptimize(result); + } +} +BENCHMARK_RANGE(BM_Split2SplitStringUsing, 0, 1 << 20); + +void BM_SplitStringToUnorderedSet(benchmark::State& state) { + const int len = state.range(0); + std::string test(len, 'x'); + for (int i = 1; i < len; i += 2) { + test[i] = ';'; + } + for (auto _ : state) { + std::unordered_set<std::string> result = + absl::StrSplit(test, ':', absl::SkipEmpty()); + benchmark::DoNotOptimize(result); + } +} +BENCHMARK_RANGE(BM_SplitStringToUnorderedSet, 0, 1 << 20); + +void BM_SplitStringToUnorderedMap(benchmark::State& state) { + const int len = state.range(0); + std::string test(len, 'x'); + for (int i = 1; i < len; i += 2) { + test[i] = ';'; + } + for (auto _ : state) { + std::unordered_map<std::string, std::string> result = + absl::StrSplit(test, ':', absl::SkipEmpty()); + benchmark::DoNotOptimize(result); + } +} +BENCHMARK_RANGE(BM_SplitStringToUnorderedMap, 0, 1 << 20); + +void BM_SplitStringAllowEmpty(benchmark::State& state) { + const int len = state.range(0); + std::string test(len, 'x'); + for (int i = 1; i < len; i += 2) { + test[i] = ';'; + } + for (auto _ : state) { + std::vector<std::string> result = absl::StrSplit(test, ';'); + benchmark::DoNotOptimize(result); + } +} +BENCHMARK_RANGE(BM_SplitStringAllowEmpty, 0, 1 << 20); + +struct OneCharLiteral { + char operator()() const { return 'X'; } +}; + +struct OneCharStringLiteral { + const char* operator()() const { return "X"; } +}; + +template <typename DelimiterFactory> +void BM_SplitStringWithOneChar(benchmark::State& state) { + const auto delimiter = DelimiterFactory()(); + std::vector<absl::string_view> pieces; + size_t v = 0; + for (auto _ : state) { + pieces = absl::StrSplit("The quick brown fox jumps over the lazy dog", + delimiter); + v += pieces.size(); + } + ABSL_RAW_CHECK(v == state.iterations(), ""); +} +BENCHMARK_TEMPLATE(BM_SplitStringWithOneChar, OneCharLiteral); +BENCHMARK_TEMPLATE(BM_SplitStringWithOneChar, OneCharStringLiteral); + +template <typename DelimiterFactory> +void BM_SplitStringWithOneCharNoVector(benchmark::State& state) { + const auto delimiter = DelimiterFactory()(); + size_t v = 0; + for (auto _ : state) { + auto splitter = absl::StrSplit( + "The quick brown fox jumps over the lazy dog", delimiter); + v += std::distance(splitter.begin(), splitter.end()); + } + ABSL_RAW_CHECK(v == state.iterations(), ""); +} +BENCHMARK_TEMPLATE(BM_SplitStringWithOneCharNoVector, OneCharLiteral); +BENCHMARK_TEMPLATE(BM_SplitStringWithOneCharNoVector, OneCharStringLiteral); + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/str_split_test.cc b/third_party/abseil_cpp/absl/strings/str_split_test.cc new file mode 100644 index 000000000000..7f7c097faee2 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/str_split_test.cc @@ -0,0 +1,953 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_split.h" + +#include <deque> +#include <initializer_list> +#include <list> +#include <map> +#include <memory> +#include <string> +#include <type_traits> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/dynamic_annotations.h" +#include "absl/base/macros.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/node_hash_map.h" +#include "absl/strings/numbers.h" + +namespace { + +using ::testing::ElementsAre; +using ::testing::Pair; +using ::testing::UnorderedElementsAre; + +TEST(Split, TraitsTest) { + static_assert(!absl::strings_internal::SplitterIsConvertibleTo<int>::value, + ""); + static_assert( + !absl::strings_internal::SplitterIsConvertibleTo<std::string>::value, ""); + static_assert(absl::strings_internal::SplitterIsConvertibleTo< + std::vector<std::string>>::value, + ""); + static_assert( + !absl::strings_internal::SplitterIsConvertibleTo<std::vector<int>>::value, + ""); + static_assert(absl::strings_internal::SplitterIsConvertibleTo< + std::vector<absl::string_view>>::value, + ""); + static_assert(absl::strings_internal::SplitterIsConvertibleTo< + std::map<std::string, std::string>>::value, + ""); + static_assert(absl::strings_internal::SplitterIsConvertibleTo< + std::map<absl::string_view, absl::string_view>>::value, + ""); + static_assert(!absl::strings_internal::SplitterIsConvertibleTo< + std::map<int, std::string>>::value, + ""); + static_assert(!absl::strings_internal::SplitterIsConvertibleTo< + std::map<std::string, int>>::value, + ""); +} + +// This tests the overall split API, which is made up of the absl::StrSplit() +// function and the Delimiter objects in the absl:: namespace. +// This TEST macro is outside of any namespace to require full specification of +// namespaces just like callers will need to use. +TEST(Split, APIExamples) { + { + // Passes string delimiter. Assumes the default of ByString. + std::vector<std::string> v = absl::StrSplit("a,b,c", ","); // NOLINT + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + + // Equivalent to... + using absl::ByString; + v = absl::StrSplit("a,b,c", ByString(",")); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + + // Equivalent to... + EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")), + ElementsAre("a", "b", "c")); + } + + { + // Same as above, but using a single character as the delimiter. + std::vector<std::string> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + + // Equivalent to... + using absl::ByChar; + v = absl::StrSplit("a,b,c", ByChar(',')); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Uses the Literal string "=>" as the delimiter. + const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>"); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // The substrings are returned as string_views, eliminating copying. + std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Leading and trailing empty substrings. + std::vector<std::string> v = absl::StrSplit(",a,b,c,", ','); + EXPECT_THAT(v, ElementsAre("", "a", "b", "c", "")); + } + + { + // Splits on a delimiter that is not found. + std::vector<std::string> v = absl::StrSplit("abc", ','); + EXPECT_THAT(v, ElementsAre("abc")); + } + + { + // Splits the input string into individual characters by using an empty + // string as the delimiter. + std::vector<std::string> v = absl::StrSplit("abc", ""); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Splits string data with embedded NUL characters, using NUL as the + // delimiter. A simple delimiter of "\0" doesn't work because strlen() will + // say that's the empty string when constructing the absl::string_view + // delimiter. Instead, a non-empty string containing NUL can be used as the + // delimiter. + std::string embedded_nulls("a\0b\0c", 5); + std::string null_delim("\0", 1); + std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Stores first two split strings as the members in a std::pair. + std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); + EXPECT_EQ("a", p.first); + EXPECT_EQ("b", p.second); + // "c" is omitted because std::pair can hold only two elements. + } + + { + // Results stored in std::set<std::string> + std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Uses a non-const char* delimiter. + char a[] = ","; + char* d = a + 0; + std::vector<std::string> v = absl::StrSplit("a,b,c", d); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Results split using either of , or ; + using absl::ByAnyChar; + std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;")); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Uses the SkipWhitespace predicate. + using absl::SkipWhitespace; + std::vector<std::string> v = + absl::StrSplit(" a , ,,b,", ',', SkipWhitespace()); + EXPECT_THAT(v, ElementsAre(" a ", "b")); + } + + { + // Uses the ByLength delimiter. + using absl::ByLength; + std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3)); + EXPECT_THAT(v, ElementsAre("abc", "def", "g")); + } + + { + // Different forms of initialization / conversion. + std::vector<std::string> v1 = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v1, ElementsAre("a", "b", "c")); + std::vector<std::string> v2(absl::StrSplit("a,b,c", ',')); + EXPECT_THAT(v2, ElementsAre("a", "b", "c")); + auto v3 = std::vector<std::string>(absl::StrSplit("a,b,c", ',')); + EXPECT_THAT(v3, ElementsAre("a", "b", "c")); + v3 = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v3, ElementsAre("a", "b", "c")); + } + + { + // Results stored in a std::map. + std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ','); + EXPECT_EQ(2, m.size()); + EXPECT_EQ("3", m["a"]); + EXPECT_EQ("2", m["b"]); + } + + { + // Results stored in a std::multimap. + std::multimap<std::string, std::string> m = + absl::StrSplit("a,1,b,2,a,3", ','); + EXPECT_EQ(3, m.size()); + auto it = m.find("a"); + EXPECT_EQ("1", it->second); + ++it; + EXPECT_EQ("3", it->second); + it = m.find("b"); + EXPECT_EQ("2", it->second); + } + + { + // Demonstrates use in a range-based for loop in C++11. + std::string s = "x,x,x,x,x,x,x"; + for (absl::string_view sp : absl::StrSplit(s, ',')) { + EXPECT_EQ("x", sp); + } + } + + { + // Demonstrates use with a Predicate in a range-based for loop. + using absl::SkipWhitespace; + std::string s = " ,x,,x,,x,x,x,,"; + for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) { + EXPECT_EQ("x", sp); + } + } + + { + // Demonstrates a "smart" split to std::map using two separate calls to + // absl::StrSplit. One call to split the records, and another call to split + // the keys and values. This also uses the Limit delimiter so that the + // std::string "a=b=c" will split to "a" -> "b=c". + std::map<std::string, std::string> m; + for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) { + m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1))); + } + EXPECT_EQ("b=c", m.find("a")->second); + EXPECT_EQ("e", m.find("d")->second); + EXPECT_EQ("", m.find("f")->second); + EXPECT_EQ("", m.find("g")->second); + } +} + +// +// Tests for SplitIterator +// + +TEST(SplitIterator, Basics) { + auto splitter = absl::StrSplit("a,b", ','); + auto it = splitter.begin(); + auto end = splitter.end(); + + EXPECT_NE(it, end); + EXPECT_EQ("a", *it); // tests dereference + ++it; // tests preincrement + EXPECT_NE(it, end); + EXPECT_EQ("b", + std::string(it->data(), it->size())); // tests dereference as ptr + it++; // tests postincrement + EXPECT_EQ(it, end); +} + +// Simple Predicate to skip a particular string. +class Skip { + public: + explicit Skip(const std::string& s) : s_(s) {} + bool operator()(absl::string_view sp) { return sp != s_; } + + private: + std::string s_; +}; + +TEST(SplitIterator, Predicate) { + auto splitter = absl::StrSplit("a,b,c", ',', Skip("b")); + auto it = splitter.begin(); + auto end = splitter.end(); + + EXPECT_NE(it, end); + EXPECT_EQ("a", *it); // tests dereference + ++it; // tests preincrement -- "b" should be skipped here. + EXPECT_NE(it, end); + EXPECT_EQ("c", + std::string(it->data(), it->size())); // tests dereference as ptr + it++; // tests postincrement + EXPECT_EQ(it, end); +} + +TEST(SplitIterator, EdgeCases) { + // Expected input and output, assuming a delimiter of ',' + struct { + std::string in; + std::vector<std::string> expect; + } specs[] = { + {"", {""}}, + {"foo", {"foo"}}, + {",", {"", ""}}, + {",foo", {"", "foo"}}, + {"foo,", {"foo", ""}}, + {",foo,", {"", "foo", ""}}, + {"foo,bar", {"foo", "bar"}}, + }; + + for (const auto& spec : specs) { + SCOPED_TRACE(spec.in); + auto splitter = absl::StrSplit(spec.in, ','); + auto it = splitter.begin(); + auto end = splitter.end(); + for (const auto& expected : spec.expect) { + EXPECT_NE(it, end); + EXPECT_EQ(expected, *it++); + } + EXPECT_EQ(it, end); + } +} + +TEST(Splitter, Const) { + const auto splitter = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(splitter, ElementsAre("a", "b", "c")); +} + +TEST(Split, EmptyAndNull) { + // Attention: Splitting a null absl::string_view is different than splitting + // an empty absl::string_view even though both string_views are considered + // equal. This behavior is likely surprising and undesirable. However, to + // maintain backward compatibility, there is a small "hack" in + // str_split_internal.h that preserves this behavior. If that behavior is ever + // changed/fixed, this test will need to be updated. + EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre("")); + EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre()); +} + +TEST(SplitIterator, EqualityAsEndCondition) { + auto splitter = absl::StrSplit("a,b,c", ','); + auto it = splitter.begin(); + auto it2 = it; + + // Increments it2 twice to point to "c" in the input text. + ++it2; + ++it2; + EXPECT_EQ("c", *it2); + + // This test uses a non-end SplitIterator as the terminating condition in a + // for loop. This relies on SplitIterator equality for non-end SplitIterators + // working correctly. At this point it2 points to "c", and we use that as the + // "end" condition in this test. + std::vector<absl::string_view> v; + for (; it != it2; ++it) { + v.push_back(*it); + } + EXPECT_THAT(v, ElementsAre("a", "b")); +} + +// +// Tests for Splitter +// + +TEST(Splitter, RangeIterators) { + auto splitter = absl::StrSplit("a,b,c", ','); + std::vector<absl::string_view> output; + for (const absl::string_view& p : splitter) { + output.push_back(p); + } + EXPECT_THAT(output, ElementsAre("a", "b", "c")); +} + +// Some template functions for use in testing conversion operators +template <typename ContainerType, typename Splitter> +void TestConversionOperator(const Splitter& splitter) { + ContainerType output = splitter; + EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d")); +} + +template <typename MapType, typename Splitter> +void TestMapConversionOperator(const Splitter& splitter) { + MapType m = splitter; + EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d"))); +} + +template <typename FirstType, typename SecondType, typename Splitter> +void TestPairConversionOperator(const Splitter& splitter) { + std::pair<FirstType, SecondType> p = splitter; + EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b"))); +} + +TEST(Splitter, ConversionOperator) { + auto splitter = absl::StrSplit("a,b,c,d", ','); + + TestConversionOperator<std::vector<absl::string_view>>(splitter); + TestConversionOperator<std::vector<std::string>>(splitter); + TestConversionOperator<std::list<absl::string_view>>(splitter); + TestConversionOperator<std::list<std::string>>(splitter); + TestConversionOperator<std::deque<absl::string_view>>(splitter); + TestConversionOperator<std::deque<std::string>>(splitter); + TestConversionOperator<std::set<absl::string_view>>(splitter); + TestConversionOperator<std::set<std::string>>(splitter); + TestConversionOperator<std::multiset<absl::string_view>>(splitter); + TestConversionOperator<std::multiset<std::string>>(splitter); + TestConversionOperator<std::unordered_set<std::string>>(splitter); + + // Tests conversion to map-like objects. + + TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>( + splitter); + TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter); + TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter); + TestMapConversionOperator<std::map<std::string, std::string>>(splitter); + TestMapConversionOperator< + std::multimap<absl::string_view, absl::string_view>>(splitter); + TestMapConversionOperator<std::multimap<absl::string_view, std::string>>( + splitter); + TestMapConversionOperator<std::multimap<std::string, absl::string_view>>( + splitter); + TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter); + TestMapConversionOperator<std::unordered_map<std::string, std::string>>( + splitter); + TestMapConversionOperator< + absl::node_hash_map<absl::string_view, absl::string_view>>(splitter); + TestMapConversionOperator< + absl::node_hash_map<absl::string_view, std::string>>(splitter); + TestMapConversionOperator< + absl::node_hash_map<std::string, absl::string_view>>(splitter); + TestMapConversionOperator< + absl::flat_hash_map<absl::string_view, absl::string_view>>(splitter); + TestMapConversionOperator< + absl::flat_hash_map<absl::string_view, std::string>>(splitter); + TestMapConversionOperator< + absl::flat_hash_map<std::string, absl::string_view>>(splitter); + + // Tests conversion to std::pair + + TestPairConversionOperator<absl::string_view, absl::string_view>(splitter); + TestPairConversionOperator<absl::string_view, std::string>(splitter); + TestPairConversionOperator<std::string, absl::string_view>(splitter); + TestPairConversionOperator<std::string, std::string>(splitter); +} + +// A few additional tests for conversion to std::pair. This conversion is +// different from others because a std::pair always has exactly two elements: +// .first and .second. The split has to work even when the split has +// less-than, equal-to, and more-than 2 strings. +TEST(Splitter, ToPair) { + { + // Empty string + std::pair<std::string, std::string> p = absl::StrSplit("", ','); + EXPECT_EQ("", p.first); + EXPECT_EQ("", p.second); + } + + { + // Only first + std::pair<std::string, std::string> p = absl::StrSplit("a", ','); + EXPECT_EQ("a", p.first); + EXPECT_EQ("", p.second); + } + + { + // Only second + std::pair<std::string, std::string> p = absl::StrSplit(",b", ','); + EXPECT_EQ("", p.first); + EXPECT_EQ("b", p.second); + } + + { + // First and second. + std::pair<std::string, std::string> p = absl::StrSplit("a,b", ','); + EXPECT_EQ("a", p.first); + EXPECT_EQ("b", p.second); + } + + { + // First and second and then more stuff that will be ignored. + std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); + EXPECT_EQ("a", p.first); + EXPECT_EQ("b", p.second); + // "c" is omitted. + } +} + +TEST(Splitter, Predicates) { + static const char kTestChars[] = ",a, ,b,"; + using absl::AllowEmpty; + using absl::SkipEmpty; + using absl::SkipWhitespace; + + { + // No predicate. Does not skip empties. + auto splitter = absl::StrSplit(kTestChars, ','); + std::vector<std::string> v = splitter; + EXPECT_THAT(v, ElementsAre("", "a", " ", "b", "")); + } + + { + // Allows empty strings. Same behavior as no predicate at all. + auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty()); + std::vector<std::string> v_allowempty = splitter; + EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", "")); + + // Ensures AllowEmpty equals the behavior with no predicate. + auto splitter_nopredicate = absl::StrSplit(kTestChars, ','); + std::vector<std::string> v_nopredicate = splitter_nopredicate; + EXPECT_EQ(v_allowempty, v_nopredicate); + } + + { + // Skips empty strings. + auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty()); + std::vector<std::string> v = splitter; + EXPECT_THAT(v, ElementsAre("a", " ", "b")); + } + + { + // Skips empty and all-whitespace strings. + auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace()); + std::vector<std::string> v = splitter; + EXPECT_THAT(v, ElementsAre("a", "b")); + } +} + +// +// Tests for StrSplit() +// + +TEST(Split, Basics) { + { + // Doesn't really do anything useful because the return value is ignored, + // but it should work. + absl::StrSplit("a,b,c", ','); + } + + { + std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + std::vector<std::string> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Ensures that assignment works. This requires a little extra work with + // C++11 because of overloads with initializer_list. + std::vector<std::string> v; + v = absl::StrSplit("a,b,c", ','); + + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + std::map<std::string, std::string> m; + m = absl::StrSplit("a,b,c", ','); + EXPECT_EQ(2, m.size()); + std::unordered_map<std::string, std::string> hm; + hm = absl::StrSplit("a,b,c", ','); + EXPECT_EQ(2, hm.size()); + } +} + +absl::string_view ReturnStringView() { return "Hello World"; } +const char* ReturnConstCharP() { return "Hello World"; } +char* ReturnCharP() { return const_cast<char*>("Hello World"); } + +TEST(Split, AcceptsCertainTemporaries) { + std::vector<std::string> v; + v = absl::StrSplit(ReturnStringView(), ' '); + EXPECT_THAT(v, ElementsAre("Hello", "World")); + v = absl::StrSplit(ReturnConstCharP(), ' '); + EXPECT_THAT(v, ElementsAre("Hello", "World")); + v = absl::StrSplit(ReturnCharP(), ' '); + EXPECT_THAT(v, ElementsAre("Hello", "World")); +} + +TEST(Split, Temporary) { + // Use a std::string longer than the SSO length, so that when the temporary is + // destroyed, if the splitter keeps a reference to the string's contents, + // it'll reference freed memory instead of just dead on-stack memory. + const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u"; + EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input)) + << "Input should be larger than fits on the stack."; + + // This happens more often in C++11 as part of a range-based for loop. + auto splitter = absl::StrSplit(std::string(input), ','); + std::string expected = "a"; + for (absl::string_view letter : splitter) { + EXPECT_EQ(expected, letter); + ++expected[0]; + } + EXPECT_EQ("v", expected); + + // This happens more often in C++11 as part of a range-based for loop. + auto std_splitter = absl::StrSplit(std::string(input), ','); + expected = "a"; + for (absl::string_view letter : std_splitter) { + EXPECT_EQ(expected, letter); + ++expected[0]; + } + EXPECT_EQ("v", expected); +} + +template <typename T> +static std::unique_ptr<T> CopyToHeap(const T& value) { + return std::unique_ptr<T>(new T(value)); +} + +TEST(Split, LvalueCaptureIsCopyable) { + std::string input = "a,b"; + auto heap_splitter = CopyToHeap(absl::StrSplit(input, ',')); + auto stack_splitter = *heap_splitter; + heap_splitter.reset(); + std::vector<std::string> result = stack_splitter; + EXPECT_THAT(result, testing::ElementsAre("a", "b")); +} + +TEST(Split, TemporaryCaptureIsCopyable) { + auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ',')); + auto stack_splitter = *heap_splitter; + heap_splitter.reset(); + std::vector<std::string> result = stack_splitter; + EXPECT_THAT(result, testing::ElementsAre("a", "b")); +} + +TEST(Split, SplitterIsCopyableAndMoveable) { + auto a = absl::StrSplit("foo", '-'); + + // Ensures that the following expressions compile. + auto b = a; // Copy construct + auto c = std::move(a); // Move construct + b = c; // Copy assign + c = std::move(b); // Move assign + + EXPECT_THAT(c, ElementsAre("foo")); +} + +TEST(Split, StringDelimiter) { + { + std::vector<absl::string_view> v = absl::StrSplit("a,b", ','); + EXPECT_THAT(v, ElementsAre("a", "b")); + } + + { + std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(",")); + EXPECT_THAT(v, ElementsAre("a", "b")); + } + + { + std::vector<absl::string_view> v = + absl::StrSplit("a,b", absl::string_view(",")); + EXPECT_THAT(v, ElementsAre("a", "b")); + } +} + +#if !defined(__cpp_char8_t) +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc++2a-compat" +#endif +TEST(Split, UTF8) { + // Tests splitting utf8 strings and utf8 delimiters. + std::string utf8_string = u8"\u03BA\u1F79\u03C3\u03BC\u03B5"; + { + // A utf8 input string with an ascii delimiter. + std::string to_split = "a," + utf8_string; + std::vector<absl::string_view> v = absl::StrSplit(to_split, ','); + EXPECT_THAT(v, ElementsAre("a", utf8_string)); + } + + { + // A utf8 input string and a utf8 delimiter. + std::string to_split = "a," + utf8_string + ",b"; + std::string unicode_delimiter = "," + utf8_string + ","; + std::vector<absl::string_view> v = + absl::StrSplit(to_split, unicode_delimiter); + EXPECT_THAT(v, ElementsAre("a", "b")); + } + + { + // A utf8 input string and ByAnyChar with ascii chars. + std::vector<absl::string_view> v = + absl::StrSplit(u8"Foo h\u00E4llo th\u4E1Ere", absl::ByAnyChar(" \t")); + EXPECT_THAT(v, ElementsAre("Foo", u8"h\u00E4llo", u8"th\u4E1Ere")); + } +} +#if defined(__clang__) +#pragma clang diagnostic pop +#endif +#endif // !defined(__cpp_char8_t) + +TEST(Split, EmptyStringDelimiter) { + { + std::vector<std::string> v = absl::StrSplit("", ""); + EXPECT_THAT(v, ElementsAre("")); + } + + { + std::vector<std::string> v = absl::StrSplit("a", ""); + EXPECT_THAT(v, ElementsAre("a")); + } + + { + std::vector<std::string> v = absl::StrSplit("ab", ""); + EXPECT_THAT(v, ElementsAre("a", "b")); + } + + { + std::vector<std::string> v = absl::StrSplit("a b", ""); + EXPECT_THAT(v, ElementsAre("a", " ", "b")); + } +} + +TEST(Split, SubstrDelimiter) { + std::vector<absl::string_view> results; + absl::string_view delim("//"); + + results = absl::StrSplit("", delim); + EXPECT_THAT(results, ElementsAre("")); + + results = absl::StrSplit("//", delim); + EXPECT_THAT(results, ElementsAre("", "")); + + results = absl::StrSplit("ab", delim); + EXPECT_THAT(results, ElementsAre("ab")); + + results = absl::StrSplit("ab//", delim); + EXPECT_THAT(results, ElementsAre("ab", "")); + + results = absl::StrSplit("ab/", delim); + EXPECT_THAT(results, ElementsAre("ab/")); + + results = absl::StrSplit("a/b", delim); + EXPECT_THAT(results, ElementsAre("a/b")); + + results = absl::StrSplit("a//b", delim); + EXPECT_THAT(results, ElementsAre("a", "b")); + + results = absl::StrSplit("a///b", delim); + EXPECT_THAT(results, ElementsAre("a", "/b")); + + results = absl::StrSplit("a////b", delim); + EXPECT_THAT(results, ElementsAre("a", "", "b")); +} + +TEST(Split, EmptyResults) { + std::vector<absl::string_view> results; + + results = absl::StrSplit("", '#'); + EXPECT_THAT(results, ElementsAre("")); + + results = absl::StrSplit("#", '#'); + EXPECT_THAT(results, ElementsAre("", "")); + + results = absl::StrSplit("#cd", '#'); + EXPECT_THAT(results, ElementsAre("", "cd")); + + results = absl::StrSplit("ab#cd#", '#'); + EXPECT_THAT(results, ElementsAre("ab", "cd", "")); + + results = absl::StrSplit("ab##cd", '#'); + EXPECT_THAT(results, ElementsAre("ab", "", "cd")); + + results = absl::StrSplit("ab##", '#'); + EXPECT_THAT(results, ElementsAre("ab", "", "")); + + results = absl::StrSplit("ab#ab#", '#'); + EXPECT_THAT(results, ElementsAre("ab", "ab", "")); + + results = absl::StrSplit("aaaa", 'a'); + EXPECT_THAT(results, ElementsAre("", "", "", "", "")); + + results = absl::StrSplit("", '#', absl::SkipEmpty()); + EXPECT_THAT(results, ElementsAre()); +} + +template <typename Delimiter> +static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d, + size_t starting_pos, int expected_pos) { + absl::string_view found = d.Find(text, starting_pos); + return found.data() != text.data() + text.size() && + expected_pos == found.data() - text.data(); +} + +// Helper function for testing Delimiter objects. Returns true if the given +// Delimiter is found in the given string at the given position. This function +// tests two cases: +// 1. The actual text given, staring at position 0 +// 2. The text given with leading padding that should be ignored +template <typename Delimiter> +static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) { + const std::string leading_text = ",x,y,z,"; + return IsFoundAtStartingPos(text, d, 0, expected_pos) && + IsFoundAtStartingPos(leading_text + std::string(text), d, + leading_text.length(), + expected_pos + leading_text.length()); +} + +// +// Tests for ByString +// + +// Tests using any delimiter that represents a single comma. +template <typename Delimiter> +void TestComma(Delimiter d) { + EXPECT_TRUE(IsFoundAt(",", d, 0)); + EXPECT_TRUE(IsFoundAt("a,", d, 1)); + EXPECT_TRUE(IsFoundAt(",b", d, 0)); + EXPECT_TRUE(IsFoundAt("a,b", d, 1)); + EXPECT_TRUE(IsFoundAt("a,b,", d, 1)); + EXPECT_TRUE(IsFoundAt("a,b,c", d, 1)); + EXPECT_FALSE(IsFoundAt("", d, -1)); + EXPECT_FALSE(IsFoundAt(" ", d, -1)); + EXPECT_FALSE(IsFoundAt("a", d, -1)); + EXPECT_FALSE(IsFoundAt("a b c", d, -1)); + EXPECT_FALSE(IsFoundAt("a;b;c", d, -1)); + EXPECT_FALSE(IsFoundAt(";", d, -1)); +} + +TEST(Delimiter, ByString) { + using absl::ByString; + TestComma(ByString(",")); + + // Works as named variable. + ByString comma_string(","); + TestComma(comma_string); + + // The first occurrence of empty string ("") in a string is at position 0. + // There is a test below that demonstrates this for absl::string_view::find(). + // If the ByString delimiter returned position 0 for this, there would + // be an infinite loop in the SplitIterator code. To avoid this, empty string + // is a special case in that it always returns the item at position 1. + absl::string_view abc("abc"); + EXPECT_EQ(0, abc.find("")); // "" is found at position 0 + ByString empty(""); + EXPECT_FALSE(IsFoundAt("", empty, 0)); + EXPECT_FALSE(IsFoundAt("a", empty, 0)); + EXPECT_TRUE(IsFoundAt("ab", empty, 1)); + EXPECT_TRUE(IsFoundAt("abc", empty, 1)); +} + +TEST(Split, ByChar) { + using absl::ByChar; + TestComma(ByChar(',')); + + // Works as named variable. + ByChar comma_char(','); + TestComma(comma_char); +} + +// +// Tests for ByAnyChar +// + +TEST(Delimiter, ByAnyChar) { + using absl::ByAnyChar; + ByAnyChar one_delim(","); + // Found + EXPECT_TRUE(IsFoundAt(",", one_delim, 0)); + EXPECT_TRUE(IsFoundAt("a,", one_delim, 1)); + EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1)); + EXPECT_TRUE(IsFoundAt(",b", one_delim, 0)); + // Not found + EXPECT_FALSE(IsFoundAt("", one_delim, -1)); + EXPECT_FALSE(IsFoundAt(" ", one_delim, -1)); + EXPECT_FALSE(IsFoundAt("a", one_delim, -1)); + EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1)); + EXPECT_FALSE(IsFoundAt(";", one_delim, -1)); + + ByAnyChar two_delims(",;"); + // Found + EXPECT_TRUE(IsFoundAt(",", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(";", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(",;", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(";,", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0)); + EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1)); + EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1)); + EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1)); + EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1)); + // Not found + EXPECT_FALSE(IsFoundAt("", two_delims, -1)); + EXPECT_FALSE(IsFoundAt(" ", two_delims, -1)); + EXPECT_FALSE(IsFoundAt("a", two_delims, -1)); + EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1)); + EXPECT_FALSE(IsFoundAt("=", two_delims, -1)); + + // ByAnyChar behaves just like ByString when given a delimiter of empty + // string. That is, it always returns a zero-length absl::string_view + // referring to the item at position 1, not position 0. + ByAnyChar empty(""); + EXPECT_FALSE(IsFoundAt("", empty, 0)); + EXPECT_FALSE(IsFoundAt("a", empty, 0)); + EXPECT_TRUE(IsFoundAt("ab", empty, 1)); + EXPECT_TRUE(IsFoundAt("abc", empty, 1)); +} + +// +// Tests for ByLength +// + +TEST(Delimiter, ByLength) { + using absl::ByLength; + + ByLength four_char_delim(4); + + // Found + EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4)); + EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4)); + EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4)); + // Not found + EXPECT_FALSE(IsFoundAt("", four_char_delim, 0)); + EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0)); + EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0)); + EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0)); + EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0)); +} + +TEST(Split, WorksWithLargeStrings) { + if (sizeof(size_t) > 4) { + std::string s((uint32_t{1} << 31) + 1, 'x'); // 2G + 1 byte + s.back() = '-'; + std::vector<absl::string_view> v = absl::StrSplit(s, '-'); + EXPECT_EQ(2, v.size()); + // The first element will contain 2G of 'x's. + // testing::StartsWith is too slow with a 2G string. + EXPECT_EQ('x', v[0][0]); + EXPECT_EQ('x', v[0][1]); + EXPECT_EQ('x', v[0][3]); + EXPECT_EQ("", v[1]); + } +} + +TEST(SplitInternalTest, TypeTraits) { + EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value); + EXPECT_TRUE( + (absl::strings_internal::HasMappedType<std::map<int, int>>::value)); + EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value); + EXPECT_TRUE( + (absl::strings_internal::HasValueType<std::map<int, int>>::value)); + EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value); + EXPECT_TRUE( + (absl::strings_internal::HasConstIterator<std::map<int, int>>::value)); + EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value); + EXPECT_TRUE((absl::strings_internal::IsInitializerList< + std::initializer_list<int>>::value)); +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/string_view.cc b/third_party/abseil_cpp/absl/strings/string_view.cc new file mode 100644 index 000000000000..c5f5de936deb --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/string_view.cc @@ -0,0 +1,235 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/string_view.h" + +#ifndef ABSL_USES_STD_STRING_VIEW + +#include <algorithm> +#include <climits> +#include <cstring> +#include <ostream> + +#include "absl/strings/internal/memutil.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +namespace { +void WritePadding(std::ostream& o, size_t pad) { + char fill_buf[32]; + memset(fill_buf, o.fill(), sizeof(fill_buf)); + while (pad) { + size_t n = std::min(pad, sizeof(fill_buf)); + o.write(fill_buf, n); + pad -= n; + } +} + +class LookupTable { + public: + // For each character in wanted, sets the index corresponding + // to the ASCII code of that character. This is used by + // the find_.*_of methods below to tell whether or not a character is in + // the lookup table in constant time. + explicit LookupTable(string_view wanted) { + for (char c : wanted) { + table_[Index(c)] = true; + } + } + bool operator[](char c) const { return table_[Index(c)]; } + + private: + static unsigned char Index(char c) { return static_cast<unsigned char>(c); } + bool table_[UCHAR_MAX + 1] = {}; +}; + +} // namespace + +std::ostream& operator<<(std::ostream& o, string_view piece) { + std::ostream::sentry sentry(o); + if (sentry) { + size_t lpad = 0; + size_t rpad = 0; + if (static_cast<size_t>(o.width()) > piece.size()) { + size_t pad = o.width() - piece.size(); + if ((o.flags() & o.adjustfield) == o.left) { + rpad = pad; + } else { + lpad = pad; + } + } + if (lpad) WritePadding(o, lpad); + o.write(piece.data(), piece.size()); + if (rpad) WritePadding(o, rpad); + o.width(0); + } + return o; +} + +string_view::size_type string_view::find(string_view s, size_type pos) const + noexcept { + if (empty() || pos > length_) { + if (empty() && pos == 0 && s.empty()) return 0; + return npos; + } + const char* result = + strings_internal::memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_); + return result ? result - ptr_ : npos; +} + +string_view::size_type string_view::find(char c, size_type pos) const noexcept { + if (empty() || pos >= length_) { + return npos; + } + const char* result = + static_cast<const char*>(memchr(ptr_ + pos, c, length_ - pos)); + return result != nullptr ? result - ptr_ : npos; +} + +string_view::size_type string_view::rfind(string_view s, size_type pos) const + noexcept { + if (length_ < s.length_) return npos; + if (s.empty()) return std::min(length_, pos); + const char* last = ptr_ + std::min(length_ - s.length_, pos) + s.length_; + const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_); + return result != last ? result - ptr_ : npos; +} + +// Search range is [0..pos] inclusive. If pos == npos, search everything. +string_view::size_type string_view::rfind(char c, size_type pos) const + noexcept { + // Note: memrchr() is not available on Windows. + if (empty()) return npos; + for (size_type i = std::min(pos, length_ - 1);; --i) { + if (ptr_[i] == c) { + return i; + } + if (i == 0) break; + } + return npos; +} + +string_view::size_type string_view::find_first_of(string_view s, + size_type pos) const + noexcept { + if (empty() || s.empty()) { + return npos; + } + // Avoid the cost of LookupTable() for a single-character search. + if (s.length_ == 1) return find_first_of(s.ptr_[0], pos); + LookupTable tbl(s); + for (size_type i = pos; i < length_; ++i) { + if (tbl[ptr_[i]]) { + return i; + } + } + return npos; +} + +string_view::size_type string_view::find_first_not_of(string_view s, + size_type pos) const + noexcept { + if (empty()) return npos; + // Avoid the cost of LookupTable() for a single-character search. + if (s.length_ == 1) return find_first_not_of(s.ptr_[0], pos); + LookupTable tbl(s); + for (size_type i = pos; i < length_; ++i) { + if (!tbl[ptr_[i]]) { + return i; + } + } + return npos; +} + +string_view::size_type string_view::find_first_not_of(char c, + size_type pos) const + noexcept { + if (empty()) return npos; + for (; pos < length_; ++pos) { + if (ptr_[pos] != c) { + return pos; + } + } + return npos; +} + +string_view::size_type string_view::find_last_of(string_view s, + size_type pos) const noexcept { + if (empty() || s.empty()) return npos; + // Avoid the cost of LookupTable() for a single-character search. + if (s.length_ == 1) return find_last_of(s.ptr_[0], pos); + LookupTable tbl(s); + for (size_type i = std::min(pos, length_ - 1);; --i) { + if (tbl[ptr_[i]]) { + return i; + } + if (i == 0) break; + } + return npos; +} + +string_view::size_type string_view::find_last_not_of(string_view s, + size_type pos) const + noexcept { + if (empty()) return npos; + size_type i = std::min(pos, length_ - 1); + if (s.empty()) return i; + // Avoid the cost of LookupTable() for a single-character search. + if (s.length_ == 1) return find_last_not_of(s.ptr_[0], pos); + LookupTable tbl(s); + for (;; --i) { + if (!tbl[ptr_[i]]) { + return i; + } + if (i == 0) break; + } + return npos; +} + +string_view::size_type string_view::find_last_not_of(char c, + size_type pos) const + noexcept { + if (empty()) return npos; + size_type i = std::min(pos, length_ - 1); + for (;; --i) { + if (ptr_[i] != c) { + return i; + } + if (i == 0) break; + } + return npos; +} + +// MSVC has non-standard behavior that implicitly creates definitions for static +// const members. These implicit definitions conflict with explicit out-of-class +// member definitions that are required by the C++ standard, resulting in +// LNK1169 "multiply defined" errors at link time. __declspec(selectany) asks +// MSVC to choose only one definition for the symbol it decorates. See details +// at https://msdn.microsoft.com/en-us/library/34h23df8(v=vs.100).aspx +#ifdef _MSC_VER +#define ABSL_STRING_VIEW_SELECTANY __declspec(selectany) +#else +#define ABSL_STRING_VIEW_SELECTANY +#endif + +ABSL_STRING_VIEW_SELECTANY +constexpr string_view::size_type string_view::npos; +ABSL_STRING_VIEW_SELECTANY +constexpr string_view::size_type string_view::kMaxSize; + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_USES_STD_STRING_VIEW diff --git a/third_party/abseil_cpp/absl/strings/string_view.h b/third_party/abseil_cpp/absl/strings/string_view.h new file mode 100644 index 000000000000..5260b5b73f47 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/string_view.h @@ -0,0 +1,629 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: string_view.h +// ----------------------------------------------------------------------------- +// +// This file contains the definition of the `absl::string_view` class. A +// `string_view` points to a contiguous span of characters, often part or all of +// another `std::string`, double-quoted string literal, character array, or even +// another `string_view`. +// +// This `absl::string_view` abstraction is designed to be a drop-in +// replacement for the C++17 `std::string_view` abstraction. +#ifndef ABSL_STRINGS_STRING_VIEW_H_ +#define ABSL_STRINGS_STRING_VIEW_H_ + +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstring> +#include <iosfwd> +#include <iterator> +#include <limits> +#include <string> + +#include "absl/base/config.h" +#include "absl/base/internal/throw_delegate.h" +#include "absl/base/macros.h" +#include "absl/base/optimization.h" +#include "absl/base/port.h" + +#ifdef ABSL_USES_STD_STRING_VIEW + +#include <string_view> // IWYU pragma: export + +namespace absl { +ABSL_NAMESPACE_BEGIN +using string_view = std::string_view; +ABSL_NAMESPACE_END +} // namespace absl + +#else // ABSL_USES_STD_STRING_VIEW + +#if ABSL_HAVE_BUILTIN(__builtin_memcmp) || \ + (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_INTERNAL_STRING_VIEW_MEMCMP __builtin_memcmp +#else // ABSL_HAVE_BUILTIN(__builtin_memcmp) +#define ABSL_INTERNAL_STRING_VIEW_MEMCMP memcmp +#endif // ABSL_HAVE_BUILTIN(__builtin_memcmp) + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// absl::string_view +// +// A `string_view` provides a lightweight view into the string data provided by +// a `std::string`, double-quoted string literal, character array, or even +// another `string_view`. A `string_view` does *not* own the string to which it +// points, and that data cannot be modified through the view. +// +// You can use `string_view` as a function or method parameter anywhere a +// parameter can receive a double-quoted string literal, `const char*`, +// `std::string`, or another `absl::string_view` argument with no need to copy +// the string data. Systematic use of `string_view` within function arguments +// reduces data copies and `strlen()` calls. +// +// Because of its small size, prefer passing `string_view` by value: +// +// void MyFunction(absl::string_view arg); +// +// If circumstances require, you may also pass one by const reference: +// +// void MyFunction(const absl::string_view& arg); // not preferred +// +// Passing by value generates slightly smaller code for many architectures. +// +// In either case, the source data of the `string_view` must outlive the +// `string_view` itself. +// +// A `string_view` is also suitable for local variables if you know that the +// lifetime of the underlying object is longer than the lifetime of your +// `string_view` variable. However, beware of binding a `string_view` to a +// temporary value: +// +// // BAD use of string_view: lifetime problem +// absl::string_view sv = obj.ReturnAString(); +// +// // GOOD use of string_view: str outlives sv +// std::string str = obj.ReturnAString(); +// absl::string_view sv = str; +// +// Due to lifetime issues, a `string_view` is sometimes a poor choice for a +// return value and usually a poor choice for a data member. If you do use a +// `string_view` this way, it is your responsibility to ensure that the object +// pointed to by the `string_view` outlives the `string_view`. +// +// A `string_view` may represent a whole string or just part of a string. For +// example, when splitting a string, `std::vector<absl::string_view>` is a +// natural data type for the output. +// +// For another example, a Cord is a non-contiguous, potentially very +// long string-like object. The Cord class has an interface that iteratively +// provides string_view objects that point to the successive pieces of a Cord +// object. +// +// When constructed from a source which is NUL-terminated, the `string_view` +// itself will not include the NUL-terminator unless a specific size (including +// the NUL) is passed to the constructor. As a result, common idioms that work +// on NUL-terminated strings do not work on `string_view` objects. If you write +// code that scans a `string_view`, you must check its length rather than test +// for nul, for example. Note, however, that nuls may still be embedded within +// a `string_view` explicitly. +// +// You may create a null `string_view` in two ways: +// +// absl::string_view sv; +// absl::string_view sv(nullptr, 0); +// +// For the above, `sv.data() == nullptr`, `sv.length() == 0`, and +// `sv.empty() == true`. Also, if you create a `string_view` with a non-null +// pointer then `sv.data() != nullptr`. Thus, you can use `string_view()` to +// signal an undefined value that is different from other `string_view` values +// in a similar fashion to how `const char* p1 = nullptr;` is different from +// `const char* p2 = "";`. However, in practice, it is not recommended to rely +// on this behavior. +// +// Be careful not to confuse a null `string_view` with an empty one. A null +// `string_view` is an empty `string_view`, but some empty `string_view`s are +// not null. Prefer checking for emptiness over checking for null. +// +// There are many ways to create an empty string_view: +// +// const char* nullcp = nullptr; +// // string_view.size() will return 0 in all cases. +// absl::string_view(); +// absl::string_view(nullcp, 0); +// absl::string_view(""); +// absl::string_view("", 0); +// absl::string_view("abcdef", 0); +// absl::string_view("abcdef" + 6, 0); +// +// All empty `string_view` objects whether null or not, are equal: +// +// absl::string_view() == absl::string_view("", 0) +// absl::string_view(nullptr, 0) == absl::string_view("abcdef"+6, 0) +class string_view { + public: + using traits_type = std::char_traits<char>; + using value_type = char; + using pointer = char*; + using const_pointer = const char*; + using reference = char&; + using const_reference = const char&; + using const_iterator = const char*; + using iterator = const_iterator; + using const_reverse_iterator = std::reverse_iterator<const_iterator>; + using reverse_iterator = const_reverse_iterator; + using size_type = size_t; + using difference_type = std::ptrdiff_t; + + static constexpr size_type npos = static_cast<size_type>(-1); + + // Null `string_view` constructor + constexpr string_view() noexcept : ptr_(nullptr), length_(0) {} + + // Implicit constructors + + template <typename Allocator> + string_view( // NOLINT(runtime/explicit) + const std::basic_string<char, std::char_traits<char>, Allocator>& + str) noexcept + // This is implemented in terms of `string_view(p, n)` so `str.size()` + // doesn't need to be reevaluated after `ptr_` is set. + : string_view(str.data(), str.size()) {} + + // Implicit constructor of a `string_view` from NUL-terminated `str`. When + // accepting possibly null strings, use `absl::NullSafeStringView(str)` + // instead (see below). + constexpr string_view(const char* str) // NOLINT(runtime/explicit) + : ptr_(str), + length_(str ? CheckLengthInternal(StrlenInternal(str)) : 0) {} + + // Implicit constructor of a `string_view` from a `const char*` and length. + constexpr string_view(const char* data, size_type len) + : ptr_(data), length_(CheckLengthInternal(len)) {} + + // NOTE: Harmlessly omitted to work around gdb bug. + // constexpr string_view(const string_view&) noexcept = default; + // string_view& operator=(const string_view&) noexcept = default; + + // Iterators + + // string_view::begin() + // + // Returns an iterator pointing to the first character at the beginning of the + // `string_view`, or `end()` if the `string_view` is empty. + constexpr const_iterator begin() const noexcept { return ptr_; } + + // string_view::end() + // + // Returns an iterator pointing just beyond the last character at the end of + // the `string_view`. This iterator acts as a placeholder; attempting to + // access it results in undefined behavior. + constexpr const_iterator end() const noexcept { return ptr_ + length_; } + + // string_view::cbegin() + // + // Returns a const iterator pointing to the first character at the beginning + // of the `string_view`, or `end()` if the `string_view` is empty. + constexpr const_iterator cbegin() const noexcept { return begin(); } + + // string_view::cend() + // + // Returns a const iterator pointing just beyond the last character at the end + // of the `string_view`. This pointer acts as a placeholder; attempting to + // access its element results in undefined behavior. + constexpr const_iterator cend() const noexcept { return end(); } + + // string_view::rbegin() + // + // Returns a reverse iterator pointing to the last character at the end of the + // `string_view`, or `rend()` if the `string_view` is empty. + const_reverse_iterator rbegin() const noexcept { + return const_reverse_iterator(end()); + } + + // string_view::rend() + // + // Returns a reverse iterator pointing just before the first character at the + // beginning of the `string_view`. This pointer acts as a placeholder; + // attempting to access its element results in undefined behavior. + const_reverse_iterator rend() const noexcept { + return const_reverse_iterator(begin()); + } + + // string_view::crbegin() + // + // Returns a const reverse iterator pointing to the last character at the end + // of the `string_view`, or `crend()` if the `string_view` is empty. + const_reverse_iterator crbegin() const noexcept { return rbegin(); } + + // string_view::crend() + // + // Returns a const reverse iterator pointing just before the first character + // at the beginning of the `string_view`. This pointer acts as a placeholder; + // attempting to access its element results in undefined behavior. + const_reverse_iterator crend() const noexcept { return rend(); } + + // Capacity Utilities + + // string_view::size() + // + // Returns the number of characters in the `string_view`. + constexpr size_type size() const noexcept { + return length_; + } + + // string_view::length() + // + // Returns the number of characters in the `string_view`. Alias for `size()`. + constexpr size_type length() const noexcept { return size(); } + + // string_view::max_size() + // + // Returns the maximum number of characters the `string_view` can hold. + constexpr size_type max_size() const noexcept { return kMaxSize; } + + // string_view::empty() + // + // Checks if the `string_view` is empty (refers to no characters). + constexpr bool empty() const noexcept { return length_ == 0; } + + // string_view::operator[] + // + // Returns the ith element of the `string_view` using the array operator. + // Note that this operator does not perform any bounds checking. + constexpr const_reference operator[](size_type i) const { + return ABSL_HARDENING_ASSERT(i < size()), ptr_[i]; + } + + // string_view::at() + // + // Returns the ith element of the `string_view`. Bounds checking is performed, + // and an exception of type `std::out_of_range` will be thrown on invalid + // access. + constexpr const_reference at(size_type i) const { + return ABSL_PREDICT_TRUE(i < size()) + ? ptr_[i] + : ((void)base_internal::ThrowStdOutOfRange( + "absl::string_view::at"), + ptr_[i]); + } + + // string_view::front() + // + // Returns the first element of a `string_view`. + constexpr const_reference front() const { + return ABSL_HARDENING_ASSERT(!empty()), ptr_[0]; + } + + // string_view::back() + // + // Returns the last element of a `string_view`. + constexpr const_reference back() const { + return ABSL_HARDENING_ASSERT(!empty()), ptr_[size() - 1]; + } + + // string_view::data() + // + // Returns a pointer to the underlying character array (which is of course + // stored elsewhere). Note that `string_view::data()` may contain embedded nul + // characters, but the returned buffer may or may not be NUL-terminated; + // therefore, do not pass `data()` to a routine that expects a NUL-terminated + // string. + constexpr const_pointer data() const noexcept { return ptr_; } + + // Modifiers + + // string_view::remove_prefix() + // + // Removes the first `n` characters from the `string_view`. Note that the + // underlying string is not changed, only the view. + void remove_prefix(size_type n) { + ABSL_HARDENING_ASSERT(n <= length_); + ptr_ += n; + length_ -= n; + } + + // string_view::remove_suffix() + // + // Removes the last `n` characters from the `string_view`. Note that the + // underlying string is not changed, only the view. + void remove_suffix(size_type n) { + ABSL_HARDENING_ASSERT(n <= length_); + length_ -= n; + } + + // string_view::swap() + // + // Swaps this `string_view` with another `string_view`. + void swap(string_view& s) noexcept { + auto t = *this; + *this = s; + s = t; + } + + // Explicit conversion operators + + // Converts to `std::basic_string`. + template <typename A> + explicit operator std::basic_string<char, traits_type, A>() const { + if (!data()) return {}; + return std::basic_string<char, traits_type, A>(data(), size()); + } + + // string_view::copy() + // + // Copies the contents of the `string_view` at offset `pos` and length `n` + // into `buf`. + size_type copy(char* buf, size_type n, size_type pos = 0) const { + if (ABSL_PREDICT_FALSE(pos > length_)) { + base_internal::ThrowStdOutOfRange("absl::string_view::copy"); + } + size_type rlen = (std::min)(length_ - pos, n); + if (rlen > 0) { + const char* start = ptr_ + pos; + traits_type::copy(buf, start, rlen); + } + return rlen; + } + + // string_view::substr() + // + // Returns a "substring" of the `string_view` (at offset `pos` and length + // `n`) as another string_view. This function throws `std::out_of_bounds` if + // `pos > size`. + // Use absl::ClippedSubstr if you need a truncating substr operation. + constexpr string_view substr(size_type pos, size_type n = npos) const { + return ABSL_PREDICT_FALSE(pos > length_) + ? (base_internal::ThrowStdOutOfRange( + "absl::string_view::substr"), + string_view()) + : string_view(ptr_ + pos, Min(n, length_ - pos)); + } + + // string_view::compare() + // + // Performs a lexicographical comparison between the `string_view` and + // another `absl::string_view`, returning -1 if `this` is less than, 0 if + // `this` is equal to, and 1 if `this` is greater than the passed string + // view. Note that in the case of data equality, a further comparison is made + // on the respective sizes of the two `string_view`s to determine which is + // smaller, equal, or greater. + constexpr int compare(string_view x) const noexcept { + return CompareImpl(length_, x.length_, + Min(length_, x.length_) == 0 + ? 0 + : ABSL_INTERNAL_STRING_VIEW_MEMCMP( + ptr_, x.ptr_, Min(length_, x.length_))); + } + + // Overload of `string_view::compare()` for comparing a substring of the + // 'string_view` and another `absl::string_view`. + int compare(size_type pos1, size_type count1, string_view v) const { + return substr(pos1, count1).compare(v); + } + + // Overload of `string_view::compare()` for comparing a substring of the + // `string_view` and a substring of another `absl::string_view`. + int compare(size_type pos1, size_type count1, string_view v, size_type pos2, + size_type count2) const { + return substr(pos1, count1).compare(v.substr(pos2, count2)); + } + + // Overload of `string_view::compare()` for comparing a `string_view` and a + // a different C-style string `s`. + int compare(const char* s) const { return compare(string_view(s)); } + + // Overload of `string_view::compare()` for comparing a substring of the + // `string_view` and a different string C-style string `s`. + int compare(size_type pos1, size_type count1, const char* s) const { + return substr(pos1, count1).compare(string_view(s)); + } + + // Overload of `string_view::compare()` for comparing a substring of the + // `string_view` and a substring of a different C-style string `s`. + int compare(size_type pos1, size_type count1, const char* s, + size_type count2) const { + return substr(pos1, count1).compare(string_view(s, count2)); + } + + // Find Utilities + + // string_view::find() + // + // Finds the first occurrence of the substring `s` within the `string_view`, + // returning the position of the first character's match, or `npos` if no + // match was found. + size_type find(string_view s, size_type pos = 0) const noexcept; + + // Overload of `string_view::find()` for finding the given character `c` + // within the `string_view`. + size_type find(char c, size_type pos = 0) const noexcept; + + // string_view::rfind() + // + // Finds the last occurrence of a substring `s` within the `string_view`, + // returning the position of the first character's match, or `npos` if no + // match was found. + size_type rfind(string_view s, size_type pos = npos) const + noexcept; + + // Overload of `string_view::rfind()` for finding the last given character `c` + // within the `string_view`. + size_type rfind(char c, size_type pos = npos) const noexcept; + + // string_view::find_first_of() + // + // Finds the first occurrence of any of the characters in `s` within the + // `string_view`, returning the start position of the match, or `npos` if no + // match was found. + size_type find_first_of(string_view s, size_type pos = 0) const + noexcept; + + // Overload of `string_view::find_first_of()` for finding a character `c` + // within the `string_view`. + size_type find_first_of(char c, size_type pos = 0) const + noexcept { + return find(c, pos); + } + + // string_view::find_last_of() + // + // Finds the last occurrence of any of the characters in `s` within the + // `string_view`, returning the start position of the match, or `npos` if no + // match was found. + size_type find_last_of(string_view s, size_type pos = npos) const + noexcept; + + // Overload of `string_view::find_last_of()` for finding a character `c` + // within the `string_view`. + size_type find_last_of(char c, size_type pos = npos) const + noexcept { + return rfind(c, pos); + } + + // string_view::find_first_not_of() + // + // Finds the first occurrence of any of the characters not in `s` within the + // `string_view`, returning the start position of the first non-match, or + // `npos` if no non-match was found. + size_type find_first_not_of(string_view s, size_type pos = 0) const noexcept; + + // Overload of `string_view::find_first_not_of()` for finding a character + // that is not `c` within the `string_view`. + size_type find_first_not_of(char c, size_type pos = 0) const noexcept; + + // string_view::find_last_not_of() + // + // Finds the last occurrence of any of the characters not in `s` within the + // `string_view`, returning the start position of the last non-match, or + // `npos` if no non-match was found. + size_type find_last_not_of(string_view s, + size_type pos = npos) const noexcept; + + // Overload of `string_view::find_last_not_of()` for finding a character + // that is not `c` within the `string_view`. + size_type find_last_not_of(char c, size_type pos = npos) const + noexcept; + + private: + static constexpr size_type kMaxSize = + (std::numeric_limits<difference_type>::max)(); + + static constexpr size_type CheckLengthInternal(size_type len) { + return ABSL_HARDENING_ASSERT(len <= kMaxSize), len; + } + + static constexpr size_type StrlenInternal(const char* str) { +#if defined(_MSC_VER) && _MSC_VER >= 1910 && !defined(__clang__) + // MSVC 2017+ can evaluate this at compile-time. + const char* begin = str; + while (*str != '\0') ++str; + return str - begin; +#elif ABSL_HAVE_BUILTIN(__builtin_strlen) || \ + (defined(__GNUC__) && !defined(__clang__)) + // GCC has __builtin_strlen according to + // https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Other-Builtins.html, but + // ABSL_HAVE_BUILTIN doesn't detect that, so we use the extra checks above. + // __builtin_strlen is constexpr. + return __builtin_strlen(str); +#else + return str ? strlen(str) : 0; +#endif + } + + static constexpr size_t Min(size_type length_a, size_type length_b) { + return length_a < length_b ? length_a : length_b; + } + + static constexpr int CompareImpl(size_type length_a, size_type length_b, + int compare_result) { + return compare_result == 0 ? static_cast<int>(length_a > length_b) - + static_cast<int>(length_a < length_b) + : (compare_result < 0 ? -1 : 1); + } + + const char* ptr_; + size_type length_; +}; + +// This large function is defined inline so that in a fairly common case where +// one of the arguments is a literal, the compiler can elide a lot of the +// following comparisons. +constexpr bool operator==(string_view x, string_view y) noexcept { + return x.size() == y.size() && + (x.empty() || + ABSL_INTERNAL_STRING_VIEW_MEMCMP(x.data(), y.data(), x.size()) == 0); +} + +constexpr bool operator!=(string_view x, string_view y) noexcept { + return !(x == y); +} + +constexpr bool operator<(string_view x, string_view y) noexcept { + return x.compare(y) < 0; +} + +constexpr bool operator>(string_view x, string_view y) noexcept { + return y < x; +} + +constexpr bool operator<=(string_view x, string_view y) noexcept { + return !(y < x); +} + +constexpr bool operator>=(string_view x, string_view y) noexcept { + return !(x < y); +} + +// IO Insertion Operator +std::ostream& operator<<(std::ostream& o, string_view piece); + +ABSL_NAMESPACE_END +} // namespace absl + +#undef ABSL_INTERNAL_STRING_VIEW_MEMCMP + +#endif // ABSL_USES_STD_STRING_VIEW + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// ClippedSubstr() +// +// Like `s.substr(pos, n)`, but clips `pos` to an upper bound of `s.size()`. +// Provided because std::string_view::substr throws if `pos > size()` +inline string_view ClippedSubstr(string_view s, size_t pos, + size_t n = string_view::npos) { + pos = (std::min)(pos, static_cast<size_t>(s.size())); + return s.substr(pos, n); +} + +// NullSafeStringView() +// +// Creates an `absl::string_view` from a pointer `p` even if it's null-valued. +// This function should be used where an `absl::string_view` can be created from +// a possibly-null pointer. +constexpr string_view NullSafeStringView(const char* p) { + return p ? string_view(p) : string_view(); +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_STRING_VIEW_H_ diff --git a/third_party/abseil_cpp/absl/strings/string_view_benchmark.cc b/third_party/abseil_cpp/absl/strings/string_view_benchmark.cc new file mode 100644 index 000000000000..0d74e23e2fc9 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/string_view_benchmark.cc @@ -0,0 +1,381 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/string_view.h" + +#include <algorithm> +#include <cstdint> +#include <map> +#include <random> +#include <string> +#include <unordered_set> +#include <vector> + +#include "benchmark/benchmark.h" +#include "absl/base/attributes.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" +#include "absl/strings/str_cat.h" + +namespace { + +void BM_StringViewFromString(benchmark::State& state) { + std::string s(state.range(0), 'x'); + std::string* ps = &s; + struct SV { + SV() = default; + explicit SV(const std::string& s) : sv(s) {} + absl::string_view sv; + } sv; + SV* psv = &sv; + benchmark::DoNotOptimize(ps); + benchmark::DoNotOptimize(psv); + for (auto _ : state) { + new (psv) SV(*ps); + benchmark::DoNotOptimize(sv); + } +} +BENCHMARK(BM_StringViewFromString)->Arg(12)->Arg(128); + +// Provide a forcibly out-of-line wrapper for operator== that can be used in +// benchmarks to measure the impact of inlining. +ABSL_ATTRIBUTE_NOINLINE +bool NonInlinedEq(absl::string_view a, absl::string_view b) { return a == b; } + +// We use functions that cannot be inlined to perform the comparison loops so +// that inlining of the operator== can't optimize away *everything*. +ABSL_ATTRIBUTE_NOINLINE +void DoEqualityComparisons(benchmark::State& state, absl::string_view a, + absl::string_view b) { + for (auto _ : state) { + benchmark::DoNotOptimize(a == b); + } +} + +void BM_EqualIdentical(benchmark::State& state) { + std::string x(state.range(0), 'a'); + DoEqualityComparisons(state, x, x); +} +BENCHMARK(BM_EqualIdentical)->DenseRange(0, 3)->Range(4, 1 << 10); + +void BM_EqualSame(benchmark::State& state) { + std::string x(state.range(0), 'a'); + std::string y = x; + DoEqualityComparisons(state, x, y); +} +BENCHMARK(BM_EqualSame) + ->DenseRange(0, 10) + ->Arg(20) + ->Arg(40) + ->Arg(70) + ->Arg(110) + ->Range(160, 4096); + +void BM_EqualDifferent(benchmark::State& state) { + const int len = state.range(0); + std::string x(len, 'a'); + std::string y = x; + if (len > 0) { + y[len - 1] = 'b'; + } + DoEqualityComparisons(state, x, y); +} +BENCHMARK(BM_EqualDifferent)->DenseRange(0, 3)->Range(4, 1 << 10); + +// This benchmark is intended to check that important simplifications can be +// made with absl::string_view comparisons against constant strings. The idea is +// that if constant strings cause redundant components of the comparison, the +// compiler should detect and eliminate them. Here we use 8 different strings, +// each with the same size. Provided our comparison makes the implementation +// inline-able by the compiler, it should fold all of these away into a single +// size check once per loop iteration. +ABSL_ATTRIBUTE_NOINLINE +void DoConstantSizeInlinedEqualityComparisons(benchmark::State& state, + absl::string_view a) { + for (auto _ : state) { + benchmark::DoNotOptimize(a == "aaa"); + benchmark::DoNotOptimize(a == "bbb"); + benchmark::DoNotOptimize(a == "ccc"); + benchmark::DoNotOptimize(a == "ddd"); + benchmark::DoNotOptimize(a == "eee"); + benchmark::DoNotOptimize(a == "fff"); + benchmark::DoNotOptimize(a == "ggg"); + benchmark::DoNotOptimize(a == "hhh"); + } +} +void BM_EqualConstantSizeInlined(benchmark::State& state) { + std::string x(state.range(0), 'a'); + DoConstantSizeInlinedEqualityComparisons(state, x); +} +// We only need to check for size of 3, and <> 3 as this benchmark only has to +// do with size differences. +BENCHMARK(BM_EqualConstantSizeInlined)->DenseRange(2, 4); + +// This benchmark exists purely to give context to the above timings: this is +// what they would look like if the compiler is completely unable to simplify +// between two comparisons when they are comparing against constant strings. +ABSL_ATTRIBUTE_NOINLINE +void DoConstantSizeNonInlinedEqualityComparisons(benchmark::State& state, + absl::string_view a) { + for (auto _ : state) { + // Force these out-of-line to compare with the above function. + benchmark::DoNotOptimize(NonInlinedEq(a, "aaa")); + benchmark::DoNotOptimize(NonInlinedEq(a, "bbb")); + benchmark::DoNotOptimize(NonInlinedEq(a, "ccc")); + benchmark::DoNotOptimize(NonInlinedEq(a, "ddd")); + benchmark::DoNotOptimize(NonInlinedEq(a, "eee")); + benchmark::DoNotOptimize(NonInlinedEq(a, "fff")); + benchmark::DoNotOptimize(NonInlinedEq(a, "ggg")); + benchmark::DoNotOptimize(NonInlinedEq(a, "hhh")); + } +} + +void BM_EqualConstantSizeNonInlined(benchmark::State& state) { + std::string x(state.range(0), 'a'); + DoConstantSizeNonInlinedEqualityComparisons(state, x); +} +// We only need to check for size of 3, and <> 3 as this benchmark only has to +// do with size differences. +BENCHMARK(BM_EqualConstantSizeNonInlined)->DenseRange(2, 4); + +void BM_CompareSame(benchmark::State& state) { + const int len = state.range(0); + std::string x; + for (int i = 0; i < len; i++) { + x += 'a'; + } + std::string y = x; + absl::string_view a = x; + absl::string_view b = y; + + for (auto _ : state) { + benchmark::DoNotOptimize(a); + benchmark::DoNotOptimize(b); + benchmark::DoNotOptimize(a.compare(b)); + } +} +BENCHMARK(BM_CompareSame)->DenseRange(0, 3)->Range(4, 1 << 10); + +void BM_CompareFirstOneLess(benchmark::State& state) { + const int len = state.range(0); + std::string x(len, 'a'); + std::string y = x; + y.back() = 'b'; + absl::string_view a = x; + absl::string_view b = y; + + for (auto _ : state) { + benchmark::DoNotOptimize(a); + benchmark::DoNotOptimize(b); + benchmark::DoNotOptimize(a.compare(b)); + } +} +BENCHMARK(BM_CompareFirstOneLess)->DenseRange(1, 3)->Range(4, 1 << 10); + +void BM_CompareSecondOneLess(benchmark::State& state) { + const int len = state.range(0); + std::string x(len, 'a'); + std::string y = x; + x.back() = 'b'; + absl::string_view a = x; + absl::string_view b = y; + + for (auto _ : state) { + benchmark::DoNotOptimize(a); + benchmark::DoNotOptimize(b); + benchmark::DoNotOptimize(a.compare(b)); + } +} +BENCHMARK(BM_CompareSecondOneLess)->DenseRange(1, 3)->Range(4, 1 << 10); + +void BM_find_string_view_len_one(benchmark::State& state) { + std::string haystack(state.range(0), '0'); + absl::string_view s(haystack); + for (auto _ : state) { + benchmark::DoNotOptimize(s.find("x")); // not present; length 1 + } +} +BENCHMARK(BM_find_string_view_len_one)->Range(1, 1 << 20); + +void BM_find_string_view_len_two(benchmark::State& state) { + std::string haystack(state.range(0), '0'); + absl::string_view s(haystack); + for (auto _ : state) { + benchmark::DoNotOptimize(s.find("xx")); // not present; length 2 + } +} +BENCHMARK(BM_find_string_view_len_two)->Range(1, 1 << 20); + +void BM_find_one_char(benchmark::State& state) { + std::string haystack(state.range(0), '0'); + absl::string_view s(haystack); + for (auto _ : state) { + benchmark::DoNotOptimize(s.find('x')); // not present + } +} +BENCHMARK(BM_find_one_char)->Range(1, 1 << 20); + +void BM_rfind_one_char(benchmark::State& state) { + std::string haystack(state.range(0), '0'); + absl::string_view s(haystack); + for (auto _ : state) { + benchmark::DoNotOptimize(s.rfind('x')); // not present + } +} +BENCHMARK(BM_rfind_one_char)->Range(1, 1 << 20); + +void BM_worst_case_find_first_of(benchmark::State& state, int haystack_len) { + const int needle_len = state.range(0); + std::string needle; + for (int i = 0; i < needle_len; ++i) { + needle += 'a' + i; + } + std::string haystack(haystack_len, '0'); // 1000 zeros. + + absl::string_view s(haystack); + for (auto _ : state) { + benchmark::DoNotOptimize(s.find_first_of(needle)); + } +} + +void BM_find_first_of_short(benchmark::State& state) { + BM_worst_case_find_first_of(state, 10); +} + +void BM_find_first_of_medium(benchmark::State& state) { + BM_worst_case_find_first_of(state, 100); +} + +void BM_find_first_of_long(benchmark::State& state) { + BM_worst_case_find_first_of(state, 1000); +} + +BENCHMARK(BM_find_first_of_short)->DenseRange(0, 4)->Arg(8)->Arg(16)->Arg(32); +BENCHMARK(BM_find_first_of_medium)->DenseRange(0, 4)->Arg(8)->Arg(16)->Arg(32); +BENCHMARK(BM_find_first_of_long)->DenseRange(0, 4)->Arg(8)->Arg(16)->Arg(32); + +struct EasyMap : public std::map<absl::string_view, uint64_t> { + explicit EasyMap(size_t) {} +}; + +// This templated benchmark helper function is intended to stress operator== or +// operator< in a realistic test. It surely isn't entirely realistic, but it's +// a start. The test creates a map of type Map, a template arg, and populates +// it with table_size key/value pairs. Each key has WordsPerKey words. After +// creating the map, a number of lookups are done in random order. Some keys +// are used much more frequently than others in this phase of the test. +template <typename Map, int WordsPerKey> +void StringViewMapBenchmark(benchmark::State& state) { + const int table_size = state.range(0); + const double kFractionOfKeysThatAreHot = 0.2; + const int kNumLookupsOfHotKeys = 20; + const int kNumLookupsOfColdKeys = 1; + const char* words[] = {"the", "quick", "brown", "fox", "jumped", + "over", "the", "lazy", "dog", "and", + "found", "a", "large", "mushroom", "and", + "a", "couple", "crickets", "eating", "pie"}; + // Create some keys that consist of words in random order. + std::random_device r; + std::seed_seq seed({r(), r(), r(), r(), r(), r(), r(), r()}); + std::mt19937 rng(seed); + std::vector<std::string> keys(table_size); + std::vector<int> all_indices; + const int kBlockSize = 1 << 12; + std::unordered_set<std::string> t(kBlockSize); + std::uniform_int_distribution<int> uniform(0, ABSL_ARRAYSIZE(words) - 1); + for (int i = 0; i < table_size; i++) { + all_indices.push_back(i); + do { + keys[i].clear(); + for (int j = 0; j < WordsPerKey; j++) { + absl::StrAppend(&keys[i], j > 0 ? " " : "", words[uniform(rng)]); + } + } while (!t.insert(keys[i]).second); + } + + // Create a list of strings to lookup: a permutation of the array of + // keys we just created, with repeats. "Hot" keys get repeated more. + std::shuffle(all_indices.begin(), all_indices.end(), rng); + const int num_hot = table_size * kFractionOfKeysThatAreHot; + const int num_cold = table_size - num_hot; + std::vector<int> hot_indices(all_indices.begin(), + all_indices.begin() + num_hot); + std::vector<int> indices; + for (int i = 0; i < kNumLookupsOfColdKeys; i++) { + indices.insert(indices.end(), all_indices.begin(), all_indices.end()); + } + for (int i = 0; i < kNumLookupsOfHotKeys - kNumLookupsOfColdKeys; i++) { + indices.insert(indices.end(), hot_indices.begin(), hot_indices.end()); + } + std::shuffle(indices.begin(), indices.end(), rng); + ABSL_RAW_CHECK( + num_cold * kNumLookupsOfColdKeys + num_hot * kNumLookupsOfHotKeys == + indices.size(), + ""); + // After constructing the array we probe it with absl::string_views built from + // test_strings. This means operator== won't see equal pointers, so + // it'll have to check for equal lengths and equal characters. + std::vector<std::string> test_strings(indices.size()); + for (int i = 0; i < indices.size(); i++) { + test_strings[i] = keys[indices[i]]; + } + + // Run the benchmark. It includes map construction but is mostly + // map lookups. + for (auto _ : state) { + Map h(table_size); + for (int i = 0; i < table_size; i++) { + h[keys[i]] = i * 2; + } + ABSL_RAW_CHECK(h.size() == table_size, ""); + uint64_t sum = 0; + for (int i = 0; i < indices.size(); i++) { + sum += h[test_strings[i]]; + } + benchmark::DoNotOptimize(sum); + } +} + +void BM_StdMap_4(benchmark::State& state) { + StringViewMapBenchmark<EasyMap, 4>(state); +} +BENCHMARK(BM_StdMap_4)->Range(1 << 10, 1 << 16); + +void BM_StdMap_8(benchmark::State& state) { + StringViewMapBenchmark<EasyMap, 8>(state); +} +BENCHMARK(BM_StdMap_8)->Range(1 << 10, 1 << 16); + +void BM_CopyToStringNative(benchmark::State& state) { + std::string src(state.range(0), 'x'); + absl::string_view sv(src); + std::string dst; + for (auto _ : state) { + dst.assign(sv.begin(), sv.end()); + } +} +BENCHMARK(BM_CopyToStringNative)->Range(1 << 3, 1 << 12); + +void BM_AppendToStringNative(benchmark::State& state) { + std::string src(state.range(0), 'x'); + absl::string_view sv(src); + std::string dst; + for (auto _ : state) { + dst.clear(); + dst.insert(dst.end(), sv.begin(), sv.end()); + } +} +BENCHMARK(BM_AppendToStringNative)->Range(1 << 3, 1 << 12); + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/string_view_test.cc b/third_party/abseil_cpp/absl/strings/string_view_test.cc new file mode 100644 index 000000000000..dcebb1500100 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/string_view_test.cc @@ -0,0 +1,1264 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/string_view.h" + +#include <stdlib.h> +#include <iomanip> +#include <iterator> +#include <limits> +#include <map> +#include <sstream> +#include <stdexcept> +#include <string> +#include <type_traits> +#include <utility> + +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/dynamic_annotations.h" +#include "absl/base/options.h" + +#if defined(ABSL_HAVE_STD_STRING_VIEW) || defined(__ANDROID__) +// We don't control the death messaging when using std::string_view. +// Android assert messages only go to system log, so death tests cannot inspect +// the message for matching. +#define ABSL_EXPECT_DEATH_IF_SUPPORTED(statement, regex) \ + EXPECT_DEATH_IF_SUPPORTED(statement, ".*") +#else +#define ABSL_EXPECT_DEATH_IF_SUPPORTED(statement, regex) \ + EXPECT_DEATH_IF_SUPPORTED(statement, regex) +#endif + +namespace { + +// A minimal allocator that uses malloc(). +template <typename T> +struct Mallocator { + typedef T value_type; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + + size_type max_size() const { + return size_t(std::numeric_limits<size_type>::max()) / sizeof(value_type); + } + template <typename U> + struct rebind { + typedef Mallocator<U> other; + }; + Mallocator() = default; + template <class U> + Mallocator(const Mallocator<U>&) {} // NOLINT(runtime/explicit) + + T* allocate(size_t n) { return static_cast<T*>(std::malloc(n * sizeof(T))); } + void deallocate(T* p, size_t) { std::free(p); } +}; +template <typename T, typename U> +bool operator==(const Mallocator<T>&, const Mallocator<U>&) { + return true; +} +template <typename T, typename U> +bool operator!=(const Mallocator<T>&, const Mallocator<U>&) { + return false; +} + +TEST(StringViewTest, Ctor) { + { + // Null. + absl::string_view s10; + EXPECT_TRUE(s10.data() == nullptr); + EXPECT_EQ(0, s10.length()); + } + + { + // const char* without length. + const char* hello = "hello"; + absl::string_view s20(hello); + EXPECT_TRUE(s20.data() == hello); + EXPECT_EQ(5, s20.length()); + + // const char* with length. + absl::string_view s21(hello, 4); + EXPECT_TRUE(s21.data() == hello); + EXPECT_EQ(4, s21.length()); + + // Not recommended, but valid C++ + absl::string_view s22(hello, 6); + EXPECT_TRUE(s22.data() == hello); + EXPECT_EQ(6, s22.length()); + } + + { + // std::string. + std::string hola = "hola"; + absl::string_view s30(hola); + EXPECT_TRUE(s30.data() == hola.data()); + EXPECT_EQ(4, s30.length()); + + // std::string with embedded '\0'. + hola.push_back('\0'); + hola.append("h2"); + hola.push_back('\0'); + absl::string_view s31(hola); + EXPECT_TRUE(s31.data() == hola.data()); + EXPECT_EQ(8, s31.length()); + } + + { + using mstring = + std::basic_string<char, std::char_traits<char>, Mallocator<char>>; + mstring str1("BUNGIE-JUMPING!"); + const mstring str2("SLEEPING!"); + + absl::string_view s1(str1); + s1.remove_prefix(strlen("BUNGIE-JUM")); + + absl::string_view s2(str2); + s2.remove_prefix(strlen("SLEE")); + + EXPECT_EQ(s1, s2); + EXPECT_EQ(s1, "PING!"); + } + + // TODO(mec): absl::string_view(const absl::string_view&); +} + +TEST(StringViewTest, Swap) { + absl::string_view a("a"); + absl::string_view b("bbb"); + EXPECT_TRUE(noexcept(a.swap(b))); + a.swap(b); + EXPECT_EQ(a, "bbb"); + EXPECT_EQ(b, "a"); + a.swap(b); + EXPECT_EQ(a, "a"); + EXPECT_EQ(b, "bbb"); +} + +TEST(StringViewTest, STLComparator) { + std::string s1("foo"); + std::string s2("bar"); + std::string s3("baz"); + + absl::string_view p1(s1); + absl::string_view p2(s2); + absl::string_view p3(s3); + + typedef std::map<absl::string_view, int> TestMap; + TestMap map; + + map.insert(std::make_pair(p1, 0)); + map.insert(std::make_pair(p2, 1)); + map.insert(std::make_pair(p3, 2)); + EXPECT_EQ(map.size(), 3); + + TestMap::const_iterator iter = map.begin(); + EXPECT_EQ(iter->second, 1); + ++iter; + EXPECT_EQ(iter->second, 2); + ++iter; + EXPECT_EQ(iter->second, 0); + ++iter; + EXPECT_TRUE(iter == map.end()); + + TestMap::iterator new_iter = map.find("zot"); + EXPECT_TRUE(new_iter == map.end()); + + new_iter = map.find("bar"); + EXPECT_TRUE(new_iter != map.end()); + + map.erase(new_iter); + EXPECT_EQ(map.size(), 2); + + iter = map.begin(); + EXPECT_EQ(iter->second, 2); + ++iter; + EXPECT_EQ(iter->second, 0); + ++iter; + EXPECT_TRUE(iter == map.end()); +} + +#define COMPARE(result, op, x, y) \ + EXPECT_EQ(result, absl::string_view((x)) op absl::string_view((y))); \ + EXPECT_EQ(result, absl::string_view((x)).compare(absl::string_view((y))) op 0) + +TEST(StringViewTest, ComparisonOperators) { + COMPARE(true, ==, "", ""); + COMPARE(true, ==, "", absl::string_view()); + COMPARE(true, ==, absl::string_view(), ""); + COMPARE(true, ==, "a", "a"); + COMPARE(true, ==, "aa", "aa"); + COMPARE(false, ==, "a", ""); + COMPARE(false, ==, "", "a"); + COMPARE(false, ==, "a", "b"); + COMPARE(false, ==, "a", "aa"); + COMPARE(false, ==, "aa", "a"); + + COMPARE(false, !=, "", ""); + COMPARE(false, !=, "a", "a"); + COMPARE(false, !=, "aa", "aa"); + COMPARE(true, !=, "a", ""); + COMPARE(true, !=, "", "a"); + COMPARE(true, !=, "a", "b"); + COMPARE(true, !=, "a", "aa"); + COMPARE(true, !=, "aa", "a"); + + COMPARE(true, <, "a", "b"); + COMPARE(true, <, "a", "aa"); + COMPARE(true, <, "aa", "b"); + COMPARE(true, <, "aa", "bb"); + COMPARE(false, <, "a", "a"); + COMPARE(false, <, "b", "a"); + COMPARE(false, <, "aa", "a"); + COMPARE(false, <, "b", "aa"); + COMPARE(false, <, "bb", "aa"); + + COMPARE(true, <=, "a", "a"); + COMPARE(true, <=, "a", "b"); + COMPARE(true, <=, "a", "aa"); + COMPARE(true, <=, "aa", "b"); + COMPARE(true, <=, "aa", "bb"); + COMPARE(false, <=, "b", "a"); + COMPARE(false, <=, "aa", "a"); + COMPARE(false, <=, "b", "aa"); + COMPARE(false, <=, "bb", "aa"); + + COMPARE(false, >=, "a", "b"); + COMPARE(false, >=, "a", "aa"); + COMPARE(false, >=, "aa", "b"); + COMPARE(false, >=, "aa", "bb"); + COMPARE(true, >=, "a", "a"); + COMPARE(true, >=, "b", "a"); + COMPARE(true, >=, "aa", "a"); + COMPARE(true, >=, "b", "aa"); + COMPARE(true, >=, "bb", "aa"); + + COMPARE(false, >, "a", "a"); + COMPARE(false, >, "a", "b"); + COMPARE(false, >, "a", "aa"); + COMPARE(false, >, "aa", "b"); + COMPARE(false, >, "aa", "bb"); + COMPARE(true, >, "b", "a"); + COMPARE(true, >, "aa", "a"); + COMPARE(true, >, "b", "aa"); + COMPARE(true, >, "bb", "aa"); +} + +TEST(StringViewTest, ComparisonOperatorsByCharacterPosition) { + std::string x; + for (int i = 0; i < 256; i++) { + x += 'a'; + std::string y = x; + COMPARE(true, ==, x, y); + for (int j = 0; j < i; j++) { + std::string z = x; + z[j] = 'b'; // Differs in position 'j' + COMPARE(false, ==, x, z); + COMPARE(true, <, x, z); + COMPARE(true, >, z, x); + if (j + 1 < i) { + z[j + 1] = 'A'; // Differs in position 'j+1' as well + COMPARE(false, ==, x, z); + COMPARE(true, <, x, z); + COMPARE(true, >, z, x); + z[j + 1] = 'z'; // Differs in position 'j+1' as well + COMPARE(false, ==, x, z); + COMPARE(true, <, x, z); + COMPARE(true, >, z, x); + } + } + } +} +#undef COMPARE + +// Sadly, our users often confuse std::string::npos with +// absl::string_view::npos; So much so that we test here that they are the same. +// They need to both be unsigned, and both be the maximum-valued integer of +// their type. + +template <typename T> +struct is_type { + template <typename U> + static bool same(U) { + return false; + } + static bool same(T) { return true; } +}; + +TEST(StringViewTest, NposMatchesStdStringView) { + EXPECT_EQ(absl::string_view::npos, std::string::npos); + + EXPECT_TRUE(is_type<size_t>::same(absl::string_view::npos)); + EXPECT_FALSE(is_type<size_t>::same("")); + + // Make sure absl::string_view::npos continues to be a header constant. + char test[absl::string_view::npos & 1] = {0}; + EXPECT_EQ(0, test[0]); +} + +TEST(StringViewTest, STL1) { + const absl::string_view a("abcdefghijklmnopqrstuvwxyz"); + const absl::string_view b("abc"); + const absl::string_view c("xyz"); + const absl::string_view d("foobar"); + const absl::string_view e; + std::string temp("123"); + temp += '\0'; + temp += "456"; + const absl::string_view f(temp); + + EXPECT_EQ(a[6], 'g'); + EXPECT_EQ(b[0], 'a'); + EXPECT_EQ(c[2], 'z'); + EXPECT_EQ(f[3], '\0'); + EXPECT_EQ(f[5], '5'); + + EXPECT_EQ(*d.data(), 'f'); + EXPECT_EQ(d.data()[5], 'r'); + EXPECT_TRUE(e.data() == nullptr); + + EXPECT_EQ(*a.begin(), 'a'); + EXPECT_EQ(*(b.begin() + 2), 'c'); + EXPECT_EQ(*(c.end() - 1), 'z'); + + EXPECT_EQ(*a.rbegin(), 'z'); + EXPECT_EQ(*(b.rbegin() + 2), 'a'); + EXPECT_EQ(*(c.rend() - 1), 'x'); + EXPECT_TRUE(a.rbegin() + 26 == a.rend()); + + EXPECT_EQ(a.size(), 26); + EXPECT_EQ(b.size(), 3); + EXPECT_EQ(c.size(), 3); + EXPECT_EQ(d.size(), 6); + EXPECT_EQ(e.size(), 0); + EXPECT_EQ(f.size(), 7); + + EXPECT_TRUE(!d.empty()); + EXPECT_TRUE(d.begin() != d.end()); + EXPECT_TRUE(d.begin() + 6 == d.end()); + + EXPECT_TRUE(e.empty()); + EXPECT_TRUE(e.begin() == e.end()); + + char buf[4] = { '%', '%', '%', '%' }; + EXPECT_EQ(a.copy(buf, 4), 4); + EXPECT_EQ(buf[0], a[0]); + EXPECT_EQ(buf[1], a[1]); + EXPECT_EQ(buf[2], a[2]); + EXPECT_EQ(buf[3], a[3]); + EXPECT_EQ(a.copy(buf, 3, 7), 3); + EXPECT_EQ(buf[0], a[7]); + EXPECT_EQ(buf[1], a[8]); + EXPECT_EQ(buf[2], a[9]); + EXPECT_EQ(buf[3], a[3]); + EXPECT_EQ(c.copy(buf, 99), 3); + EXPECT_EQ(buf[0], c[0]); + EXPECT_EQ(buf[1], c[1]); + EXPECT_EQ(buf[2], c[2]); + EXPECT_EQ(buf[3], a[3]); +#ifdef ABSL_HAVE_EXCEPTIONS + EXPECT_THROW(a.copy(buf, 1, 27), std::out_of_range); +#else + ABSL_EXPECT_DEATH_IF_SUPPORTED(a.copy(buf, 1, 27), "absl::string_view::copy"); +#endif +} + +// Separated from STL1() because some compilers produce an overly +// large stack frame for the combined function. +TEST(StringViewTest, STL2) { + const absl::string_view a("abcdefghijklmnopqrstuvwxyz"); + const absl::string_view b("abc"); + const absl::string_view c("xyz"); + absl::string_view d("foobar"); + const absl::string_view e; + const absl::string_view f( + "123" + "\0" + "456", + 7); + + d = absl::string_view(); + EXPECT_EQ(d.size(), 0); + EXPECT_TRUE(d.empty()); + EXPECT_TRUE(d.data() == nullptr); + EXPECT_TRUE(d.begin() == d.end()); + + EXPECT_EQ(a.find(b), 0); + EXPECT_EQ(a.find(b, 1), absl::string_view::npos); + EXPECT_EQ(a.find(c), 23); + EXPECT_EQ(a.find(c, 9), 23); + EXPECT_EQ(a.find(c, absl::string_view::npos), absl::string_view::npos); + EXPECT_EQ(b.find(c), absl::string_view::npos); + EXPECT_EQ(b.find(c, absl::string_view::npos), absl::string_view::npos); + EXPECT_EQ(a.find(d), 0); + EXPECT_EQ(a.find(e), 0); + EXPECT_EQ(a.find(d, 12), 12); + EXPECT_EQ(a.find(e, 17), 17); + absl::string_view g("xx not found bb"); + EXPECT_EQ(a.find(g), absl::string_view::npos); + // empty string nonsense + EXPECT_EQ(d.find(b), absl::string_view::npos); + EXPECT_EQ(e.find(b), absl::string_view::npos); + EXPECT_EQ(d.find(b, 4), absl::string_view::npos); + EXPECT_EQ(e.find(b, 7), absl::string_view::npos); + + size_t empty_search_pos = std::string().find(std::string()); + EXPECT_EQ(d.find(d), empty_search_pos); + EXPECT_EQ(d.find(e), empty_search_pos); + EXPECT_EQ(e.find(d), empty_search_pos); + EXPECT_EQ(e.find(e), empty_search_pos); + EXPECT_EQ(d.find(d, 4), std::string().find(std::string(), 4)); + EXPECT_EQ(d.find(e, 4), std::string().find(std::string(), 4)); + EXPECT_EQ(e.find(d, 4), std::string().find(std::string(), 4)); + EXPECT_EQ(e.find(e, 4), std::string().find(std::string(), 4)); + + EXPECT_EQ(a.find('a'), 0); + EXPECT_EQ(a.find('c'), 2); + EXPECT_EQ(a.find('z'), 25); + EXPECT_EQ(a.find('$'), absl::string_view::npos); + EXPECT_EQ(a.find('\0'), absl::string_view::npos); + EXPECT_EQ(f.find('\0'), 3); + EXPECT_EQ(f.find('3'), 2); + EXPECT_EQ(f.find('5'), 5); + EXPECT_EQ(g.find('o'), 4); + EXPECT_EQ(g.find('o', 4), 4); + EXPECT_EQ(g.find('o', 5), 8); + EXPECT_EQ(a.find('b', 5), absl::string_view::npos); + // empty string nonsense + EXPECT_EQ(d.find('\0'), absl::string_view::npos); + EXPECT_EQ(e.find('\0'), absl::string_view::npos); + EXPECT_EQ(d.find('\0', 4), absl::string_view::npos); + EXPECT_EQ(e.find('\0', 7), absl::string_view::npos); + EXPECT_EQ(d.find('x'), absl::string_view::npos); + EXPECT_EQ(e.find('x'), absl::string_view::npos); + EXPECT_EQ(d.find('x', 4), absl::string_view::npos); + EXPECT_EQ(e.find('x', 7), absl::string_view::npos); + + EXPECT_EQ(a.rfind(b), 0); + EXPECT_EQ(a.rfind(b, 1), 0); + EXPECT_EQ(a.rfind(c), 23); + EXPECT_EQ(a.rfind(c, 22), absl::string_view::npos); + EXPECT_EQ(a.rfind(c, 1), absl::string_view::npos); + EXPECT_EQ(a.rfind(c, 0), absl::string_view::npos); + EXPECT_EQ(b.rfind(c), absl::string_view::npos); + EXPECT_EQ(b.rfind(c, 0), absl::string_view::npos); + EXPECT_EQ(a.rfind(d), std::string(a).rfind(std::string())); + EXPECT_EQ(a.rfind(e), std::string(a).rfind(std::string())); + EXPECT_EQ(a.rfind(d, 12), 12); + EXPECT_EQ(a.rfind(e, 17), 17); + EXPECT_EQ(a.rfind(g), absl::string_view::npos); + EXPECT_EQ(d.rfind(b), absl::string_view::npos); + EXPECT_EQ(e.rfind(b), absl::string_view::npos); + EXPECT_EQ(d.rfind(b, 4), absl::string_view::npos); + EXPECT_EQ(e.rfind(b, 7), absl::string_view::npos); + // empty string nonsense + EXPECT_EQ(d.rfind(d, 4), std::string().rfind(std::string())); + EXPECT_EQ(e.rfind(d, 7), std::string().rfind(std::string())); + EXPECT_EQ(d.rfind(e, 4), std::string().rfind(std::string())); + EXPECT_EQ(e.rfind(e, 7), std::string().rfind(std::string())); + EXPECT_EQ(d.rfind(d), std::string().rfind(std::string())); + EXPECT_EQ(e.rfind(d), std::string().rfind(std::string())); + EXPECT_EQ(d.rfind(e), std::string().rfind(std::string())); + EXPECT_EQ(e.rfind(e), std::string().rfind(std::string())); + + EXPECT_EQ(g.rfind('o'), 8); + EXPECT_EQ(g.rfind('q'), absl::string_view::npos); + EXPECT_EQ(g.rfind('o', 8), 8); + EXPECT_EQ(g.rfind('o', 7), 4); + EXPECT_EQ(g.rfind('o', 3), absl::string_view::npos); + EXPECT_EQ(f.rfind('\0'), 3); + EXPECT_EQ(f.rfind('\0', 12), 3); + EXPECT_EQ(f.rfind('3'), 2); + EXPECT_EQ(f.rfind('5'), 5); + // empty string nonsense + EXPECT_EQ(d.rfind('o'), absl::string_view::npos); + EXPECT_EQ(e.rfind('o'), absl::string_view::npos); + EXPECT_EQ(d.rfind('o', 4), absl::string_view::npos); + EXPECT_EQ(e.rfind('o', 7), absl::string_view::npos); +} + +// Continued from STL2 +TEST(StringViewTest, STL2FindFirst) { + const absl::string_view a("abcdefghijklmnopqrstuvwxyz"); + const absl::string_view b("abc"); + const absl::string_view c("xyz"); + absl::string_view d("foobar"); + const absl::string_view e; + const absl::string_view f( + "123" + "\0" + "456", + 7); + absl::string_view g("xx not found bb"); + + d = absl::string_view(); + EXPECT_EQ(a.find_first_of(b), 0); + EXPECT_EQ(a.find_first_of(b, 0), 0); + EXPECT_EQ(a.find_first_of(b, 1), 1); + EXPECT_EQ(a.find_first_of(b, 2), 2); + EXPECT_EQ(a.find_first_of(b, 3), absl::string_view::npos); + EXPECT_EQ(a.find_first_of(c), 23); + EXPECT_EQ(a.find_first_of(c, 23), 23); + EXPECT_EQ(a.find_first_of(c, 24), 24); + EXPECT_EQ(a.find_first_of(c, 25), 25); + EXPECT_EQ(a.find_first_of(c, 26), absl::string_view::npos); + EXPECT_EQ(g.find_first_of(b), 13); + EXPECT_EQ(g.find_first_of(c), 0); + EXPECT_EQ(a.find_first_of(f), absl::string_view::npos); + EXPECT_EQ(f.find_first_of(a), absl::string_view::npos); + // empty string nonsense + EXPECT_EQ(a.find_first_of(d), absl::string_view::npos); + EXPECT_EQ(a.find_first_of(e), absl::string_view::npos); + EXPECT_EQ(d.find_first_of(b), absl::string_view::npos); + EXPECT_EQ(e.find_first_of(b), absl::string_view::npos); + EXPECT_EQ(d.find_first_of(d), absl::string_view::npos); + EXPECT_EQ(e.find_first_of(d), absl::string_view::npos); + EXPECT_EQ(d.find_first_of(e), absl::string_view::npos); + EXPECT_EQ(e.find_first_of(e), absl::string_view::npos); + + EXPECT_EQ(a.find_first_not_of(b), 3); + EXPECT_EQ(a.find_first_not_of(c), 0); + EXPECT_EQ(b.find_first_not_of(a), absl::string_view::npos); + EXPECT_EQ(c.find_first_not_of(a), absl::string_view::npos); + EXPECT_EQ(f.find_first_not_of(a), 0); + EXPECT_EQ(a.find_first_not_of(f), 0); + EXPECT_EQ(a.find_first_not_of(d), 0); + EXPECT_EQ(a.find_first_not_of(e), 0); + // empty string nonsense + EXPECT_EQ(a.find_first_not_of(d), 0); + EXPECT_EQ(a.find_first_not_of(e), 0); + EXPECT_EQ(a.find_first_not_of(d, 1), 1); + EXPECT_EQ(a.find_first_not_of(e, 1), 1); + EXPECT_EQ(a.find_first_not_of(d, a.size() - 1), a.size() - 1); + EXPECT_EQ(a.find_first_not_of(e, a.size() - 1), a.size() - 1); + EXPECT_EQ(a.find_first_not_of(d, a.size()), absl::string_view::npos); + EXPECT_EQ(a.find_first_not_of(e, a.size()), absl::string_view::npos); + EXPECT_EQ(a.find_first_not_of(d, absl::string_view::npos), + absl::string_view::npos); + EXPECT_EQ(a.find_first_not_of(e, absl::string_view::npos), + absl::string_view::npos); + EXPECT_EQ(d.find_first_not_of(a), absl::string_view::npos); + EXPECT_EQ(e.find_first_not_of(a), absl::string_view::npos); + EXPECT_EQ(d.find_first_not_of(d), absl::string_view::npos); + EXPECT_EQ(e.find_first_not_of(d), absl::string_view::npos); + EXPECT_EQ(d.find_first_not_of(e), absl::string_view::npos); + EXPECT_EQ(e.find_first_not_of(e), absl::string_view::npos); + + absl::string_view h("===="); + EXPECT_EQ(h.find_first_not_of('='), absl::string_view::npos); + EXPECT_EQ(h.find_first_not_of('=', 3), absl::string_view::npos); + EXPECT_EQ(h.find_first_not_of('\0'), 0); + EXPECT_EQ(g.find_first_not_of('x'), 2); + EXPECT_EQ(f.find_first_not_of('\0'), 0); + EXPECT_EQ(f.find_first_not_of('\0', 3), 4); + EXPECT_EQ(f.find_first_not_of('\0', 2), 2); + // empty string nonsense + EXPECT_EQ(d.find_first_not_of('x'), absl::string_view::npos); + EXPECT_EQ(e.find_first_not_of('x'), absl::string_view::npos); + EXPECT_EQ(d.find_first_not_of('\0'), absl::string_view::npos); + EXPECT_EQ(e.find_first_not_of('\0'), absl::string_view::npos); +} + +// Continued from STL2 +TEST(StringViewTest, STL2FindLast) { + const absl::string_view a("abcdefghijklmnopqrstuvwxyz"); + const absl::string_view b("abc"); + const absl::string_view c("xyz"); + absl::string_view d("foobar"); + const absl::string_view e; + const absl::string_view f( + "123" + "\0" + "456", + 7); + absl::string_view g("xx not found bb"); + absl::string_view h("===="); + absl::string_view i("56"); + + d = absl::string_view(); + EXPECT_EQ(h.find_last_of(a), absl::string_view::npos); + EXPECT_EQ(g.find_last_of(a), g.size()-1); + EXPECT_EQ(a.find_last_of(b), 2); + EXPECT_EQ(a.find_last_of(c), a.size()-1); + EXPECT_EQ(f.find_last_of(i), 6); + EXPECT_EQ(a.find_last_of('a'), 0); + EXPECT_EQ(a.find_last_of('b'), 1); + EXPECT_EQ(a.find_last_of('z'), 25); + EXPECT_EQ(a.find_last_of('a', 5), 0); + EXPECT_EQ(a.find_last_of('b', 5), 1); + EXPECT_EQ(a.find_last_of('b', 0), absl::string_view::npos); + EXPECT_EQ(a.find_last_of('z', 25), 25); + EXPECT_EQ(a.find_last_of('z', 24), absl::string_view::npos); + EXPECT_EQ(f.find_last_of(i, 5), 5); + EXPECT_EQ(f.find_last_of(i, 6), 6); + EXPECT_EQ(f.find_last_of(a, 4), absl::string_view::npos); + // empty string nonsense + EXPECT_EQ(f.find_last_of(d), absl::string_view::npos); + EXPECT_EQ(f.find_last_of(e), absl::string_view::npos); + EXPECT_EQ(f.find_last_of(d, 4), absl::string_view::npos); + EXPECT_EQ(f.find_last_of(e, 4), absl::string_view::npos); + EXPECT_EQ(d.find_last_of(d), absl::string_view::npos); + EXPECT_EQ(d.find_last_of(e), absl::string_view::npos); + EXPECT_EQ(e.find_last_of(d), absl::string_view::npos); + EXPECT_EQ(e.find_last_of(e), absl::string_view::npos); + EXPECT_EQ(d.find_last_of(f), absl::string_view::npos); + EXPECT_EQ(e.find_last_of(f), absl::string_view::npos); + EXPECT_EQ(d.find_last_of(d, 4), absl::string_view::npos); + EXPECT_EQ(d.find_last_of(e, 4), absl::string_view::npos); + EXPECT_EQ(e.find_last_of(d, 4), absl::string_view::npos); + EXPECT_EQ(e.find_last_of(e, 4), absl::string_view::npos); + EXPECT_EQ(d.find_last_of(f, 4), absl::string_view::npos); + EXPECT_EQ(e.find_last_of(f, 4), absl::string_view::npos); + + EXPECT_EQ(a.find_last_not_of(b), a.size()-1); + EXPECT_EQ(a.find_last_not_of(c), 22); + EXPECT_EQ(b.find_last_not_of(a), absl::string_view::npos); + EXPECT_EQ(b.find_last_not_of(b), absl::string_view::npos); + EXPECT_EQ(f.find_last_not_of(i), 4); + EXPECT_EQ(a.find_last_not_of(c, 24), 22); + EXPECT_EQ(a.find_last_not_of(b, 3), 3); + EXPECT_EQ(a.find_last_not_of(b, 2), absl::string_view::npos); + // empty string nonsense + EXPECT_EQ(f.find_last_not_of(d), f.size()-1); + EXPECT_EQ(f.find_last_not_of(e), f.size()-1); + EXPECT_EQ(f.find_last_not_of(d, 4), 4); + EXPECT_EQ(f.find_last_not_of(e, 4), 4); + EXPECT_EQ(d.find_last_not_of(d), absl::string_view::npos); + EXPECT_EQ(d.find_last_not_of(e), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of(d), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of(e), absl::string_view::npos); + EXPECT_EQ(d.find_last_not_of(f), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of(f), absl::string_view::npos); + EXPECT_EQ(d.find_last_not_of(d, 4), absl::string_view::npos); + EXPECT_EQ(d.find_last_not_of(e, 4), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of(d, 4), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of(e, 4), absl::string_view::npos); + EXPECT_EQ(d.find_last_not_of(f, 4), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of(f, 4), absl::string_view::npos); + + EXPECT_EQ(h.find_last_not_of('x'), h.size() - 1); + EXPECT_EQ(h.find_last_not_of('='), absl::string_view::npos); + EXPECT_EQ(b.find_last_not_of('c'), 1); + EXPECT_EQ(h.find_last_not_of('x', 2), 2); + EXPECT_EQ(h.find_last_not_of('=', 2), absl::string_view::npos); + EXPECT_EQ(b.find_last_not_of('b', 1), 0); + // empty string nonsense + EXPECT_EQ(d.find_last_not_of('x'), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of('x'), absl::string_view::npos); + EXPECT_EQ(d.find_last_not_of('\0'), absl::string_view::npos); + EXPECT_EQ(e.find_last_not_of('\0'), absl::string_view::npos); +} + +// Continued from STL2 +TEST(StringViewTest, STL2Substr) { + const absl::string_view a("abcdefghijklmnopqrstuvwxyz"); + const absl::string_view b("abc"); + const absl::string_view c("xyz"); + absl::string_view d("foobar"); + const absl::string_view e; + + d = absl::string_view(); + EXPECT_EQ(a.substr(0, 3), b); + EXPECT_EQ(a.substr(23), c); + EXPECT_EQ(a.substr(23, 3), c); + EXPECT_EQ(a.substr(23, 99), c); + EXPECT_EQ(a.substr(0), a); + EXPECT_EQ(a.substr(3, 2), "de"); + // empty string nonsense + EXPECT_EQ(d.substr(0, 99), e); + // use of npos + EXPECT_EQ(a.substr(0, absl::string_view::npos), a); + EXPECT_EQ(a.substr(23, absl::string_view::npos), c); + // throw exception +#ifdef ABSL_HAVE_EXCEPTIONS + EXPECT_THROW((void)a.substr(99, 2), std::out_of_range); +#else + ABSL_EXPECT_DEATH_IF_SUPPORTED((void)a.substr(99, 2), + "absl::string_view::substr"); +#endif +} + +TEST(StringViewTest, TruncSubstr) { + const absl::string_view hi("hi"); + EXPECT_EQ("", absl::ClippedSubstr(hi, 0, 0)); + EXPECT_EQ("h", absl::ClippedSubstr(hi, 0, 1)); + EXPECT_EQ("hi", absl::ClippedSubstr(hi, 0)); + EXPECT_EQ("i", absl::ClippedSubstr(hi, 1)); + EXPECT_EQ("", absl::ClippedSubstr(hi, 2)); + EXPECT_EQ("", absl::ClippedSubstr(hi, 3)); // truncation + EXPECT_EQ("", absl::ClippedSubstr(hi, 3, 2)); // truncation +} + +TEST(StringViewTest, UTF8) { + std::string utf8 = "\u00E1"; + std::string utf8_twice = utf8 + " " + utf8; + int utf8_len = strlen(utf8.data()); + EXPECT_EQ(utf8_len, absl::string_view(utf8_twice).find_first_of(" ")); + EXPECT_EQ(utf8_len, absl::string_view(utf8_twice).find_first_of(" \t")); +} + +TEST(StringViewTest, FindConformance) { + struct { + std::string haystack; + std::string needle; + } specs[] = { + {"", ""}, + {"", "a"}, + {"a", ""}, + {"a", "a"}, + {"a", "b"}, + {"aa", ""}, + {"aa", "a"}, + {"aa", "b"}, + {"ab", "a"}, + {"ab", "b"}, + {"abcd", ""}, + {"abcd", "a"}, + {"abcd", "d"}, + {"abcd", "ab"}, + {"abcd", "bc"}, + {"abcd", "cd"}, + {"abcd", "abcd"}, + }; + for (const auto& s : specs) { + SCOPED_TRACE(s.haystack); + SCOPED_TRACE(s.needle); + std::string st = s.haystack; + absl::string_view sp = s.haystack; + for (size_t i = 0; i <= sp.size(); ++i) { + size_t pos = (i == sp.size()) ? absl::string_view::npos : i; + SCOPED_TRACE(pos); + EXPECT_EQ(sp.find(s.needle, pos), + st.find(s.needle, pos)); + EXPECT_EQ(sp.rfind(s.needle, pos), + st.rfind(s.needle, pos)); + EXPECT_EQ(sp.find_first_of(s.needle, pos), + st.find_first_of(s.needle, pos)); + EXPECT_EQ(sp.find_first_not_of(s.needle, pos), + st.find_first_not_of(s.needle, pos)); + EXPECT_EQ(sp.find_last_of(s.needle, pos), + st.find_last_of(s.needle, pos)); + EXPECT_EQ(sp.find_last_not_of(s.needle, pos), + st.find_last_not_of(s.needle, pos)); + } + } +} + +TEST(StringViewTest, Remove) { + absl::string_view a("foobar"); + std::string s1("123"); + s1 += '\0'; + s1 += "456"; + absl::string_view e; + std::string s2; + + // remove_prefix + absl::string_view c(a); + c.remove_prefix(3); + EXPECT_EQ(c, "bar"); + c = a; + c.remove_prefix(0); + EXPECT_EQ(c, a); + c.remove_prefix(c.size()); + EXPECT_EQ(c, e); + + // remove_suffix + c = a; + c.remove_suffix(3); + EXPECT_EQ(c, "foo"); + c = a; + c.remove_suffix(0); + EXPECT_EQ(c, a); + c.remove_suffix(c.size()); + EXPECT_EQ(c, e); +} + +TEST(StringViewTest, Set) { + absl::string_view a("foobar"); + absl::string_view empty; + absl::string_view b; + + // set + b = absl::string_view("foobar", 6); + EXPECT_EQ(b, a); + b = absl::string_view("foobar", 0); + EXPECT_EQ(b, empty); + b = absl::string_view("foobar", 7); + EXPECT_NE(b, a); + + b = absl::string_view("foobar"); + EXPECT_EQ(b, a); +} + +TEST(StringViewTest, FrontBack) { + static const char arr[] = "abcd"; + const absl::string_view csp(arr, 4); + EXPECT_EQ(&arr[0], &csp.front()); + EXPECT_EQ(&arr[3], &csp.back()); +} + +TEST(StringViewTest, FrontBackSingleChar) { + static const char c = 'a'; + const absl::string_view csp(&c, 1); + EXPECT_EQ(&c, &csp.front()); + EXPECT_EQ(&c, &csp.back()); +} + +TEST(StringViewTest, FrontBackEmpty) { +#ifndef ABSL_USES_STD_STRING_VIEW +#if !defined(NDEBUG) || ABSL_OPTION_HARDENED + // Abseil's string_view implementation has debug assertions that check that + // front() and back() are not called on an empty string_view. + absl::string_view sv; + ABSL_EXPECT_DEATH_IF_SUPPORTED(sv.front(), ""); + ABSL_EXPECT_DEATH_IF_SUPPORTED(sv.back(), ""); +#endif +#endif +} + +// `std::string_view::string_view(const char*)` calls +// `std::char_traits<char>::length(const char*)` to get the string length. In +// libc++, it doesn't allow `nullptr` in the constexpr context, with the error +// "read of dereferenced null pointer is not allowed in a constant expression". +// At run time, the behavior of `std::char_traits::length()` on `nullptr` is +// undefined by the standard and usually results in crash with libc++. +// GCC also started rejected this in libstdc++ starting in GCC9. +// In MSVC, creating a constexpr string_view from nullptr also triggers an +// "unevaluable pointer value" error. This compiler implementation conforms +// to the standard, but `absl::string_view` implements a different +// behavior for historical reasons. We work around tests that construct +// `string_view` from `nullptr` when using libc++. +#if !defined(ABSL_USES_STD_STRING_VIEW) || \ + (!(defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE >= 9) && \ + !defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) +#define ABSL_HAVE_STRING_VIEW_FROM_NULLPTR 1 +#endif + +TEST(StringViewTest, NULLInput) { + absl::string_view s; + EXPECT_EQ(s.data(), nullptr); + EXPECT_EQ(s.size(), 0); + +#ifdef ABSL_HAVE_STRING_VIEW_FROM_NULLPTR + s = absl::string_view(nullptr); + EXPECT_EQ(s.data(), nullptr); + EXPECT_EQ(s.size(), 0); + + // .ToString() on a absl::string_view with nullptr should produce the empty + // string. + EXPECT_EQ("", std::string(s)); +#endif // ABSL_HAVE_STRING_VIEW_FROM_NULLPTR +} + +TEST(StringViewTest, Comparisons2) { + // The `compare` member has 6 overloads (v: string_view, s: const char*): + // (1) compare(v) + // (2) compare(pos1, count1, v) + // (3) compare(pos1, count1, v, pos2, count2) + // (4) compare(s) + // (5) compare(pos1, count1, s) + // (6) compare(pos1, count1, s, count2) + + absl::string_view abc("abcdefghijklmnopqrstuvwxyz"); + + // check comparison operations on strings longer than 4 bytes. + EXPECT_EQ(abc, absl::string_view("abcdefghijklmnopqrstuvwxyz")); + EXPECT_EQ(abc.compare(absl::string_view("abcdefghijklmnopqrstuvwxyz")), 0); + + EXPECT_LT(abc, absl::string_view("abcdefghijklmnopqrstuvwxzz")); + EXPECT_LT(abc.compare(absl::string_view("abcdefghijklmnopqrstuvwxzz")), 0); + + EXPECT_GT(abc, absl::string_view("abcdefghijklmnopqrstuvwxyy")); + EXPECT_GT(abc.compare(absl::string_view("abcdefghijklmnopqrstuvwxyy")), 0); + + // The "substr" variants of `compare`. + absl::string_view digits("0123456789"); + auto npos = absl::string_view::npos; + + // Taking string_view + EXPECT_EQ(digits.compare(3, npos, absl::string_view("3456789")), 0); // 2 + EXPECT_EQ(digits.compare(3, 4, absl::string_view("3456")), 0); // 2 + EXPECT_EQ(digits.compare(10, 0, absl::string_view()), 0); // 2 + EXPECT_EQ(digits.compare(3, 4, absl::string_view("0123456789"), 3, 4), + 0); // 3 + EXPECT_LT(digits.compare(3, 4, absl::string_view("0123456789"), 3, 5), + 0); // 3 + EXPECT_LT(digits.compare(0, npos, absl::string_view("0123456789"), 3, 5), + 0); // 3 + // Taking const char* + EXPECT_EQ(digits.compare(3, 4, "3456"), 0); // 5 + EXPECT_EQ(digits.compare(3, npos, "3456789"), 0); // 5 + EXPECT_EQ(digits.compare(10, 0, ""), 0); // 5 + EXPECT_EQ(digits.compare(3, 4, "0123456789", 3, 4), 0); // 6 + EXPECT_LT(digits.compare(3, 4, "0123456789", 3, 5), 0); // 6 + EXPECT_LT(digits.compare(0, npos, "0123456789", 3, 5), 0); // 6 +} + +TEST(StringViewTest, At) { + absl::string_view abc = "abc"; + EXPECT_EQ(abc.at(0), 'a'); + EXPECT_EQ(abc.at(1), 'b'); + EXPECT_EQ(abc.at(2), 'c'); +#ifdef ABSL_HAVE_EXCEPTIONS + EXPECT_THROW(abc.at(3), std::out_of_range); +#else + ABSL_EXPECT_DEATH_IF_SUPPORTED(abc.at(3), "absl::string_view::at"); +#endif +} + +struct MyCharAlloc : std::allocator<char> {}; + +TEST(StringViewTest, ExplicitConversionOperator) { + absl::string_view sp = "hi"; + EXPECT_EQ(sp, std::string(sp)); +} + +TEST(StringViewTest, NullSafeStringView) { + { + absl::string_view s = absl::NullSafeStringView(nullptr); + EXPECT_EQ(nullptr, s.data()); + EXPECT_EQ(0, s.size()); + EXPECT_EQ(absl::string_view(), s); + } + { + static const char kHi[] = "hi"; + absl::string_view s = absl::NullSafeStringView(kHi); + EXPECT_EQ(kHi, s.data()); + EXPECT_EQ(strlen(kHi), s.size()); + EXPECT_EQ(absl::string_view("hi"), s); + } +} + +TEST(StringViewTest, ConstexprNullSafeStringView) { + { + constexpr absl::string_view s = absl::NullSafeStringView(nullptr); + EXPECT_EQ(nullptr, s.data()); + EXPECT_EQ(0, s.size()); + EXPECT_EQ(absl::string_view(), s); + } +#if !defined(_MSC_VER) || _MSC_VER >= 1910 + // MSVC 2017+ is required for good constexpr string_view support. + // See the implementation of `absl::string_view::StrlenInternal()`. + { + static constexpr char kHi[] = "hi"; + absl::string_view s = absl::NullSafeStringView(kHi); + EXPECT_EQ(kHi, s.data()); + EXPECT_EQ(strlen(kHi), s.size()); + EXPECT_EQ(absl::string_view("hi"), s); + } + { + constexpr absl::string_view s = absl::NullSafeStringView("hello"); + EXPECT_EQ(s.size(), 5); + EXPECT_EQ("hello", s); + } +#endif +} + +TEST(StringViewTest, ConstexprCompiles) { + constexpr absl::string_view sp; +#ifdef ABSL_HAVE_STRING_VIEW_FROM_NULLPTR + constexpr absl::string_view cstr(nullptr); +#endif + constexpr absl::string_view cstr_len("cstr", 4); + +#if defined(ABSL_USES_STD_STRING_VIEW) + // In libstdc++ (as of 7.2), `std::string_view::string_view(const char*)` + // calls `std::char_traits<char>::length(const char*)` to get the string + // length, but it is not marked constexpr yet. See GCC bug: + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78156 + // Also, there is a LWG issue that adds constexpr to length() which was just + // resolved 2017-06-02. See + // http://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#2232 + // TODO(zhangxy): Update the condition when libstdc++ adopts the constexpr + // length(). +#if !defined(__GLIBCXX__) +#define ABSL_HAVE_CONSTEXPR_STRING_VIEW_FROM_CSTR 1 +#endif // !__GLIBCXX__ + +#else // ABSL_USES_STD_STRING_VIEW + +// This duplicates the check for __builtin_strlen in the header. +#if ABSL_HAVE_BUILTIN(__builtin_strlen) || \ + (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_HAVE_CONSTEXPR_STRING_VIEW_FROM_CSTR 1 +#elif defined(__GNUC__) // GCC or clang +#error GCC/clang should have constexpr string_view. +#endif + +// MSVC 2017+ should be able to construct a constexpr string_view from a cstr. +#if defined(_MSC_VER) && _MSC_VER >= 1910 +#define ABSL_HAVE_CONSTEXPR_STRING_VIEW_FROM_CSTR 1 +#endif + +#endif // ABSL_USES_STD_STRING_VIEW + +#ifdef ABSL_HAVE_CONSTEXPR_STRING_VIEW_FROM_CSTR + constexpr absl::string_view cstr_strlen("foo"); + EXPECT_EQ(cstr_strlen.length(), 3); + constexpr absl::string_view cstr_strlen2 = "bar"; + EXPECT_EQ(cstr_strlen2, "bar"); + +#if ABSL_HAVE_BUILTIN(__builtin_memcmp) || \ + (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_HAVE_CONSTEXPR_STRING_VIEW_COMPARISON 1 +#endif +#ifdef ABSL_HAVE_CONSTEXPR_STRING_VIEW_COMPARISON + constexpr absl::string_view foo = "foo"; + constexpr absl::string_view bar = "bar"; + constexpr bool foo_eq_bar = foo == bar; + constexpr bool foo_ne_bar = foo != bar; + constexpr bool foo_lt_bar = foo < bar; + constexpr bool foo_le_bar = foo <= bar; + constexpr bool foo_gt_bar = foo > bar; + constexpr bool foo_ge_bar = foo >= bar; + constexpr int foo_compare_bar = foo.compare(bar); + EXPECT_FALSE(foo_eq_bar); + EXPECT_TRUE(foo_ne_bar); + EXPECT_FALSE(foo_lt_bar); + EXPECT_FALSE(foo_le_bar); + EXPECT_TRUE(foo_gt_bar); + EXPECT_TRUE(foo_ge_bar); + EXPECT_GT(foo_compare_bar, 0); +#endif +#endif + +#if !defined(__clang__) || 3 < __clang_major__ || \ + (3 == __clang_major__ && 4 < __clang_minor__) + // older clang versions (< 3.5) complain that: + // "cannot perform pointer arithmetic on null pointer" + constexpr absl::string_view::iterator const_begin_empty = sp.begin(); + constexpr absl::string_view::iterator const_end_empty = sp.end(); + EXPECT_EQ(const_begin_empty, const_end_empty); + +#ifdef ABSL_HAVE_STRING_VIEW_FROM_NULLPTR + constexpr absl::string_view::iterator const_begin_nullptr = cstr.begin(); + constexpr absl::string_view::iterator const_end_nullptr = cstr.end(); + EXPECT_EQ(const_begin_nullptr, const_end_nullptr); +#endif // ABSL_HAVE_STRING_VIEW_FROM_NULLPTR +#endif // !defined(__clang__) || ... + + constexpr absl::string_view::iterator const_begin = cstr_len.begin(); + constexpr absl::string_view::iterator const_end = cstr_len.end(); + constexpr absl::string_view::size_type const_size = cstr_len.size(); + constexpr absl::string_view::size_type const_length = cstr_len.length(); + static_assert(const_begin + const_size == const_end, + "pointer arithmetic check"); + static_assert(const_begin + const_length == const_end, + "pointer arithmetic check"); +#ifndef _MSC_VER + // MSVC has bugs doing constexpr pointer arithmetic. + // https://developercommunity.visualstudio.com/content/problem/482192/bad-pointer-arithmetic-in-constepxr-2019-rc1-svc1.html + EXPECT_EQ(const_begin + const_size, const_end); + EXPECT_EQ(const_begin + const_length, const_end); +#endif + + constexpr bool isempty = sp.empty(); + EXPECT_TRUE(isempty); + + constexpr const char c = cstr_len[2]; + EXPECT_EQ(c, 't'); + + constexpr const char cfront = cstr_len.front(); + constexpr const char cback = cstr_len.back(); + EXPECT_EQ(cfront, 'c'); + EXPECT_EQ(cback, 'r'); + + constexpr const char* np = sp.data(); + constexpr const char* cstr_ptr = cstr_len.data(); + EXPECT_EQ(np, nullptr); + EXPECT_NE(cstr_ptr, nullptr); + + constexpr size_t sp_npos = sp.npos; + EXPECT_EQ(sp_npos, -1); +} + +TEST(StringViewTest, ConstexprSubstr) { + constexpr absl::string_view foobar("foobar", 6); + constexpr absl::string_view foo = foobar.substr(0, 3); + constexpr absl::string_view bar = foobar.substr(3); + EXPECT_EQ(foo, "foo"); + EXPECT_EQ(bar, "bar"); +} + +TEST(StringViewTest, Noexcept) { + EXPECT_TRUE((std::is_nothrow_constructible<absl::string_view, + const std::string&>::value)); + EXPECT_TRUE((std::is_nothrow_constructible<absl::string_view, + const std::string&>::value)); + EXPECT_TRUE(std::is_nothrow_constructible<absl::string_view>::value); + constexpr absl::string_view sp; + EXPECT_TRUE(noexcept(sp.begin())); + EXPECT_TRUE(noexcept(sp.end())); + EXPECT_TRUE(noexcept(sp.cbegin())); + EXPECT_TRUE(noexcept(sp.cend())); + EXPECT_TRUE(noexcept(sp.rbegin())); + EXPECT_TRUE(noexcept(sp.rend())); + EXPECT_TRUE(noexcept(sp.crbegin())); + EXPECT_TRUE(noexcept(sp.crend())); + EXPECT_TRUE(noexcept(sp.size())); + EXPECT_TRUE(noexcept(sp.length())); + EXPECT_TRUE(noexcept(sp.empty())); + EXPECT_TRUE(noexcept(sp.data())); + EXPECT_TRUE(noexcept(sp.compare(sp))); + EXPECT_TRUE(noexcept(sp.find(sp))); + EXPECT_TRUE(noexcept(sp.find('f'))); + EXPECT_TRUE(noexcept(sp.rfind(sp))); + EXPECT_TRUE(noexcept(sp.rfind('f'))); + EXPECT_TRUE(noexcept(sp.find_first_of(sp))); + EXPECT_TRUE(noexcept(sp.find_first_of('f'))); + EXPECT_TRUE(noexcept(sp.find_last_of(sp))); + EXPECT_TRUE(noexcept(sp.find_last_of('f'))); + EXPECT_TRUE(noexcept(sp.find_first_not_of(sp))); + EXPECT_TRUE(noexcept(sp.find_first_not_of('f'))); + EXPECT_TRUE(noexcept(sp.find_last_not_of(sp))); + EXPECT_TRUE(noexcept(sp.find_last_not_of('f'))); +} + +TEST(StringViewTest, BoundsCheck) { +#ifndef ABSL_USES_STD_STRING_VIEW +#if !defined(NDEBUG) || ABSL_OPTION_HARDENED + // Abseil's string_view implementation has bounds-checking in debug mode. + absl::string_view h = "hello"; + ABSL_EXPECT_DEATH_IF_SUPPORTED(h[5], ""); + ABSL_EXPECT_DEATH_IF_SUPPORTED(h[-1], ""); +#endif +#endif +} + +TEST(ComparisonOpsTest, StringCompareNotAmbiguous) { + EXPECT_EQ("hello", std::string("hello")); + EXPECT_LT("hello", std::string("world")); +} + +TEST(ComparisonOpsTest, HeterogenousStringViewEquals) { + EXPECT_EQ(absl::string_view("hello"), std::string("hello")); + EXPECT_EQ("hello", absl::string_view("hello")); +} + +TEST(FindOneCharTest, EdgeCases) { + absl::string_view a("xxyyyxx"); + + // Set a = "xyyyx". + a.remove_prefix(1); + a.remove_suffix(1); + + EXPECT_EQ(0, a.find('x')); + EXPECT_EQ(0, a.find('x', 0)); + EXPECT_EQ(4, a.find('x', 1)); + EXPECT_EQ(4, a.find('x', 4)); + EXPECT_EQ(absl::string_view::npos, a.find('x', 5)); + + EXPECT_EQ(4, a.rfind('x')); + EXPECT_EQ(4, a.rfind('x', 5)); + EXPECT_EQ(4, a.rfind('x', 4)); + EXPECT_EQ(0, a.rfind('x', 3)); + EXPECT_EQ(0, a.rfind('x', 0)); + + // Set a = "yyy". + a.remove_prefix(1); + a.remove_suffix(1); + + EXPECT_EQ(absl::string_view::npos, a.find('x')); + EXPECT_EQ(absl::string_view::npos, a.rfind('x')); +} + +#ifndef ABSL_HAVE_THREAD_SANITIZER // Allocates too much memory for tsan. +TEST(HugeStringView, TwoPointTwoGB) { + if (sizeof(size_t) <= 4) + return; + // Try a huge string piece. + const size_t size = size_t{2200} * 1000 * 1000; + std::string s(size, 'a'); + absl::string_view sp(s); + EXPECT_EQ(size, sp.length()); + sp.remove_prefix(1); + EXPECT_EQ(size - 1, sp.length()); + sp.remove_suffix(2); + EXPECT_EQ(size - 1 - 2, sp.length()); +} +#endif // ABSL_HAVE_THREAD_SANITIZER + +#if !defined(NDEBUG) && !defined(ABSL_USES_STD_STRING_VIEW) +TEST(NonNegativeLenTest, NonNegativeLen) { + ABSL_EXPECT_DEATH_IF_SUPPORTED(absl::string_view("xyz", -1), + "len <= kMaxSize"); +} + +TEST(LenExceedsMaxSizeTest, LenExceedsMaxSize) { + auto max_size = absl::string_view().max_size(); + + // This should construct ok (although the view itself is obviously invalid). + absl::string_view ok_view("", max_size); + + // Adding one to the max should trigger an assertion. + ABSL_EXPECT_DEATH_IF_SUPPORTED(absl::string_view("", max_size + 1), + "len <= kMaxSize"); +} +#endif // !defined(NDEBUG) && !defined(ABSL_USES_STD_STRING_VIEW) + +class StringViewStreamTest : public ::testing::Test { + public: + // Set negative 'width' for right justification. + template <typename T> + std::string Pad(const T& s, int width, char fill = 0) { + std::ostringstream oss; + if (fill != 0) { + oss << std::setfill(fill); + } + if (width < 0) { + width = -width; + oss << std::right; + } + oss << std::setw(width) << s; + return oss.str(); + } +}; + +TEST_F(StringViewStreamTest, Padding) { + std::string s("hello"); + absl::string_view sp(s); + for (int w = -64; w < 64; ++w) { + SCOPED_TRACE(w); + EXPECT_EQ(Pad(s, w), Pad(sp, w)); + } + for (int w = -64; w < 64; ++w) { + SCOPED_TRACE(w); + EXPECT_EQ(Pad(s, w, '#'), Pad(sp, w, '#')); + } +} + +TEST_F(StringViewStreamTest, ResetsWidth) { + // Width should reset after one formatted write. + // If we weren't resetting width after formatting the string_view, + // we'd have width=5 carrying over to the printing of the "]", + // creating "[###hi####]". + std::string s = "hi"; + absl::string_view sp = s; + { + std::ostringstream oss; + oss << "[" << std::setfill('#') << std::setw(5) << s << "]"; + ASSERT_EQ("[###hi]", oss.str()); + } + { + std::ostringstream oss; + oss << "[" << std::setfill('#') << std::setw(5) << sp << "]"; + EXPECT_EQ("[###hi]", oss.str()); + } +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/strip.h b/third_party/abseil_cpp/absl/strings/strip.h new file mode 100644 index 000000000000..111872ca54ba --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/strip.h @@ -0,0 +1,91 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: strip.h +// ----------------------------------------------------------------------------- +// +// This file contains various functions for stripping substrings from a string. +#ifndef ABSL_STRINGS_STRIP_H_ +#define ABSL_STRINGS_STRIP_H_ + +#include <cstddef> +#include <string> + +#include "absl/base/macros.h" +#include "absl/strings/ascii.h" +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// ConsumePrefix() +// +// Strips the `expected` prefix from the start of the given string, returning +// `true` if the strip operation succeeded or false otherwise. +// +// Example: +// +// absl::string_view input("abc"); +// EXPECT_TRUE(absl::ConsumePrefix(&input, "a")); +// EXPECT_EQ(input, "bc"); +inline bool ConsumePrefix(absl::string_view* str, absl::string_view expected) { + if (!absl::StartsWith(*str, expected)) return false; + str->remove_prefix(expected.size()); + return true; +} +// ConsumeSuffix() +// +// Strips the `expected` suffix from the end of the given string, returning +// `true` if the strip operation succeeded or false otherwise. +// +// Example: +// +// absl::string_view input("abcdef"); +// EXPECT_TRUE(absl::ConsumeSuffix(&input, "def")); +// EXPECT_EQ(input, "abc"); +inline bool ConsumeSuffix(absl::string_view* str, absl::string_view expected) { + if (!absl::EndsWith(*str, expected)) return false; + str->remove_suffix(expected.size()); + return true; +} + +// StripPrefix() +// +// Returns a view into the input string 'str' with the given 'prefix' removed, +// but leaving the original string intact. If the prefix does not match at the +// start of the string, returns the original string instead. +ABSL_MUST_USE_RESULT inline absl::string_view StripPrefix( + absl::string_view str, absl::string_view prefix) { + if (absl::StartsWith(str, prefix)) str.remove_prefix(prefix.size()); + return str; +} + +// StripSuffix() +// +// Returns a view into the input string 'str' with the given 'suffix' removed, +// but leaving the original string intact. If the suffix does not match at the +// end of the string, returns the original string instead. +ABSL_MUST_USE_RESULT inline absl::string_view StripSuffix( + absl::string_view str, absl::string_view suffix) { + if (absl::EndsWith(str, suffix)) str.remove_suffix(suffix.size()); + return str; +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_STRIP_H_ diff --git a/third_party/abseil_cpp/absl/strings/strip_test.cc b/third_party/abseil_cpp/absl/strings/strip_test.cc new file mode 100644 index 000000000000..e4e00cb66ebc --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/strip_test.cc @@ -0,0 +1,198 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains functions that remove a defined part from the string, +// i.e., strip the string. + +#include "absl/strings/strip.h" + +#include <cassert> +#include <cstdio> +#include <cstring> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/string_view.h" + +namespace { + +TEST(Strip, ConsumePrefixOneChar) { + absl::string_view input("abc"); + EXPECT_TRUE(absl::ConsumePrefix(&input, "a")); + EXPECT_EQ(input, "bc"); + + EXPECT_FALSE(absl::ConsumePrefix(&input, "x")); + EXPECT_EQ(input, "bc"); + + EXPECT_TRUE(absl::ConsumePrefix(&input, "b")); + EXPECT_EQ(input, "c"); + + EXPECT_TRUE(absl::ConsumePrefix(&input, "c")); + EXPECT_EQ(input, ""); + + EXPECT_FALSE(absl::ConsumePrefix(&input, "a")); + EXPECT_EQ(input, ""); +} + +TEST(Strip, ConsumePrefix) { + absl::string_view input("abcdef"); + EXPECT_FALSE(absl::ConsumePrefix(&input, "abcdefg")); + EXPECT_EQ(input, "abcdef"); + + EXPECT_FALSE(absl::ConsumePrefix(&input, "abce")); + EXPECT_EQ(input, "abcdef"); + + EXPECT_TRUE(absl::ConsumePrefix(&input, "")); + EXPECT_EQ(input, "abcdef"); + + EXPECT_FALSE(absl::ConsumePrefix(&input, "abcdeg")); + EXPECT_EQ(input, "abcdef"); + + EXPECT_TRUE(absl::ConsumePrefix(&input, "abcdef")); + EXPECT_EQ(input, ""); + + input = "abcdef"; + EXPECT_TRUE(absl::ConsumePrefix(&input, "abcde")); + EXPECT_EQ(input, "f"); +} + +TEST(Strip, ConsumeSuffix) { + absl::string_view input("abcdef"); + EXPECT_FALSE(absl::ConsumeSuffix(&input, "abcdefg")); + EXPECT_EQ(input, "abcdef"); + + EXPECT_TRUE(absl::ConsumeSuffix(&input, "")); + EXPECT_EQ(input, "abcdef"); + + EXPECT_TRUE(absl::ConsumeSuffix(&input, "def")); + EXPECT_EQ(input, "abc"); + + input = "abcdef"; + EXPECT_FALSE(absl::ConsumeSuffix(&input, "abcdeg")); + EXPECT_EQ(input, "abcdef"); + + EXPECT_TRUE(absl::ConsumeSuffix(&input, "f")); + EXPECT_EQ(input, "abcde"); + + EXPECT_TRUE(absl::ConsumeSuffix(&input, "abcde")); + EXPECT_EQ(input, ""); +} + +TEST(Strip, StripPrefix) { + const absl::string_view null_str; + + EXPECT_EQ(absl::StripPrefix("foobar", "foo"), "bar"); + EXPECT_EQ(absl::StripPrefix("foobar", ""), "foobar"); + EXPECT_EQ(absl::StripPrefix("foobar", null_str), "foobar"); + EXPECT_EQ(absl::StripPrefix("foobar", "foobar"), ""); + EXPECT_EQ(absl::StripPrefix("foobar", "bar"), "foobar"); + EXPECT_EQ(absl::StripPrefix("foobar", "foobarr"), "foobar"); + EXPECT_EQ(absl::StripPrefix("", ""), ""); +} + +TEST(Strip, StripSuffix) { + const absl::string_view null_str; + + EXPECT_EQ(absl::StripSuffix("foobar", "bar"), "foo"); + EXPECT_EQ(absl::StripSuffix("foobar", ""), "foobar"); + EXPECT_EQ(absl::StripSuffix("foobar", null_str), "foobar"); + EXPECT_EQ(absl::StripSuffix("foobar", "foobar"), ""); + EXPECT_EQ(absl::StripSuffix("foobar", "foo"), "foobar"); + EXPECT_EQ(absl::StripSuffix("foobar", "ffoobar"), "foobar"); + EXPECT_EQ(absl::StripSuffix("", ""), ""); +} + +TEST(Strip, RemoveExtraAsciiWhitespace) { + const char* inputs[] = { + "No extra space", + " Leading whitespace", + "Trailing whitespace ", + " Leading and trailing ", + " Whitespace \t in\v middle ", + "'Eeeeep! \n Newlines!\n", + "nospaces", + }; + const char* outputs[] = { + "No extra space", + "Leading whitespace", + "Trailing whitespace", + "Leading and trailing", + "Whitespace in middle", + "'Eeeeep! Newlines!", + "nospaces", + }; + int NUM_TESTS = 7; + + for (int i = 0; i < NUM_TESTS; i++) { + std::string s(inputs[i]); + absl::RemoveExtraAsciiWhitespace(&s); + EXPECT_STREQ(outputs[i], s.c_str()); + } + + // Test that absl::RemoveExtraAsciiWhitespace returns immediately for empty + // strings (It was adding the \0 character to the C++ std::string, which broke + // tests involving empty()) + std::string zero_string = ""; + assert(zero_string.empty()); + absl::RemoveExtraAsciiWhitespace(&zero_string); + EXPECT_EQ(zero_string.size(), 0); + EXPECT_TRUE(zero_string.empty()); +} + +TEST(Strip, StripTrailingAsciiWhitespace) { + std::string test = "foo "; + absl::StripTrailingAsciiWhitespace(&test); + EXPECT_EQ(test, "foo"); + + test = " "; + absl::StripTrailingAsciiWhitespace(&test); + EXPECT_EQ(test, ""); + + test = ""; + absl::StripTrailingAsciiWhitespace(&test); + EXPECT_EQ(test, ""); + + test = " abc\t"; + absl::StripTrailingAsciiWhitespace(&test); + EXPECT_EQ(test, " abc"); +} + +TEST(String, StripLeadingAsciiWhitespace) { + absl::string_view orig = "\t \n\f\r\n\vfoo"; + EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace(orig)); + orig = "\t \n\f\r\v\n\t \n\f\r\v\n"; + EXPECT_EQ(absl::string_view(), absl::StripLeadingAsciiWhitespace(orig)); +} + +TEST(Strip, StripAsciiWhitespace) { + std::string test2 = "\t \f\r\n\vfoo \t\f\r\v\n"; + absl::StripAsciiWhitespace(&test2); + EXPECT_EQ(test2, "foo"); + std::string test3 = "bar"; + absl::StripAsciiWhitespace(&test3); + EXPECT_EQ(test3, "bar"); + std::string test4 = "\t \f\r\n\vfoo"; + absl::StripAsciiWhitespace(&test4); + EXPECT_EQ(test4, "foo"); + std::string test5 = "foo \t\f\r\v\n"; + absl::StripAsciiWhitespace(&test5); + EXPECT_EQ(test5, "foo"); + absl::string_view test6("\t \f\r\n\vfoo \t\f\r\v\n"); + test6 = absl::StripAsciiWhitespace(test6); + EXPECT_EQ(test6, "foo"); + test6 = absl::StripAsciiWhitespace(test6); + EXPECT_EQ(test6, "foo"); // already stripped +} + +} // namespace diff --git a/third_party/abseil_cpp/absl/strings/substitute.cc b/third_party/abseil_cpp/absl/strings/substitute.cc new file mode 100644 index 000000000000..1f3c7409abb6 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/substitute.cc @@ -0,0 +1,171 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/substitute.h" + +#include <algorithm> + +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/ascii.h" +#include "absl/strings/escaping.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace substitute_internal { + +void SubstituteAndAppendArray(std::string* output, absl::string_view format, + const absl::string_view* args_array, + size_t num_args) { + // Determine total size needed. + size_t size = 0; + for (size_t i = 0; i < format.size(); i++) { + if (format[i] == '$') { + if (i + 1 >= format.size()) { +#ifndef NDEBUG + ABSL_RAW_LOG(FATAL, + "Invalid absl::Substitute() format string: \"%s\".", + absl::CEscape(format).c_str()); +#endif + return; + } else if (absl::ascii_isdigit(format[i + 1])) { + int index = format[i + 1] - '0'; + if (static_cast<size_t>(index) >= num_args) { +#ifndef NDEBUG + ABSL_RAW_LOG( + FATAL, + "Invalid absl::Substitute() format string: asked for \"$" + "%d\", but only %d args were given. Full format string was: " + "\"%s\".", + index, static_cast<int>(num_args), absl::CEscape(format).c_str()); +#endif + return; + } + size += args_array[index].size(); + ++i; // Skip next char. + } else if (format[i + 1] == '$') { + ++size; + ++i; // Skip next char. + } else { +#ifndef NDEBUG + ABSL_RAW_LOG(FATAL, + "Invalid absl::Substitute() format string: \"%s\".", + absl::CEscape(format).c_str()); +#endif + return; + } + } else { + ++size; + } + } + + if (size == 0) return; + + // Build the string. + size_t original_size = output->size(); + strings_internal::STLStringResizeUninitialized(output, original_size + size); + char* target = &(*output)[original_size]; + for (size_t i = 0; i < format.size(); i++) { + if (format[i] == '$') { + if (absl::ascii_isdigit(format[i + 1])) { + const absl::string_view src = args_array[format[i + 1] - '0']; + target = std::copy(src.begin(), src.end(), target); + ++i; // Skip next char. + } else if (format[i + 1] == '$') { + *target++ = '$'; + ++i; // Skip next char. + } + } else { + *target++ = format[i]; + } + } + + assert(target == output->data() + output->size()); +} + +Arg::Arg(const void* value) { + static_assert(sizeof(scratch_) >= sizeof(value) * 2 + 2, + "fix sizeof(scratch_)"); + if (value == nullptr) { + piece_ = "NULL"; + } else { + char* ptr = scratch_ + sizeof(scratch_); + uintptr_t num = reinterpret_cast<uintptr_t>(value); + do { + *--ptr = absl::numbers_internal::kHexChar[num & 0xf]; + num >>= 4; + } while (num != 0); + *--ptr = 'x'; + *--ptr = '0'; + piece_ = absl::string_view(ptr, scratch_ + sizeof(scratch_) - ptr); + } +} + +// TODO(jorg): Don't duplicate so much code between here and str_cat.cc +Arg::Arg(Hex hex) { + char* const end = &scratch_[numbers_internal::kFastToBufferSize]; + char* writer = end; + uint64_t value = hex.value; + do { + *--writer = absl::numbers_internal::kHexChar[value & 0xF]; + value >>= 4; + } while (value != 0); + + char* beg; + if (end - writer < hex.width) { + beg = end - hex.width; + std::fill_n(beg, writer - beg, hex.fill); + } else { + beg = writer; + } + + piece_ = absl::string_view(beg, end - beg); +} + +// TODO(jorg): Don't duplicate so much code between here and str_cat.cc +Arg::Arg(Dec dec) { + assert(dec.width <= numbers_internal::kFastToBufferSize); + char* const end = &scratch_[numbers_internal::kFastToBufferSize]; + char* const minfill = end - dec.width; + char* writer = end; + uint64_t value = dec.value; + bool neg = dec.neg; + while (value > 9) { + *--writer = '0' + (value % 10); + value /= 10; + } + *--writer = '0' + value; + if (neg) *--writer = '-'; + + ptrdiff_t fillers = writer - minfill; + if (fillers > 0) { + // Tricky: if the fill character is ' ', then it's <fill><+/-><digits> + // But...: if the fill character is '0', then it's <+/-><fill><digits> + bool add_sign_again = false; + if (neg && dec.fill == '0') { // If filling with '0', + ++writer; // ignore the sign we just added + add_sign_again = true; // and re-add the sign later. + } + writer -= fillers; + std::fill_n(writer, fillers, dec.fill); + if (add_sign_again) *--writer = '-'; + } + + piece_ = absl::string_view(writer, end - writer); +} + +} // namespace substitute_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/strings/substitute.h b/third_party/abseil_cpp/absl/strings/substitute.h new file mode 100644 index 000000000000..c6da4dc6e722 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/substitute.h @@ -0,0 +1,696 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: substitute.h +// ----------------------------------------------------------------------------- +// +// This package contains functions for efficiently performing string +// substitutions using a format string with positional notation: +// `Substitute()` and `SubstituteAndAppend()`. +// +// Unlike printf-style format specifiers, `Substitute()` functions do not need +// to specify the type of the substitution arguments. Supported arguments +// following the format string, such as strings, string_views, ints, +// floats, and bools, are automatically converted to strings during the +// substitution process. (See below for a full list of supported types.) +// +// `Substitute()` does not allow you to specify *how* to format a value, beyond +// the default conversion to string. For example, you cannot format an integer +// in hex. +// +// The format string uses positional identifiers indicated by a dollar sign ($) +// and single digit positional ids to indicate which substitution arguments to +// use at that location within the format string. +// +// A '$$' sequence in the format string causes a literal '$' character to be +// output. +// +// Example 1: +// std::string s = Substitute("$1 purchased $0 $2 for $$10. Thanks $1!", +// 5, "Bob", "Apples"); +// EXPECT_EQ("Bob purchased 5 Apples for $10. Thanks Bob!", s); +// +// Example 2: +// std::string s = "Hi. "; +// SubstituteAndAppend(&s, "My name is $0 and I am $1 years old.", "Bob", 5); +// EXPECT_EQ("Hi. My name is Bob and I am 5 years old.", s); +// +// Supported types: +// * absl::string_view, std::string, const char* (null is equivalent to "") +// * int32_t, int64_t, uint32_t, uint64_t +// * float, double +// * bool (Printed as "true" or "false") +// * pointer types other than char* (Printed as "0x<lower case hex string>", +// except that null is printed as "NULL") +// +// If an invalid format string is provided, Substitute returns an empty string +// and SubstituteAndAppend does not change the provided output string. +// A format string is invalid if it: +// * ends in an unescaped $ character, +// e.g. "Hello $", or +// * calls for a position argument which is not provided, +// e.g. Substitute("Hello $2", "world"), or +// * specifies a non-digit, non-$ character after an unescaped $ character, +// e.g. "Hello $f". +// In debug mode, i.e. #ifndef NDEBUG, such errors terminate the program. + +#ifndef ABSL_STRINGS_SUBSTITUTE_H_ +#define ABSL_STRINGS_SUBSTITUTE_H_ + +#include <cstring> +#include <string> +#include <type_traits> +#include <vector> + +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/strings/ascii.h" +#include "absl/strings/escaping.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace substitute_internal { + +// Arg +// +// This class provides an argument type for `absl::Substitute()` and +// `absl::SubstituteAndAppend()`. `Arg` handles implicit conversion of various +// types to a string. (`Arg` is very similar to the `AlphaNum` class in +// `StrCat()`.) +// +// This class has implicit constructors. +class Arg { + public: + // Overloads for string-y things + // + // Explicitly overload `const char*` so the compiler doesn't cast to `bool`. + Arg(const char* value) // NOLINT(runtime/explicit) + : piece_(absl::NullSafeStringView(value)) {} + template <typename Allocator> + Arg( // NOLINT + const std::basic_string<char, std::char_traits<char>, Allocator>& + value) noexcept + : piece_(value) {} + Arg(absl::string_view value) // NOLINT(runtime/explicit) + : piece_(value) {} + + // Overloads for primitives + // + // No overloads are available for signed and unsigned char because if people + // are explicitly declaring their chars as signed or unsigned then they are + // probably using them as 8-bit integers and would probably prefer an integer + // representation. However, we can't really know, so we make the caller decide + // what to do. + Arg(char value) // NOLINT(runtime/explicit) + : piece_(scratch_, 1) { + scratch_[0] = value; + } + Arg(short value) // NOLINT(*) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(unsigned short value) // NOLINT(*) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(int value) // NOLINT(runtime/explicit) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(unsigned int value) // NOLINT(runtime/explicit) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(long value) // NOLINT(*) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(unsigned long value) // NOLINT(*) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(long long value) // NOLINT(*) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(unsigned long long value) // NOLINT(*) + : piece_(scratch_, + numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} + Arg(float value) // NOLINT(runtime/explicit) + : piece_(scratch_, numbers_internal::SixDigitsToBuffer(value, scratch_)) { + } + Arg(double value) // NOLINT(runtime/explicit) + : piece_(scratch_, numbers_internal::SixDigitsToBuffer(value, scratch_)) { + } + Arg(bool value) // NOLINT(runtime/explicit) + : piece_(value ? "true" : "false") {} + + Arg(Hex hex); // NOLINT(runtime/explicit) + Arg(Dec dec); // NOLINT(runtime/explicit) + + // vector<bool>::reference and const_reference require special help to + // convert to `AlphaNum` because it requires two user defined conversions. + template <typename T, + absl::enable_if_t< + std::is_class<T>::value && + (std::is_same<T, std::vector<bool>::reference>::value || + std::is_same<T, std::vector<bool>::const_reference>::value)>* = + nullptr> + Arg(T value) // NOLINT(google-explicit-constructor) + : Arg(static_cast<bool>(value)) {} + + // `void*` values, with the exception of `char*`, are printed as + // "0x<hex value>". However, in the case of `nullptr`, "NULL" is printed. + Arg(const void* value); // NOLINT(runtime/explicit) + + Arg(const Arg&) = delete; + Arg& operator=(const Arg&) = delete; + + absl::string_view piece() const { return piece_; } + + private: + absl::string_view piece_; + char scratch_[numbers_internal::kFastToBufferSize]; +}; + +// Internal helper function. Don't call this from outside this implementation. +// This interface may change without notice. +void SubstituteAndAppendArray(std::string* output, absl::string_view format, + const absl::string_view* args_array, + size_t num_args); + +#if defined(ABSL_BAD_CALL_IF) +constexpr int CalculateOneBit(const char* format) { + // Returns: + // * 2^N for '$N' when N is in [0-9] + // * 0 for correct '$' escaping: '$$'. + // * -1 otherwise. + return (*format < '0' || *format > '9') ? (*format == '$' ? 0 : -1) + : (1 << (*format - '0')); +} + +constexpr const char* SkipNumber(const char* format) { + return !*format ? format : (format + 1); +} + +constexpr int PlaceholderBitmask(const char* format) { + return !*format + ? 0 + : *format != '$' ? PlaceholderBitmask(format + 1) + : (CalculateOneBit(format + 1) | + PlaceholderBitmask(SkipNumber(format + 1))); +} +#endif // ABSL_BAD_CALL_IF + +} // namespace substitute_internal + +// +// PUBLIC API +// + +// SubstituteAndAppend() +// +// Substitutes variables into a given format string and appends to a given +// output string. See file comments above for usage. +// +// The declarations of `SubstituteAndAppend()` below consist of overloads +// for passing 0 to 10 arguments, respectively. +// +// NOTE: A zero-argument `SubstituteAndAppend()` may be used within variadic +// templates to allow a variable number of arguments. +// +// Example: +// template <typename... Args> +// void VarMsg(std::string* boilerplate, absl::string_view format, +// const Args&... args) { +// absl::SubstituteAndAppend(boilerplate, format, args...); +// } +// +inline void SubstituteAndAppend(std::string* output, absl::string_view format) { + substitute_internal::SubstituteAndAppendArray(output, format, nullptr, 0); +} + +inline void SubstituteAndAppend(std::string* output, absl::string_view format, + const substitute_internal::Arg& a0) { + const absl::string_view args[] = {a0.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1) { + const absl::string_view args[] = {a0.piece(), a1.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2) { + const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3) { + const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4) { + const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5) { + const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece(), a5.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend(std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6) { + const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece(), a5.piece(), + a6.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend( + std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6, const substitute_internal::Arg& a7) { + const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece(), a5.piece(), + a6.piece(), a7.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend( + std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6, const substitute_internal::Arg& a7, + const substitute_internal::Arg& a8) { + const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(), + a3.piece(), a4.piece(), a5.piece(), + a6.piece(), a7.piece(), a8.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +inline void SubstituteAndAppend( + std::string* output, absl::string_view format, + const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6, const substitute_internal::Arg& a7, + const substitute_internal::Arg& a8, const substitute_internal::Arg& a9) { + const absl::string_view args[] = { + a0.piece(), a1.piece(), a2.piece(), a3.piece(), a4.piece(), + a5.piece(), a6.piece(), a7.piece(), a8.piece(), a9.piece()}; + substitute_internal::SubstituteAndAppendArray(output, format, args, + ABSL_ARRAYSIZE(args)); +} + +#if defined(ABSL_BAD_CALL_IF) +// This body of functions catches cases where the number of placeholders +// doesn't match the number of data arguments. +void SubstituteAndAppend(std::string* output, const char* format) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0, + "There were no substitution arguments " + "but this format string has a $[0-9] in it"); + +void SubstituteAndAppend(std::string* output, const char* format, + const substitute_internal::Arg& a0) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1, + "There was 1 substitution argument given, but " + "this format string is either missing its $0, or " + "contains one of $1-$9"); + +void SubstituteAndAppend(std::string* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 3, + "There were 2 substitution arguments given, but " + "this format string is either missing its $0/$1, or " + "contains one of $2-$9"); + +void SubstituteAndAppend(std::string* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 7, + "There were 3 substitution arguments given, but " + "this format string is either missing its $0/$1/$2, or " + "contains one of $3-$9"); + +void SubstituteAndAppend(std::string* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 15, + "There were 4 substitution arguments given, but " + "this format string is either missing its $0-$3, or " + "contains one of $4-$9"); + +void SubstituteAndAppend(std::string* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 31, + "There were 5 substitution arguments given, but " + "this format string is either missing its $0-$4, or " + "contains one of $5-$9"); + +void SubstituteAndAppend(std::string* output, const char* format, + const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 63, + "There were 6 substitution arguments given, but " + "this format string is either missing its $0-$5, or " + "contains one of $6-$9"); + +void SubstituteAndAppend( + std::string* output, const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 127, + "There were 7 substitution arguments given, but " + "this format string is either missing its $0-$6, or " + "contains one of $7-$9"); + +void SubstituteAndAppend( + std::string* output, const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 255, + "There were 8 substitution arguments given, but " + "this format string is either missing its $0-$7, or " + "contains one of $8-$9"); + +void SubstituteAndAppend( + std::string* output, const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 511, + "There were 9 substitution arguments given, but " + "this format string is either missing its $0-$8, or contains a $9"); + +void SubstituteAndAppend( + std::string* output, const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8, + const substitute_internal::Arg& a9) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1023, + "There were 10 substitution arguments given, but this " + "format string doesn't contain all of $0 through $9"); +#endif // ABSL_BAD_CALL_IF + +// Substitute() +// +// Substitutes variables into a given format string. See file comments above +// for usage. +// +// The declarations of `Substitute()` below consist of overloads for passing 0 +// to 10 arguments, respectively. +// +// NOTE: A zero-argument `Substitute()` may be used within variadic templates to +// allow a variable number of arguments. +// +// Example: +// template <typename... Args> +// void VarMsg(absl::string_view format, const Args&... args) { +// std::string s = absl::Substitute(format, args...); + +ABSL_MUST_USE_RESULT inline std::string Substitute(absl::string_view format) { + std::string result; + SubstituteAndAppend(&result, format); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0) { + std::string result; + SubstituteAndAppend(&result, format, a0); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7, a8); + return result; +} + +ABSL_MUST_USE_RESULT inline std::string Substitute( + absl::string_view format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8, + const substitute_internal::Arg& a9) { + std::string result; + SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9); + return result; +} + +#if defined(ABSL_BAD_CALL_IF) +// This body of functions catches cases where the number of placeholders +// doesn't match the number of data arguments. +std::string Substitute(const char* format) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0, + "There were no substitution arguments " + "but this format string has a $[0-9] in it"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1, + "There was 1 substitution argument given, but " + "this format string is either missing its $0, or " + "contains one of $1-$9"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 3, + "There were 2 substitution arguments given, but " + "this format string is either missing its $0/$1, or " + "contains one of $2-$9"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 7, + "There were 3 substitution arguments given, but " + "this format string is either missing its $0/$1/$2, or " + "contains one of $3-$9"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 15, + "There were 4 substitution arguments given, but " + "this format string is either missing its $0-$3, or " + "contains one of $4-$9"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 31, + "There were 5 substitution arguments given, but " + "this format string is either missing its $0-$4, or " + "contains one of $5-$9"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 63, + "There were 6 substitution arguments given, but " + "this format string is either missing its $0-$5, or " + "contains one of $6-$9"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 127, + "There were 7 substitution arguments given, but " + "this format string is either missing its $0-$6, or " + "contains one of $7-$9"); + +std::string Substitute(const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, + const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, + const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, + const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 255, + "There were 8 substitution arguments given, but " + "this format string is either missing its $0-$7, or " + "contains one of $8-$9"); + +std::string Substitute( + const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8) + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 511, + "There were 9 substitution arguments given, but " + "this format string is either missing its $0-$8, or contains a $9"); + +std::string Substitute( + const char* format, const substitute_internal::Arg& a0, + const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, + const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, + const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, + const substitute_internal::Arg& a7, const substitute_internal::Arg& a8, + const substitute_internal::Arg& a9) + ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1023, + "There were 10 substitution arguments given, but this " + "format string doesn't contain all of $0 through $9"); +#endif // ABSL_BAD_CALL_IF + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_SUBSTITUTE_H_ diff --git a/third_party/abseil_cpp/absl/strings/substitute_test.cc b/third_party/abseil_cpp/absl/strings/substitute_test.cc new file mode 100644 index 000000000000..442c921528d8 --- /dev/null +++ b/third_party/abseil_cpp/absl/strings/substitute_test.cc @@ -0,0 +1,204 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/substitute.h" + +#include <cstdint> +#include <vector> + +#include "gtest/gtest.h" +#include "absl/strings/str_cat.h" + +namespace { + +TEST(SubstituteTest, Substitute) { + // Basic. + EXPECT_EQ("Hello, world!", absl::Substitute("$0, $1!", "Hello", "world")); + + // Non-char* types. + EXPECT_EQ("123 0.2 0.1 foo true false x", + absl::Substitute("$0 $1 $2 $3 $4 $5 $6", 123, 0.2, 0.1f, + std::string("foo"), true, false, 'x')); + + // All int types. + EXPECT_EQ( + "-32767 65535 " + "-1234567890 3234567890 " + "-1234567890 3234567890 " + "-1234567890123456789 9234567890123456789", + absl::Substitute( + "$0 $1 $2 $3 $4 $5 $6 $7", + static_cast<short>(-32767), // NOLINT(runtime/int) + static_cast<unsigned short>(65535), // NOLINT(runtime/int) + -1234567890, 3234567890U, -1234567890L, 3234567890UL, + -int64_t{1234567890123456789}, uint64_t{9234567890123456789u})); + + // Hex format + EXPECT_EQ("0 1 f ffff0ffff 0123456789abcdef", + absl::Substitute("$0$1$2$3$4 $5", // + absl::Hex(0), absl::Hex(1, absl::kSpacePad2), + absl::Hex(0xf, absl::kSpacePad2), + absl::Hex(int16_t{-1}, absl::kSpacePad5), + absl::Hex(int16_t{-1}, absl::kZeroPad5), + absl::Hex(0x123456789abcdef, absl::kZeroPad16))); + + // Dec format + EXPECT_EQ("0 115 -1-0001 81985529216486895", + absl::Substitute("$0$1$2$3$4 $5", // + absl::Dec(0), absl::Dec(1, absl::kSpacePad2), + absl::Dec(0xf, absl::kSpacePad2), + absl::Dec(int16_t{-1}, absl::kSpacePad5), + absl::Dec(int16_t{-1}, absl::kZeroPad5), + absl::Dec(0x123456789abcdef, absl::kZeroPad16))); + + // Pointer. + const int* int_p = reinterpret_cast<const int*>(0x12345); + std::string str = absl::Substitute("$0", int_p); + EXPECT_EQ(absl::StrCat("0x", absl::Hex(int_p)), str); + + // Volatile Pointer. + // Like C++ streamed I/O, such pointers implicitly become bool + volatile int vol = 237; + volatile int *volatile volptr = &vol; + str = absl::Substitute("$0", volptr); + EXPECT_EQ("true", str); + + // null is special. StrCat prints 0x0. Substitute prints NULL. + const uint64_t* null_p = nullptr; + str = absl::Substitute("$0", null_p); + EXPECT_EQ("NULL", str); + + // char* is also special. + const char* char_p = "print me"; + str = absl::Substitute("$0", char_p); + EXPECT_EQ("print me", str); + + char char_buf[16]; + strncpy(char_buf, "print me too", sizeof(char_buf)); + str = absl::Substitute("$0", char_buf); + EXPECT_EQ("print me too", str); + + // null char* is "doubly" special. Represented as the empty string. + char_p = nullptr; + str = absl::Substitute("$0", char_p); + EXPECT_EQ("", str); + + // Out-of-order. + EXPECT_EQ("b, a, c, b", absl::Substitute("$1, $0, $2, $1", "a", "b", "c")); + + // Literal $ + EXPECT_EQ("$", absl::Substitute("$$")); + + EXPECT_EQ("$1", absl::Substitute("$$1")); + + // Test all overloads. + EXPECT_EQ("a", absl::Substitute("$0", "a")); + EXPECT_EQ("a b", absl::Substitute("$0 $1", "a", "b")); + EXPECT_EQ("a b c", absl::Substitute("$0 $1 $2", "a", "b", "c")); + EXPECT_EQ("a b c d", absl::Substitute("$0 $1 $2 $3", "a", "b", "c", "d")); + EXPECT_EQ("a b c d e", + absl::Substitute("$0 $1 $2 $3 $4", "a", "b", "c", "d", "e")); + EXPECT_EQ("a b c d e f", absl::Substitute("$0 $1 $2 $3 $4 $5", "a", "b", "c", + "d", "e", "f")); + EXPECT_EQ("a b c d e f g", absl::Substitute("$0 $1 $2 $3 $4 $5 $6", "a", "b", + "c", "d", "e", "f", "g")); + EXPECT_EQ("a b c d e f g h", + absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7", "a", "b", "c", "d", "e", + "f", "g", "h")); + EXPECT_EQ("a b c d e f g h i", + absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7 $8", "a", "b", "c", "d", + "e", "f", "g", "h", "i")); + EXPECT_EQ("a b c d e f g h i j", + absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7 $8 $9", "a", "b", "c", + "d", "e", "f", "g", "h", "i", "j")); + EXPECT_EQ("a b c d e f g h i j b0", + absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7 $8 $9 $10", "a", "b", "c", + "d", "e", "f", "g", "h", "i", "j")); + + const char* null_cstring = nullptr; + EXPECT_EQ("Text: ''", absl::Substitute("Text: '$0'", null_cstring)); +} + +TEST(SubstituteTest, SubstituteAndAppend) { + std::string str = "Hello"; + absl::SubstituteAndAppend(&str, ", $0!", "world"); + EXPECT_EQ("Hello, world!", str); + + // Test all overloads. + str.clear(); + absl::SubstituteAndAppend(&str, "$0", "a"); + EXPECT_EQ("a", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1", "a", "b"); + EXPECT_EQ("a b", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2", "a", "b", "c"); + EXPECT_EQ("a b c", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3", "a", "b", "c", "d"); + EXPECT_EQ("a b c d", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4", "a", "b", "c", "d", "e"); + EXPECT_EQ("a b c d e", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5", "a", "b", "c", "d", "e", + "f"); + EXPECT_EQ("a b c d e f", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6", "a", "b", "c", "d", + "e", "f", "g"); + EXPECT_EQ("a b c d e f g", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6 $7", "a", "b", "c", "d", + "e", "f", "g", "h"); + EXPECT_EQ("a b c d e f g h", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6 $7 $8", "a", "b", "c", + "d", "e", "f", "g", "h", "i"); + EXPECT_EQ("a b c d e f g h i", str); + str.clear(); + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6 $7 $8 $9", "a", "b", + "c", "d", "e", "f", "g", "h", "i", "j"); + EXPECT_EQ("a b c d e f g h i j", str); +} + +TEST(SubstituteTest, VectorBoolRef) { + std::vector<bool> v = {true, false}; + const auto& cv = v; + EXPECT_EQ("true false true false", + absl::Substitute("$0 $1 $2 $3", v[0], v[1], cv[0], cv[1])); + + std::string str = "Logic be like: "; + absl::SubstituteAndAppend(&str, "$0 $1 $2 $3", v[0], v[1], cv[0], cv[1]); + EXPECT_EQ("Logic be like: true false true false", str); +} + +#ifdef GTEST_HAS_DEATH_TEST + +TEST(SubstituteDeathTest, SubstituteDeath) { + EXPECT_DEBUG_DEATH( + static_cast<void>(absl::Substitute(absl::string_view("-$2"), "a", "b")), + "Invalid absl::Substitute\\(\\) format string: asked for \"\\$2\", " + "but only 2 args were given."); + EXPECT_DEBUG_DEATH( + static_cast<void>(absl::Substitute(absl::string_view("-$z-"))), + "Invalid absl::Substitute\\(\\) format string: \"-\\$z-\""); + EXPECT_DEBUG_DEATH( + static_cast<void>(absl::Substitute(absl::string_view("-$"))), + "Invalid absl::Substitute\\(\\) format string: \"-\\$\""); +} + +#endif // GTEST_HAS_DEATH_TEST + +} // namespace |