From d902eb869bcfacc1bad14933ed9af4bed006d481 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Tue, 16 Apr 2019 12:11:35 -0700 Subject: Export of internal Abseil changes. -- babbb6421068af3831870fd5995444437ace6769 by Derek Mauro : Rollback of: Make raw_hash_set_test less flaky. Split the timing loop into chunks so that we are less suceptible to antogantistic processes. PiperOrigin-RevId: 243854490 -- a2711f17a712f6d09799bf32363d67526737b486 by CJ Johnson : Relocates IsAtLeastForwardIterator to internal/inlined_vector.h PiperOrigin-RevId: 243846090 -- 6c14cdbeb9a61022c27f8957654f930d8abf2fc1 by Matt Kulukundis : Make raw_hash_set_test less flaky. Split the timing loop into chunks so that we are less suceptible to antogantistic processes. PiperOrigin-RevId: 243824289 -- ee6072a6b6e0ac653622524ceb09db3b9e870f96 by Samuel Benzaquen : Improve format parser performance. Replace the main switch with a lookup in the existing tag table. Improve the ABI of ConsumeUnboundConversion a little. PiperOrigin-RevId: 243824112 -- 24b9e6476dfa4be8d644359eab8ac6816492f187 by Abseil Team : Fix DR numbers: 3800 ? 3080, 3801 ? 3081. PiperOrigin-RevId: 243804213 -- 0660404074707e197684f07cc0bffe4a9c35cd2f by Abseil Team : Internal change. PiperOrigin-RevId: 243757359 -- ba0f5bb9b8584d75c4ffc44ff3cb8c691796ffc6 by Xiaoyi Zhang : Consolidate ABSL_INTERNAL_UNALIGNED_* implementation into memcpy. The compiler should be good enough to optimize these operations. See https://github.com/abseil/abseil-cpp/issues/269 for background. PiperOrigin-RevId: 243323941 -- 00853a8756548df7217513c562d604b4ee5c6ab9 by Eric Fiselier : Reexport memory.h from optional.h for compatibility between libc++ and libstdc++. PiperOrigin-RevId: 243313425 GitOrigin-RevId: babbb6421068af3831870fd5995444437ace6769 Change-Id: Ic53c127ad857a431ad60c98b27cc585fed50a3e3 --- absl/strings/internal/str_format/parser.cc | 143 ++++++++++++++--------------- 1 file changed, 68 insertions(+), 75 deletions(-) (limited to 'absl/strings/internal/str_format/parser.cc') diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc index 10487f23c855..9ef5615cc9ee 100644 --- a/absl/strings/internal/str_format/parser.cc +++ b/absl/strings/internal/str_format/parser.cc @@ -15,6 +15,44 @@ namespace absl { namespace str_format_internal { + +using CC = ConversionChar::Id; +using LM = LengthMod::Id; +ABSL_CONST_INIT const ConvTag kTags[256] = { + {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 + {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f + {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 + {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f + {}, {}, {}, {}, {}, {}, {}, {}, // 20-27 + {}, {}, {}, {}, {}, {}, {}, {}, // 28-2f + {}, {}, {}, {}, {}, {}, {}, {}, // 30-37 + {}, {}, {}, {}, {}, {}, {}, {}, // 38-3f + {}, CC::A, {}, CC::C, {}, CC::E, CC::F, CC::G, // @ABCDEFG + {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO + {}, {}, {}, CC::S, {}, {}, {}, {}, // PQRSTUVW + CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ + {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg + LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno + CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw + CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! + {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 + {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f + {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 + {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f + {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 + {}, {}, {}, {}, {}, {}, {}, {}, // a8-af + {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 + {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf + {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 + {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf + {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 + {}, {}, {}, {}, {}, {}, {}, {}, // d8-df + {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 + {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef + {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 + {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff +}; + namespace { bool CheckFastPathSetting(const UnboundConversion& conv) { @@ -36,60 +74,17 @@ bool CheckFastPathSetting(const UnboundConversion& conv) { return should_be_basic == conv.flags.basic; } -// Keep a single table for all the conversion chars and length modifiers. -// We invert the length modifiers to make them negative so that we can easily -// test for them. -// Everything else is `none`, which is a negative constant. -using CC = ConversionChar::Id; -using LM = LengthMod::Id; -static constexpr std::int8_t none = -128; -static constexpr std::int8_t kIds[] = { - none, none, none, none, none, none, none, none, // 00-07 - none, none, none, none, none, none, none, none, // 08-0f - none, none, none, none, none, none, none, none, // 10-17 - none, none, none, none, none, none, none, none, // 18-1f - none, none, none, none, none, none, none, none, // 20-27 - none, none, none, none, none, none, none, none, // 28-2f - none, none, none, none, none, none, none, none, // 30-37 - none, none, none, none, none, none, none, none, // 38-3f - none, CC::A, none, CC::C, none, CC::E, CC::F, CC::G, // @ABCDEFG - none, none, none, none, ~LM::L, none, none, none, // HIJKLMNO - none, none, none, CC::S, none, none, none, none, // PQRSTUVW - CC::X, none, none, none, none, none, none, none, // XYZ[\]^_ - none, CC::a, none, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg - ~LM::h, CC::i, ~LM::j, none, ~LM::l, none, CC::n, CC::o, // hijklmno - CC::p, ~LM::q, none, CC::s, ~LM::t, CC::u, none, none, // pqrstuvw - CC::x, none, ~LM::z, none, none, none, none, none, // xyz{|}~! - none, none, none, none, none, none, none, none, // 80-87 - none, none, none, none, none, none, none, none, // 88-8f - none, none, none, none, none, none, none, none, // 90-97 - none, none, none, none, none, none, none, none, // 98-9f - none, none, none, none, none, none, none, none, // a0-a7 - none, none, none, none, none, none, none, none, // a8-af - none, none, none, none, none, none, none, none, // b0-b7 - none, none, none, none, none, none, none, none, // b8-bf - none, none, none, none, none, none, none, none, // c0-c7 - none, none, none, none, none, none, none, none, // c8-cf - none, none, none, none, none, none, none, none, // d0-d7 - none, none, none, none, none, none, none, none, // d8-df - none, none, none, none, none, none, none, none, // e0-e7 - none, none, none, none, none, none, none, none, // e8-ef - none, none, none, none, none, none, none, none, // f0-f7 - none, none, none, none, none, none, none, none, // f8-ff -}; - template -bool ConsumeConversion(string_view *src, UnboundConversion *conv, - int *next_arg) { - const char *pos = src->data(); - const char *const end = pos + src->size(); +const char *ConsumeConversion(const char *pos, const char *const end, + UnboundConversion *conv, int *next_arg) { + const char* const original_pos = pos; char c; // Read the next char into `c` and update `pos`. Returns false if there are // no more chars to read. -#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \ - do { \ - if (ABSL_PREDICT_FALSE(pos == end)) return false; \ - c = *pos++; \ +#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \ + do { \ + if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \ + c = *pos++; \ } while (0) const auto parse_digits = [&] { @@ -111,10 +106,10 @@ bool ConsumeConversion(string_view *src, UnboundConversion *conv, if (is_positional) { ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false; + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; conv->arg_position = parse_digits(); assert(conv->arg_position > 0); - if (ABSL_PREDICT_FALSE(c != '$')) return false; + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; } ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); @@ -129,10 +124,9 @@ bool ConsumeConversion(string_view *src, UnboundConversion *conv, conv->flags.basic = false; for (; c <= '0';) { - // FIXME: We might be able to speed this up reusing the kIds lookup table - // from above. - // It might require changing Flags to be a plain integer where we can |= a - // value. + // FIXME: We might be able to speed this up reusing the lookup table from + // above. It might require changing Flags to be a plain integer where we + // can |= a value. switch (c) { case '-': conv->flags.left = true; @@ -160,20 +154,20 @@ flags_done: if (c >= '0') { int maybe_width = parse_digits(); if (!is_positional && c == '$') { - if (ABSL_PREDICT_FALSE(*next_arg != 0)) return false; + if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; // Positional conversion. *next_arg = -1; conv->flags = Flags(); conv->flags.basic = true; - return ConsumeConversion(src, conv, next_arg); + return ConsumeConversion(original_pos, end, conv, next_arg); } conv->width.set_value(maybe_width); } else if (c == '*') { ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); if (is_positional) { - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false; + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; conv->width.set_from_arg(parse_digits()); - if (ABSL_PREDICT_FALSE(c != '$')) return false; + if (ABSL_PREDICT_FALSE(c != '$')) return nullptr; ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); } else { conv->width.set_from_arg(++*next_arg); @@ -188,9 +182,9 @@ flags_done: } else if (c == '*') { ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); if (is_positional) { - if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false; + if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; conv->precision.set_from_arg(parse_digits()); - if (c != '$') return false; + if (c != '$') return nullptr; ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); } else { conv->precision.set_from_arg(++*next_arg); @@ -201,14 +195,14 @@ flags_done: } } - std::int8_t id = kIds[static_cast(c)]; + auto tag = GetTagForChar(c); - if (id < 0) { - if (ABSL_PREDICT_FALSE(id == none)) return false; + if (ABSL_PREDICT_FALSE(!tag.is_conv())) { + if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr; // It is a length modifier. using str_format_internal::LengthMod; - LengthMod length_mod = LengthMod::FromId(static_cast(~id)); + LengthMod length_mod = tag.as_length(); ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); if (c == 'h' && length_mod.id() == LengthMod::h) { conv->length_mod = LengthMod::FromId(LengthMod::hh); @@ -219,25 +213,24 @@ flags_done: } else { conv->length_mod = length_mod; } - id = kIds[static_cast(c)]; - if (ABSL_PREDICT_FALSE(id < 0)) return false; + tag = GetTagForChar(c); + if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr; } assert(CheckFastPathSetting(*conv)); (void)(&CheckFastPathSetting); - conv->conv = ConversionChar::FromId(static_cast(id)); + conv->conv = tag.as_conv(); if (!is_positional) conv->arg_position = ++*next_arg; - *src = string_view(pos, end - pos); - return true; + return pos; } } // namespace -bool ConsumeUnboundConversion(string_view *src, UnboundConversion *conv, - int *next_arg) { - if (*next_arg < 0) return ConsumeConversion(src, conv, next_arg); - return ConsumeConversion(src, conv, next_arg); +const char *ConsumeUnboundConversion(const char *p, const char *end, + UnboundConversion *conv, int *next_arg) { + if (*next_arg < 0) return ConsumeConversion(p, end, conv, next_arg); + return ConsumeConversion(p, end, conv, next_arg); } struct ParsedFormatBase::ParsedFormatConsumer { -- cgit 1.4.1