about summary refs log tree commit diff
path: root/absl/strings/internal/str_format/parser.cc
diff options
context:
space:
mode:
authorAbseil Team <absl-team@google.com>2019-04-16T19·11-0700
committerAlex Strelnikov <strel@google.com>2019-04-16T19·40-0400
commitd902eb869bcfacc1bad14933ed9af4bed006d481 (patch)
tree2c6b4c121bb5a696657d63db03a166404b3592b8 /absl/strings/internal/str_format/parser.cc
parenta02f62f456f2c4a7ecf2be3104fe0c6e16fbad9a (diff)
Export of internal Abseil changes.
--
babbb6421068af3831870fd5995444437ace6769 by Derek Mauro <dmauro@google.com>:

Rollback of:

Make raw_hash_set_test less flaky.

Split the timing loop into chunks so that we are less suceptible to
antogantistic processes.

PiperOrigin-RevId: 243854490

--
a2711f17a712f6d09799bf32363d67526737b486 by CJ Johnson <johnsoncj@google.com>:

Relocates IsAtLeastForwardIterator to internal/inlined_vector.h

PiperOrigin-RevId: 243846090

--
6c14cdbeb9a61022c27f8957654f930d8abf2fc1 by Matt Kulukundis <kfm@google.com>:

Make raw_hash_set_test less flaky.

Split the timing loop into chunks so that we are less suceptible to
antogantistic processes.

PiperOrigin-RevId: 243824289

--
ee6072a6b6e0ac653622524ceb09db3b9e870f96 by Samuel Benzaquen <sbenza@google.com>:

Improve format parser performance.
Replace the main switch with a lookup in the existing tag table.
Improve the ABI of ConsumeUnboundConversion a little.

PiperOrigin-RevId: 243824112

--
24b9e6476dfa4be8d644359eab8ac6816492f187 by Abseil Team <absl-team@google.com>:

Fix DR numbers: 3800 ? 3080, 3801 ? 3081.

PiperOrigin-RevId: 243804213

--
0660404074707e197684f07cc0bffe4a9c35cd2f by Abseil Team <absl-team@google.com>:

Internal change.

PiperOrigin-RevId: 243757359

--
ba0f5bb9b8584d75c4ffc44ff3cb8c691796ffc6 by Xiaoyi Zhang <zhangxy@google.com>:

Consolidate ABSL_INTERNAL_UNALIGNED_* implementation into memcpy.
The compiler should be good enough to optimize these operations.
See https://github.com/abseil/abseil-cpp/issues/269 for background.

PiperOrigin-RevId: 243323941

--
00853a8756548df7217513c562d604b4ee5c6ab9 by Eric Fiselier <ericwf@google.com>:

Reexport memory.h from optional.h for compatibility between libc++ and
libstdc++.

PiperOrigin-RevId: 243313425
GitOrigin-RevId: babbb6421068af3831870fd5995444437ace6769
Change-Id: Ic53c127ad857a431ad60c98b27cc585fed50a3e3
Diffstat (limited to 'absl/strings/internal/str_format/parser.cc')
-rw-r--r--absl/strings/internal/str_format/parser.cc143
1 files changed, 68 insertions, 75 deletions
diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc
index 10487f23c855..9ef5615cc9ee 100644
--- a/absl/strings/internal/str_format/parser.cc
+++ b/absl/strings/internal/str_format/parser.cc
@@ -15,6 +15,44 @@
 
 namespace absl {
 namespace str_format_internal {
+
+using CC = ConversionChar::Id;
+using LM = LengthMod::Id;
+ABSL_CONST_INIT const ConvTag kTags[256] = {
+    {},    {},    {},    {},    {},    {},    {},    {},     // 00-07
+    {},    {},    {},    {},    {},    {},    {},    {},     // 08-0f
+    {},    {},    {},    {},    {},    {},    {},    {},     // 10-17
+    {},    {},    {},    {},    {},    {},    {},    {},     // 18-1f
+    {},    {},    {},    {},    {},    {},    {},    {},     // 20-27
+    {},    {},    {},    {},    {},    {},    {},    {},     // 28-2f
+    {},    {},    {},    {},    {},    {},    {},    {},     // 30-37
+    {},    {},    {},    {},    {},    {},    {},    {},     // 38-3f
+    {},    CC::A, {},    CC::C, {},    CC::E, CC::F, CC::G,  // @ABCDEFG
+    {},    {},    {},    {},    LM::L, {},    {},    {},     // HIJKLMNO
+    {},    {},    {},    CC::S, {},    {},    {},    {},     // PQRSTUVW
+    CC::X, {},    {},    {},    {},    {},    {},    {},     // XYZ[\]^_
+    {},    CC::a, {},    CC::c, CC::d, CC::e, CC::f, CC::g,  // `abcdefg
+    LM::h, CC::i, LM::j, {},    LM::l, {},    CC::n, CC::o,  // hijklmno
+    CC::p, LM::q, {},    CC::s, LM::t, CC::u, {},    {},     // pqrstuvw
+    CC::x, {},    LM::z, {},    {},    {},    {},    {},     // xyz{|}!
+    {},    {},    {},    {},    {},    {},    {},    {},     // 80-87
+    {},    {},    {},    {},    {},    {},    {},    {},     // 88-8f
+    {},    {},    {},    {},    {},    {},    {},    {},     // 90-97
+    {},    {},    {},    {},    {},    {},    {},    {},     // 98-9f
+    {},    {},    {},    {},    {},    {},    {},    {},     // a0-a7
+    {},    {},    {},    {},    {},    {},    {},    {},     // a8-af
+    {},    {},    {},    {},    {},    {},    {},    {},     // b0-b7
+    {},    {},    {},    {},    {},    {},    {},    {},     // b8-bf
+    {},    {},    {},    {},    {},    {},    {},    {},     // c0-c7
+    {},    {},    {},    {},    {},    {},    {},    {},     // c8-cf
+    {},    {},    {},    {},    {},    {},    {},    {},     // d0-d7
+    {},    {},    {},    {},    {},    {},    {},    {},     // d8-df
+    {},    {},    {},    {},    {},    {},    {},    {},     // e0-e7
+    {},    {},    {},    {},    {},    {},    {},    {},     // e8-ef
+    {},    {},    {},    {},    {},    {},    {},    {},     // f0-f7
+    {},    {},    {},    {},    {},    {},    {},    {},     // f8-ff
+};
+
 namespace {
 
 bool CheckFastPathSetting(const UnboundConversion& conv) {
@@ -36,60 +74,17 @@ bool CheckFastPathSetting(const UnboundConversion& conv) {
   return should_be_basic == conv.flags.basic;
 }
 
-// Keep a single table for all the conversion chars and length modifiers.
-// We invert the length modifiers to make them negative so that we can easily
-// test for them.
-// Everything else is `none`, which is a negative constant.
-using CC = ConversionChar::Id;
-using LM = LengthMod::Id;
-static constexpr std::int8_t none = -128;
-static constexpr std::int8_t kIds[] = {
-    none,   none,   none,   none,  none,   none,  none,  none,   // 00-07
-    none,   none,   none,   none,  none,   none,  none,  none,   // 08-0f
-    none,   none,   none,   none,  none,   none,  none,  none,   // 10-17
-    none,   none,   none,   none,  none,   none,  none,  none,   // 18-1f
-    none,   none,   none,   none,  none,   none,  none,  none,   // 20-27
-    none,   none,   none,   none,  none,   none,  none,  none,   // 28-2f
-    none,   none,   none,   none,  none,   none,  none,  none,   // 30-37
-    none,   none,   none,   none,  none,   none,  none,  none,   // 38-3f
-    none,   CC::A,  none,   CC::C, none,   CC::E, CC::F, CC::G,  // @ABCDEFG
-    none,   none,   none,   none,  ~LM::L, none,  none,  none,   // HIJKLMNO
-    none,   none,   none,   CC::S, none,   none,  none,  none,   // PQRSTUVW
-    CC::X,  none,   none,   none,  none,   none,  none,  none,   // XYZ[\]^_
-    none,   CC::a,  none,   CC::c, CC::d,  CC::e, CC::f, CC::g,  // `abcdefg
-    ~LM::h, CC::i,  ~LM::j, none,  ~LM::l, none,  CC::n, CC::o,  // hijklmno
-    CC::p,  ~LM::q, none,   CC::s, ~LM::t, CC::u, none,  none,   // pqrstuvw
-    CC::x,  none,   ~LM::z, none,  none,   none,  none,  none,   // xyz{|}~!
-    none,   none,   none,   none,  none,   none,  none,  none,   // 80-87
-    none,   none,   none,   none,  none,   none,  none,  none,   // 88-8f
-    none,   none,   none,   none,  none,   none,  none,  none,   // 90-97
-    none,   none,   none,   none,  none,   none,  none,  none,   // 98-9f
-    none,   none,   none,   none,  none,   none,  none,  none,   // a0-a7
-    none,   none,   none,   none,  none,   none,  none,  none,   // a8-af
-    none,   none,   none,   none,  none,   none,  none,  none,   // b0-b7
-    none,   none,   none,   none,  none,   none,  none,  none,   // b8-bf
-    none,   none,   none,   none,  none,   none,  none,  none,   // c0-c7
-    none,   none,   none,   none,  none,   none,  none,  none,   // c8-cf
-    none,   none,   none,   none,  none,   none,  none,  none,   // d0-d7
-    none,   none,   none,   none,  none,   none,  none,  none,   // d8-df
-    none,   none,   none,   none,  none,   none,  none,  none,   // e0-e7
-    none,   none,   none,   none,  none,   none,  none,  none,   // e8-ef
-    none,   none,   none,   none,  none,   none,  none,  none,   // f0-f7
-    none,   none,   none,   none,  none,   none,  none,  none,   // f8-ff
-};
-
 template <bool is_positional>
-bool ConsumeConversion(string_view *src, UnboundConversion *conv,
-                       int *next_arg) {
-  const char *pos = src->data();
-  const char *const end = pos + src->size();
+const char *ConsumeConversion(const char *pos, const char *const end,
+                              UnboundConversion *conv, int *next_arg) {
+  const char* const original_pos = pos;
   char c;
   // Read the next char into `c` and update `pos`. Returns false if there are
   // no more chars to read.
-#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR()        \
-  do {                                                \
-    if (ABSL_PREDICT_FALSE(pos == end)) return false; \
-    c = *pos++;                                       \
+#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR()          \
+  do {                                                  \
+    if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
+    c = *pos++;                                         \
   } while (0)
 
   const auto parse_digits = [&] {
@@ -111,10 +106,10 @@ bool ConsumeConversion(string_view *src, UnboundConversion *conv,
 
   if (is_positional) {
     ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
-    if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false;
+    if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
     conv->arg_position = parse_digits();
     assert(conv->arg_position > 0);
-    if (ABSL_PREDICT_FALSE(c != '$')) return false;
+    if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
   }
 
   ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
@@ -129,10 +124,9 @@ bool ConsumeConversion(string_view *src, UnboundConversion *conv,
     conv->flags.basic = false;
 
     for (; c <= '0';) {
-      // FIXME: We might be able to speed this up reusing the kIds lookup table
-      // from above.
-      // It might require changing Flags to be a plain integer where we can |= a
-      // value.
+      // FIXME: We might be able to speed this up reusing the lookup table from
+      // above. It might require changing Flags to be a plain integer where we
+      // can |= a value.
       switch (c) {
         case '-':
           conv->flags.left = true;
@@ -160,20 +154,20 @@ flags_done:
       if (c >= '0') {
         int maybe_width = parse_digits();
         if (!is_positional && c == '$') {
-          if (ABSL_PREDICT_FALSE(*next_arg != 0)) return false;
+          if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
           // Positional conversion.
           *next_arg = -1;
           conv->flags = Flags();
           conv->flags.basic = true;
-          return ConsumeConversion<true>(src, conv, next_arg);
+          return ConsumeConversion<true>(original_pos, end, conv, next_arg);
         }
         conv->width.set_value(maybe_width);
       } else if (c == '*') {
         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
         if (is_positional) {
-          if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false;
+          if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
           conv->width.set_from_arg(parse_digits());
-          if (ABSL_PREDICT_FALSE(c != '$')) return false;
+          if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
           ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
         } else {
           conv->width.set_from_arg(++*next_arg);
@@ -188,9 +182,9 @@ flags_done:
       } else if (c == '*') {
         ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
         if (is_positional) {
-          if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return false;
+          if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
           conv->precision.set_from_arg(parse_digits());
-          if (c != '$') return false;
+          if (c != '$') return nullptr;
           ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
         } else {
           conv->precision.set_from_arg(++*next_arg);
@@ -201,14 +195,14 @@ flags_done:
     }
   }
 
-  std::int8_t id = kIds[static_cast<unsigned char>(c)];
+  auto tag = GetTagForChar(c);
 
-  if (id < 0) {
-    if (ABSL_PREDICT_FALSE(id == none)) return false;
+  if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
+    if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
 
     // It is a length modifier.
     using str_format_internal::LengthMod;
-    LengthMod length_mod = LengthMod::FromId(static_cast<LM>(~id));
+    LengthMod length_mod = tag.as_length();
     ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
     if (c == 'h' && length_mod.id() == LengthMod::h) {
       conv->length_mod = LengthMod::FromId(LengthMod::hh);
@@ -219,25 +213,24 @@ flags_done:
     } else {
       conv->length_mod = length_mod;
     }
-    id = kIds[static_cast<unsigned char>(c)];
-    if (ABSL_PREDICT_FALSE(id < 0)) return false;
+    tag = GetTagForChar(c);
+    if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
   }
 
   assert(CheckFastPathSetting(*conv));
   (void)(&CheckFastPathSetting);
 
-  conv->conv = ConversionChar::FromId(static_cast<CC>(id));
+  conv->conv = tag.as_conv();
   if (!is_positional) conv->arg_position = ++*next_arg;
-  *src = string_view(pos, end - pos);
-  return true;
+  return pos;
 }
 
 }  // namespace
 
-bool ConsumeUnboundConversion(string_view *src, UnboundConversion *conv,
-                              int *next_arg) {
-  if (*next_arg < 0) return ConsumeConversion<true>(src, conv, next_arg);
-  return ConsumeConversion<false>(src, conv, next_arg);
+const char *ConsumeUnboundConversion(const char *p, const char *end,
+                                     UnboundConversion *conv, int *next_arg) {
+  if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
+  return ConsumeConversion<false>(p, end, conv, next_arg);
 }
 
 struct ParsedFormatBase::ParsedFormatConsumer {