about summary refs log tree commit diff
path: root/absl/strings
diff options
context:
space:
mode:
authorAbseil Team <absl-team@google.com>2019-10-18T16·06-0700
committerAndy Soffer <asoffer@google.com>2019-10-18T16·40-0400
commite4c8d0eb8ef4acb5d7a4252b3b87feb391ef7e41 (patch)
treede2b90294a678bb218a0533784bb9ae532ae869a /absl/strings
parenta15364ce4d88534ae2295127e5d8e32aefb6b446 (diff)
Export of internal Abseil changes
--
a9ac6567c0933d786d68c10011e3f3ff9deedf89 by Greg Falcon <gfalcon@google.com>:

Add absl::FunctionRef, a type analogous to the proposed C++23 std::function_ref.

Like std::function, FunctionRef can be used to type-erase any callable (invokable) object.  However, FunctionRef works by reference: it does not store a copy of the type-erased object.  If the wrapped object is destroyed before the FunctionRef, the reference becomes dangling.

FunctionRef relates to std::function in much the same way that string_view relates to std::string.

Because of these limitations, FunctionRef is best used only as a function argument type, and only where the function will be invoked immediately (rather than saved for later use).  When `const std::function<...>&` is used in this way, `absl::FunctionRef<...>` is a better-performing replacement.

PiperOrigin-RevId: 275484044

--
1f7c4df3760f8b93e5a5baf40b070eca1d3f4c98 by Abseil Team <absl-team@google.com>:

Add FastHexToBufferZeroPad16() function for blazingly fast hex encoding of uint64_t.

PiperOrigin-RevId: 275420901

--
08d48ac004eba57cf2f1ada827181a2995f74807 by Abseil Team <absl-team@google.com>:

Avoid applying the workaround for MSVC's static initialization problems when using clang-cl.

PiperOrigin-RevId: 275366326

--
40be82bd2b34670b5458c0a72a0475086153c2d6 by Abseil Team <absl-team@google.com>:

Added comments to SimpleAtof()/SimpleAtod() that clarify that they
always use the "C" locale, unlike the standard functions strtod()
and strtof() referenced now in the comments.

PiperOrigin-RevId: 275355815

--
086779dacb3f6f2b3ab59947e94e79046bdb1fe1 by Jorg Brown <jorg@google.com>:

Move the hex conversion table used by escaping.cc into numbers.h so
that other parts of Abseil can more efficiently access it.

PiperOrigin-RevId: 275331251

--
3c4ed1b04e55d96a40cbe70fb70929ffbb0c0432 by Abseil Team <absl-team@google.com>:

Avoid applying the workaround for MSVC's static initialization problems when using clang-cl.

PiperOrigin-RevId: 275323858

--
56ceb58ab688c3761978308609b09a1ac2739c9a by Derek Mauro <dmauro@google.com>:

Add script for testing on Alpine Linux (for musl test coverage)

PiperOrigin-RevId: 275321244
GitOrigin-RevId: a9ac6567c0933d786d68c10011e3f3ff9deedf89
Change-Id: I39799fa03768ddb44f3166200c860e1da4461807
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/BUILD.bazel5
-rw-r--r--absl/strings/CMakeLists.txt2
-rw-r--r--absl/strings/escaping.cc33
-rw-r--r--absl/strings/numbers.cc29
-rw-r--r--absl/strings/numbers.h45
-rw-r--r--absl/strings/numbers_benchmark.cc23
-rw-r--r--absl/strings/numbers_test.cc30
-rw-r--r--absl/strings/str_cat.cc5
-rw-r--r--absl/strings/substitute.cc5
9 files changed, 137 insertions, 40 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index 4863ead2992e..e38c8ad6ce66 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -413,8 +413,9 @@ cc_test(
     deps = [
         ":pow10_helper",
         ":strings",
-        "//absl/base:core_headers",
         "//absl/base:raw_logging_internal",
+        "//absl/random",
+        "//absl/random:distributions",
         "@com_google_googletest//:gtest_main",
     ],
 )
@@ -428,6 +429,8 @@ cc_test(
     deps = [
         ":strings",
         "//absl/base:raw_logging_internal",
+        "//absl/random",
+        "//absl/random:distributions",
         "@com_github_google_benchmark//:benchmark_main",
     ],
 )
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index 3f9079575011..cd52a472f499 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt
@@ -277,6 +277,8 @@ absl_cc_test(
     absl::core_headers
     absl::pow10_helper
     absl::raw_logging_internal
+    absl::random_random
+    absl::random_distributions
     gmock_main
 )
 
diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc
index 0d336e3ff691..88390fbfa82f 100644
--- a/absl/strings/escaping.cc
+++ b/absl/strings/escaping.cc
@@ -35,27 +35,6 @@
 namespace absl {
 namespace {
 
-// Digit conversion.
-constexpr char kHexChar[] = "0123456789abcdef";
-
-constexpr char kHexTable[513] =
-    "000102030405060708090a0b0c0d0e0f"
-    "101112131415161718191a1b1c1d1e1f"
-    "202122232425262728292a2b2c2d2e2f"
-    "303132333435363738393a3b3c3d3e3f"
-    "404142434445464748494a4b4c4d4e4f"
-    "505152535455565758595a5b5c5d5e5f"
-    "606162636465666768696a6b6c6d6e6f"
-    "707172737475767778797a7b7c7d7e7f"
-    "808182838485868788898a8b8c8d8e8f"
-    "909192939495969798999a9b9c9d9e9f"
-    "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
-    "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
-    "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
-    "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
-    "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
-    "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
-
 // These are used for the leave_nulls_escaped argument to CUnescapeInternal().
 constexpr bool kUnescapeNulls = false;
 
@@ -348,14 +327,14 @@ std::string CEscapeInternal(absl::string_view src, bool use_hex,
              (last_hex_escape && absl::ascii_isxdigit(c)))) {
           if (use_hex) {
             dest.append("\\" "x");
-            dest.push_back(kHexChar[c / 16]);
-            dest.push_back(kHexChar[c % 16]);
+            dest.push_back(numbers_internal::kHexChar[c / 16]);
+            dest.push_back(numbers_internal::kHexChar[c % 16]);
             is_hex_escape = true;
           } else {
             dest.append("\\");
-            dest.push_back(kHexChar[c / 64]);
-            dest.push_back(kHexChar[(c % 64) / 8]);
-            dest.push_back(kHexChar[c % 8]);
+            dest.push_back(numbers_internal::kHexChar[c / 64]);
+            dest.push_back(numbers_internal::kHexChar[(c % 64) / 8]);
+            dest.push_back(numbers_internal::kHexChar[c % 8]);
           }
         } else {
           dest.push_back(c);
@@ -1019,7 +998,7 @@ template <typename T>
 void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) {
   auto dest_ptr = &dest[0];
   for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) {
-    const char* hex_p = &kHexTable[*src_ptr * 2];
+    const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2];
     std::copy(hex_p, hex_p + 2, dest_ptr);
   }
 }
diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc
index 38d148690e2e..d7b94fc1301e 100644
--- a/absl/strings/numbers.cc
+++ b/absl/strings/numbers.cc
@@ -19,8 +19,8 @@
 
 #include <algorithm>
 #include <cassert>
-#include <cfloat>          // for DBL_DIG and FLT_DIG
-#include <cmath>           // for HUGE_VAL
+#include <cfloat>  // for DBL_DIG and FLT_DIG
+#include <cmath>   // for HUGE_VAL
 #include <cstdint>
 #include <cstdio>
 #include <cstdlib>
@@ -34,6 +34,7 @@
 #include "absl/base/internal/raw_logging.h"
 #include "absl/strings/ascii.h"
 #include "absl/strings/charconv.h"
+#include "absl/strings/escaping.h"
 #include "absl/strings/internal/memutil.h"
 #include "absl/strings/match.h"
 #include "absl/strings/str_cat.h"
@@ -885,6 +886,28 @@ inline bool safe_uint_internal(absl::string_view text, IntType* value_p,
 }  // anonymous namespace
 
 namespace numbers_internal {
+
+// Digit conversion.
+ABSL_CONST_INIT const char kHexChar[] = "0123456789abcdef";
+
+ABSL_CONST_INIT const char kHexTable[513] =
+    "000102030405060708090a0b0c0d0e0f"
+    "101112131415161718191a1b1c1d1e1f"
+    "202122232425262728292a2b2c2d2e2f"
+    "303132333435363738393a3b3c3d3e3f"
+    "404142434445464748494a4b4c4d4e4f"
+    "505152535455565758595a5b5c5d5e5f"
+    "606162636465666768696a6b6c6d6e6f"
+    "707172737475767778797a7b7c7d7e7f"
+    "808182838485868788898a8b8c8d8e8f"
+    "909192939495969798999a9b9c9d9e9f"
+    "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
+    "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
+    "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
+    "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
+    "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
+    "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
+
 bool safe_strto32_base(absl::string_view text, int32_t* value, int base) {
   return safe_int_internal<int32_t>(text, value, base);
 }
@@ -900,6 +923,6 @@ bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) {
 bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) {
   return safe_uint_internal<uint64_t>(text, value, base);
 }
-}  // namespace numbers_internal
 
+}  // namespace numbers_internal
 }  // namespace absl
diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h
index 100839b04626..745de67ad86d 100644
--- a/absl/strings/numbers.h
+++ b/absl/strings/numbers.h
@@ -24,6 +24,10 @@
 #ifndef ABSL_STRINGS_NUMBERS_H_
 #define ABSL_STRINGS_NUMBERS_H_
 
+#ifdef __SSE4_2__
+#include <x86intrin.h>
+#endif
+
 #include <cstddef>
 #include <cstdlib>
 #include <cstring>
@@ -32,6 +36,8 @@
 #include <string>
 #include <type_traits>
 
+#include "absl/base/internal/bits.h"
+#include "absl/base/internal/endian.h"
 #include "absl/base/macros.h"
 #include "absl/base/port.h"
 #include "absl/numeric/int128.h"
@@ -54,7 +60,8 @@ ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out);
 // Converts the given string (optionally followed or preceded by ASCII
 // whitespace) into a float, which may be rounded on overflow or underflow.
 // See https://en.cppreference.com/w/c/string/byte/strtof for details about the
-// allowed formats for `str`. If any errors are encountered, this function
+// allowed formats for `str`, except SimpleAtof() is locale-indepdent and will
+// always use the "C" locale. If any errors are encountered, this function
 // returns `false`, leaving `out` in an unspecified state.
 ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* out);
 
@@ -63,7 +70,8 @@ ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* out);
 // Converts the given string (optionally followed or preceded by ASCII
 // whitespace) into a double, which may be rounded on overflow or underflow.
 // See https://en.cppreference.com/w/c/string/byte/strtof for details about the
-// allowed formats for `str`. If any errors are encountered, this function
+// allowed formats for `str`, except SimpleAtod is locale-independent and will
+// always use the "C" locale. If any errors are encountered, this function
 // returns `false`, leaving `out` in an unspecified state.
 ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* out);
 
@@ -84,6 +92,10 @@ ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* out);
 namespace absl {
 namespace numbers_internal {
 
+// Digit conversion.
+extern const char kHexChar[17];    // 0123456789abcdef
+extern const char kHexTable[513];  // 000102030405060708090a0b0c0d0e0f1011...
+
 // safe_strto?() functions for implementing SimpleAtoi()
 bool safe_strto32_base(absl::string_view text, int32_t* value, int base);
 bool safe_strto64_base(absl::string_view text, int64_t* value, int base);
@@ -170,6 +182,35 @@ ABSL_MUST_USE_RESULT bool safe_strtoi_base(absl::string_view s, int_type* out,
   return parsed;
 }
 
+// FastHexToBufferZeroPad16()
+//
+// Outputs `val` into `out` as if by `snprintf(out, 17, "%016x", val)` but
+// without the terminating null character. Thus `out` must be of length >= 16.
+// Returns the number of non-pad digits of the output (it can never be zero
+// since 0 has one digit).
+inline size_t FastHexToBufferZeroPad16(uint64_t val, char* out) {
+  uint64_t be = absl::big_endian::FromHost64(val);
+#ifdef __SSE4_2__
+  const auto kNibbleMask = _mm_set1_epi8(0xf);
+  const auto kHexDigits = _mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7',
+                                        '8', '9', 'a', 'b', 'c', 'd', 'e', 'f');
+  auto v = _mm_loadu_si64(reinterpret_cast<__m128i*>(&be));  // load lo dword
+  auto v4 = _mm_srli_epi64(v, 4);                            // shift 4 right
+  auto il = _mm_unpacklo_epi8(v4, v);                        // interleave bytes
+  auto m = _mm_and_si128(il, kNibbleMask);                   // mask out nibbles
+  auto hexchars = _mm_shuffle_epi8(kHexDigits, m);           // hex chars
+  _mm_storeu_si128(reinterpret_cast<__m128i*>(out), hexchars);
+#else
+  for (int i = 0; i < 8; ++i) {
+    auto byte = (be >> (8 * i)) & 0xFF;
+    auto* hex = &absl::numbers_internal::kHexTable[byte * 2];
+    std::memcpy(out + 2 * i, hex, 2);
+  }
+#endif
+  // | 0x1 so that even 0 has 1 digit.
+  return 16 - absl::base_internal::CountLeadingZeros64(val | 0x1) / 4;
+}
+
 }  // namespace numbers_internal
 
 // SimpleAtoi()
diff --git a/absl/strings/numbers_benchmark.cc b/absl/strings/numbers_benchmark.cc
index 54dbedd35357..6e79b3e811ff 100644
--- a/absl/strings/numbers_benchmark.cc
+++ b/absl/strings/numbers_benchmark.cc
@@ -20,6 +20,8 @@
 
 #include "benchmark/benchmark.h"
 #include "absl/base/internal/raw_logging.h"
+#include "absl/random/distributions.h"
+#include "absl/random/random.h"
 #include "absl/strings/numbers.h"
 
 namespace {
@@ -260,4 +262,25 @@ BENCHMARK_TEMPLATE(BM_SimpleAtod, std::string)
     ->ArgPair(10, 4)
     ->ArgPair(10, 8);
 
+void BM_FastHexToBufferZeroPad16(benchmark::State& state) {
+  absl::BitGen rng;
+  std::vector<uint64_t> nums;
+  nums.resize(1000);
+  auto min = std::numeric_limits<uint64_t>::min();
+  auto max = std::numeric_limits<uint64_t>::max();
+  for (auto& num : nums) {
+    num = absl::LogUniform(rng, min, max);
+  }
+
+  char buf[16];
+  while (state.KeepRunningBatch(nums.size())) {
+    for (auto num : nums) {
+      auto digits = absl::numbers_internal::FastHexToBufferZeroPad16(num, buf);
+      benchmark::DoNotOptimize(digits);
+      benchmark::DoNotOptimize(buf);
+    }
+  }
+}
+BENCHMARK(BM_FastHexToBufferZeroPad16);
+
 }  // namespace
diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc
index d964e562b985..b92b9a8cb2a8 100644
--- a/absl/strings/numbers_test.cc
+++ b/absl/strings/numbers_test.cc
@@ -17,6 +17,7 @@
 #include "absl/strings/numbers.h"
 
 #include <sys/types.h>
+
 #include <cfenv>  // NOLINT(build/c++11)
 #include <cinttypes>
 #include <climits>
@@ -36,10 +37,11 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "absl/base/internal/raw_logging.h"
-#include "absl/strings/str_cat.h"
-
+#include "absl/random/distributions.h"
+#include "absl/random/random.h"
 #include "absl/strings/internal/numbers_test_common.h"
 #include "absl/strings/internal/pow10_helper.h"
+#include "absl/strings/str_cat.h"
 
 namespace {
 
@@ -1187,4 +1189,28 @@ TEST(StrToUint64Base, PrefixOnly) {
   }
 }
 
+void TestFastHexToBufferZeroPad16(uint64_t v) {
+  char buf[16];
+  auto digits = absl::numbers_internal::FastHexToBufferZeroPad16(v, buf);
+  absl::string_view res(buf, 16);
+  char buf2[17];
+  snprintf(buf2, sizeof(buf2), "%016" PRIx64, v);
+  EXPECT_EQ(res, buf2) << v;
+  size_t expected_digits = snprintf(buf2, sizeof(buf2), "%" PRIx64, v);
+  EXPECT_EQ(digits, expected_digits) << v;
+}
+
+TEST(FastHexToBufferZeroPad16, Smoke) {
+  TestFastHexToBufferZeroPad16(std::numeric_limits<uint64_t>::min());
+  TestFastHexToBufferZeroPad16(std::numeric_limits<uint64_t>::max());
+  TestFastHexToBufferZeroPad16(std::numeric_limits<int64_t>::min());
+  TestFastHexToBufferZeroPad16(std::numeric_limits<int64_t>::max());
+  absl::BitGen rng;
+  for (int i = 0; i < 100000; ++i) {
+    TestFastHexToBufferZeroPad16(
+        absl::LogUniform(rng, std::numeric_limits<uint64_t>::min(),
+                         std::numeric_limits<uint64_t>::max()));
+  }
+}
+
 }  // namespace
diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc
index 996194457fdd..d587789972e4 100644
--- a/absl/strings/str_cat.cc
+++ b/absl/strings/str_cat.cc
@@ -15,12 +15,14 @@
 #include "absl/strings/str_cat.h"
 
 #include <assert.h>
+
 #include <algorithm>
 #include <cstdint>
 #include <cstring>
 
 #include "absl/strings/ascii.h"
 #include "absl/strings/internal/resize_uninitialized.h"
+#include "absl/strings/numbers.h"
 
 namespace absl {
 
@@ -28,9 +30,8 @@ AlphaNum::AlphaNum(Hex hex) {
   char* const end = &digits_[numbers_internal::kFastToBufferSize];
   char* writer = end;
   uint64_t value = hex.value;
-  static const char hexdigits[] = "0123456789abcdef";
   do {
-    *--writer = hexdigits[value & 0xF];
+    *--writer = absl::numbers_internal::kHexChar[value & 0xF];
     value >>= 4;
   } while (value != 0);
 
diff --git a/absl/strings/substitute.cc b/absl/strings/substitute.cc
index bc176950e453..36dbfe7d42ff 100644
--- a/absl/strings/substitute.cc
+++ b/absl/strings/substitute.cc
@@ -94,7 +94,6 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format,
   assert(target == output->data() + output->size());
 }
 
-static const char kHexDigits[] = "0123456789abcdef";
 Arg::Arg(const void* value) {
   static_assert(sizeof(scratch_) >= sizeof(value) * 2 + 2,
                 "fix sizeof(scratch_)");
@@ -104,7 +103,7 @@ Arg::Arg(const void* value) {
     char* ptr = scratch_ + sizeof(scratch_);
     uintptr_t num = reinterpret_cast<uintptr_t>(value);
     do {
-      *--ptr = kHexDigits[num & 0xf];
+      *--ptr = absl::numbers_internal::kHexChar[num & 0xf];
       num >>= 4;
     } while (num != 0);
     *--ptr = 'x';
@@ -119,7 +118,7 @@ Arg::Arg(Hex hex) {
   char* writer = end;
   uint64_t value = hex.value;
   do {
-    *--writer = kHexDigits[value & 0xF];
+    *--writer = absl::numbers_internal::kHexChar[value & 0xF];
     value >>= 4;
   } while (value != 0);