about summary refs log tree commit diff
path: root/absl/hash
diff options
context:
space:
mode:
Diffstat (limited to 'absl/hash')
-rw-r--r--absl/hash/BUILD.bazel2
-rw-r--r--absl/hash/hash_test.cc114
-rw-r--r--absl/hash/internal/hash.cc30
-rw-r--r--absl/hash/internal/hash.h121
-rw-r--r--absl/hash/internal/spy_hash_state.h13
5 files changed, 275 insertions, 5 deletions
diff --git a/absl/hash/BUILD.bazel b/absl/hash/BUILD.bazel
index c51248a5cb9b..ffe8c294a0bc 100644
--- a/absl/hash/BUILD.bazel
+++ b/absl/hash/BUILD.bazel
@@ -71,9 +71,9 @@ cc_test(
     deps = [
         ":hash",
         ":hash_testing",
+        ":spy_hash_state",
         "//absl/base:core_headers",
         "//absl/container:flat_hash_set",
-        "//absl/hash:spy_hash_state",
         "//absl/meta:type_traits",
         "//absl/numeric:int128",
         "@com_google_googletest//:gtest_main",
diff --git a/absl/hash/hash_test.cc b/absl/hash/hash_test.cc
index 9a667ba703b4..61f7661f5912 100644
--- a/absl/hash/hash_test.cc
+++ b/absl/hash/hash_test.cc
@@ -274,8 +274,8 @@ TEST(HashValueTest, Strings) {
 
   const std::string small = "foo";
   const std::string dup = "foofoo";
-  const std::string large = "large";
-  const std::string huge = std::string(5000, 'a');
+  const std::string large = std::string(2048, 'x');  // multiple of chunk size
+  const std::string huge = std::string(5000, 'a');   // not a multiple
 
   EXPECT_TRUE(absl::VerifyTypeImplementsAbslHashCorrectly(std::make_tuple(
       std::string(), absl::string_view(),
@@ -378,6 +378,116 @@ struct Private {
   }
 };
 
+// Test helper for combine_piecewise_buffer.  It holds a string_view to the
+// buffer-to-be-hashed.  Its AbslHashValue specialization will split up its
+// contents at the character offsets requested.
+class PiecewiseHashTester {
+ public:
+  // Create a hash view of a buffer to be hashed contiguously.
+  explicit PiecewiseHashTester(absl::string_view buf)
+      : buf_(buf), piecewise_(false), split_locations_() {}
+
+  // Create a hash view of a buffer to be hashed piecewise, with breaks at the
+  // given locations.
+  PiecewiseHashTester(absl::string_view buf, std::set<size_t> split_locations)
+      : buf_(buf),
+        piecewise_(true),
+        split_locations_(std::move(split_locations)) {}
+
+  template <typename H>
+  friend H AbslHashValue(H h, const PiecewiseHashTester& p) {
+    if (!p.piecewise_) {
+      return H::combine_contiguous(std::move(h), p.buf_.data(), p.buf_.size());
+    }
+    absl::hash_internal::PiecewiseCombiner combiner;
+    if (p.split_locations_.empty()) {
+      h = combiner.add_buffer(std::move(h), p.buf_.data(), p.buf_.size());
+      return combiner.finalize(std::move(h));
+    }
+    size_t begin = 0;
+    for (size_t next : p.split_locations_) {
+      absl::string_view chunk = p.buf_.substr(begin, next - begin);
+      h = combiner.add_buffer(std::move(h), chunk.data(), chunk.size());
+      begin = next;
+    }
+    absl::string_view last_chunk = p.buf_.substr(begin);
+    if (!last_chunk.empty()) {
+      h = combiner.add_buffer(std::move(h), last_chunk.data(),
+                              last_chunk.size());
+    }
+    return combiner.finalize(std::move(h));
+  }
+
+ private:
+  absl::string_view buf_;
+  bool piecewise_;
+  std::set<size_t> split_locations_;
+};
+
+// Dummy object that hashes as two distinct contiguous buffers, "foo" followed
+// by "bar"
+struct DummyFooBar {
+  template <typename H>
+  friend H AbslHashValue(H h, const DummyFooBar&) {
+    const char* foo = "foo";
+    const char* bar = "bar";
+    h = H::combine_contiguous(std::move(h), foo, 3);
+    h = H::combine_contiguous(std::move(h), bar, 3);
+    return std::move(h);
+  }
+};
+
+TEST(HashValueTest, CombinePiecewiseBuffer) {
+  absl::Hash<PiecewiseHashTester> hash;
+
+  // Check that hashing an empty buffer through the piecewise API works.
+  EXPECT_EQ(hash(PiecewiseHashTester("")), hash(PiecewiseHashTester("", {})));
+
+  // Similarly, small buffers should give consistent results
+  EXPECT_EQ(hash(PiecewiseHashTester("foobar")),
+            hash(PiecewiseHashTester("foobar", {})));
+  EXPECT_EQ(hash(PiecewiseHashTester("foobar")),
+            hash(PiecewiseHashTester("foobar", {3})));
+
+  // But hashing "foobar" in pieces gives a different answer than hashing "foo"
+  // contiguously, then "bar" contiguously.
+  EXPECT_NE(hash(PiecewiseHashTester("foobar", {3})),
+            absl::Hash<DummyFooBar>()(DummyFooBar{}));
+
+  // Test hashing a large buffer incrementally, broken up in several different
+  // ways.  Arrange for breaks on and near the stride boundaries to look for
+  // off-by-one errors in the implementation.
+  //
+  // This test is run on a buffer that is a multiple of the stride size, and one
+  // that isn't.
+  for (size_t big_buffer_size : {1024 * 2 + 512, 1024 * 3}) {
+    SCOPED_TRACE(big_buffer_size);
+    std::string big_buffer;
+    for (int i = 0; i < big_buffer_size; ++i) {
+      // Arbitrary std::string
+      big_buffer.push_back(32 + (i * (i / 3)) % 64);
+    }
+    auto big_buffer_hash = hash(PiecewiseHashTester(big_buffer));
+
+    const int possible_breaks = 9;
+    size_t breaks[possible_breaks] = {1,    512,  1023, 1024, 1025,
+                                      1536, 2047, 2048, 2049};
+    for (unsigned test_mask = 0; test_mask < (1u << possible_breaks);
+         ++test_mask) {
+      SCOPED_TRACE(test_mask);
+      std::set<size_t> break_locations;
+      for (int j = 0; j < possible_breaks; ++j) {
+        if (test_mask & (1u << j)) {
+          break_locations.insert(breaks[j]);
+        }
+      }
+      EXPECT_EQ(
+          hash(PiecewiseHashTester(big_buffer, std::move(break_locations))),
+          big_buffer_hash);
+    }
+  }
+}
+
 TEST(HashValueTest, PrivateSanity) {
   // Sanity check that Private is working as the tests below expect it to work.
   EXPECT_TRUE(is_hashable<Private>::value);
diff --git a/absl/hash/internal/hash.cc b/absl/hash/internal/hash.cc
index 4ab7a9f8bfd9..c17f3be1719a 100644
--- a/absl/hash/internal/hash.cc
+++ b/absl/hash/internal/hash.cc
@@ -17,6 +17,36 @@
 namespace absl {
 namespace hash_internal {
 
+uint64_t CityHashState::CombineLargeContiguousImpl32(uint64_t state,
+                                                     const unsigned char* first,
+                                                     size_t len) {
+  while (len >= PiecewiseChunkSize()) {
+    state =
+        Mix(state, absl::hash_internal::CityHash32(reinterpret_cast<const char*>(first),
+                                         PiecewiseChunkSize()));
+    len -= PiecewiseChunkSize();
+    first += PiecewiseChunkSize();
+  }
+  // Handle the remainder.
+  return CombineContiguousImpl(state, first, len,
+                               std::integral_constant<int, 4>{});
+}
+
+uint64_t CityHashState::CombineLargeContiguousImpl64(uint64_t state,
+                                                     const unsigned char* first,
+                                                     size_t len) {
+  while (len >= PiecewiseChunkSize()) {
+    state =
+        Mix(state, absl::hash_internal::CityHash64(reinterpret_cast<const char*>(first),
+                                         PiecewiseChunkSize()));
+    len -= PiecewiseChunkSize();
+    first += PiecewiseChunkSize();
+  }
+  // Handle the remainder.
+  return CombineContiguousImpl(state, first, len,
+                               std::integral_constant<int, 8>{});
+}
+
 ABSL_CONST_INIT const void* const CityHashState::kSeed = &kSeed;
 
 }  // namespace hash_internal
diff --git a/absl/hash/internal/hash.h b/absl/hash/internal/hash.h
index 4ff4a126f0cd..e99f8e75bd9e 100644
--- a/absl/hash/internal/hash.h
+++ b/absl/hash/internal/hash.h
@@ -52,6 +52,12 @@
 namespace absl {
 namespace hash_internal {
 
+class PiecewiseCombiner;
+
+// Internal detail: Large buffers are hashed in smaller chunks.  This function
+// returns the size of these chunks.
+constexpr int PiecewiseChunkSize() { return 1024; }
+
 // HashStateBase
 //
 // A hash state object represents an intermediate state in the computation
@@ -68,7 +74,7 @@ namespace hash_internal {
 //
 //    `static H combine_contiguous(H state, const unsigned char*, size_t)`.
 //
-// `HashStateBase` will provide a complete implementations for a hash state
+// `HashStateBase` will provide a complete implementation for a hash state
 // object in terms of this method.
 //
 // Example:
@@ -117,6 +123,9 @@ class HashStateBase {
   // for-loop instead.
   template <typename T>
   static H combine_contiguous(H state, const T* data, size_t size);
+
+ private:
+  friend class PiecewiseCombiner;
 };
 
 // is_uniquely_represented
@@ -187,6 +196,61 @@ H hash_bytes(H hash_state, const T& value) {
   return H::combine_contiguous(std::move(hash_state), start, sizeof(value));
 }
 
+// PiecewiseCombiner
+//
+// PiecewiseCombiner is an internal-only helper class for hashing a piecewise
+// buffer of `char` or `unsigned char` as though it were contiguous.  This class
+// provides two methods:
+//
+//   H add_buffer(state, data, size)
+//   H finalize(state)
+//
+// `add_buffer` can be called zero or more times, followed by a single call to
+// `finalize`.  This will produce the same hash expansion as concatenating each
+// buffer piece into a single contiguous buffer, and passing this to
+// `H::combine_contiguous`.
+//
+//  Example usage:
+//    PiecewiseCombiner combiner;
+//    for (const auto& piece : pieces) {
+//      state = combiner.add_buffer(std::move(state), piece.data, piece.size);
+//    }
+//    return combiner.finalize(std::move(state));
+class PiecewiseCombiner {
+ public:
+  PiecewiseCombiner() : position_(0) {}
+  PiecewiseCombiner(const PiecewiseCombiner&) = delete;
+  PiecewiseCombiner& operator=(const PiecewiseCombiner&) = delete;
+
+  // PiecewiseCombiner::add_buffer()
+  //
+  // Appends the given range of bytes to the sequence to be hashed, which may
+  // modify the provided hash state.
+  template <typename H>
+  H add_buffer(H state, const unsigned char* data, size_t size);
+  template <typename H>
+  H add_buffer(H state, const char* data, size_t size) {
+    return add_buffer(std::move(state),
+                      reinterpret_cast<const unsigned char*>(data), size);
+  }
+
+  // PiecewiseCombiner::finalize()
+  //
+  // Finishes combining the hash sequence, which may may modify the provided
+  // hash state.
+  //
+  // Once finalize() is called, add_buffer() may no longer be called. The
+  // resulting hash state will be the same as if the pieces passed to
+  // add_buffer() were concatenated into a single flat buffer, and then provided
+  // to H::combine_contiguous().
+  template <typename H>
+  H finalize(H state);
+
+ private:
+  unsigned char buf_[PiecewiseChunkSize()];
+  size_t position_;
+};
+
 // -----------------------------------------------------------------------------
 // AbslHashValue for Basic Types
 // -----------------------------------------------------------------------------
@@ -709,6 +773,16 @@ class CityHashState : public HashStateBase<CityHashState> {
                                         std::integral_constant<int, 8>
                                         /* sizeof_size_t*/);
 
+  // Slow dispatch path for calls to CombineContiguousImpl with a size argument
+  // larger than PiecewiseChunkSize().  Has the same effect as calling
+  // CombineContiguousImpl() repeatedly with the chunk stride size.
+  static uint64_t CombineLargeContiguousImpl32(uint64_t state,
+                                               const unsigned char* first,
+                                               size_t len);
+  static uint64_t CombineLargeContiguousImpl64(uint64_t state,
+                                               const unsigned char* first,
+                                               size_t len);
+
   // Reads 9 to 16 bytes from p.
   // The first 8 bytes are in .first, the rest (zero padded) bytes are in
   // .second.
@@ -776,6 +850,9 @@ inline uint64_t CityHashState::CombineContiguousImpl(
   // multiplicative hash.
   uint64_t v;
   if (len > 8) {
+    if (ABSL_PREDICT_FALSE(len > PiecewiseChunkSize())) {
+      return CombineLargeContiguousImpl32(state, first, len);
+    }
     v = absl::hash_internal::CityHash32(reinterpret_cast<const char*>(first), len);
   } else if (len >= 4) {
     v = Read4To8(first, len);
@@ -796,6 +873,9 @@ inline uint64_t CityHashState::CombineContiguousImpl(
   // multiplicative hash.
   uint64_t v;
   if (len > 16) {
+    if (ABSL_PREDICT_FALSE(len > PiecewiseChunkSize())) {
+      return CombineLargeContiguousImpl64(state, first, len);
+    }
     v = absl::hash_internal::CityHash64(reinterpret_cast<const char*>(first), len);
   } else if (len > 8) {
     auto p = Read9To16(first, len);
@@ -812,7 +892,6 @@ inline uint64_t CityHashState::CombineContiguousImpl(
   return Mix(state, v);
 }
 
-
 struct AggregateBarrier {};
 
 // HashImpl
@@ -849,6 +928,44 @@ template <typename T>
 H HashStateBase<H>::combine_contiguous(H state, const T* data, size_t size) {
   return hash_internal::hash_range_or_bytes(std::move(state), data, size);
 }
+
+// HashStateBase::PiecewiseCombiner::add_buffer()
+template <typename H>
+H PiecewiseCombiner::add_buffer(H state, const unsigned char* data,
+                                size_t size) {
+  if (position_ + size < PiecewiseChunkSize()) {
+    // This partial chunk does not fill our existing buffer
+    memcpy(buf_ + position_, data, size);
+    position_ += size;
+    return std::move(state);
+  }
+
+  // Complete the buffer and hash it
+  const size_t bytes_needed = PiecewiseChunkSize() - position_;
+  memcpy(buf_ + position_, data, bytes_needed);
+  state = H::combine_contiguous(std::move(state), buf_, PiecewiseChunkSize());
+  data += bytes_needed;
+  size -= bytes_needed;
+
+  // Hash whatever chunks we can without copying
+  while (size >= PiecewiseChunkSize()) {
+    state = H::combine_contiguous(std::move(state), data, PiecewiseChunkSize());
+    data += PiecewiseChunkSize();
+    size -= PiecewiseChunkSize();
+  }
+  // Fill the buffer with the remainder
+  memcpy(buf_, data, size);
+  position_ = size;
+  return std::move(state);
+}
+
+// HashStateBase::PiecewiseCombiner::finalize()
+template <typename H>
+H PiecewiseCombiner::finalize(H state) {
+  // Hash the remainder left in the buffer, which may be empty
+  return H::combine_contiguous(std::move(state), buf_, position_);
+}
+
 }  // namespace hash_internal
 }  // namespace absl
 
diff --git a/absl/hash/internal/spy_hash_state.h b/absl/hash/internal/spy_hash_state.h
index c4cc8d07337c..05c7cafeb0ee 100644
--- a/absl/hash/internal/spy_hash_state.h
+++ b/absl/hash/internal/spy_hash_state.h
@@ -146,6 +146,19 @@ class SpyHashStateImpl : public HashStateBase<SpyHashStateImpl<T>> {
   static SpyHashStateImpl combine_contiguous(SpyHashStateImpl hash_state,
                                              const unsigned char* begin,
                                              size_t size) {
+    const size_t large_chunk_stride = PiecewiseChunkSize();
+    if (size > large_chunk_stride) {
+      // Combining a large contiguous buffer must have the same effect as
+      // doing it piecewise by the stride length, followed by the (possibly
+      // empty) remainder.
+      while (size >= large_chunk_stride) {
+        hash_state = SpyHashStateImpl::combine_contiguous(
+            std::move(hash_state), begin, large_chunk_stride);
+        begin += large_chunk_stride;
+        size -= large_chunk_stride;
+      }
+    }
+
     hash_state.hash_representation_.emplace_back(
         reinterpret_cast<const char*>(begin), size);
     return hash_state;