about summary refs log tree commit diff
path: root/absl/random/internal/randen_hwaes.cc
diff options
context:
space:
mode:
authorAbseil Team <absl-team@google.com>2019-08-15T17·06-0700
committervslashg <gfalcon@google.com>2019-08-15T18·36-0400
commitaae8143cf9aa611f70d7ea9b95b8b8b383b2271a (patch)
tree131d9bf69d90050cc21bdb5c35bca797c58dd5c6 /absl/random/internal/randen_hwaes.cc
parentd9aa92d7fb324314f9df487ac23d32a25650b742 (diff)
Export of internal Abseil changes
--
f28b989d5161230c6561e923b458c797a96bcf90 by Greg Falcon <gfalcon@google.com>:

Import of CCTZ from GitHub.

PiperOrigin-RevId: 263586488

--
8259484025b7de45358719fc6182a48cac8044c6 by Andy Soffer <asoffer@google.com>:

Internal changes and combine namespaces into a single namespace.

PiperOrigin-RevId: 263560576

--
8d19f41661984a600d1f8bbfeb8a30fcb4dee7d6 by Mark Barolak <mbar@google.com>:

Inside of absl::string_view::copy, use absl::string_view::traits_type::copy instead of std:copy to do the actual work.  This both follows the C++ standard more closely and avoids avoid MSVC unchecked iterator warnings.

PiperOrigin-RevId: 263430502

--
c06bf74236e12c7c1c97bfcbbc9d29bd65d6b36c by Andy Soffer <asoffer@google.com>:

Remove force-inlining attributes. Benchmarking results indicate that they are creating meaningful performance differences.

PiperOrigin-RevId: 263364896

--
ec4fa6eac958a9521456201b138784f55d3b17bc by Abseil Team <absl-team@google.com>:

Make BM_Fill benchmarks more representative.

PiperOrigin-RevId: 263349482

--
4ae280b4eb31d9cb58e847eb670473340f7778c1 by Derek Mauro <dmauro@google.com>:

Fix new -Wdeprecated-copy warning in gcc9

PiperOrigin-RevId: 263348118

--
d238a92f452a5c35686f9c71596fdd1fe62090a2 by Matt Calabrese <calabrese@google.com>:

The std::is_trivially_xxx fail on versions of GCC up until 7.4 due to faulty underlying intrinsics, but our emulation succeeds. Update our traits to not compare against the standard library implementation in these versions.

PiperOrigin-RevId: 263209457
GitOrigin-RevId: f28b989d5161230c6561e923b458c797a96bcf90
Change-Id: I4c41db5928ba71e243aeace4420e06d1a2df0b5b
Diffstat (limited to 'absl/random/internal/randen_hwaes.cc')
-rw-r--r--absl/random/internal/randen_hwaes.cc128
1 files changed, 39 insertions, 89 deletions
diff --git a/absl/random/internal/randen_hwaes.cc b/absl/random/internal/randen_hwaes.cc
index 6b82d1d07ad8..7d5b2b74128b 100644
--- a/absl/random/internal/randen_hwaes.cc
+++ b/absl/random/internal/randen_hwaes.cc
@@ -22,39 +22,9 @@
 #include <cstdint>
 #include <cstring>
 
+#include "absl/base/attributes.h"
 #include "absl/random/internal/platform.h"
 
-// ABSL_HAVE_ATTRIBUTE
-#if !defined(ABSL_HAVE_ATTRIBUTE)
-#ifdef __has_attribute
-#define ABSL_HAVE_ATTRIBUTE(x) __has_attribute(x)
-#else
-#define ABSL_HAVE_ATTRIBUTE(x) 0
-#endif
-#endif
-
-#if ABSL_HAVE_ATTRIBUTE(always_inline) || \
-    (defined(__GNUC__) && !defined(__clang__))
-#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE \
-  __attribute__((always_inline))
-#elif defined(_MSC_VER)
-// We can achieve something similar to attribute((always_inline)) with MSVC by
-// using the __forceinline keyword, however this is not perfect. MSVC is
-// much less aggressive about inlining, and even with the __forceinline keyword.
-#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE __forceinline
-#else
-#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE
-#endif
-
-// ABSL_ATTRIBUTE_FLATTEN enables much more aggressive inlining within
-// the indicated function.
-#undef ABSL_ATTRIBUTE_FLATTEN
-#if ABSL_HAVE_ATTRIBUTE(flatten) || (defined(__GNUC__) && !defined(__clang__))
-#define ABSL_ATTRIBUTE_FLATTEN __attribute__((flatten))
-#else
-#define ABSL_ATTRIBUTE_FLATTEN
-#endif
-
 // ABSL_RANDEN_HWAES_IMPL indicates whether this file will contain
 // a hardware accelerated implementation of randen, or whether it
 // will contain stubs that exit the process.
@@ -146,18 +116,6 @@ void RandenHwAes::Generate(const void*, void*) {
 
 #include "absl/random/internal/randen_traits.h"
 
-// ABSL_FUNCTION_ALIGN32 defines a 32-byte alignment attribute
-// for the functions in this file.
-//
-// NOTE: Determine whether we actually have any wins from ALIGN32
-// using microbenchmarks. If not, remove.
-#undef ABSL_FUNCTION_ALIGN32
-#if ABSL_HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__))
-#define ABSL_FUNCTION_ALIGN32 __attribute__((aligned(32)))
-#else
-#define ABSL_FUNCTION_ALIGN32
-#endif
-
 // TARGET_CRYPTO defines a crypto attribute for each architecture.
 //
 // NOTE: Evaluate whether we should eliminate ABSL_TARGET_CRYPTO.
@@ -191,8 +149,7 @@ using Vector128 = __vector unsigned long long;  // NOLINT(runtime/int)
 
 namespace {
 
-inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128
-ReverseBytes(const Vector128& v) {
+inline ABSL_TARGET_CRYPTO Vector128 ReverseBytes(const Vector128& v) {
   // Reverses the bytes of the vector.
   const __vector unsigned char perm = {15, 14, 13, 12, 11, 10, 9, 8,
                                        7,  6,  5,  4,  3,  2,  1, 0};
@@ -202,26 +159,26 @@ ReverseBytes(const Vector128& v) {
 // WARNING: these load/store in native byte order. It is OK to load and then
 // store an unchanged vector, but interpreting the bits as a number or input
 // to AES will have undefined results.
-inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128
+inline ABSL_TARGET_CRYPTO Vector128
 Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) {
   return vec_vsx_ld(0, reinterpret_cast<const Vector128*>(from));
 }
 
-inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void
-Vector128Store(const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) {
+inline ABSL_TARGET_CRYPTO void Vector128Store(
+    const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) {
   vec_vsx_st(v, 0, reinterpret_cast<Vector128*>(to));
 }
 
 // One round of AES. "round_key" is a public constant for breaking the
 // symmetry of AES (ensures previously equal columns differ afterwards).
-inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128
-AesRound(const Vector128& state, const Vector128& round_key) {
+inline ABSL_TARGET_CRYPTO Vector128 AesRound(const Vector128& state,
+                                             const Vector128& round_key) {
   return Vector128(__builtin_crypto_vcipher(state, round_key));
 }
 
 // Enables native loads in the round loop by pre-swapping.
-inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void
-SwapEndian(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) {
+inline ABSL_TARGET_CRYPTO void SwapEndian(
+    uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) {
   using absl::random_internal::RandenTraits;
   constexpr size_t kLanes = 2;
   constexpr size_t kFeistelBlocks = RandenTraits::kFeistelBlocks;
@@ -273,20 +230,20 @@ using Vector128 = uint8x16_t;
 
 namespace {
 
-inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128
+inline ABSL_TARGET_CRYPTO Vector128
 Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) {
   return vld1q_u8(reinterpret_cast<const uint8_t*>(from));
 }
 
-inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void
-Vector128Store(const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) {
+inline ABSL_TARGET_CRYPTO void Vector128Store(
+    const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) {
   vst1q_u8(reinterpret_cast<uint8_t*>(to), v);
 }
 
 // One round of AES. "round_key" is a public constant for breaking the
 // symmetry of AES (ensures previously equal columns differ afterwards).
-inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128
-AesRound(const Vector128& state, const Vector128& round_key) {
+inline ABSL_TARGET_CRYPTO Vector128 AesRound(const Vector128& state,
+                                             const Vector128& round_key) {
   // It is important to always use the full round function - omitting the
   // final MixColumns reduces security [https://eprint.iacr.org/2010/041.pdf]
   // and does not help because we never decrypt.
@@ -297,8 +254,8 @@ AesRound(const Vector128& state, const Vector128& round_key) {
   return vaesmcq_u8(vaeseq_u8(state, uint8x16_t{})) ^ round_key;
 }
 
-inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void
-SwapEndian(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT) {}
+inline ABSL_TARGET_CRYPTO void SwapEndian(
+    uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT) {}
 
 }  // namespace
 
@@ -313,16 +270,11 @@ namespace {
 class Vector128 {
  public:
   // Convert from/to intrinsics.
-  inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE explicit Vector128(
-      const __m128i& Vector128)
-      : data_(Vector128) {}
+  inline explicit Vector128(const __m128i& Vector128) : data_(Vector128) {}
 
-  inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE __m128i data() const {
-    return data_;
-  }
+  inline __m128i data() const { return data_; }
 
-  inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128& operator^=(
-      const Vector128& other) {
+  inline Vector128& operator^=(const Vector128& other) {
     data_ = _mm_xor_si128(data_, other.data());
     return *this;
   }
@@ -331,29 +283,29 @@ class Vector128 {
   __m128i data_;
 };
 
-inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128
+inline ABSL_TARGET_CRYPTO Vector128
 Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) {
   return Vector128(_mm_load_si128(reinterpret_cast<const __m128i*>(from)));
 }
 
-inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void
-Vector128Store(const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) {
+inline ABSL_TARGET_CRYPTO void Vector128Store(
+    const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) {
   _mm_store_si128(reinterpret_cast<__m128i * ABSL_RANDOM_INTERNAL_RESTRICT>(to),
                   v.data());
 }
 
 // One round of AES. "round_key" is a public constant for breaking the
 // symmetry of AES (ensures previously equal columns differ afterwards).
-inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128
-AesRound(const Vector128& state, const Vector128& round_key) {
+inline ABSL_TARGET_CRYPTO Vector128 AesRound(const Vector128& state,
+                                             const Vector128& round_key) {
   // It is important to always use the full round function - omitting the
   // final MixColumns reduces security [https://eprint.iacr.org/2010/041.pdf]
   // and does not help because we never decrypt.
   return Vector128(_mm_aesenc_si128(state.data(), round_key.data()));
 }
 
-inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void
-SwapEndian(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT) {}
+inline ABSL_TARGET_CRYPTO void SwapEndian(
+    uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT) {}
 
 }  // namespace
 
@@ -450,8 +402,8 @@ constexpr size_t kLanes = 2;
 
 // Block shuffles applies a shuffle to the entire state between AES rounds.
 // Improved odd-even shuffle from "New criterion for diffusion property".
-inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO void
-BlockShuffle(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) {
+inline ABSL_TARGET_CRYPTO void BlockShuffle(
+    uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) {
   static_assert(kFeistelBlocks == 16, "Expecting 16 FeistelBlocks.");
 
   constexpr size_t shuffle[kFeistelBlocks] = {7,  2, 13, 4,  11, 8,  3, 6,
@@ -499,10 +451,9 @@ BlockShuffle(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) {
 // per 16 bytes (vs. 10 for AES-CTR). Computing eight round functions in
 // parallel hides the 7-cycle AESNI latency on HSW. Note that the Feistel
 // XORs are 'free' (included in the second AES instruction).
-inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO const
-    u64x2*
-    FeistelRound(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state,
-                 const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) {
+inline ABSL_TARGET_CRYPTO const u64x2* FeistelRound(
+    uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state,
+    const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) {
   static_assert(kFeistelBlocks == 16, "Expecting 16 FeistelBlocks.");
 
   // MSVC does a horrible job at unrolling loops.
@@ -561,9 +512,9 @@ inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO const
 // Indistinguishable from ideal by chosen-ciphertext adversaries using less than
 // 2^64 queries if the round function is a PRF. This is similar to the b=8 case
 // of Simpira v2, but more efficient than its generic construction for b=16.
-inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO void
-Permute(const void* ABSL_RANDOM_INTERNAL_RESTRICT keys,
-        uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) {
+inline ABSL_TARGET_CRYPTO void Permute(
+    const void* ABSL_RANDOM_INTERNAL_RESTRICT keys,
+    uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) {
   const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys128 =
       static_cast<const u64x2*>(keys);
 
@@ -584,16 +535,15 @@ namespace random_internal {
 
 bool HasRandenHwAesImplementation() { return true; }
 
-const void* ABSL_TARGET_CRYPTO ABSL_FUNCTION_ALIGN32 ABSL_ATTRIBUTE_FLATTEN
-RandenHwAes::GetKeys() {
+const void* ABSL_TARGET_CRYPTO RandenHwAes::GetKeys() {
   // Round keys for one AES per Feistel round and branch.
   // The canonical implementation uses first digits of Pi.
   return round_keys;
 }
 
 // NOLINTNEXTLINE
-void ABSL_TARGET_CRYPTO ABSL_FUNCTION_ALIGN32 ABSL_ATTRIBUTE_FLATTEN
-RandenHwAes::Absorb(const void* seed_void, void* state_void) {
+void ABSL_TARGET_CRYPTO RandenHwAes::Absorb(const void* seed_void,
+                                            void* state_void) {
   uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state =
       reinterpret_cast<uint64_t*>(state_void);
   const uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT seed =
@@ -669,8 +619,8 @@ RandenHwAes::Absorb(const void* seed_void, void* state_void) {
 }
 
 // NOLINTNEXTLINE
-void ABSL_TARGET_CRYPTO ABSL_FUNCTION_ALIGN32 ABSL_ATTRIBUTE_FLATTEN
-RandenHwAes::Generate(const void* keys, void* state_void) {
+void ABSL_TARGET_CRYPTO RandenHwAes::Generate(const void* keys,
+                                              void* state_void) {
   static_assert(kCapacityBytes == sizeof(Vector128), "Capacity mismatch");
 
   uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state =