diff options
Diffstat (limited to 'absl/random/internal')
-rw-r--r-- | absl/random/internal/nanobenchmark.cc | 22 | ||||
-rw-r--r-- | absl/random/internal/platform.h | 42 | ||||
-rw-r--r-- | absl/random/internal/randen_hwaes.cc | 94 | ||||
-rw-r--r-- | absl/random/internal/randen_slow.cc | 36 |
4 files changed, 113 insertions, 81 deletions
diff --git a/absl/random/internal/nanobenchmark.cc b/absl/random/internal/nanobenchmark.cc index 5a8b1ed1dba1..7f37800c6830 100644 --- a/absl/random/internal/nanobenchmark.cc +++ b/absl/random/internal/nanobenchmark.cc @@ -59,6 +59,24 @@ #include <time.h> // NOLINT #endif +// ABSL_HAVE_ATTRIBUTE +#if !defined(ABSL_HAVE_ATTRIBUTE) +#ifdef __has_attribute +#define ABSL_HAVE_ATTRIBUTE(x) __has_attribute(x) +#else +#define ABSL_HAVE_ATTRIBUTE(x) 0 +#endif +#endif + +// ABSL_RANDOM_INTERNAL_ATTRIBUTE_NEVER_INLINE prevents inlining of the method. +#if ABSL_HAVE_ATTRIBUTE(noinline) || (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_NEVER_INLINE __attribute__((noinline)) +#elif defined(_MSC_VER) +#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_NEVER_INLINE __declspec(noinline) +#else +#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_NEVER_INLINE +#endif + namespace absl { namespace random_internal_nanobenchmark { namespace { @@ -658,8 +676,8 @@ Ticks TotalDuration(const Func func, const void* arg, const InputVec* inputs, } // (Nearly) empty Func for measuring timer overhead/resolution. -ABSL_ATTRIBUTE_NEVER_INLINE FuncOutput EmptyFunc(const void* arg, - const FuncInput input) { +ABSL_RANDOM_INTERNAL_ATTRIBUTE_NEVER_INLINE FuncOutput +EmptyFunc(const void* arg, const FuncInput input) { return input; } diff --git a/absl/random/internal/platform.h b/absl/random/internal/platform.h index 5edab3448bbf..d1ef5c249032 100644 --- a/absl/random/internal/platform.h +++ b/absl/random/internal/platform.h @@ -81,50 +81,8 @@ // Attribute Checks // ----------------------------------------------------------------------------- -// ABSL_HAVE_ATTRIBUTE -#undef ABSL_HAVE_ATTRIBUTE -#ifdef __has_attribute -#define ABSL_HAVE_ATTRIBUTE(x) __has_attribute(x) -#else -#define ABSL_HAVE_ATTRIBUTE(x) 0 -#endif - -// ABSL_ATTRIBUTE_ALWAYS_INLINE forces inlining of the method. -#undef ABSL_ATTRIBUTE_ALWAYS_INLINE -#if ABSL_HAVE_ATTRIBUTE(always_inline) || \ - (defined(__GNUC__) && !defined(__clang__)) -#define ABSL_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline)) -#elif defined(_MSC_VER) -// We can achieve something similar to attribute((always_inline)) with MSVC by -// using the __forceinline keyword, however this is not perfect. MSVC is -// much less aggressive about inlining, and even with the __forceinline keyword. -#define ABSL_ATTRIBUTE_ALWAYS_INLINE __forceinline -#else -#define ABSL_ATTRIBUTE_ALWAYS_INLINE -#endif - -// ABSL_ATTRIBUTE_NEVER_INLINE prevents inlining of the method. -#undef ABSL_ATTRIBUTE_NEVER_INLINE -#if ABSL_HAVE_ATTRIBUTE(noinline) || (defined(__GNUC__) && !defined(__clang__)) -#define ABSL_ATTRIBUTE_NEVER_INLINE __attribute__((noinline)) -#elif defined(_MSC_VER) -#define ABSL_ATTRIBUTE_NEVER_INLINE __declspec(noinline) -#else -#define ABSL_ATTRIBUTE_NEVER_INLINE -#endif - -// ABSL_ATTRIBUTE_FLATTEN enables much more aggressive inlining within -// the indicated function. -#undef ABSL_ATTRIBUTE_FLATTEN -#if ABSL_HAVE_ATTRIBUTE(flatten) || (defined(__GNUC__) && !defined(__clang__)) -#define ABSL_ATTRIBUTE_FLATTEN __attribute__((flatten)) -#else -#define ABSL_ATTRIBUTE_FLATTEN -#endif - // ABSL_RANDOM_INTERNAL_RESTRICT annotates whether pointers may be considered // to be unaliased. -#undef ABSL_RANDOM_INTERNAL_RESTRICT #if defined(__clang__) || defined(__GNUC__) #define ABSL_RANDOM_INTERNAL_RESTRICT __restrict__ #elif defined(_MSC_VER) diff --git a/absl/random/internal/randen_hwaes.cc b/absl/random/internal/randen_hwaes.cc index 0fcd9a85a8b9..6b82d1d07ad8 100644 --- a/absl/random/internal/randen_hwaes.cc +++ b/absl/random/internal/randen_hwaes.cc @@ -24,6 +24,37 @@ #include "absl/random/internal/platform.h" +// ABSL_HAVE_ATTRIBUTE +#if !defined(ABSL_HAVE_ATTRIBUTE) +#ifdef __has_attribute +#define ABSL_HAVE_ATTRIBUTE(x) __has_attribute(x) +#else +#define ABSL_HAVE_ATTRIBUTE(x) 0 +#endif +#endif + +#if ABSL_HAVE_ATTRIBUTE(always_inline) || \ + (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE \ + __attribute__((always_inline)) +#elif defined(_MSC_VER) +// We can achieve something similar to attribute((always_inline)) with MSVC by +// using the __forceinline keyword, however this is not perfect. MSVC is +// much less aggressive about inlining, and even with the __forceinline keyword. +#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE __forceinline +#else +#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE +#endif + +// ABSL_ATTRIBUTE_FLATTEN enables much more aggressive inlining within +// the indicated function. +#undef ABSL_ATTRIBUTE_FLATTEN +#if ABSL_HAVE_ATTRIBUTE(flatten) || (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_ATTRIBUTE_FLATTEN __attribute__((flatten)) +#else +#define ABSL_ATTRIBUTE_FLATTEN +#endif + // ABSL_RANDEN_HWAES_IMPL indicates whether this file will contain // a hardware accelerated implementation of randen, or whether it // will contain stubs that exit the process. @@ -160,7 +191,7 @@ using Vector128 = __vector unsigned long long; // NOLINT(runtime/int) namespace { -inline ABSL_TARGET_CRYPTO ABSL_ATTRIBUTE_ALWAYS_INLINE Vector128 +inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 ReverseBytes(const Vector128& v) { // Reverses the bytes of the vector. const __vector unsigned char perm = {15, 14, 13, 12, 11, 10, 9, 8, @@ -171,26 +202,26 @@ ReverseBytes(const Vector128& v) { // WARNING: these load/store in native byte order. It is OK to load and then // store an unchanged vector, but interpreting the bits as a number or input // to AES will have undefined results. -inline ABSL_TARGET_CRYPTO ABSL_ATTRIBUTE_ALWAYS_INLINE Vector128 +inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) { return vec_vsx_ld(0, reinterpret_cast<const Vector128*>(from)); } -inline ABSL_TARGET_CRYPTO ABSL_ATTRIBUTE_ALWAYS_INLINE void Vector128Store( - const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { +inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void +Vector128Store(const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { vec_vsx_st(v, 0, reinterpret_cast<Vector128*>(to)); } // One round of AES. "round_key" is a public constant for breaking the // symmetry of AES (ensures previously equal columns differ afterwards). -inline ABSL_TARGET_CRYPTO ABSL_ATTRIBUTE_ALWAYS_INLINE Vector128 +inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 AesRound(const Vector128& state, const Vector128& round_key) { return Vector128(__builtin_crypto_vcipher(state, round_key)); } // Enables native loads in the round loop by pre-swapping. -inline ABSL_TARGET_CRYPTO ABSL_ATTRIBUTE_ALWAYS_INLINE void SwapEndian( - uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { +inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void +SwapEndian(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { using absl::random_internal::RandenTraits; constexpr size_t kLanes = 2; constexpr size_t kFeistelBlocks = RandenTraits::kFeistelBlocks; @@ -242,19 +273,19 @@ using Vector128 = uint8x16_t; namespace { -inline ABSL_TARGET_CRYPTO ABSL_ATTRIBUTE_ALWAYS_INLINE Vector128 +inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) { return vld1q_u8(reinterpret_cast<const uint8_t*>(from)); } -inline ABSL_TARGET_CRYPTO ABSL_ATTRIBUTE_ALWAYS_INLINE void Vector128Store( - const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { +inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void +Vector128Store(const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { vst1q_u8(reinterpret_cast<uint8_t*>(to), v); } // One round of AES. "round_key" is a public constant for breaking the // symmetry of AES (ensures previously equal columns differ afterwards). -inline ABSL_TARGET_CRYPTO ABSL_ATTRIBUTE_ALWAYS_INLINE Vector128 +inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 AesRound(const Vector128& state, const Vector128& round_key) { // It is important to always use the full round function - omitting the // final MixColumns reduces security [https://eprint.iacr.org/2010/041.pdf] @@ -266,8 +297,8 @@ AesRound(const Vector128& state, const Vector128& round_key) { return vaesmcq_u8(vaeseq_u8(state, uint8x16_t{})) ^ round_key; } -inline ABSL_TARGET_CRYPTO ABSL_ATTRIBUTE_ALWAYS_INLINE void SwapEndian( - uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT) {} +inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void +SwapEndian(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT) {} } // namespace @@ -282,13 +313,15 @@ namespace { class Vector128 { public: // Convert from/to intrinsics. - inline ABSL_ATTRIBUTE_ALWAYS_INLINE explicit Vector128( + inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE explicit Vector128( const __m128i& Vector128) : data_(Vector128) {} - inline ABSL_ATTRIBUTE_ALWAYS_INLINE __m128i data() const { return data_; } + inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE __m128i data() const { + return data_; + } - inline ABSL_ATTRIBUTE_ALWAYS_INLINE Vector128& operator^=( + inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128& operator^=( const Vector128& other) { data_ = _mm_xor_si128(data_, other.data()); return *this; @@ -298,20 +331,20 @@ class Vector128 { __m128i data_; }; -inline ABSL_TARGET_CRYPTO ABSL_ATTRIBUTE_ALWAYS_INLINE Vector128 +inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) { return Vector128(_mm_load_si128(reinterpret_cast<const __m128i*>(from))); } -inline ABSL_TARGET_CRYPTO ABSL_ATTRIBUTE_ALWAYS_INLINE void Vector128Store( - const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { +inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void +Vector128Store(const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { _mm_store_si128(reinterpret_cast<__m128i * ABSL_RANDOM_INTERNAL_RESTRICT>(to), v.data()); } // One round of AES. "round_key" is a public constant for breaking the // symmetry of AES (ensures previously equal columns differ afterwards). -inline ABSL_TARGET_CRYPTO ABSL_ATTRIBUTE_ALWAYS_INLINE Vector128 +inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 AesRound(const Vector128& state, const Vector128& round_key) { // It is important to always use the full round function - omitting the // final MixColumns reduces security [https://eprint.iacr.org/2010/041.pdf] @@ -319,8 +352,8 @@ AesRound(const Vector128& state, const Vector128& round_key) { return Vector128(_mm_aesenc_si128(state.data(), round_key.data())); } -inline ABSL_TARGET_CRYPTO ABSL_ATTRIBUTE_ALWAYS_INLINE void SwapEndian( - uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT) {} +inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void +SwapEndian(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT) {} } // namespace @@ -417,8 +450,8 @@ constexpr size_t kLanes = 2; // Block shuffles applies a shuffle to the entire state between AES rounds. // Improved odd-even shuffle from "New criterion for diffusion property". -inline ABSL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO void BlockShuffle( - uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { +inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO void +BlockShuffle(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { static_assert(kFeistelBlocks == 16, "Expecting 16 FeistelBlocks."); constexpr size_t shuffle[kFeistelBlocks] = {7, 2, 13, 4, 11, 8, 3, 6, @@ -466,9 +499,10 @@ inline ABSL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO void BlockShuffle( // per 16 bytes (vs. 10 for AES-CTR). Computing eight round functions in // parallel hides the 7-cycle AESNI latency on HSW. Note that the Feistel // XORs are 'free' (included in the second AES instruction). -inline ABSL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO const u64x2* -FeistelRound(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state, - const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { +inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO const + u64x2* + FeistelRound(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state, + const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { static_assert(kFeistelBlocks == 16, "Expecting 16 FeistelBlocks."); // MSVC does a horrible job at unrolling loops. @@ -527,9 +561,9 @@ FeistelRound(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state, // Indistinguishable from ideal by chosen-ciphertext adversaries using less than // 2^64 queries if the round function is a PRF. This is similar to the b=8 case // of Simpira v2, but more efficient than its generic construction for b=16. -inline ABSL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO void Permute( - const void* ABSL_RANDOM_INTERNAL_RESTRICT keys, - uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { +inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO void +Permute(const void* ABSL_RANDOM_INTERNAL_RESTRICT keys, + uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys128 = static_cast<const u64x2*>(keys); diff --git a/absl/random/internal/randen_slow.cc b/absl/random/internal/randen_slow.cc index b2ecabff878c..7a2e2daa53f5 100644 --- a/absl/random/internal/randen_slow.cc +++ b/absl/random/internal/randen_slow.cc @@ -20,6 +20,28 @@ #include "absl/random/internal/platform.h" +// ABSL_HAVE_ATTRIBUTE +#if !defined(ABSL_HAVE_ATTRIBUTE) +#ifdef __has_attribute +#define ABSL_HAVE_ATTRIBUTE(x) __has_attribute(x) +#else +#define ABSL_HAVE_ATTRIBUTE(x) 0 +#endif +#endif + +#if ABSL_HAVE_ATTRIBUTE(always_inline) || \ + (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE \ + __attribute__((always_inline)) +#elif defined(_MSC_VER) +// We can achieve something similar to attribute((always_inline)) with MSVC by +// using the __forceinline keyword, however this is not perfect. MSVC is +// much less aggressive about inlining, and even with the __forceinline keyword. +#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE __forceinline +#else +#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE +#endif + namespace { // AES portions based on rijndael-alg-fst.c, @@ -222,7 +244,7 @@ struct alignas(16) u64x2 { // as an underlying vector register. // struct Vector128 { - inline ABSL_ATTRIBUTE_ALWAYS_INLINE Vector128& operator^=( + inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128& operator^=( const Vector128& other) { s[0] ^= other.s[0]; s[1] ^= other.s[1]; @@ -234,7 +256,7 @@ struct Vector128 { uint32_t s[4]; }; -inline ABSL_ATTRIBUTE_ALWAYS_INLINE Vector128 +inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) { Vector128 result; const uint8_t* ABSL_RANDOM_INTERNAL_RESTRICT src = @@ -259,7 +281,7 @@ Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) { return result; } -inline ABSL_ATTRIBUTE_ALWAYS_INLINE void Vector128Store( +inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void Vector128Store( const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { uint8_t* dst = reinterpret_cast<uint8_t*>(to); dst[0] = static_cast<uint8_t>(v.s[0] >> 24); @@ -282,7 +304,7 @@ inline ABSL_ATTRIBUTE_ALWAYS_INLINE void Vector128Store( // One round of AES. "round_key" is a public constant for breaking the // symmetry of AES (ensures previously equal columns differ afterwards). -inline ABSL_ATTRIBUTE_ALWAYS_INLINE Vector128 +inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 AesRound(const Vector128& state, const Vector128& round_key) { // clang-format off Vector128 result; @@ -348,7 +370,7 @@ static_assert(kKeys == kRoundKeys, "kKeys and kRoundKeys must be equal"); static constexpr size_t kLanes = 2; // The improved Feistel block shuffle function for 16 blocks. -inline ABSL_ATTRIBUTE_ALWAYS_INLINE void BlockShuffle( +inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void BlockShuffle( uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state_u64) { static_assert(kFeistelBlocks == 16, "Feistel block shuffle only works for 16 blocks."); @@ -409,7 +431,7 @@ inline ABSL_ATTRIBUTE_ALWAYS_INLINE void BlockShuffle( // per 16 bytes (vs. 10 for AES-CTR). Computing eight round functions in // parallel hides the 7-cycle AESNI latency on HSW. Note that the Feistel // XORs are 'free' (included in the second AES instruction). -inline ABSL_ATTRIBUTE_ALWAYS_INLINE const u64x2* FeistelRound( +inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE const u64x2* FeistelRound( uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { for (size_t branch = 0; branch < kFeistelBlocks; branch += 4) { @@ -435,7 +457,7 @@ inline ABSL_ATTRIBUTE_ALWAYS_INLINE const u64x2* FeistelRound( // Indistinguishable from ideal by chosen-ciphertext adversaries using less than // 2^64 queries if the round function is a PRF. This is similar to the b=8 case // of Simpira v2, but more efficient than its generic construction for b=16. -inline ABSL_ATTRIBUTE_ALWAYS_INLINE void Permute( +inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void Permute( const void* keys, uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys128 = static_cast<const u64x2*>(keys); |