Crypto++  6.1
Free C++ class library of cryptographic schemes
shacal2-simd.cpp
1 // shacla2-simd.cpp - written and placed in the public domain by
2 // Jeffrey Walton and Jack Lloyd
3 //
4 // Jack Lloyd and the Botan team allowed Crypto++ to use parts of
5 // Botan's implementation under the same license as Crypto++
6 // is released. The code for SHACAL2_Enc_ProcessAndXorBlock_SHANI
7 // below is Botan's x86_encrypt_blocks with minor tweaks. Many thanks
8 // to the Botan team. Also see http://github.com/randombit/botan/.
9 //
10 // This source file uses intrinsics to gain access to SHA-NI and
11 // ARMv8a SHA instructions. A separate source file is needed because
12 // additional CXXFLAGS are required to enable the appropriate instructions
13 // sets in some build configurations.
14 
15 #include "pch.h"
16 #include "config.h"
17 #include "sha.h"
18 #include "misc.h"
19 
20 #if (CRYPTOPP_SHANI_AVAILABLE)
21 # include <nmmintrin.h>
22 # include <immintrin.h>
23 #endif
24 
25 // Use ARMv8 rather than NEON due to compiler inconsistencies
26 #if (CRYPTOPP_ARM_SHA_AVAILABLE)
27 # include <arm_neon.h>
28 #endif
29 
30 // Can't use CRYPTOPP_ARM_XXX_AVAILABLE because too many
31 // compilers don't follow ACLE conventions for the include.
32 #if defined(CRYPTOPP_ARM_ACLE_AVAILABLE)
33 # include <stdint.h>
34 # include <arm_acle.h>
35 #endif
36 
37 // Clang __m128i casts, http://bugs.llvm.org/show_bug.cgi?id=20670
38 #define M128_CAST(x) ((__m128i *)(void *)(x))
39 #define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
40 
41 NAMESPACE_BEGIN(CryptoPP)
42 
43 #if CRYPTOPP_SHANI_AVAILABLE
44 void SHACAL2_Enc_ProcessAndXorBlock_SHANI(const word32* subKeys, const byte *inBlock, const byte *xorBlock, byte *outBlock)
45 {
46  CRYPTOPP_ASSERT(subKeys);
47  CRYPTOPP_ASSERT(inBlock);
48  CRYPTOPP_ASSERT(outBlock);
49 
50  const __m128i MASK1 = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7);
51  const __m128i MASK2 = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
52 
53  __m128i B0 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(inBlock + 0)), MASK1);
54  __m128i B1 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(inBlock + 16)), MASK2);
55 
56  __m128i TMP = _mm_alignr_epi8(B0, B1, 8);
57  B1 = _mm_blend_epi16(B1, B0, 0xF0);
58  B0 = TMP;
59 
60 #if 0
61  // SSE2 + SSSE3, but 0.2 cpb slower on a Celeraon J3455
62  const __m128i MASK1 = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7);
63  const __m128i MASK2 = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
64 
65  __m128i B0 = _mm_loadu_si128(CONST_M128_CAST(inBlock + 0));
66  __m128i B1 = _mm_loadu_si128(CONST_M128_CAST(inBlock + 16));
67 
68  __m128i TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK2);
69  B1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK2);
70  B0 = TMP;
71 #endif
72 
73  const byte* keys = reinterpret_cast<const byte*>(subKeys);
74  for (size_t i = 0; i != 8; ++i)
75  {
76  const __m128i RK0 = _mm_load_si128(CONST_M128_CAST(keys + 32*i));
77  const __m128i RK2 = _mm_load_si128(CONST_M128_CAST(keys + 32*i+16));
78  const __m128i RK1 = _mm_srli_si128(RK0, 8);
79  const __m128i RK3 = _mm_srli_si128(RK2, 8);
80 
81  B1 = _mm_sha256rnds2_epu32(B1, B0, RK0);
82  B0 = _mm_sha256rnds2_epu32(B0, B1, RK1);
83  B1 = _mm_sha256rnds2_epu32(B1, B0, RK2);
84  B0 = _mm_sha256rnds2_epu32(B0, B1, RK3);
85  }
86 
87  TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK1);
88  B1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK1);
89  B0 = TMP;
90 
91  if (xorBlock)
92  {
93  _mm_storeu_si128(M128_CAST(outBlock + 0),
94  _mm_xor_si128(B0, _mm_loadu_si128(CONST_M128_CAST(xorBlock + 0))));
95 
96  _mm_storeu_si128(M128_CAST(outBlock + 16),
97  _mm_xor_si128(B1, _mm_loadu_si128(CONST_M128_CAST(xorBlock + 16))));
98  }
99  else
100  {
101  _mm_storeu_si128(M128_CAST(outBlock + 0), B0);
102  _mm_storeu_si128(M128_CAST(outBlock + 16), B1);
103  }
104 }
105 #endif
106 
107 NAMESPACE_END
Utility functions for the Crypto++ library.
Library configuration file.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:60
Classes for SHA-1 and SHA-2 family of message digests.
Crypto++ library namespace.