Crypto++  8.4
Free C++ class library of cryptographic schemes
gcm.cpp
1 // gcm.cpp - originally written and placed in the public domain by Wei Dai.
2 // ARM and Aarch64 added by Jeffrey Walton. The ARM carryless
3 // multiply routines are less efficient because they shadow x86.
4 // The precomputed key table integration makes it tricky to use the
5 // more efficient ARMv8 implementation of the multiply and reduce.
6 
7 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM gcm.cpp" to generate MASM code
8 
9 #include "pch.h"
10 #include "config.h"
11 
12 #ifndef CRYPTOPP_IMPORTS
13 #ifndef CRYPTOPP_GENERATE_X64_MASM
14 
15 // Visual Studio .Net 2003 compiler crash
16 #if defined(_MSC_VER) && (_MSC_VER < 1400)
17 # pragma optimize("", off)
18 #endif
19 
20 #include "gcm.h"
21 #include "cpu.h"
22 
23 #if defined(CRYPTOPP_DISABLE_GCM_ASM)
24 # undef CRYPTOPP_X86_ASM_AVAILABLE
25 # undef CRYPTOPP_X32_ASM_AVAILABLE
26 # undef CRYPTOPP_X64_ASM_AVAILABLE
27 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
28 #endif
29 
30 NAMESPACE_BEGIN(CryptoPP)
31 
32 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
33 // Different assemblers accept different mnemonics: 'movd eax, xmm0' vs
34 // 'movd rax, xmm0' vs 'mov eax, xmm0' vs 'mov rax, xmm0'
35 #if defined(CRYPTOPP_DISABLE_MIXED_ASM)
36 // 'movd eax, xmm0' only. REG_WORD() macro not used. Clang path.
37 # define USE_MOVD_REG32 1
38 #elif defined(__GNUC__) || defined(_MSC_VER)
39 // 'movd eax, xmm0' or 'movd rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
40 # define USE_MOVD_REG32_OR_REG64 1
41 #else
42 // 'mov eax, xmm0' or 'mov rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
43 # define USE_MOV_REG32_OR_REG64 1
44 #endif
45 #endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
46 
47 // Clang intrinsic casts, http://bugs.llvm.org/show_bug.cgi?id=20670
48 #define M128_CAST(x) ((__m128i *)(void *)(x))
49 #define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
50 
51 word16 GCM_Base::s_reductionTable[256];
52 volatile bool GCM_Base::s_reductionTableInitialized = false;
53 
54 void GCM_Base::GCTR::IncrementCounterBy256()
55 {
56  IncrementCounterByOne(m_counterArray+BlockSize()-4, 3);
57 }
58 
59 static inline void Xor16(byte *a, const byte *b, const byte *c)
60 {
61  CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<word64>()));
62  CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<word64>()));
63  CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<word64>()));
64  ((word64 *)(void *)a)[0] = ((word64 *)(void *)b)[0] ^ ((word64 *)(void *)c)[0];
65  ((word64 *)(void *)a)[1] = ((word64 *)(void *)b)[1] ^ ((word64 *)(void *)c)[1];
66 }
67 
68 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
69 // SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
70 // a source file with a SSE architecture switch. Also see GH #226 and GH #284.
71 extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
72 #endif // SSE2
73 
74 #if CRYPTOPP_ARM_NEON_AVAILABLE
75 extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
76 #endif
77 
78 #if CRYPTOPP_POWER8_AVAILABLE
79 extern void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c);
80 #endif
81 
82 #if CRYPTOPP_CLMUL_AVAILABLE
83 extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
84 extern size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
85 const unsigned int s_cltableSizeInBlocks = 8;
86 extern void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer);
87 #endif // CRYPTOPP_CLMUL_AVAILABLE
88 
89 #if CRYPTOPP_ARM_PMULL_AVAILABLE
90 extern void GCM_SetKeyWithoutResync_PMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
91 extern size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
92 const unsigned int s_cltableSizeInBlocks = 8;
93 extern void GCM_ReverseHashBufferIfNeeded_PMULL(byte *hashBuffer);
94 #endif // CRYPTOPP_ARM_PMULL_AVAILABLE
95 
96 #if CRYPTOPP_POWER8_VMULL_AVAILABLE
97 extern void GCM_SetKeyWithoutResync_VMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
98 extern size_t GCM_AuthenticateBlocks_VMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
99 const unsigned int s_cltableSizeInBlocks = 8;
100 extern void GCM_ReverseHashBufferIfNeeded_VMULL(byte *hashBuffer);
101 #endif // CRYPTOPP_POWER8_VMULL_AVAILABLE
102 
103 void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs &params)
104 {
105  BlockCipher &blockCipher = AccessBlockCipher();
106  blockCipher.SetKey(userKey, keylength, params);
107 
108  // GCM is only defined for 16-byte block ciphers at the moment.
109  // However, variable blocksize support means we have to defer
110  // blocksize checks to runtime after the key is set. Also see
111  // https://github.com/weidai11/cryptopp/issues/408.
112  const unsigned int blockSize = blockCipher.BlockSize();
113  CRYPTOPP_ASSERT(blockSize == REQUIRED_BLOCKSIZE);
114  if (blockCipher.BlockSize() != REQUIRED_BLOCKSIZE)
115  throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16");
116 
117  int tableSize, i, j, k;
118 
119 #if CRYPTOPP_CLMUL_AVAILABLE
120  if (HasCLMUL())
121  {
122  // Avoid "parameter not used" error and suppress Coverity finding
123  (void)params.GetIntValue(Name::TableSize(), tableSize);
124  tableSize = s_cltableSizeInBlocks * blockSize;
125  CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
126  }
127  else
128 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
129  if (HasPMULL())
130  {
131  // Avoid "parameter not used" error and suppress Coverity finding
132  (void)params.GetIntValue(Name::TableSize(), tableSize);
133  tableSize = s_cltableSizeInBlocks * blockSize;
134  CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
135  }
136  else
137 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
138  if (HasPMULL())
139  {
140  // Avoid "parameter not used" error and suppress Coverity finding
141  (void)params.GetIntValue(Name::TableSize(), tableSize);
142  tableSize = s_cltableSizeInBlocks * blockSize;
143  CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
144  }
145  else
146 #endif
147  {
148  if (params.GetIntValue(Name::TableSize(), tableSize))
149  tableSize = (tableSize >= 64*1024) ? 64*1024 : 2*1024;
150  else
151  tableSize = (GetTablesOption() == GCM_64K_Tables) ? 64*1024 : 2*1024;
152 
153  //#if defined(_MSC_VER) && (_MSC_VER < 1400)
154  // VC 2003 workaround: compiler generates bad code for 64K tables
155  //tableSize = 2*1024;
156  //#endif
157  }
158 
159  m_buffer.resize(3*blockSize + tableSize);
160  byte *mulTable = MulTable();
161  byte *hashKey = HashKey();
162  memset(hashKey, 0, REQUIRED_BLOCKSIZE);
163  blockCipher.ProcessBlock(hashKey);
164 
165 #if CRYPTOPP_CLMUL_AVAILABLE
166  if (HasCLMUL())
167  {
168  GCM_SetKeyWithoutResync_CLMUL(hashKey, mulTable, tableSize);
169  return;
170  }
171 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
172  if (HasPMULL())
173  {
174  GCM_SetKeyWithoutResync_PMULL(hashKey, mulTable, tableSize);
175  return;
176  }
177 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
178  if (HasPMULL())
179  {
180  GCM_SetKeyWithoutResync_VMULL(hashKey, mulTable, tableSize);
181  return;
182  }
183 #endif
184 
185  word64 V0, V1;
187  Block::Get(hashKey)(V0)(V1);
188 
189  if (tableSize == 64*1024)
190  {
191  for (i=0; i<128; i++)
192  {
193  k = i%8;
194  Block::Put(NULLPTR, mulTable+(i/8)*256*16+(size_t(1)<<(11-k)))(V0)(V1);
195 
196  int x = (int)V1 & 1;
197  V1 = (V1>>1) | (V0<<63);
198  V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
199  }
200 
201  for (i=0; i<16; i++)
202  {
203  memset(mulTable+i*256*16, 0, 16);
204 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
205  if (HasSSE2())
206  for (j=2; j<=0x80; j*=2)
207  for (k=1; k<j; k++)
208  GCM_Xor16_SSE2(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
209  else
210 #elif CRYPTOPP_ARM_NEON_AVAILABLE
211  if (HasNEON())
212  for (j=2; j<=0x80; j*=2)
213  for (k=1; k<j; k++)
214  GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
215  else
216 #elif CRYPTOPP_POWER8_AVAILABLE
217  if (HasPower8())
218  for (j=2; j<=0x80; j*=2)
219  for (k=1; k<j; k++)
220  GCM_Xor16_POWER8(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
221  else
222 #endif
223  for (j=2; j<=0x80; j*=2)
224  for (k=1; k<j; k++)
225  Xor16(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
226  }
227  }
228  else
229  {
230  if (!s_reductionTableInitialized)
231  {
232  s_reductionTable[0] = 0;
233  word16 x = 0x01c2;
234  s_reductionTable[1] = ByteReverse(x);
235  for (unsigned int ii=2; ii<=0x80; ii*=2)
236  {
237  x <<= 1;
238  s_reductionTable[ii] = ByteReverse(x);
239  for (unsigned int jj=1; jj<ii; jj++)
240  s_reductionTable[ii+jj] = s_reductionTable[ii] ^ s_reductionTable[jj];
241  }
242  s_reductionTableInitialized = true;
243  }
244 
245  for (i=0; i<128-24; i++)
246  {
247  k = i%32;
248  if (k < 4)
249  Block::Put(NULLPTR, mulTable+1024+(i/32)*256+(size_t(1)<<(7-k)))(V0)(V1);
250  else if (k < 8)
251  Block::Put(NULLPTR, mulTable+(i/32)*256+(size_t(1)<<(11-k)))(V0)(V1);
252 
253  int x = (int)V1 & 1;
254  V1 = (V1>>1) | (V0<<63);
255  V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
256  }
257 
258  for (i=0; i<4; i++)
259  {
260  memset(mulTable+i*256, 0, 16);
261  memset(mulTable+1024+i*256, 0, 16);
262 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
263  if (HasSSE2())
264  for (j=2; j<=8; j*=2)
265  for (k=1; k<j; k++)
266  {
267  GCM_Xor16_SSE2(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
268  GCM_Xor16_SSE2(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
269  }
270  else
271 #elif CRYPTOPP_ARM_NEON_AVAILABLE
272  if (HasNEON())
273  for (j=2; j<=8; j*=2)
274  for (k=1; k<j; k++)
275  {
276  GCM_Xor16_NEON(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
277  GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
278  }
279  else
280 #elif CRYPTOPP_POWER8_AVAILABLE
281  if (HasPower8())
282  for (j=2; j<=8; j*=2)
283  for (k=1; k<j; k++)
284  {
285  GCM_Xor16_POWER8(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
286  GCM_Xor16_POWER8(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
287  }
288  else
289 #endif
290  for (j=2; j<=8; j*=2)
291  for (k=1; k<j; k++)
292  {
293  Xor16(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
294  Xor16(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
295  }
296  }
297  }
298 }
299 
300 inline void GCM_Base::ReverseHashBufferIfNeeded()
301 {
302 #if CRYPTOPP_CLMUL_AVAILABLE
303  if (HasCLMUL())
304  {
305  GCM_ReverseHashBufferIfNeeded_CLMUL(HashBuffer());
306  }
307 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
308  if (HasPMULL())
309  {
310  GCM_ReverseHashBufferIfNeeded_PMULL(HashBuffer());
311  }
312 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
313  if (HasPMULL())
314  {
315  GCM_ReverseHashBufferIfNeeded_VMULL(HashBuffer());
316  }
317 #endif
318 }
319 
320 void GCM_Base::Resync(const byte *iv, size_t len)
321 {
322  BlockCipher &cipher = AccessBlockCipher();
323  byte *hashBuffer = HashBuffer();
324 
325  if (len == 12)
326  {
327  memcpy(hashBuffer, iv, len);
328  memset(hashBuffer+len, 0, 3);
329  hashBuffer[len+3] = 1;
330  }
331  else
332  {
333  size_t origLen = len;
334  memset(hashBuffer, 0, HASH_BLOCKSIZE);
335 
336  if (len >= HASH_BLOCKSIZE)
337  {
338  len = GCM_Base::AuthenticateBlocks(iv, len);
339  iv += (origLen - len);
340  }
341 
342  if (len > 0)
343  {
344  memcpy(m_buffer, iv, len);
345  memset(m_buffer+len, 0, HASH_BLOCKSIZE-len);
346  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
347  }
348 
349  PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(0)(origLen*8);
350  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
351 
352  ReverseHashBufferIfNeeded();
353  }
354 
355  if (m_state >= State_IVSet)
356  m_ctr.Resynchronize(hashBuffer, REQUIRED_BLOCKSIZE);
357  else
358  m_ctr.SetCipherWithIV(cipher, hashBuffer);
359 
360  m_ctr.Seek(HASH_BLOCKSIZE);
361 
362  memset(hashBuffer, 0, HASH_BLOCKSIZE);
363 }
364 
365 unsigned int GCM_Base::OptimalDataAlignment() const
366 {
367  return
368 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
369  HasSSE2() ? 16 :
370 #elif CRYPTOPP_ARM_NEON_AVAILABLE
371  HasNEON() ? 4 :
372 #elif CRYPTOPP_POWER8_AVAILABLE
373  HasPower8() ? 16 :
374 #endif
375  GetBlockCipher().OptimalDataAlignment();
376 }
377 
378 #if CRYPTOPP_MSC_VERSION
379 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
380 #endif
381 
382 #endif // Not CRYPTOPP_GENERATE_X64_MASM
383 
384 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
385 extern "C" {
386 void GCM_AuthenticateBlocks_2K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer, const word16 *reductionTable);
387 void GCM_AuthenticateBlocks_64K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer);
388 }
389 #endif
390 
391 #ifndef CRYPTOPP_GENERATE_X64_MASM
392 
393 size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
394 {
395 #if CRYPTOPP_CLMUL_AVAILABLE
396  if (HasCLMUL())
397  {
398  return GCM_AuthenticateBlocks_CLMUL(data, len, MulTable(), HashBuffer());
399  }
400 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
401  if (HasPMULL())
402  {
403  return GCM_AuthenticateBlocks_PMULL(data, len, MulTable(), HashBuffer());
404  }
405 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
406  if (HasPMULL())
407  {
408  return GCM_AuthenticateBlocks_VMULL(data, len, MulTable(), HashBuffer());
409  }
410 #endif
411 
413  word64 *hashBuffer = (word64 *)(void *)HashBuffer();
414  CRYPTOPP_ASSERT(IsAlignedOn(hashBuffer,GetAlignmentOf<word64>()));
415 
416  switch (2*(m_buffer.size()>=64*1024)
417 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
418  + HasSSE2()
419 //#elif CRYPTOPP_ARM_NEON_AVAILABLE
420 // + HasNEON()
421 #endif
422  )
423  {
424  case 0: // non-SSE2 and 2K tables
425  {
426  byte *mulTable = MulTable();
427  word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
428 
429  do
430  {
431  word64 y0, y1, a0, a1, b0, b1, c0, c1, d0, d1;
432  Block::Get(data)(y0)(y1);
433  x0 ^= y0;
434  x1 ^= y1;
435 
436  data += HASH_BLOCKSIZE;
437  len -= HASH_BLOCKSIZE;
438 
439  #define READ_TABLE_WORD64_COMMON(a, b, c, d) *(word64 *)(void *)(mulTable+(a*1024)+(b*256)+c+d*8)
440 
441  #if (CRYPTOPP_LITTLE_ENDIAN)
442  #if CRYPTOPP_BOOL_SLOW_WORD64
443  word32 z0 = (word32)x0;
444  word32 z1 = (word32)(x0>>32);
445  word32 z2 = (word32)x1;
446  word32 z3 = (word32)(x1>>32);
447  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, (d?(z##c>>((d?d-1:0)*4))&0xf0:(z##c&0xf)<<4), e)
448  #else
449  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, ((d+8*b)?(x##a>>(((d+8*b)?(d+8*b)-1:1)*4))&0xf0:(x##a&0xf)<<4), e)
450  #endif
451  #define GF_MOST_SIG_8BITS(a) (a##1 >> 7*8)
452  #define GF_SHIFT_8(a) a##1 = (a##1 << 8) ^ (a##0 >> 7*8); a##0 <<= 8;
453  #else
454  #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((1-d%2), c, ((15-d-8*b)?(x##a>>(((15-d-8*b)?(15-d-8*b)-1:0)*4))&0xf0:(x##a&0xf)<<4), e)
455  #define GF_MOST_SIG_8BITS(a) (a##1 & 0xff)
456  #define GF_SHIFT_8(a) a##1 = (a##1 >> 8) ^ (a##0 << 7*8); a##0 >>= 8;
457  #endif
458 
459  #define GF_MUL_32BY128(op, a, b, c) \
460  a0 op READ_TABLE_WORD64(a, b, c, 0, 0) ^ READ_TABLE_WORD64(a, b, c, 1, 0); \
461  a1 op READ_TABLE_WORD64(a, b, c, 0, 1) ^ READ_TABLE_WORD64(a, b, c, 1, 1); \
462  b0 op READ_TABLE_WORD64(a, b, c, 2, 0) ^ READ_TABLE_WORD64(a, b, c, 3, 0); \
463  b1 op READ_TABLE_WORD64(a, b, c, 2, 1) ^ READ_TABLE_WORD64(a, b, c, 3, 1); \
464  c0 op READ_TABLE_WORD64(a, b, c, 4, 0) ^ READ_TABLE_WORD64(a, b, c, 5, 0); \
465  c1 op READ_TABLE_WORD64(a, b, c, 4, 1) ^ READ_TABLE_WORD64(a, b, c, 5, 1); \
466  d0 op READ_TABLE_WORD64(a, b, c, 6, 0) ^ READ_TABLE_WORD64(a, b, c, 7, 0); \
467  d1 op READ_TABLE_WORD64(a, b, c, 6, 1) ^ READ_TABLE_WORD64(a, b, c, 7, 1); \
468 
469  GF_MUL_32BY128(=, 0, 0, 0)
470  GF_MUL_32BY128(^=, 0, 1, 1)
471  GF_MUL_32BY128(^=, 1, 0, 2)
472  GF_MUL_32BY128(^=, 1, 1, 3)
473 
474  word32 r = (word32)s_reductionTable[GF_MOST_SIG_8BITS(d)] << 16;
475  GF_SHIFT_8(d)
476  c0 ^= d0; c1 ^= d1;
477  r ^= (word32)s_reductionTable[GF_MOST_SIG_8BITS(c)] << 8;
478  GF_SHIFT_8(c)
479  b0 ^= c0; b1 ^= c1;
480  r ^= s_reductionTable[GF_MOST_SIG_8BITS(b)];
481  GF_SHIFT_8(b)
482  a0 ^= b0; a1 ^= b1;
484  x0 = a0; x1 = a1;
485  }
486  while (len >= HASH_BLOCKSIZE);
487 
488  hashBuffer[0] = x0; hashBuffer[1] = x1;
489  return len;
490  }
491 
492  case 2: // non-SSE2 and 64K tables
493  {
494  byte *mulTable = MulTable();
495  word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
496 
497  do
498  {
499  word64 y0, y1, a0, a1;
500  Block::Get(data)(y0)(y1);
501  x0 ^= y0;
502  x1 ^= y1;
503 
504  data += HASH_BLOCKSIZE;
505  len -= HASH_BLOCKSIZE;
506 
507  #undef READ_TABLE_WORD64_COMMON
508  #undef READ_TABLE_WORD64
509 
510  #define READ_TABLE_WORD64_COMMON(a, c, d) *(word64 *)(void *)(mulTable+(a)*256*16+(c)+(d)*8)
511 
512  #if (CRYPTOPP_LITTLE_ENDIAN)
513  #if CRYPTOPP_BOOL_SLOW_WORD64
514  word32 z0 = (word32)x0;
515  word32 z1 = (word32)(x0>>32);
516  word32 z2 = (word32)x1;
517  word32 z3 = (word32)(x1>>32);
518  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, (d?(z##c>>((d?d:1)*8-4))&0xff0:(z##c&0xff)<<4), e)
519  #else
520  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((d+4*(c%2))?(x##b>>(((d+4*(c%2))?(d+4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
521  #endif
522  #else
523  #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((7-d-4*(c%2))?(x##b>>(((7-d-4*(c%2))?(7-d-4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
524  #endif
525 
526  #define GF_MUL_8BY128(op, b, c, d) \
527  a0 op READ_TABLE_WORD64(b, c, d, 0);\
528  a1 op READ_TABLE_WORD64(b, c, d, 1);\
529 
530  GF_MUL_8BY128(=, 0, 0, 0)
531  GF_MUL_8BY128(^=, 0, 0, 1)
532  GF_MUL_8BY128(^=, 0, 0, 2)
533  GF_MUL_8BY128(^=, 0, 0, 3)
534  GF_MUL_8BY128(^=, 0, 1, 0)
535  GF_MUL_8BY128(^=, 0, 1, 1)
536  GF_MUL_8BY128(^=, 0, 1, 2)
537  GF_MUL_8BY128(^=, 0, 1, 3)
538  GF_MUL_8BY128(^=, 1, 2, 0)
539  GF_MUL_8BY128(^=, 1, 2, 1)
540  GF_MUL_8BY128(^=, 1, 2, 2)
541  GF_MUL_8BY128(^=, 1, 2, 3)
542  GF_MUL_8BY128(^=, 1, 3, 0)
543  GF_MUL_8BY128(^=, 1, 3, 1)
544  GF_MUL_8BY128(^=, 1, 3, 2)
545  GF_MUL_8BY128(^=, 1, 3, 3)
546 
547  x0 = a0; x1 = a1;
548  }
549  while (len >= HASH_BLOCKSIZE);
550 
551  hashBuffer[0] = x0; hashBuffer[1] = x1;
552  return len;
553  }
554 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
555 
556 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
557  case 1: // SSE2 and 2K tables
558  GCM_AuthenticateBlocks_2K_SSE2(data, len/16, hashBuffer, s_reductionTable);
559  return len % 16;
560  case 3: // SSE2 and 64K tables
561  GCM_AuthenticateBlocks_64K_SSE2(data, len/16, hashBuffer);
562  return len % 16;
563 #endif
564 
565 #if CRYPTOPP_SSE2_ASM_AVAILABLE
566  case 1: // SSE2 and 2K tables
567  {
568  #ifdef __GNUC__
569  __asm__ __volatile__
570  (
571  INTEL_NOPREFIX
572  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
573  ALIGN 8
574  GCM_AuthenticateBlocks_2K_SSE2 PROC FRAME
575  rex_push_reg rsi
576  push_reg rdi
577  push_reg rbx
578  .endprolog
579  mov rsi, r8
580  mov r11, r9
581  #else
582  AS2( mov WORD_REG(cx), data )
583  AS2( mov WORD_REG(dx), len )
584  AS2( mov WORD_REG(si), hashBuffer )
585  AS2( shr WORD_REG(dx), 4 )
586  #endif
587 
589  AS1(push rbx)
590  AS1(push rbp)
591  #else
592  AS_PUSH_IF86( bx)
593  AS_PUSH_IF86( bp)
594  #endif
595 
596  #ifdef __GNUC__
597  AS2( mov AS_REG_7, WORD_REG(di))
598  #elif CRYPTOPP_BOOL_X86
599  AS2( lea AS_REG_7, s_reductionTable)
600  #endif
601 
602  AS2( movdqa xmm0, [WORD_REG(si)] )
603 
604  #define MUL_TABLE_0 WORD_REG(si) + 32
605  #define MUL_TABLE_1 WORD_REG(si) + 32 + 1024
606  #define RED_TABLE AS_REG_7
607 
608  ASL(0)
609  AS2( movdqu xmm4, [WORD_REG(cx)] )
610  AS2( pxor xmm0, xmm4 )
611 
612  AS2( movd ebx, xmm0 )
613  AS2( mov eax, AS_HEX(f0f0f0f0) )
614  AS2( and eax, ebx )
615  AS2( shl ebx, 4 )
616  AS2( and ebx, AS_HEX(f0f0f0f0) )
617  AS2( movzx edi, ah )
618  AS2( movdqa xmm5, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
619  AS2( movzx edi, al )
620  AS2( movdqa xmm4, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
621  AS2( shr eax, 16 )
622  AS2( movzx edi, ah )
623  AS2( movdqa xmm3, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
624  AS2( movzx edi, al )
625  AS2( movdqa xmm2, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
626 
627  #define SSE2_MUL_32BITS(i) \
628  AS2( psrldq xmm0, 4 )\
629  AS2( movd eax, xmm0 )\
630  AS2( and eax, AS_HEX(f0f0f0f0) )\
631  AS2( movzx edi, bh )\
632  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
633  AS2( movzx edi, bl )\
634  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
635  AS2( shr ebx, 16 )\
636  AS2( movzx edi, bh )\
637  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
638  AS2( movzx edi, bl )\
639  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
640  AS2( movd ebx, xmm0 )\
641  AS2( shl ebx, 4 )\
642  AS2( and ebx, AS_HEX(f0f0f0f0) )\
643  AS2( movzx edi, ah )\
644  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
645  AS2( movzx edi, al )\
646  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
647  AS2( shr eax, 16 )\
648  AS2( movzx edi, ah )\
649  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
650  AS2( movzx edi, al )\
651  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
652 
653  SSE2_MUL_32BITS(1)
654  SSE2_MUL_32BITS(2)
655  SSE2_MUL_32BITS(3)
656 
657  AS2( movzx edi, bh )
658  AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
659  AS2( movzx edi, bl )
660  AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
661  AS2( shr ebx, 16 )
662  AS2( movzx edi, bh )
663  AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
664  AS2( movzx edi, bl )
665  AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
666 
667  AS2( movdqa xmm0, xmm3 )
668  AS2( pslldq xmm3, 1 )
669  AS2( pxor xmm2, xmm3 )
670  AS2( movdqa xmm1, xmm2 )
671  AS2( pslldq xmm2, 1 )
672  AS2( pxor xmm5, xmm2 )
673 
674  AS2( psrldq xmm0, 15 )
675 #if USE_MOVD_REG32
676  AS2( movd edi, xmm0 )
677 #elif USE_MOV_REG32_OR_REG64
678  AS2( mov WORD_REG(di), xmm0 )
679 #else // GNU Assembler
680  AS2( movd WORD_REG(di), xmm0 )
681 #endif
682  AS2( movzx eax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
683  AS2( shl eax, 8 )
684 
685  AS2( movdqa xmm0, xmm5 )
686  AS2( pslldq xmm5, 1 )
687  AS2( pxor xmm4, xmm5 )
688 
689  AS2( psrldq xmm1, 15 )
690 #if USE_MOVD_REG32
691  AS2( movd edi, xmm1 )
692 #elif USE_MOV_REG32_OR_REG64
693  AS2( mov WORD_REG(di), xmm1 )
694 #else
695  AS2( movd WORD_REG(di), xmm1 )
696 #endif
697  AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
698  AS2( shl eax, 8 )
699 
700  AS2( psrldq xmm0, 15 )
701 #if USE_MOVD_REG32
702  AS2( movd edi, xmm0 )
703 #elif USE_MOV_REG32_OR_REG64
704  AS2( mov WORD_REG(di), xmm0 )
705 #else
706  AS2( movd WORD_REG(di), xmm0 )
707 #endif
708  AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
709 
710  AS2( movd xmm0, eax )
711  AS2( pxor xmm0, xmm4 )
712 
713  AS2( add WORD_REG(cx), 16 )
714  AS2( sub WORD_REG(dx), 1 )
715  // ATT_NOPREFIX
716  ASJ( jnz, 0, b )
717  INTEL_NOPREFIX
718  AS2( movdqa [WORD_REG(si)], xmm0 )
719 
721  AS1(pop rbp)
722  AS1(pop rbx)
723  #else
724  AS_POP_IF86( bp)
725  AS_POP_IF86( bx)
726  #endif
727 
728  #ifdef __GNUC__
729  ATT_PREFIX
730  :
731  : "c" (data), "d" (len/16), "S" (hashBuffer), "D" (s_reductionTable)
732  : "memory", "cc", "%eax"
734  , "%ebx", "%r11"
735  #endif
736  );
737  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
738  pop rbx
739  pop rdi
740  pop rsi
741  ret
742  GCM_AuthenticateBlocks_2K_SSE2 ENDP
743  #endif
744 
745  return len%16;
746  }
747  case 3: // SSE2 and 64K tables
748  {
749  #ifdef __GNUC__
750  __asm__ __volatile__
751  (
752  INTEL_NOPREFIX
753  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
754  ALIGN 8
755  GCM_AuthenticateBlocks_64K_SSE2 PROC FRAME
756  rex_push_reg rsi
757  push_reg rdi
758  .endprolog
759  mov rsi, r8
760  #else
761  AS2( mov WORD_REG(cx), data )
762  AS2( mov WORD_REG(dx), len )
763  AS2( mov WORD_REG(si), hashBuffer )
764  AS2( shr WORD_REG(dx), 4 )
765  #endif
766 
767  AS2( movdqa xmm0, [WORD_REG(si)] )
768 
769  #undef MUL_TABLE
770  #define MUL_TABLE(i,j) WORD_REG(si) + 32 + (i*4+j)*256*16
771 
772  ASL(1)
773  AS2( movdqu xmm1, [WORD_REG(cx)] )
774  AS2( pxor xmm1, xmm0 )
775  AS2( pxor xmm0, xmm0 )
776 
777  #undef SSE2_MUL_32BITS
778  #define SSE2_MUL_32BITS(i) \
779  AS2( movd eax, xmm1 )\
780  AS2( psrldq xmm1, 4 )\
781  AS2( movzx edi, al )\
782  AS2( add WORD_REG(di), WORD_REG(di) )\
783  AS2( pxor xmm0, [MUL_TABLE(i,0) + WORD_REG(di)*8] )\
784  AS2( movzx edi, ah )\
785  AS2( add WORD_REG(di), WORD_REG(di) )\
786  AS2( pxor xmm0, [MUL_TABLE(i,1) + WORD_REG(di)*8] )\
787  AS2( shr eax, 16 )\
788  AS2( movzx edi, al )\
789  AS2( add WORD_REG(di), WORD_REG(di) )\
790  AS2( pxor xmm0, [MUL_TABLE(i,2) + WORD_REG(di)*8] )\
791  AS2( movzx edi, ah )\
792  AS2( add WORD_REG(di), WORD_REG(di) )\
793  AS2( pxor xmm0, [MUL_TABLE(i,3) + WORD_REG(di)*8] )\
794 
795  SSE2_MUL_32BITS(0)
796  SSE2_MUL_32BITS(1)
797  SSE2_MUL_32BITS(2)
798  SSE2_MUL_32BITS(3)
799 
800  AS2( add WORD_REG(cx), 16 )
801  AS2( sub WORD_REG(dx), 1 )
802  // ATT_NOPREFIX
803  ASJ( jnz, 1, b )
804  INTEL_NOPREFIX
805  AS2( movdqa [WORD_REG(si)], xmm0 )
806 
807  #ifdef __GNUC__
808  ATT_PREFIX
809  :
810  : "c" (data), "d" (len/16), "S" (hashBuffer)
811  : "memory", "cc", "%edi", "%eax"
812  );
813  #elif defined(CRYPTOPP_GENERATE_X64_MASM)
814  pop rdi
815  pop rsi
816  ret
817  GCM_AuthenticateBlocks_64K_SSE2 ENDP
818  #endif
819 
820  return len%16;
821  }
822 #endif
823 #ifndef CRYPTOPP_GENERATE_X64_MASM
824  }
825 
826  return len%16;
827 }
828 
829 void GCM_Base::AuthenticateLastHeaderBlock()
830 {
831  if (m_bufferedDataLength > 0)
832  {
833  memset(m_buffer+m_bufferedDataLength, 0, HASH_BLOCKSIZE-m_bufferedDataLength);
834  m_bufferedDataLength = 0;
835  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
836  }
837 }
838 
839 void GCM_Base::AuthenticateLastConfidentialBlock()
840 {
841  GCM_Base::AuthenticateLastHeaderBlock();
842  PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(m_totalHeaderLength*8)(m_totalMessageLength*8);
843  GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
844 }
845 
846 void GCM_Base::AuthenticateLastFooterBlock(byte *mac, size_t macSize)
847 {
848  m_ctr.Seek(0);
849  ReverseHashBufferIfNeeded();
850  m_ctr.ProcessData(mac, HashBuffer(), macSize);
851 }
852 
853 NAMESPACE_END
854 
855 #endif // Not CRYPTOPP_GENERATE_X64_MASM
856 #endif
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1283
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:879
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
std::string AlgorithmName() const
Provides the name of this algorithm.
Definition: gcm.h:36
An invalid argument was detected.
Definition: cryptlib.h:203
Interface for retrieving values given their names.
Definition: cryptlib.h:322
CRYPTOPP_DLL bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:415
Access a block of memory.
Definition: misc.h:2550
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Library configuration file.
#define CRYPTOPP_BOOL_X86
32-bit x86 platform
Definition: config_cpu.h:52
#define CRYPTOPP_BOOL_X32
32-bit x32 platform
Definition: config_cpu.h:44
#define CRYPTOPP_BOOL_X64
32-bit x86 platform
Definition: config_cpu.h:48
#define W64LIT(x)
Declare an unsigned word64.
Definition: config_int.h:119
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
unsigned short word16
16-bit unsigned datatype
Definition: config_int.h:59
unsigned long long word64
64-bit unsigned datatype
Definition: config_int.h:91
Functions for CPU features and intrinsics.
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:145
GCM block cipher mode of operation.
@ GCM_64K_Tables
Use a table with 64K entries.
Definition: gcm.h:27
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition: misc.h:2001
void IncrementCounterByOne(byte *inout, unsigned int size)
Performs an addition with carry on a block of bytes.
Definition: misc.h:1278
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:1206
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2167
Crypto++ library namespace.
const char * TableSize()
int, in bytes
Definition: argnames.h:81
const char * BlockSize()
int, in bytes
Definition: argnames.h:27
Precompiled header file.
Access a block of memory.
Definition: misc.h:2587
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68