00001
00002
00003
00004 #include "pch.h"
00005
00006 #ifndef CRYPTOPP_IMPORTS
00007
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"
00016 #include "sha.h"
00017 #include "cpu.h"
00018
00019 #include <iostream>
00020
00021 #if _MSC_VER >= 1400
00022 #include <intrin.h>
00023 #endif
00024
00025 #ifdef __DECCXX
00026 #include <c_asm.h>
00027 #endif
00028
00029 #ifdef CRYPTOPP_MSVC6_NO_PP
00030 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
00031 #endif
00032
00033 #define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
00034
00035 NAMESPACE_BEGIN(CryptoPP)
00036
00037 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00038 {
00039 if (valueType != typeid(Integer))
00040 return false;
00041 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00042 return true;
00043 }
00044
00045 inline static int Compare(const word *A, const word *B, size_t N)
00046 {
00047 while (N--)
00048 if (A[N] > B[N])
00049 return 1;
00050 else if (A[N] < B[N])
00051 return -1;
00052
00053 return 0;
00054 }
00055
00056 inline static int Increment(word *A, size_t N, word B=1)
00057 {
00058 assert(N);
00059 word t = A[0];
00060 A[0] = t+B;
00061 if (A[0] >= t)
00062 return 0;
00063 for (unsigned i=1; i<N; i++)
00064 if (++A[i])
00065 return 0;
00066 return 1;
00067 }
00068
00069 inline static int Decrement(word *A, size_t N, word B=1)
00070 {
00071 assert(N);
00072 word t = A[0];
00073 A[0] = t-B;
00074 if (A[0] <= t)
00075 return 0;
00076 for (unsigned i=1; i<N; i++)
00077 if (A[i]--)
00078 return 0;
00079 return 1;
00080 }
00081
00082 static void TwosComplement(word *A, size_t N)
00083 {
00084 Decrement(A, N);
00085 for (unsigned i=0; i<N; i++)
00086 A[i] = ~A[i];
00087 }
00088
00089 static word AtomicInverseModPower2(word A)
00090 {
00091 assert(A%2==1);
00092
00093 word R=A%8;
00094
00095 for (unsigned i=3; i<WORD_BITS; i*=2)
00096 R = R*(2-R*A);
00097
00098 assert(R*A==1);
00099 return R;
00100 }
00101
00102
00103
00104 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
00105 #define Declare2Words(x) word x##0, x##1;
00106 #define AssignWord(a, b) a##0 = b; a##1 = 0;
00107 #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
00108 #define LowWord(a) a##0
00109 #define HighWord(a) a##1
00110 #ifdef _MSC_VER
00111 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
00112 #ifndef __INTEL_COMPILER
00113 #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
00114 #endif
00115 #elif defined(__DECCXX)
00116 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
00117 #elif defined(__x86_64__)
00118 #ifdef __SUNPRO_CC
00119
00120 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
00121 #else
00122 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
00123 #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
00124 #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
00125 #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
00126 #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
00127 #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
00128 #endif
00129 #endif
00130 #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
00131 #ifndef Double3Words
00132 #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
00133 #endif
00134 #ifndef Acc2WordsBy2
00135 #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
00136 #endif
00137 #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
00138 #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
00139 #define GetCarry(u) u##1
00140 #define GetBorrow(u) u##1
00141 #else
00142 #define Declare2Words(x) dword x;
00143 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
00144 #define MultiplyWords(p, a, b) p = __emulu(a, b);
00145 #else
00146 #define MultiplyWords(p, a, b) p = (dword)a*b;
00147 #endif
00148 #define AssignWord(a, b) a = b;
00149 #define Add2WordsBy1(a, b, c) a = b + c;
00150 #define Acc2WordsBy2(a, b) a += b;
00151 #define LowWord(a) word(a)
00152 #define HighWord(a) word(a>>WORD_BITS)
00153 #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
00154 #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
00155 #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
00156 #define GetCarry(u) HighWord(u)
00157 #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
00158 #endif
00159 #ifndef MulAcc
00160 #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
00161 #endif
00162 #ifndef Acc2WordsBy1
00163 #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
00164 #endif
00165 #ifndef Acc3WordsBy2
00166 #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
00167 #endif
00168
00169 class DWord
00170 {
00171 public:
00172 DWord() {}
00173
00174 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00175 explicit DWord(word low)
00176 {
00177 m_whole = low;
00178 }
00179 #else
00180 explicit DWord(word low)
00181 {
00182 m_halfs.low = low;
00183 m_halfs.high = 0;
00184 }
00185 #endif
00186
00187 DWord(word low, word high)
00188 {
00189 m_halfs.low = low;
00190 m_halfs.high = high;
00191 }
00192
00193 static DWord Multiply(word a, word b)
00194 {
00195 DWord r;
00196 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00197 r.m_whole = (dword)a * b;
00198 #elif defined(MultiplyWordsLoHi)
00199 MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
00200 #endif
00201 return r;
00202 }
00203
00204 static DWord MultiplyAndAdd(word a, word b, word c)
00205 {
00206 DWord r = Multiply(a, b);
00207 return r += c;
00208 }
00209
00210 DWord & operator+=(word a)
00211 {
00212 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00213 m_whole = m_whole + a;
00214 #else
00215 m_halfs.low += a;
00216 m_halfs.high += (m_halfs.low < a);
00217 #endif
00218 return *this;
00219 }
00220
00221 DWord operator+(word a)
00222 {
00223 DWord r;
00224 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00225 r.m_whole = m_whole + a;
00226 #else
00227 r.m_halfs.low = m_halfs.low + a;
00228 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00229 #endif
00230 return r;
00231 }
00232
00233 DWord operator-(DWord a)
00234 {
00235 DWord r;
00236 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00237 r.m_whole = m_whole - a.m_whole;
00238 #else
00239 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00240 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00241 #endif
00242 return r;
00243 }
00244
00245 DWord operator-(word a)
00246 {
00247 DWord r;
00248 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00249 r.m_whole = m_whole - a;
00250 #else
00251 r.m_halfs.low = m_halfs.low - a;
00252 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00253 #endif
00254 return r;
00255 }
00256
00257
00258 word operator/(word divisor);
00259
00260 word operator%(word a);
00261
00262 bool operator!() const
00263 {
00264 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00265 return !m_whole;
00266 #else
00267 return !m_halfs.high && !m_halfs.low;
00268 #endif
00269 }
00270
00271 word GetLowHalf() const {return m_halfs.low;}
00272 word GetHighHalf() const {return m_halfs.high;}
00273 word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00274
00275 private:
00276 union
00277 {
00278 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00279 dword m_whole;
00280 #endif
00281 struct
00282 {
00283 #ifdef IS_LITTLE_ENDIAN
00284 word low;
00285 word high;
00286 #else
00287 word high;
00288 word low;
00289 #endif
00290 } m_halfs;
00291 };
00292 };
00293
00294 class Word
00295 {
00296 public:
00297 Word() {}
00298
00299 Word(word value)
00300 {
00301 m_whole = value;
00302 }
00303
00304 Word(hword low, hword high)
00305 {
00306 m_whole = low | (word(high) << (WORD_BITS/2));
00307 }
00308
00309 static Word Multiply(hword a, hword b)
00310 {
00311 Word r;
00312 r.m_whole = (word)a * b;
00313 return r;
00314 }
00315
00316 Word operator-(Word a)
00317 {
00318 Word r;
00319 r.m_whole = m_whole - a.m_whole;
00320 return r;
00321 }
00322
00323 Word operator-(hword a)
00324 {
00325 Word r;
00326 r.m_whole = m_whole - a;
00327 return r;
00328 }
00329
00330
00331 hword operator/(hword divisor)
00332 {
00333 return hword(m_whole / divisor);
00334 }
00335
00336 bool operator!() const
00337 {
00338 return !m_whole;
00339 }
00340
00341 word GetWhole() const {return m_whole;}
00342 hword GetLowHalf() const {return hword(m_whole);}
00343 hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00344 hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00345
00346 private:
00347 word m_whole;
00348 };
00349
00350
00351 template <class S, class D>
00352 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00353 {
00354
00355 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00356
00357
00358 S Q;
00359 if (S(B1+1) == 0)
00360 Q = A[2];
00361 else
00362 Q = D(A[1], A[2]) / S(B1+1);
00363
00364
00365 D p = D::Multiply(B0, Q);
00366 D u = (D) A[0] - p.GetLowHalf();
00367 A[0] = u.GetLowHalf();
00368 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00369 A[1] = u.GetLowHalf();
00370 A[2] += u.GetHighHalf();
00371
00372
00373 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00374 {
00375 u = (D) A[0] - B0;
00376 A[0] = u.GetLowHalf();
00377 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00378 A[1] = u.GetLowHalf();
00379 A[2] += u.GetHighHalf();
00380 Q++;
00381 assert(Q);
00382 }
00383
00384 return Q;
00385 }
00386
00387
00388 template <class S, class D>
00389 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00390 {
00391 if (!B)
00392 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00393 else
00394 {
00395 S Q[2];
00396 T[0] = Al.GetLowHalf();
00397 T[1] = Al.GetHighHalf();
00398 T[2] = Ah.GetLowHalf();
00399 T[3] = Ah.GetHighHalf();
00400 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00401 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00402 return D(Q[0], Q[1]);
00403 }
00404 }
00405
00406
00407 inline word DWord::operator/(word a)
00408 {
00409 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00410 return word(m_whole / a);
00411 #else
00412 hword r[4];
00413 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00414 #endif
00415 }
00416
00417 inline word DWord::operator%(word a)
00418 {
00419 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00420 return word(m_whole % a);
00421 #else
00422 if (a < (word(1) << (WORD_BITS/2)))
00423 {
00424 hword h = hword(a);
00425 word r = m_halfs.high % h;
00426 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00427 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00428 }
00429 else
00430 {
00431 hword r[4];
00432 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00433 return Word(r[0], r[1]).GetWhole();
00434 }
00435 #endif
00436 }
00437
00438
00439
00440
00441 #if defined(__GNUC__)
00442 #define AddPrologue \
00443 int result; \
00444 __asm__ __volatile__ \
00445 ( \
00446 ".intel_syntax noprefix;"
00447 #define AddEpilogue \
00448 ".att_syntax prefix;" \
00449 : "=a" (result)\
00450 : "d" (C), "a" (A), "D" (B), "c" (N) \
00451 : "%esi", "memory", "cc" \
00452 );\
00453 return result;
00454 #define MulPrologue \
00455 __asm__ __volatile__ \
00456 ( \
00457 ".intel_syntax noprefix;" \
00458 AS1( push ebx) \
00459 AS2( mov ebx, edx)
00460 #define MulEpilogue \
00461 AS1( pop ebx) \
00462 ".att_syntax prefix;" \
00463 : \
00464 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
00465 : "%esi", "memory", "cc" \
00466 );
00467 #define SquPrologue MulPrologue
00468 #define SquEpilogue \
00469 AS1( pop ebx) \
00470 ".att_syntax prefix;" \
00471 : \
00472 : "d" (s_maskLow16), "c" (C), "a" (A) \
00473 : "%esi", "%edi", "memory", "cc" \
00474 );
00475 #define TopPrologue MulPrologue
00476 #define TopEpilogue \
00477 AS1( pop ebx) \
00478 ".att_syntax prefix;" \
00479 : \
00480 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
00481 : "memory", "cc" \
00482 );
00483 #else
00484 #define AddPrologue \
00485 __asm push edi \
00486 __asm push esi \
00487 __asm mov eax, [esp+12] \
00488 __asm mov edi, [esp+16]
00489 #define AddEpilogue \
00490 __asm pop esi \
00491 __asm pop edi \
00492 __asm ret 8
00493 #if _MSC_VER < 1300
00494 #define SaveEBX __asm push ebx
00495 #define RestoreEBX __asm pop ebx
00496 #else
00497 #define SaveEBX
00498 #define RestoreEBX
00499 #endif
00500 #define SquPrologue \
00501 AS2( mov eax, A) \
00502 AS2( mov ecx, C) \
00503 SaveEBX \
00504 AS2( lea ebx, s_maskLow16)
00505 #define MulPrologue \
00506 AS2( mov eax, A) \
00507 AS2( mov edi, B) \
00508 AS2( mov ecx, C) \
00509 SaveEBX \
00510 AS2( lea ebx, s_maskLow16)
00511 #define TopPrologue \
00512 AS2( mov eax, A) \
00513 AS2( mov edi, B) \
00514 AS2( mov ecx, C) \
00515 AS2( mov esi, L) \
00516 SaveEBX \
00517 AS2( lea ebx, s_maskLow16)
00518 #define SquEpilogue RestoreEBX
00519 #define MulEpilogue RestoreEBX
00520 #define TopEpilogue RestoreEBX
00521 #endif
00522
00523 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
00524 extern "C" {
00525 int Baseline_Add(size_t N, word *C, const word *A, const word *B);
00526 int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
00527 }
00528 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
00529 int Baseline_Add(size_t N, word *C, const word *A, const word *B)
00530 {
00531 word result;
00532 __asm__ __volatile__
00533 (
00534 ".intel_syntax;"
00535 AS1( neg %1)
00536 ASJ( jz, 1, f)
00537 AS2( mov %0,[%3+8*%1])
00538 AS2( add %0,[%4+8*%1])
00539 AS2( mov [%2+8*%1],%0)
00540 ASL(0)
00541 AS2( mov %0,[%3+8*%1+8])
00542 AS2( adc %0,[%4+8*%1+8])
00543 AS2( mov [%2+8*%1+8],%0)
00544 AS2( lea %1,[%1+2])
00545 ASJ( jrcxz, 1, f)
00546 AS2( mov %0,[%3+8*%1])
00547 AS2( adc %0,[%4+8*%1])
00548 AS2( mov [%2+8*%1],%0)
00549 ASJ( jmp, 0, b)
00550 ASL(1)
00551 AS2( mov %0, 0)
00552 AS2( adc %0, %0)
00553 ".att_syntax;"
00554 : "=&r" (result), "+c" (N)
00555 : "r" (C+N), "r" (A+N), "r" (B+N)
00556 : "memory", "cc"
00557 );
00558 return (int)result;
00559 }
00560
00561 int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00562 {
00563 word result;
00564 __asm__ __volatile__
00565 (
00566 ".intel_syntax;"
00567 AS1( neg %1)
00568 ASJ( jz, 1, f)
00569 AS2( mov %0,[%3+8*%1])
00570 AS2( sub %0,[%4+8*%1])
00571 AS2( mov [%2+8*%1],%0)
00572 ASL(0)
00573 AS2( mov %0,[%3+8*%1+8])
00574 AS2( sbb %0,[%4+8*%1+8])
00575 AS2( mov [%2+8*%1+8],%0)
00576 AS2( lea %1,[%1+2])
00577 ASJ( jrcxz, 1, f)
00578 AS2( mov %0,[%3+8*%1])
00579 AS2( sbb %0,[%4+8*%1])
00580 AS2( mov [%2+8*%1],%0)
00581 ASJ( jmp, 0, b)
00582 ASL(1)
00583 AS2( mov %0, 0)
00584 AS2( adc %0, %0)
00585 ".att_syntax;"
00586 : "=&r" (result), "+c" (N)
00587 : "r" (C+N), "r" (A+N), "r" (B+N)
00588 : "memory", "cc"
00589 );
00590 return (int)result;
00591 }
00592 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
00593 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00594 {
00595 AddPrologue
00596
00597
00598 AS2( lea eax, [eax+4*ecx])
00599 AS2( lea edi, [edi+4*ecx])
00600 AS2( lea edx, [edx+4*ecx])
00601
00602 AS1( neg ecx)
00603 AS2( test ecx, 2)
00604 ASJ( jz, 0, f)
00605 AS2( sub ecx, 2)
00606 ASJ( jmp, 1, f)
00607
00608 ASL(0)
00609 ASJ( jecxz, 2, f)
00610 AS2( mov esi,[eax+4*ecx])
00611 AS2( adc esi,[edi+4*ecx])
00612 AS2( mov [edx+4*ecx],esi)
00613 AS2( mov esi,[eax+4*ecx+4])
00614 AS2( adc esi,[edi+4*ecx+4])
00615 AS2( mov [edx+4*ecx+4],esi)
00616 ASL(1)
00617 AS2( mov esi,[eax+4*ecx+8])
00618 AS2( adc esi,[edi+4*ecx+8])
00619 AS2( mov [edx+4*ecx+8],esi)
00620 AS2( mov esi,[eax+4*ecx+12])
00621 AS2( adc esi,[edi+4*ecx+12])
00622 AS2( mov [edx+4*ecx+12],esi)
00623
00624 AS2( lea ecx,[ecx+4])
00625 ASJ( jmp, 0, b)
00626
00627 ASL(2)
00628 AS2( mov eax, 0)
00629 AS1( setc al)
00630
00631 AddEpilogue
00632 }
00633
00634 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00635 {
00636 AddPrologue
00637
00638
00639 AS2( lea eax, [eax+4*ecx])
00640 AS2( lea edi, [edi+4*ecx])
00641 AS2( lea edx, [edx+4*ecx])
00642
00643 AS1( neg ecx)
00644 AS2( test ecx, 2)
00645 ASJ( jz, 0, f)
00646 AS2( sub ecx, 2)
00647 ASJ( jmp, 1, f)
00648
00649 ASL(0)
00650 ASJ( jecxz, 2, f)
00651 AS2( mov esi,[eax+4*ecx])
00652 AS2( sbb esi,[edi+4*ecx])
00653 AS2( mov [edx+4*ecx],esi)
00654 AS2( mov esi,[eax+4*ecx+4])
00655 AS2( sbb esi,[edi+4*ecx+4])
00656 AS2( mov [edx+4*ecx+4],esi)
00657 ASL(1)
00658 AS2( mov esi,[eax+4*ecx+8])
00659 AS2( sbb esi,[edi+4*ecx+8])
00660 AS2( mov [edx+4*ecx+8],esi)
00661 AS2( mov esi,[eax+4*ecx+12])
00662 AS2( sbb esi,[edi+4*ecx+12])
00663 AS2( mov [edx+4*ecx+12],esi)
00664
00665 AS2( lea ecx,[ecx+4])
00666 ASJ( jmp, 0, b)
00667
00668 ASL(2)
00669 AS2( mov eax, 0)
00670 AS1( setc al)
00671
00672 AddEpilogue
00673 }
00674
00675 #if CRYPTOPP_INTEGER_SSE2
00676 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
00677 {
00678 AddPrologue
00679
00680
00681 AS2( lea eax, [eax+4*ecx])
00682 AS2( lea edi, [edi+4*ecx])
00683 AS2( lea edx, [edx+4*ecx])
00684
00685 AS1( neg ecx)
00686 AS2( pxor mm2, mm2)
00687 ASJ( jz, 2, f)
00688 AS2( test ecx, 2)
00689 ASJ( jz, 0, f)
00690 AS2( sub ecx, 2)
00691 ASJ( jmp, 1, f)
00692
00693 ASL(0)
00694 AS2( movd mm0, DWORD PTR [eax+4*ecx])
00695 AS2( movd mm1, DWORD PTR [edi+4*ecx])
00696 AS2( paddq mm0, mm1)
00697 AS2( paddq mm2, mm0)
00698 AS2( movd DWORD PTR [edx+4*ecx], mm2)
00699 AS2( psrlq mm2, 32)
00700
00701 AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
00702 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
00703 AS2( paddq mm0, mm1)
00704 AS2( paddq mm2, mm0)
00705 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
00706 AS2( psrlq mm2, 32)
00707
00708 ASL(1)
00709 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
00710 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
00711 AS2( paddq mm0, mm1)
00712 AS2( paddq mm2, mm0)
00713 AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
00714 AS2( psrlq mm2, 32)
00715
00716 AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
00717 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
00718 AS2( paddq mm0, mm1)
00719 AS2( paddq mm2, mm0)
00720 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
00721 AS2( psrlq mm2, 32)
00722
00723 AS2( add ecx, 4)
00724 ASJ( jnz, 0, b)
00725
00726 ASL(2)
00727 AS2( movd eax, mm2)
00728 AS1( emms)
00729
00730 AddEpilogue
00731 }
00732 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
00733 {
00734 AddPrologue
00735
00736
00737 AS2( lea eax, [eax+4*ecx])
00738 AS2( lea edi, [edi+4*ecx])
00739 AS2( lea edx, [edx+4*ecx])
00740
00741 AS1( neg ecx)
00742 AS2( pxor mm2, mm2)
00743 ASJ( jz, 2, f)
00744 AS2( test ecx, 2)
00745 ASJ( jz, 0, f)
00746 AS2( sub ecx, 2)
00747 ASJ( jmp, 1, f)
00748
00749 ASL(0)
00750 AS2( movd mm0, DWORD PTR [eax+4*ecx])
00751 AS2( movd mm1, DWORD PTR [edi+4*ecx])
00752 AS2( psubq mm0, mm1)
00753 AS2( psubq mm0, mm2)
00754 AS2( movd DWORD PTR [edx+4*ecx], mm0)
00755 AS2( psrlq mm0, 63)
00756
00757 AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
00758 AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
00759 AS2( psubq mm2, mm1)
00760 AS2( psubq mm2, mm0)
00761 AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
00762 AS2( psrlq mm2, 63)
00763
00764 ASL(1)
00765 AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
00766 AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
00767 AS2( psubq mm0, mm1)
00768 AS2( psubq mm0, mm2)
00769 AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
00770 AS2( psrlq mm0, 63)
00771
00772 AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
00773 AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
00774 AS2( psubq mm2, mm1)
00775 AS2( psubq mm2, mm0)
00776 AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
00777 AS2( psrlq mm2, 63)
00778
00779 AS2( add ecx, 4)
00780 ASJ( jnz, 0, b)
00781
00782 ASL(2)
00783 AS2( movd eax, mm2)
00784 AS1( emms)
00785
00786 AddEpilogue
00787 }
00788 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
00789 #else
00790 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
00791 {
00792 assert (N%2 == 0);
00793
00794 Declare2Words(u);
00795 AssignWord(u, 0);
00796 for (size_t i=0; i<N; i+=2)
00797 {
00798 AddWithCarry(u, A[i], B[i]);
00799 C[i] = LowWord(u);
00800 AddWithCarry(u, A[i+1], B[i+1]);
00801 C[i+1] = LowWord(u);
00802 }
00803 return int(GetCarry(u));
00804 }
00805
00806 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
00807 {
00808 assert (N%2 == 0);
00809
00810 Declare2Words(u);
00811 AssignWord(u, 0);
00812 for (size_t i=0; i<N; i+=2)
00813 {
00814 SubtractWithBorrow(u, A[i], B[i]);
00815 C[i] = LowWord(u);
00816 SubtractWithBorrow(u, A[i+1], B[i+1]);
00817 C[i+1] = LowWord(u);
00818 }
00819 return int(GetBorrow(u));
00820 }
00821 #endif
00822
00823 static word LinearMultiply(word *C, const word *A, word B, size_t N)
00824 {
00825 word carry=0;
00826 for(unsigned i=0; i<N; i++)
00827 {
00828 Declare2Words(p);
00829 MultiplyWords(p, A[i], B);
00830 Acc2WordsBy1(p, carry);
00831 C[i] = LowWord(p);
00832 carry = HighWord(p);
00833 }
00834 return carry;
00835 }
00836
00837 #ifndef CRYPTOPP_DOXYGEN_PROCESSING
00838
00839 #define Mul_2 \
00840 Mul_Begin(2) \
00841 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00842 Mul_End(1, 1)
00843
00844 #define Mul_4 \
00845 Mul_Begin(4) \
00846 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00847 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00848 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00849 Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
00850 Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
00851 Mul_End(5, 3)
00852
00853 #define Mul_8 \
00854 Mul_Begin(8) \
00855 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00856 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00857 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00858 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00859 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00860 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00861 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00862 Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
00863 Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
00864 Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
00865 Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
00866 Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
00867 Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
00868 Mul_End(13, 7)
00869
00870 #define Mul_16 \
00871 Mul_Begin(16) \
00872 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00873 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00874 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00875 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00876 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00877 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00878 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00879 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00880 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00881 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
00882 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
00883 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
00884 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
00885 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
00886 Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
00887 Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
00888 Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
00889 Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
00890 Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
00891 Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
00892 Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
00893 Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
00894 Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
00895 Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
00896 Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
00897 Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
00898 Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
00899 Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
00900 Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
00901 Mul_End(29, 15)
00902
00903 #define Squ_2 \
00904 Squ_Begin(2) \
00905 Squ_End(2)
00906
00907 #define Squ_4 \
00908 Squ_Begin(4) \
00909 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00910 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00911 Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
00912 Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
00913 Squ_End(4)
00914
00915 #define Squ_8 \
00916 Squ_Begin(8) \
00917 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00918 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00919 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00920 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00921 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00922 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00923 Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00924 Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00925 Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00926 Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00927 Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
00928 Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
00929 Squ_End(8)
00930
00931 #define Squ_16 \
00932 Squ_Begin(16) \
00933 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
00934 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
00935 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
00936 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
00937 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
00938 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
00939 Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
00940 Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
00941 Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
00942 Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
00943 Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
00944 Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
00945 Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
00946 Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
00947 Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
00948 Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
00949 Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
00950 Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
00951 Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
00952 Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
00953 Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
00954 Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
00955 Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
00956 Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
00957 Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
00958 Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
00959 Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
00960 Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
00961 Squ_End(16)
00962
00963 #define Bot_2 \
00964 Mul_Begin(2) \
00965 Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
00966 Bot_End(2)
00967
00968 #define Bot_4 \
00969 Mul_Begin(4) \
00970 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00971 Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
00972 Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
00973 Bot_End(4)
00974
00975 #define Bot_8 \
00976 Mul_Begin(8) \
00977 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00978 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00979 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00980 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00981 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00982 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00983 Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
00984 Bot_End(8)
00985
00986 #define Bot_16 \
00987 Mul_Begin(16) \
00988 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
00989 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
00990 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
00991 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
00992 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
00993 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
00994 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
00995 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
00996 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
00997 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
00998 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
00999 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
01000 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
01001 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
01002 Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
01003 Bot_End(16)
01004
01005 #endif
01006
01007 #if 0
01008 #define Mul_Begin(n) \
01009 Declare2Words(p) \
01010 Declare2Words(c) \
01011 Declare2Words(d) \
01012 MultiplyWords(p, A[0], B[0]) \
01013 AssignWord(c, LowWord(p)) \
01014 AssignWord(d, HighWord(p))
01015
01016 #define Mul_Acc(i, j) \
01017 MultiplyWords(p, A[i], B[j]) \
01018 Acc2WordsBy1(c, LowWord(p)) \
01019 Acc2WordsBy1(d, HighWord(p))
01020
01021 #define Mul_SaveAcc(k, i, j) \
01022 R[k] = LowWord(c); \
01023 Add2WordsBy1(c, d, HighWord(c)) \
01024 MultiplyWords(p, A[i], B[j]) \
01025 AssignWord(d, HighWord(p)) \
01026 Acc2WordsBy1(c, LowWord(p))
01027
01028 #define Mul_End(n) \
01029 R[2*n-3] = LowWord(c); \
01030 Acc2WordsBy1(d, HighWord(c)) \
01031 MultiplyWords(p, A[n-1], B[n-1])\
01032 Acc2WordsBy2(d, p) \
01033 R[2*n-2] = LowWord(d); \
01034 R[2*n-1] = HighWord(d);
01035
01036 #define Bot_SaveAcc(k, i, j) \
01037 R[k] = LowWord(c); \
01038 word e = LowWord(d) + HighWord(c); \
01039 e += A[i] * B[j];
01040
01041 #define Bot_Acc(i, j) \
01042 e += A[i] * B[j];
01043
01044 #define Bot_End(n) \
01045 R[n-1] = e;
01046 #else
01047 #define Mul_Begin(n) \
01048 Declare2Words(p) \
01049 word c; \
01050 Declare2Words(d) \
01051 MultiplyWords(p, A[0], B[0]) \
01052 c = LowWord(p); \
01053 AssignWord(d, HighWord(p))
01054
01055 #define Mul_Acc(i, j) \
01056 MulAcc(c, d, A[i], B[j])
01057
01058 #define Mul_SaveAcc(k, i, j) \
01059 R[k] = c; \
01060 c = LowWord(d); \
01061 AssignWord(d, HighWord(d)) \
01062 MulAcc(c, d, A[i], B[j])
01063
01064 #define Mul_End(k, i) \
01065 R[k] = c; \
01066 MultiplyWords(p, A[i], B[i]) \
01067 Acc2WordsBy2(p, d) \
01068 R[k+1] = LowWord(p); \
01069 R[k+2] = HighWord(p);
01070
01071 #define Bot_SaveAcc(k, i, j) \
01072 R[k] = c; \
01073 c = LowWord(d); \
01074 c += A[i] * B[j];
01075
01076 #define Bot_Acc(i, j) \
01077 c += A[i] * B[j];
01078
01079 #define Bot_End(n) \
01080 R[n-1] = c;
01081 #endif
01082
01083 #define Squ_Begin(n) \
01084 Declare2Words(p) \
01085 word c; \
01086 Declare2Words(d) \
01087 Declare2Words(e) \
01088 MultiplyWords(p, A[0], A[0]) \
01089 R[0] = LowWord(p); \
01090 AssignWord(e, HighWord(p)) \
01091 MultiplyWords(p, A[0], A[1]) \
01092 c = LowWord(p); \
01093 AssignWord(d, HighWord(p)) \
01094 Squ_NonDiag \
01095
01096 #define Squ_NonDiag \
01097 Double3Words(c, d)
01098
01099 #define Squ_SaveAcc(k, i, j) \
01100 Acc3WordsBy2(c, d, e) \
01101 R[k] = c; \
01102 MultiplyWords(p, A[i], A[j]) \
01103 c = LowWord(p); \
01104 AssignWord(d, HighWord(p)) \
01105
01106 #define Squ_Acc(i, j) \
01107 MulAcc(c, d, A[i], A[j])
01108
01109 #define Squ_Diag(i) \
01110 Squ_NonDiag \
01111 MulAcc(c, d, A[i], A[i])
01112
01113 #define Squ_End(n) \
01114 Acc3WordsBy2(c, d, e) \
01115 R[2*n-3] = c; \
01116 MultiplyWords(p, A[n-1], A[n-1])\
01117 Acc2WordsBy2(p, e) \
01118 R[2*n-2] = LowWord(p); \
01119 R[2*n-1] = HighWord(p);
01120
01121 void Baseline_Multiply2(word *R, const word *A, const word *B)
01122 {
01123 Mul_2
01124 }
01125
01126 void Baseline_Multiply4(word *R, const word *A, const word *B)
01127 {
01128 Mul_4
01129 }
01130
01131 void Baseline_Multiply8(word *R, const word *A, const word *B)
01132 {
01133 Mul_8
01134 }
01135
01136 void Baseline_Square2(word *R, const word *A)
01137 {
01138 Squ_2
01139 }
01140
01141 void Baseline_Square4(word *R, const word *A)
01142 {
01143 Squ_4
01144 }
01145
01146 void Baseline_Square8(word *R, const word *A)
01147 {
01148 Squ_8
01149 }
01150
01151 void Baseline_MultiplyBottom2(word *R, const word *A, const word *B)
01152 {
01153 Bot_2
01154 }
01155
01156 void Baseline_MultiplyBottom4(word *R, const word *A, const word *B)
01157 {
01158 Bot_4
01159 }
01160
01161 void Baseline_MultiplyBottom8(word *R, const word *A, const word *B)
01162 {
01163 Bot_8
01164 }
01165
01166 #define Top_Begin(n) \
01167 Declare2Words(p) \
01168 word c; \
01169 Declare2Words(d) \
01170 MultiplyWords(p, A[0], B[n-2]);\
01171 AssignWord(d, HighWord(p));
01172
01173 #define Top_Acc(i, j) \
01174 MultiplyWords(p, A[i], B[j]);\
01175 Acc2WordsBy1(d, HighWord(p));
01176
01177 #define Top_SaveAcc0(i, j) \
01178 c = LowWord(d); \
01179 AssignWord(d, HighWord(d)) \
01180 MulAcc(c, d, A[i], B[j])
01181
01182 #define Top_SaveAcc1(i, j) \
01183 c = L<c; \
01184 Acc2WordsBy1(d, c); \
01185 c = LowWord(d); \
01186 AssignWord(d, HighWord(d)) \
01187 MulAcc(c, d, A[i], B[j])
01188
01189 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
01190 {
01191 word T[4];
01192 Baseline_Multiply2(T, A, B);
01193 R[0] = T[2];
01194 R[1] = T[3];
01195 }
01196
01197 void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L)
01198 {
01199 Top_Begin(4)
01200 Top_Acc(1, 1) Top_Acc(2, 0) \
01201 Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
01202 Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
01203 Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
01204 Mul_End(1, 3)
01205 }
01206
01207 void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L)
01208 {
01209 Top_Begin(8)
01210 Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
01211 Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
01212 Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
01213 Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
01214 Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
01215 Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
01216 Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
01217 Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
01218 Mul_End(5, 7)
01219 }
01220
01221 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
01222 void Baseline_Multiply16(word *R, const word *A, const word *B)
01223 {
01224 Mul_16
01225 }
01226
01227 void Baseline_Square16(word *R, const word *A)
01228 {
01229 Squ_16
01230 }
01231
01232 void Baseline_MultiplyBottom16(word *R, const word *A, const word *B)
01233 {
01234 Bot_16
01235 }
01236
01237 void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L)
01238 {
01239 Top_Begin(16)
01240 Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
01241 Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
01242 Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
01243 Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
01244 Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
01245 Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
01246 Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
01247 Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
01248 Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
01249 Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
01250 Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
01251 Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
01252 Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
01253 Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
01254 Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
01255 Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
01256 Mul_End(13, 15)
01257 }
01258 #endif
01259
01260
01261
01262 #if CRYPTOPP_INTEGER_SSE2
01263
01264 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
01265
01266 #undef Mul_Begin
01267 #undef Mul_Acc
01268 #undef Top_Begin
01269 #undef Top_Acc
01270 #undef Squ_Acc
01271 #undef Squ_NonDiag
01272 #undef Squ_Diag
01273 #undef Squ_SaveAcc
01274 #undef Squ_Begin
01275 #undef Mul_SaveAcc
01276 #undef Bot_Acc
01277 #undef Bot_SaveAcc
01278 #undef Bot_End
01279 #undef Squ_End
01280 #undef Mul_End
01281
01282 #define SSE2_FinalSave(k) \
01283 AS2( psllq xmm5, 16) \
01284 AS2( paddq xmm4, xmm5) \
01285 AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
01286
01287 #define SSE2_SaveShift(k) \
01288 AS2( movq xmm0, xmm6) \
01289 AS2( punpckhqdq xmm6, xmm0) \
01290 AS2( movq xmm1, xmm7) \
01291 AS2( punpckhqdq xmm7, xmm1) \
01292 AS2( paddd xmm6, xmm0) \
01293 AS2( pslldq xmm6, 4) \
01294 AS2( paddd xmm7, xmm1) \
01295 AS2( paddd xmm4, xmm6) \
01296 AS2( pslldq xmm7, 4) \
01297 AS2( movq xmm6, xmm4) \
01298 AS2( paddd xmm5, xmm7) \
01299 AS2( movq xmm7, xmm5) \
01300 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
01301 AS2( psrlq xmm6, 16) \
01302 AS2( paddq xmm6, xmm7) \
01303 AS2( punpckhqdq xmm4, xmm0) \
01304 AS2( punpckhqdq xmm5, xmm0) \
01305 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
01306 AS2( psrlq xmm6, 3*16) \
01307 AS2( paddd xmm4, xmm6) \
01308
01309 #define Squ_SSE2_SaveShift(k) \
01310 AS2( movq xmm0, xmm6) \
01311 AS2( punpckhqdq xmm6, xmm0) \
01312 AS2( movq xmm1, xmm7) \
01313 AS2( punpckhqdq xmm7, xmm1) \
01314 AS2( paddd xmm6, xmm0) \
01315 AS2( pslldq xmm6, 4) \
01316 AS2( paddd xmm7, xmm1) \
01317 AS2( paddd xmm4, xmm6) \
01318 AS2( pslldq xmm7, 4) \
01319 AS2( movhlps xmm6, xmm4) \
01320 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
01321 AS2( paddd xmm5, xmm7) \
01322 AS2( movhps QWORD PTR [esp+12], xmm5)\
01323 AS2( psrlq xmm4, 16) \
01324 AS2( paddq xmm4, xmm5) \
01325 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
01326 AS2( psrlq xmm4, 3*16) \
01327 AS2( paddd xmm4, xmm6) \
01328 AS2( movq QWORD PTR [esp+4], xmm4)\
01329
01330 #define SSE2_FirstMultiply(i) \
01331 AS2( movdqa xmm7, [esi+(i)*16])\
01332 AS2( movdqa xmm5, [edi-(i)*16])\
01333 AS2( pmuludq xmm5, xmm7) \
01334 AS2( movdqa xmm4, [ebx])\
01335 AS2( movdqa xmm6, xmm4) \
01336 AS2( pand xmm4, xmm5) \
01337 AS2( psrld xmm5, 16) \
01338 AS2( pmuludq xmm7, [edx-(i)*16])\
01339 AS2( pand xmm6, xmm7) \
01340 AS2( psrld xmm7, 16)
01341
01342 #define Squ_Begin(n) \
01343 SquPrologue \
01344 AS2( mov esi, esp)\
01345 AS2( and esp, 0xfffffff0)\
01346 AS2( lea edi, [esp-32*n])\
01347 AS2( sub esp, 32*n+16)\
01348 AS1( push esi)\
01349 AS2( mov esi, edi) \
01350 AS2( xor edx, edx) \
01351 ASL(1) \
01352 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01353 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01354 AS2( movdqa [edi+2*edx], xmm0) \
01355 AS2( psrlq xmm0, 32) \
01356 AS2( movdqa [edi+2*edx+16], xmm0) \
01357 AS2( movdqa [edi+16*n+2*edx], xmm1) \
01358 AS2( psrlq xmm1, 32) \
01359 AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
01360 AS2( add edx, 16) \
01361 AS2( cmp edx, 8*(n)) \
01362 ASJ( jne, 1, b) \
01363 AS2( lea edx, [edi+16*n])\
01364 SSE2_FirstMultiply(0) \
01365
01366 #define Squ_Acc(i) \
01367 ASL(LSqu##i) \
01368 AS2( movdqa xmm1, [esi+(i)*16]) \
01369 AS2( movdqa xmm0, [edi-(i)*16]) \
01370 AS2( movdqa xmm2, [ebx]) \
01371 AS2( pmuludq xmm0, xmm1) \
01372 AS2( pmuludq xmm1, [edx-(i)*16]) \
01373 AS2( movdqa xmm3, xmm2) \
01374 AS2( pand xmm2, xmm0) \
01375 AS2( psrld xmm0, 16) \
01376 AS2( paddd xmm4, xmm2) \
01377 AS2( paddd xmm5, xmm0) \
01378 AS2( pand xmm3, xmm1) \
01379 AS2( psrld xmm1, 16) \
01380 AS2( paddd xmm6, xmm3) \
01381 AS2( paddd xmm7, xmm1) \
01382
01383 #define Squ_Acc1(i)
01384 #define Squ_Acc2(i) ASC(call, LSqu##i)
01385 #define Squ_Acc3(i) Squ_Acc2(i)
01386 #define Squ_Acc4(i) Squ_Acc2(i)
01387 #define Squ_Acc5(i) Squ_Acc2(i)
01388 #define Squ_Acc6(i) Squ_Acc2(i)
01389 #define Squ_Acc7(i) Squ_Acc2(i)
01390 #define Squ_Acc8(i) Squ_Acc2(i)
01391
01392 #define SSE2_End(E, n) \
01393 SSE2_SaveShift(2*(n)-3) \
01394 AS2( movdqa xmm7, [esi+16]) \
01395 AS2( movdqa xmm0, [edi]) \
01396 AS2( pmuludq xmm0, xmm7) \
01397 AS2( movdqa xmm2, [ebx]) \
01398 AS2( pmuludq xmm7, [edx]) \
01399 AS2( movdqa xmm6, xmm2) \
01400 AS2( pand xmm2, xmm0) \
01401 AS2( psrld xmm0, 16) \
01402 AS2( paddd xmm4, xmm2) \
01403 AS2( paddd xmm5, xmm0) \
01404 AS2( pand xmm6, xmm7) \
01405 AS2( psrld xmm7, 16) \
01406 SSE2_SaveShift(2*(n)-2) \
01407 SSE2_FinalSave(2*(n)-1) \
01408 AS1( pop esp)\
01409 E
01410
01411 #define Squ_End(n) SSE2_End(SquEpilogue, n)
01412 #define Mul_End(n) SSE2_End(MulEpilogue, n)
01413 #define Top_End(n) SSE2_End(TopEpilogue, n)
01414
01415 #define Squ_Column1(k, i) \
01416 Squ_SSE2_SaveShift(k) \
01417 AS2( add esi, 16) \
01418 SSE2_FirstMultiply(1)\
01419 Squ_Acc##i(i) \
01420 AS2( paddd xmm4, xmm4) \
01421 AS2( paddd xmm5, xmm5) \
01422 AS2( movdqa xmm3, [esi]) \
01423 AS2( movq xmm1, QWORD PTR [esi+8]) \
01424 AS2( pmuludq xmm1, xmm3) \
01425 AS2( pmuludq xmm3, xmm3) \
01426 AS2( movdqa xmm0, [ebx])\
01427 AS2( movdqa xmm2, xmm0) \
01428 AS2( pand xmm0, xmm1) \
01429 AS2( psrld xmm1, 16) \
01430 AS2( paddd xmm6, xmm0) \
01431 AS2( paddd xmm7, xmm1) \
01432 AS2( pand xmm2, xmm3) \
01433 AS2( psrld xmm3, 16) \
01434 AS2( paddd xmm6, xmm6) \
01435 AS2( paddd xmm7, xmm7) \
01436 AS2( paddd xmm4, xmm2) \
01437 AS2( paddd xmm5, xmm3) \
01438 AS2( movq xmm0, QWORD PTR [esp+4])\
01439 AS2( movq xmm1, QWORD PTR [esp+12])\
01440 AS2( paddd xmm4, xmm0)\
01441 AS2( paddd xmm5, xmm1)\
01442
01443 #define Squ_Column0(k, i) \
01444 Squ_SSE2_SaveShift(k) \
01445 AS2( add edi, 16) \
01446 AS2( add edx, 16) \
01447 SSE2_FirstMultiply(1)\
01448 Squ_Acc##i(i) \
01449 AS2( paddd xmm6, xmm6) \
01450 AS2( paddd xmm7, xmm7) \
01451 AS2( paddd xmm4, xmm4) \
01452 AS2( paddd xmm5, xmm5) \
01453 AS2( movq xmm0, QWORD PTR [esp+4])\
01454 AS2( movq xmm1, QWORD PTR [esp+12])\
01455 AS2( paddd xmm4, xmm0)\
01456 AS2( paddd xmm5, xmm1)\
01457
01458 #define SSE2_MulAdd45 \
01459 AS2( movdqa xmm7, [esi]) \
01460 AS2( movdqa xmm0, [edi]) \
01461 AS2( pmuludq xmm0, xmm7) \
01462 AS2( movdqa xmm2, [ebx]) \
01463 AS2( pmuludq xmm7, [edx]) \
01464 AS2( movdqa xmm6, xmm2) \
01465 AS2( pand xmm2, xmm0) \
01466 AS2( psrld xmm0, 16) \
01467 AS2( paddd xmm4, xmm2) \
01468 AS2( paddd xmm5, xmm0) \
01469 AS2( pand xmm6, xmm7) \
01470 AS2( psrld xmm7, 16)
01471
01472 #define Mul_Begin(n) \
01473 MulPrologue \
01474 AS2( mov esi, esp)\
01475 AS2( and esp, 0xfffffff0)\
01476 AS2( sub esp, 48*n+16)\
01477 AS1( push esi)\
01478 AS2( xor edx, edx) \
01479 ASL(1) \
01480 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01481 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01482 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
01483 AS2( movdqa [esp+20+2*edx], xmm0) \
01484 AS2( psrlq xmm0, 32) \
01485 AS2( movdqa [esp+20+2*edx+16], xmm0) \
01486 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
01487 AS2( psrlq xmm1, 32) \
01488 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
01489 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
01490 AS2( psrlq xmm2, 32) \
01491 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
01492 AS2( add edx, 16) \
01493 AS2( cmp edx, 8*(n)) \
01494 ASJ( jne, 1, b) \
01495 AS2( lea edi, [esp+20])\
01496 AS2( lea edx, [esp+20+16*n])\
01497 AS2( lea esi, [esp+20+32*n])\
01498 SSE2_FirstMultiply(0) \
01499
01500 #define Mul_Acc(i) \
01501 ASL(LMul##i) \
01502 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
01503 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
01504 AS2( movdqa xmm2, [ebx]) \
01505 AS2( pmuludq xmm0, xmm1) \
01506 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01507 AS2( movdqa xmm3, xmm2) \
01508 AS2( pand xmm2, xmm0) \
01509 AS2( psrld xmm0, 16) \
01510 AS2( paddd xmm4, xmm2) \
01511 AS2( paddd xmm5, xmm0) \
01512 AS2( pand xmm3, xmm1) \
01513 AS2( psrld xmm1, 16) \
01514 AS2( paddd xmm6, xmm3) \
01515 AS2( paddd xmm7, xmm1) \
01516
01517 #define Mul_Acc1(i)
01518 #define Mul_Acc2(i) ASC(call, LMul##i)
01519 #define Mul_Acc3(i) Mul_Acc2(i)
01520 #define Mul_Acc4(i) Mul_Acc2(i)
01521 #define Mul_Acc5(i) Mul_Acc2(i)
01522 #define Mul_Acc6(i) Mul_Acc2(i)
01523 #define Mul_Acc7(i) Mul_Acc2(i)
01524 #define Mul_Acc8(i) Mul_Acc2(i)
01525 #define Mul_Acc9(i) Mul_Acc2(i)
01526 #define Mul_Acc10(i) Mul_Acc2(i)
01527 #define Mul_Acc11(i) Mul_Acc2(i)
01528 #define Mul_Acc12(i) Mul_Acc2(i)
01529 #define Mul_Acc13(i) Mul_Acc2(i)
01530 #define Mul_Acc14(i) Mul_Acc2(i)
01531 #define Mul_Acc15(i) Mul_Acc2(i)
01532 #define Mul_Acc16(i) Mul_Acc2(i)
01533
01534 #define Mul_Column1(k, i) \
01535 SSE2_SaveShift(k) \
01536 AS2( add esi, 16) \
01537 SSE2_MulAdd45\
01538 Mul_Acc##i(i) \
01539
01540 #define Mul_Column0(k, i) \
01541 SSE2_SaveShift(k) \
01542 AS2( add edi, 16) \
01543 AS2( add edx, 16) \
01544 SSE2_MulAdd45\
01545 Mul_Acc##i(i) \
01546
01547 #define Bot_Acc(i) \
01548 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
01549 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
01550 AS2( pmuludq xmm0, xmm1) \
01551 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01552 AS2( paddq xmm4, xmm0) \
01553 AS2( paddd xmm6, xmm1)
01554
01555 #define Bot_SaveAcc(k) \
01556 SSE2_SaveShift(k) \
01557 AS2( add edi, 16) \
01558 AS2( add edx, 16) \
01559 AS2( movdqa xmm6, [esi]) \
01560 AS2( movdqa xmm0, [edi]) \
01561 AS2( pmuludq xmm0, xmm6) \
01562 AS2( paddq xmm4, xmm0) \
01563 AS2( psllq xmm5, 16) \
01564 AS2( paddq xmm4, xmm5) \
01565 AS2( pmuludq xmm6, [edx])
01566
01567 #define Bot_End(n) \
01568 AS2( movhlps xmm7, xmm6) \
01569 AS2( paddd xmm6, xmm7) \
01570 AS2( psllq xmm6, 32) \
01571 AS2( paddd xmm4, xmm6) \
01572 AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
01573 AS1( pop esp)\
01574 MulEpilogue
01575
01576 #define Top_Begin(n) \
01577 TopPrologue \
01578 AS2( mov edx, esp)\
01579 AS2( and esp, 0xfffffff0)\
01580 AS2( sub esp, 48*n+16)\
01581 AS1( push edx)\
01582 AS2( xor edx, edx) \
01583 ASL(1) \
01584 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
01585 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
01586 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
01587 AS2( movdqa [esp+20+2*edx], xmm0) \
01588 AS2( psrlq xmm0, 32) \
01589 AS2( movdqa [esp+20+2*edx+16], xmm0) \
01590 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
01591 AS2( psrlq xmm1, 32) \
01592 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
01593 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
01594 AS2( psrlq xmm2, 32) \
01595 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
01596 AS2( add edx, 16) \
01597 AS2( cmp edx, 8*(n)) \
01598 ASJ( jne, 1, b) \
01599 AS2( mov eax, esi) \
01600 AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
01601 AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
01602 AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
01603 AS2( pxor xmm4, xmm4)\
01604 AS2( pxor xmm5, xmm5)
01605
01606 #define Top_Acc(i) \
01607 AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
01608 AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
01609 AS2( psrlq xmm0, 48) \
01610 AS2( paddd xmm5, xmm0)\
01611
01612 #define Top_Column0(i) \
01613 AS2( psllq xmm5, 32) \
01614 AS2( add edi, 16) \
01615 AS2( add edx, 16) \
01616 SSE2_MulAdd45\
01617 Mul_Acc##i(i) \
01618
01619 #define Top_Column1(i) \
01620 SSE2_SaveShift(0) \
01621 AS2( add esi, 16) \
01622 SSE2_MulAdd45\
01623 Mul_Acc##i(i) \
01624 AS2( shr eax, 16) \
01625 AS2( movd xmm0, eax)\
01626 AS2( movd xmm1, [ecx+4])\
01627 AS2( psrld xmm1, 16)\
01628 AS2( pcmpgtd xmm1, xmm0)\
01629 AS2( psrld xmm1, 31)\
01630 AS2( paddd xmm4, xmm1)\
01631
01632 void SSE2_Square4(word *C, const word *A)
01633 {
01634 Squ_Begin(2)
01635 Squ_Column0(0, 1)
01636 Squ_End(2)
01637 }
01638
01639 void SSE2_Square8(word *C, const word *A)
01640 {
01641 Squ_Begin(4)
01642 #ifndef __GNUC__
01643 ASJ( jmp, 0, f)
01644 Squ_Acc(2)
01645 AS1( ret) ASL(0)
01646 #endif
01647 Squ_Column0(0, 1)
01648 Squ_Column1(1, 1)
01649 Squ_Column0(2, 2)
01650 Squ_Column1(3, 1)
01651 Squ_Column0(4, 1)
01652 Squ_End(4)
01653 }
01654
01655 void SSE2_Square16(word *C, const word *A)
01656 {
01657 Squ_Begin(8)
01658 #ifndef __GNUC__
01659 ASJ( jmp, 0, f)
01660 Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01661 AS1( ret) ASL(0)
01662 #endif
01663 Squ_Column0(0, 1)
01664 Squ_Column1(1, 1)
01665 Squ_Column0(2, 2)
01666 Squ_Column1(3, 2)
01667 Squ_Column0(4, 3)
01668 Squ_Column1(5, 3)
01669 Squ_Column0(6, 4)
01670 Squ_Column1(7, 3)
01671 Squ_Column0(8, 3)
01672 Squ_Column1(9, 2)
01673 Squ_Column0(10, 2)
01674 Squ_Column1(11, 1)
01675 Squ_Column0(12, 1)
01676 Squ_End(8)
01677 }
01678
01679 void SSE2_Square32(word *C, const word *A)
01680 {
01681 Squ_Begin(16)
01682 ASJ( jmp, 0, f)
01683 Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
01684 AS1( ret) ASL(0)
01685 Squ_Column0(0, 1)
01686 Squ_Column1(1, 1)
01687 Squ_Column0(2, 2)
01688 Squ_Column1(3, 2)
01689 Squ_Column0(4, 3)
01690 Squ_Column1(5, 3)
01691 Squ_Column0(6, 4)
01692 Squ_Column1(7, 4)
01693 Squ_Column0(8, 5)
01694 Squ_Column1(9, 5)
01695 Squ_Column0(10, 6)
01696 Squ_Column1(11, 6)
01697 Squ_Column0(12, 7)
01698 Squ_Column1(13, 7)
01699 Squ_Column0(14, 8)
01700 Squ_Column1(15, 7)
01701 Squ_Column0(16, 7)
01702 Squ_Column1(17, 6)
01703 Squ_Column0(18, 6)
01704 Squ_Column1(19, 5)
01705 Squ_Column0(20, 5)
01706 Squ_Column1(21, 4)
01707 Squ_Column0(22, 4)
01708 Squ_Column1(23, 3)
01709 Squ_Column0(24, 3)
01710 Squ_Column1(25, 2)
01711 Squ_Column0(26, 2)
01712 Squ_Column1(27, 1)
01713 Squ_Column0(28, 1)
01714 Squ_End(16)
01715 }
01716
01717 void SSE2_Multiply4(word *C, const word *A, const word *B)
01718 {
01719 Mul_Begin(2)
01720 #ifndef __GNUC__
01721 ASJ( jmp, 0, f)
01722 Mul_Acc(2)
01723 AS1( ret) ASL(0)
01724 #endif
01725 Mul_Column0(0, 2)
01726 Mul_End(2)
01727 }
01728
01729 void SSE2_Multiply8(word *C, const word *A, const word *B)
01730 {
01731 Mul_Begin(4)
01732 #ifndef __GNUC__
01733 ASJ( jmp, 0, f)
01734 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01735 AS1( ret) ASL(0)
01736 #endif
01737 Mul_Column0(0, 2)
01738 Mul_Column1(1, 3)
01739 Mul_Column0(2, 4)
01740 Mul_Column1(3, 3)
01741 Mul_Column0(4, 2)
01742 Mul_End(4)
01743 }
01744
01745 void SSE2_Multiply16(word *C, const word *A, const word *B)
01746 {
01747 Mul_Begin(8)
01748 #ifndef __GNUC__
01749 ASJ( jmp, 0, f)
01750 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01751 AS1( ret) ASL(0)
01752 #endif
01753 Mul_Column0(0, 2)
01754 Mul_Column1(1, 3)
01755 Mul_Column0(2, 4)
01756 Mul_Column1(3, 5)
01757 Mul_Column0(4, 6)
01758 Mul_Column1(5, 7)
01759 Mul_Column0(6, 8)
01760 Mul_Column1(7, 7)
01761 Mul_Column0(8, 6)
01762 Mul_Column1(9, 5)
01763 Mul_Column0(10, 4)
01764 Mul_Column1(11, 3)
01765 Mul_Column0(12, 2)
01766 Mul_End(8)
01767 }
01768
01769 void SSE2_Multiply32(word *C, const word *A, const word *B)
01770 {
01771 Mul_Begin(16)
01772 ASJ( jmp, 0, f)
01773 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01774 AS1( ret) ASL(0)
01775 Mul_Column0(0, 2)
01776 Mul_Column1(1, 3)
01777 Mul_Column0(2, 4)
01778 Mul_Column1(3, 5)
01779 Mul_Column0(4, 6)
01780 Mul_Column1(5, 7)
01781 Mul_Column0(6, 8)
01782 Mul_Column1(7, 9)
01783 Mul_Column0(8, 10)
01784 Mul_Column1(9, 11)
01785 Mul_Column0(10, 12)
01786 Mul_Column1(11, 13)
01787 Mul_Column0(12, 14)
01788 Mul_Column1(13, 15)
01789 Mul_Column0(14, 16)
01790 Mul_Column1(15, 15)
01791 Mul_Column0(16, 14)
01792 Mul_Column1(17, 13)
01793 Mul_Column0(18, 12)
01794 Mul_Column1(19, 11)
01795 Mul_Column0(20, 10)
01796 Mul_Column1(21, 9)
01797 Mul_Column0(22, 8)
01798 Mul_Column1(23, 7)
01799 Mul_Column0(24, 6)
01800 Mul_Column1(25, 5)
01801 Mul_Column0(26, 4)
01802 Mul_Column1(27, 3)
01803 Mul_Column0(28, 2)
01804 Mul_End(16)
01805 }
01806
01807 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
01808 {
01809 Mul_Begin(2)
01810 Bot_SaveAcc(0) Bot_Acc(2)
01811 Bot_End(2)
01812 }
01813
01814 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
01815 {
01816 Mul_Begin(4)
01817 #ifndef __GNUC__
01818 ASJ( jmp, 0, f)
01819 Mul_Acc(3) Mul_Acc(2)
01820 AS1( ret) ASL(0)
01821 #endif
01822 Mul_Column0(0, 2)
01823 Mul_Column1(1, 3)
01824 Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01825 Bot_End(4)
01826 }
01827
01828 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
01829 {
01830 Mul_Begin(8)
01831 #ifndef __GNUC__
01832 ASJ( jmp, 0, f)
01833 Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01834 AS1( ret) ASL(0)
01835 #endif
01836 Mul_Column0(0, 2)
01837 Mul_Column1(1, 3)
01838 Mul_Column0(2, 4)
01839 Mul_Column1(3, 5)
01840 Mul_Column0(4, 6)
01841 Mul_Column1(5, 7)
01842 Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01843 Bot_End(8)
01844 }
01845
01846 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
01847 {
01848 Mul_Begin(16)
01849 #ifndef __GNUC__
01850 ASJ( jmp, 0, f)
01851 Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01852 AS1( ret) ASL(0)
01853 #endif
01854 Mul_Column0(0, 2)
01855 Mul_Column1(1, 3)
01856 Mul_Column0(2, 4)
01857 Mul_Column1(3, 5)
01858 Mul_Column0(4, 6)
01859 Mul_Column1(5, 7)
01860 Mul_Column0(6, 8)
01861 Mul_Column1(7, 9)
01862 Mul_Column0(8, 10)
01863 Mul_Column1(9, 11)
01864 Mul_Column0(10, 12)
01865 Mul_Column1(11, 13)
01866 Mul_Column0(12, 14)
01867 Mul_Column1(13, 15)
01868 Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
01869 Bot_End(16)
01870 }
01871
01872 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
01873 {
01874 Top_Begin(4)
01875 Top_Acc(3) Top_Acc(2) Top_Acc(1)
01876 #ifndef __GNUC__
01877 ASJ( jmp, 0, f)
01878 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01879 AS1( ret) ASL(0)
01880 #endif
01881 Top_Column0(4)
01882 Top_Column1(3)
01883 Mul_Column0(0, 2)
01884 Top_End(2)
01885 }
01886
01887 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
01888 {
01889 Top_Begin(8)
01890 Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01891 #ifndef __GNUC__
01892 ASJ( jmp, 0, f)
01893 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01894 AS1( ret) ASL(0)
01895 #endif
01896 Top_Column0(8)
01897 Top_Column1(7)
01898 Mul_Column0(0, 6)
01899 Mul_Column1(1, 5)
01900 Mul_Column0(2, 4)
01901 Mul_Column1(3, 3)
01902 Mul_Column0(4, 2)
01903 Top_End(4)
01904 }
01905
01906 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
01907 {
01908 Top_Begin(16)
01909 Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
01910 #ifndef __GNUC__
01911 ASJ( jmp, 0, f)
01912 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
01913 AS1( ret) ASL(0)
01914 #endif
01915 Top_Column0(16)
01916 Top_Column1(15)
01917 Mul_Column0(0, 14)
01918 Mul_Column1(1, 13)
01919 Mul_Column0(2, 12)
01920 Mul_Column1(3, 11)
01921 Mul_Column0(4, 10)
01922 Mul_Column1(5, 9)
01923 Mul_Column0(6, 8)
01924 Mul_Column1(7, 7)
01925 Mul_Column0(8, 6)
01926 Mul_Column1(9, 5)
01927 Mul_Column0(10, 4)
01928 Mul_Column1(11, 3)
01929 Mul_Column0(12, 2)
01930 Top_End(8)
01931 }
01932
01933 #endif // #if CRYPTOPP_INTEGER_SSE2
01934
01935
01936
01937 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
01938 typedef void (* PMul)(word *C, const word *A, const word *B);
01939 typedef void (* PSqu)(word *C, const word *A);
01940 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
01941
01942 #if CRYPTOPP_INTEGER_SSE2
01943 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
01944 static size_t s_recursionLimit = 8;
01945 #else
01946 static const size_t s_recursionLimit = 16;
01947 #endif
01948
01949 static PMul s_pMul[9], s_pBot[9];
01950 static PSqu s_pSqu[9];
01951 static PMulTop s_pTop[9];
01952
01953 static void SetFunctionPointers()
01954 {
01955 s_pMul[0] = &Baseline_Multiply2;
01956 s_pBot[0] = &Baseline_MultiplyBottom2;
01957 s_pSqu[0] = &Baseline_Square2;
01958 s_pTop[0] = &Baseline_MultiplyTop2;
01959 s_pTop[1] = &Baseline_MultiplyTop4;
01960
01961 #if CRYPTOPP_INTEGER_SSE2
01962 if (HasSSE2())
01963 {
01964 #if _MSC_VER != 1200 || defined(NDEBUG)
01965 if (IsP4())
01966 {
01967 s_pAdd = &SSE2_Add;
01968 s_pSub = &SSE2_Sub;
01969 }
01970 #endif
01971
01972 s_recursionLimit = 32;
01973
01974 s_pMul[1] = &SSE2_Multiply4;
01975 s_pMul[2] = &SSE2_Multiply8;
01976 s_pMul[4] = &SSE2_Multiply16;
01977 s_pMul[8] = &SSE2_Multiply32;
01978
01979 s_pBot[1] = &SSE2_MultiplyBottom4;
01980 s_pBot[2] = &SSE2_MultiplyBottom8;
01981 s_pBot[4] = &SSE2_MultiplyBottom16;
01982 s_pBot[8] = &SSE2_MultiplyBottom32;
01983
01984 s_pSqu[1] = &SSE2_Square4;
01985 s_pSqu[2] = &SSE2_Square8;
01986 s_pSqu[4] = &SSE2_Square16;
01987 s_pSqu[8] = &SSE2_Square32;
01988
01989 s_pTop[2] = &SSE2_MultiplyTop8;
01990 s_pTop[4] = &SSE2_MultiplyTop16;
01991 s_pTop[8] = &SSE2_MultiplyTop32;
01992 }
01993 else
01994 #endif
01995 {
01996 s_pMul[1] = &Baseline_Multiply4;
01997 s_pMul[2] = &Baseline_Multiply8;
01998
01999 s_pBot[1] = &Baseline_MultiplyBottom4;
02000 s_pBot[2] = &Baseline_MultiplyBottom8;
02001
02002 s_pSqu[1] = &Baseline_Square4;
02003 s_pSqu[2] = &Baseline_Square8;
02004
02005 s_pTop[2] = &Baseline_MultiplyTop8;
02006
02007 #if !CRYPTOPP_INTEGER_SSE2
02008 s_pMul[4] = &Baseline_Multiply16;
02009 s_pBot[4] = &Baseline_MultiplyBottom16;
02010 s_pSqu[4] = &Baseline_Square16;
02011 s_pTop[4] = &Baseline_MultiplyTop16;
02012 #endif
02013 }
02014 }
02015
02016 inline int Add(word *C, const word *A, const word *B, size_t N)
02017 {
02018 #if CRYPTOPP_INTEGER_SSE2
02019 return s_pAdd(N, C, A, B);
02020 #else
02021 return Baseline_Add(N, C, A, B);
02022 #endif
02023 }
02024
02025 inline int Subtract(word *C, const word *A, const word *B, size_t N)
02026 {
02027 #if CRYPTOPP_INTEGER_SSE2
02028 return s_pSub(N, C, A, B);
02029 #else
02030 return Baseline_Sub(N, C, A, B);
02031 #endif
02032 }
02033
02034
02035
02036
02037 #define A0 A
02038 #define A1 (A+N2)
02039 #define B0 B
02040 #define B1 (B+N2)
02041
02042 #define T0 T
02043 #define T1 (T+N2)
02044 #define T2 (T+N)
02045 #define T3 (T+N+N2)
02046
02047 #define R0 R
02048 #define R1 (R+N2)
02049 #define R2 (R+N)
02050 #define R3 (R+N+N2)
02051
02052
02053
02054
02055
02056
02057 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
02058 {
02059 assert(N>=2 && N%2==0);
02060
02061 if (N <= s_recursionLimit)
02062 s_pMul[N/4](R, A, B);
02063 else
02064 {
02065 const size_t N2 = N/2;
02066
02067 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
02068 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02069
02070 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
02071 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02072
02073 RecursiveMultiply(R2, T2, A1, B1, N2);
02074 RecursiveMultiply(T0, T2, R0, R1, N2);
02075 RecursiveMultiply(R0, T2, A0, B0, N2);
02076
02077
02078
02079 int c2 = Add(R2, R2, R1, N2);
02080 int c3 = c2;
02081 c2 += Add(R1, R2, R0, N2);
02082 c3 += Add(R2, R2, R3, N2);
02083
02084 if (AN2 == BN2)
02085 c3 -= Subtract(R1, R1, T0, N);
02086 else
02087 c3 += Add(R1, R1, T0, N);
02088
02089 c3 += Increment(R2, N2, c2);
02090 assert (c3 >= 0 && c3 <= 2);
02091 Increment(R3, N2, c3);
02092 }
02093 }
02094
02095
02096
02097
02098
02099 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
02100 {
02101 assert(N && N%2==0);
02102
02103 if (N <= s_recursionLimit)
02104 s_pSqu[N/4](R, A);
02105 else
02106 {
02107 const size_t N2 = N/2;
02108
02109 RecursiveSquare(R0, T2, A0, N2);
02110 RecursiveSquare(R2, T2, A1, N2);
02111 RecursiveMultiply(T0, T2, A0, A1, N2);
02112
02113 int carry = Add(R1, R1, T0, N);
02114 carry += Add(R1, R1, T0, N);
02115 Increment(R3, N2, carry);
02116 }
02117 }
02118
02119
02120
02121
02122
02123
02124 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02125 {
02126 assert(N>=2 && N%2==0);
02127
02128 if (N <= s_recursionLimit)
02129 s_pBot[N/4](R, A, B);
02130 else
02131 {
02132 const size_t N2 = N/2;
02133
02134 RecursiveMultiply(R, T, A0, B0, N2);
02135 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02136 Add(R1, R1, T0, N2);
02137 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02138 Add(R1, R1, T0, N2);
02139 }
02140 }
02141
02142
02143
02144
02145
02146
02147
02148 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02149 {
02150 assert(N>=2 && N%2==0);
02151
02152 if (N <= s_recursionLimit)
02153 s_pTop[N/4](R, A, B, L[N-1]);
02154 else
02155 {
02156 const size_t N2 = N/2;
02157
02158 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
02159 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
02160
02161 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
02162 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
02163
02164 RecursiveMultiply(T0, T2, R0, R1, N2);
02165 RecursiveMultiply(R0, T2, A1, B1, N2);
02166
02167
02168
02169 int t, c3;
02170 int c2 = Subtract(T2, L+N2, L, N2);
02171
02172 if (AN2 == BN2)
02173 {
02174 c2 -= Add(T2, T2, T0, N2);
02175 t = (Compare(T2, R0, N2) == -1);
02176 c3 = t - Subtract(T2, T2, T1, N2);
02177 }
02178 else
02179 {
02180 c2 += Subtract(T2, T2, T0, N2);
02181 t = (Compare(T2, R0, N2) == -1);
02182 c3 = t + Add(T2, T2, T1, N2);
02183 }
02184
02185 c2 += t;
02186 if (c2 >= 0)
02187 c3 += Increment(T2, N2, c2);
02188 else
02189 c3 -= Decrement(T2, N2, -c2);
02190 c3 += Add(R0, T2, R1, N2);
02191
02192 assert (c3 >= 0 && c3 <= 2);
02193 Increment(R1, N2, c3);
02194 }
02195 }
02196
02197 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
02198 {
02199 RecursiveMultiply(R, T, A, B, N);
02200 }
02201
02202 inline void Square(word *R, word *T, const word *A, size_t N)
02203 {
02204 RecursiveSquare(R, T, A, N);
02205 }
02206
02207 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02208 {
02209 RecursiveMultiplyBottom(R, T, A, B, N);
02210 }
02211
02212
02213
02214
02215
02216
02217 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
02218 {
02219 if (NA == NB)
02220 {
02221 if (A == B)
02222 Square(R, T, A, NA);
02223 else
02224 Multiply(R, T, A, B, NA);
02225
02226 return;
02227 }
02228
02229 if (NA > NB)
02230 {
02231 std::swap(A, B);
02232 std::swap(NA, NB);
02233 }
02234
02235 assert(NB % NA == 0);
02236
02237 if (NA==2 && !A[1])
02238 {
02239 switch (A[0])
02240 {
02241 case 0:
02242 SetWords(R, 0, NB+2);
02243 return;
02244 case 1:
02245 CopyWords(R, B, NB);
02246 R[NB] = R[NB+1] = 0;
02247 return;
02248 default:
02249 R[NB] = LinearMultiply(R, B, A[0], NB);
02250 R[NB+1] = 0;
02251 return;
02252 }
02253 }
02254
02255 size_t i;
02256 if ((NB/NA)%2 == 0)
02257 {
02258 Multiply(R, T, A, B, NA);
02259 CopyWords(T+2*NA, R+NA, NA);
02260
02261 for (i=2*NA; i<NB; i+=2*NA)
02262 Multiply(T+NA+i, T, A, B+i, NA);
02263 for (i=NA; i<NB; i+=2*NA)
02264 Multiply(R+i, T, A, B+i, NA);
02265 }
02266 else
02267 {
02268 for (i=0; i<NB; i+=2*NA)
02269 Multiply(R+i, T, A, B+i, NA);
02270 for (i=NA; i<NB; i+=2*NA)
02271 Multiply(T+NA+i, T, A, B+i, NA);
02272 }
02273
02274 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02275 Increment(R+NB, NA);
02276 }
02277
02278
02279
02280
02281
02282 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
02283 {
02284 if (N==2)
02285 {
02286 T[0] = AtomicInverseModPower2(A[0]);
02287 T[1] = 0;
02288 s_pBot[0](T+2, T, A);
02289 TwosComplement(T+2, 2);
02290 Increment(T+2, 2, 2);
02291 s_pBot[0](R, T, T+2);
02292 }
02293 else
02294 {
02295 const size_t N2 = N/2;
02296 RecursiveInverseModPower2(R0, T0, A0, N2);
02297 T0[0] = 1;
02298 SetWords(T0+1, 0, N2-1);
02299 MultiplyTop(R1, T1, T0, R0, A0, N2);
02300 MultiplyBottom(T0, T1, R0, A1, N2);
02301 Add(T0, R1, T0, N2);
02302 TwosComplement(T0, N2);
02303 MultiplyBottom(R1, T1, R0, T0, N2);
02304 }
02305 }
02306
02307
02308
02309
02310
02311
02312
02313 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
02314 {
02315 #if 1
02316 MultiplyBottom(R, T, X, U, N);
02317 MultiplyTop(T, T+N, X, R, M, N);
02318 word borrow = Subtract(T, X+N, T, N);
02319
02320 word carry = Add(T+N, T, M, N);
02321 assert(carry | !borrow);
02322 CopyWords(R, T + ((0-borrow) & N), N);
02323 #elif 0
02324 const word u = 0-U[0];
02325 Declare2Words(p)
02326 for (size_t i=0; i<N; i++)
02327 {
02328 const word t = u * X[i];
02329 word c = 0;
02330 for (size_t j=0; j<N; j+=2)
02331 {
02332 MultiplyWords(p, t, M[j]);
02333 Acc2WordsBy1(p, X[i+j]);
02334 Acc2WordsBy1(p, c);
02335 X[i+j] = LowWord(p);
02336 c = HighWord(p);
02337 MultiplyWords(p, t, M[j+1]);
02338 Acc2WordsBy1(p, X[i+j+1]);
02339 Acc2WordsBy1(p, c);
02340 X[i+j+1] = LowWord(p);
02341 c = HighWord(p);
02342 }
02343
02344 if (Increment(X+N+i, N-i, c))
02345 while (!Subtract(X+N, X+N, M, N)) {}
02346 }
02347
02348 memcpy(R, X+N, N*WORD_SIZE);
02349 #else
02350 __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
02351 for (size_t i=0; i<N; i++)
02352 {
02353 __m64 t = _mm_cvtsi32_si64(X[i]);
02354 t = _mm_mul_su32(t, u);
02355 __m64 c = _mm_setzero_si64();
02356 for (size_t j=0; j<N; j+=2)
02357 {
02358 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
02359 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
02360 c = _mm_add_si64(c, p);
02361 X[i+j] = _mm_cvtsi64_si32(c);
02362 c = _mm_srli_si64(c, 32);
02363 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
02364 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
02365 c = _mm_add_si64(c, p);
02366 X[i+j+1] = _mm_cvtsi64_si32(c);
02367 c = _mm_srli_si64(c, 32);
02368 }
02369
02370 if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
02371 while (!Subtract(X+N, X+N, M, N)) {}
02372 }
02373
02374 memcpy(R, X+N, N*WORD_SIZE);
02375 _mm_empty();
02376 #endif
02377 }
02378
02379
02380
02381
02382
02383
02384
02385
02386 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
02387 {
02388 assert(N%2==0 && N>=4);
02389
02390 #define M0 M
02391 #define M1 (M+N2)
02392 #define V0 V
02393 #define V1 (V+N2)
02394
02395 #define X0 X
02396 #define X1 (X+N2)
02397 #define X2 (X+N)
02398 #define X3 (X+N+N2)
02399
02400 const size_t N2 = N/2;
02401 Multiply(T0, T2, V0, X3, N2);
02402 int c2 = Add(T0, T0, X0, N);
02403 MultiplyBottom(T3, T2, T0, U, N2);
02404 MultiplyTop(T2, R, T0, T3, M0, N2);
02405 c2 -= Subtract(T2, T1, T2, N2);
02406 Multiply(T0, R, T3, M1, N2);
02407 c2 -= Subtract(T0, T2, T0, N2);
02408 int c3 = -(int)Subtract(T1, X2, T1, N2);
02409 Multiply(R0, T2, V1, X3, N2);
02410 c3 += Add(R, R, T, N);
02411
02412 if (c2>0)
02413 c3 += Increment(R1, N2);
02414 else if (c2<0)
02415 c3 -= Decrement(R1, N2, -c2);
02416
02417 assert(c3>=-1 && c3<=1);
02418 if (c3>0)
02419 Subtract(R, R, M, N);
02420 else if (c3<0)
02421 Add(R, R, M, N);
02422
02423 #undef M0
02424 #undef M1
02425 #undef V0
02426 #undef V1
02427
02428 #undef X0
02429 #undef X1
02430 #undef X2
02431 #undef X3
02432 }
02433
02434 #undef A0
02435 #undef A1
02436 #undef B0
02437 #undef B1
02438
02439 #undef T0
02440 #undef T1
02441 #undef T2
02442 #undef T3
02443
02444 #undef R0
02445 #undef R1
02446 #undef R2
02447 #undef R3
02448
02449
02450
02451
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467
02468
02469
02470
02471
02472
02473
02474
02475
02476
02477
02478
02479
02480
02481
02482
02483
02484
02485
02486
02487
02488
02489
02490
02491
02492
02493
02494
02495
02496
02497
02498
02499
02500
02501
02502
02503
02504
02505
02506
02507
02508
02509
02510
02511
02512
02513 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02514 {
02515 word T[4];
02516 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02517 Q[0] = q.GetLowHalf();
02518 Q[1] = q.GetHighHalf();
02519
02520 #ifndef NDEBUG
02521 if (B[0] || B[1])
02522 {
02523
02524 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02525 word P[4];
02526 s_pMul[0](P, Q, B);
02527 Add(P, P, T, 4);
02528 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02529 }
02530 #endif
02531 }
02532
02533
02534 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
02535 {
02536 assert(N && N%2==0);
02537
02538 AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
02539
02540 word borrow = Subtract(R, R, T, N+2);
02541 assert(!borrow && !R[N+1]);
02542
02543 while (R[N] || Compare(R, B, N) >= 0)
02544 {
02545 R[N] -= Subtract(R, R, B, N);
02546 Q[1] += (++Q[0]==0);
02547 assert(Q[0] || Q[1]);
02548 }
02549 }
02550
02551
02552
02553
02554
02555
02556
02557 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
02558 {
02559 assert(NA && NB && NA%2==0 && NB%2==0);
02560 assert(B[NB-1] || B[NB-2]);
02561 assert(NB <= NA);
02562
02563
02564 word *const TA=T;
02565 word *const TB=T+NA+2;
02566 word *const TP=T+NA+2+NB;
02567
02568
02569 unsigned shiftWords = (B[NB-1]==0);
02570 TB[0] = TB[NB-1] = 0;
02571 CopyWords(TB+shiftWords, B, NB-shiftWords);
02572 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02573 assert(shiftBits < WORD_BITS);
02574 ShiftWordsLeftByBits(TB, NB, shiftBits);
02575
02576
02577 TA[0] = TA[NA] = TA[NA+1] = 0;
02578 CopyWords(TA+shiftWords, A, NA);
02579 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02580
02581 if (TA[NA+1]==0 && TA[NA] <= 1)
02582 {
02583 Q[NA-NB+1] = Q[NA-NB] = 0;
02584 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02585 {
02586 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02587 ++Q[NA-NB];
02588 }
02589 }
02590 else
02591 {
02592 NA+=2;
02593 assert(Compare(TA+NA-NB, TB, NB) < 0);
02594 }
02595
02596 word BT[2];
02597 BT[0] = TB[NB-2] + 1;
02598 BT[1] = TB[NB-1] + (BT[0]==0);
02599
02600
02601 for (size_t i=NA-2; i>=NB; i-=2)
02602 {
02603 AtomicDivide(Q+i-NB, TA+i-2, BT);
02604 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02605 }
02606
02607
02608 CopyWords(R, TA+shiftWords, NB);
02609 ShiftWordsRightByBits(R, NB, shiftBits);
02610 }
02611
02612 static inline size_t EvenWordCount(const word *X, size_t N)
02613 {
02614 while (N && X[N-2]==0 && X[N-1]==0)
02615 N-=2;
02616 return N;
02617 }
02618
02619
02620
02621
02622
02623
02624
02625 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
02626 {
02627 assert(NA<=N && N && N%2==0);
02628
02629 word *b = T;
02630 word *c = T+N;
02631 word *f = T+2*N;
02632 word *g = T+3*N;
02633 size_t bcLen=2, fgLen=EvenWordCount(M, N);
02634 unsigned int k=0, s=0;
02635
02636 SetWords(T, 0, 3*N);
02637 b[0]=1;
02638 CopyWords(f, A, NA);
02639 CopyWords(g, M, N);
02640
02641 while (1)
02642 {
02643 word t=f[0];
02644 while (!t)
02645 {
02646 if (EvenWordCount(f, fgLen)==0)
02647 {
02648 SetWords(R, 0, N);
02649 return 0;
02650 }
02651
02652 ShiftWordsRightByWords(f, fgLen, 1);
02653 if (c[bcLen-1]) bcLen+=2;
02654 assert(bcLen <= N);
02655 ShiftWordsLeftByWords(c, bcLen, 1);
02656 k+=WORD_BITS;
02657 t=f[0];
02658 }
02659
02660 unsigned int i=0;
02661 while (t%2 == 0)
02662 {
02663 t>>=1;
02664 i++;
02665 }
02666 k+=i;
02667
02668 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02669 {
02670 if (s%2==0)
02671 CopyWords(R, b, N);
02672 else
02673 Subtract(R, M, b, N);
02674 return k;
02675 }
02676
02677 ShiftWordsRightByBits(f, fgLen, i);
02678 t=ShiftWordsLeftByBits(c, bcLen, i);
02679 if (t)
02680 {
02681 c[bcLen] = t;
02682 bcLen+=2;
02683 assert(bcLen <= N);
02684 }
02685
02686 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02687 fgLen-=2;
02688
02689 if (Compare(f, g, fgLen)==-1)
02690 {
02691 std::swap(f, g);
02692 std::swap(b, c);
02693 s++;
02694 }
02695
02696 Subtract(f, f, g, fgLen);
02697
02698 if (Add(b, b, c, bcLen))
02699 {
02700 b[bcLen] = 1;
02701 bcLen+=2;
02702 assert(bcLen <= N);
02703 }
02704 }
02705 }
02706
02707
02708
02709
02710
02711 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02712 {
02713 CopyWords(R, A, N);
02714
02715 while (k--)
02716 {
02717 if (R[0]%2==0)
02718 ShiftWordsRightByBits(R, N, 1);
02719 else
02720 {
02721 word carry = Add(R, R, M, N);
02722 ShiftWordsRightByBits(R, N, 1);
02723 R[N-1] += carry<<(WORD_BITS-1);
02724 }
02725 }
02726 }
02727
02728
02729
02730
02731
02732 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02733 {
02734 CopyWords(R, A, N);
02735
02736 while (k--)
02737 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02738 Subtract(R, R, M, N);
02739 }
02740
02741
02742
02743 InitializeInteger::InitializeInteger()
02744 {
02745 if (!g_pAssignIntToInteger)
02746 {
02747 SetFunctionPointers();
02748 g_pAssignIntToInteger = AssignIntToInteger;
02749 }
02750 }
02751
02752 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02753
02754 static inline size_t RoundupSize(size_t n)
02755 {
02756 if (n<=8)
02757 return RoundupSizeTable[n];
02758 else if (n<=16)
02759 return 16;
02760 else if (n<=32)
02761 return 32;
02762 else if (n<=64)
02763 return 64;
02764 else return size_t(1) << BitPrecision(n-1);
02765 }
02766
02767 Integer::Integer()
02768 : reg(2), sign(POSITIVE)
02769 {
02770 reg[0] = reg[1] = 0;
02771 }
02772
02773 Integer::Integer(const Integer& t)
02774 : reg(RoundupSize(t.WordCount())), sign(t.sign)
02775 {
02776 CopyWords(reg, t.reg, reg.size());
02777 }
02778
02779 Integer::Integer(Sign s, lword value)
02780 : reg(2), sign(s)
02781 {
02782 reg[0] = word(value);
02783 reg[1] = word(SafeRightShift<WORD_BITS>(value));
02784 }
02785
02786 Integer::Integer(signed long value)
02787 : reg(2)
02788 {
02789 if (value >= 0)
02790 sign = POSITIVE;
02791 else
02792 {
02793 sign = NEGATIVE;
02794 value = -value;
02795 }
02796 reg[0] = word(value);
02797 reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02798 }
02799
02800 Integer::Integer(Sign s, word high, word low)
02801 : reg(2), sign(s)
02802 {
02803 reg[0] = low;
02804 reg[1] = high;
02805 }
02806
02807 bool Integer::IsConvertableToLong() const
02808 {
02809 if (ByteCount() > sizeof(long))
02810 return false;
02811
02812 unsigned long value = (unsigned long)reg[0];
02813 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02814
02815 if (sign==POSITIVE)
02816 return (signed long)value >= 0;
02817 else
02818 return -(signed long)value < 0;
02819 }
02820
02821 signed long Integer::ConvertToLong() const
02822 {
02823 assert(IsConvertableToLong());
02824
02825 unsigned long value = (unsigned long)reg[0];
02826 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
02827 return sign==POSITIVE ? value : -(signed long)value;
02828 }
02829
02830 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
02831 {
02832 Decode(encodedInteger, byteCount, s);
02833 }
02834
02835 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
02836 {
02837 Decode(encodedInteger, byteCount, s);
02838 }
02839
02840 Integer::Integer(BufferedTransformation &bt)
02841 {
02842 BERDecode(bt);
02843 }
02844
02845 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
02846 {
02847 Randomize(rng, bitcount);
02848 }
02849
02850 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02851 {
02852 if (!Randomize(rng, min, max, rnType, equiv, mod))
02853 throw Integer::RandomNumberNotFound();
02854 }
02855
02856 Integer Integer::Power2(size_t e)
02857 {
02858 Integer r((word)0, BitsToWords(e+1));
02859 r.SetBit(e);
02860 return r;
02861 }
02862
02863 template <long i>
02864 struct NewInteger
02865 {
02866 Integer * operator()() const
02867 {
02868 return new Integer(i);
02869 }
02870 };
02871
02872 const Integer &Integer::Zero()
02873 {
02874 return Singleton<Integer>().Ref();
02875 }
02876
02877 const Integer &Integer::One()
02878 {
02879 return Singleton<Integer, NewInteger<1> >().Ref();
02880 }
02881
02882 const Integer &Integer::Two()
02883 {
02884 return Singleton<Integer, NewInteger<2> >().Ref();
02885 }
02886
02887 bool Integer::operator!() const
02888 {
02889 return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02890 }
02891
02892 Integer& Integer::operator=(const Integer& t)
02893 {
02894 if (this != &t)
02895 {
02896 if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
02897 reg.New(RoundupSize(t.WordCount()));
02898 CopyWords(reg, t.reg, reg.size());
02899 sign = t.sign;
02900 }
02901 return *this;
02902 }
02903
02904 bool Integer::GetBit(size_t n) const
02905 {
02906 if (n/WORD_BITS >= reg.size())
02907 return 0;
02908 else
02909 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02910 }
02911
02912 void Integer::SetBit(size_t n, bool value)
02913 {
02914 if (value)
02915 {
02916 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02917 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02918 }
02919 else
02920 {
02921 if (n/WORD_BITS < reg.size())
02922 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02923 }
02924 }
02925
02926 byte Integer::GetByte(size_t n) const
02927 {
02928 if (n/WORD_SIZE >= reg.size())
02929 return 0;
02930 else
02931 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02932 }
02933
02934 void Integer::SetByte(size_t n, byte value)
02935 {
02936 reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02937 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02938 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02939 }
02940
02941 lword Integer::GetBits(size_t i, size_t n) const
02942 {
02943 lword v = 0;
02944 assert(n <= sizeof(v)*8);
02945 for (unsigned int j=0; j<n; j++)
02946 v |= lword(GetBit(i+j)) << j;
02947 return v;
02948 }
02949
02950 Integer Integer::operator-() const
02951 {
02952 Integer result(*this);
02953 result.Negate();
02954 return result;
02955 }
02956
02957 Integer Integer::AbsoluteValue() const
02958 {
02959 Integer result(*this);
02960 result.sign = POSITIVE;
02961 return result;
02962 }
02963
02964 void Integer::swap(Integer &a)
02965 {
02966 reg.swap(a.reg);
02967 std::swap(sign, a.sign);
02968 }
02969
02970 Integer::Integer(word value, size_t length)
02971 : reg(RoundupSize(length)), sign(POSITIVE)
02972 {
02973 reg[0] = value;
02974 SetWords(reg+1, 0, reg.size()-1);
02975 }
02976
02977 template <class T>
02978 static Integer StringToInteger(const T *str)
02979 {
02980 int radix;
02981
02982
02983 unsigned int length;
02984 for (length = 0; str[length] != 0; length++) {}
02985
02986 Integer v;
02987
02988 if (length == 0)
02989 return v;
02990
02991 switch (str[length-1])
02992 {
02993 case 'h':
02994 case 'H':
02995 radix=16;
02996 break;
02997 case 'o':
02998 case 'O':
02999 radix=8;
03000 break;
03001 case 'b':
03002 case 'B':
03003 radix=2;
03004 break;
03005 default:
03006 radix=10;
03007 }
03008
03009 if (length > 2 && str[0] == '0' && str[1] == 'x')
03010 radix = 16;
03011
03012 for (unsigned i=0; i<length; i++)
03013 {
03014 int digit;
03015
03016 if (str[i] >= '0' && str[i] <= '9')
03017 digit = str[i] - '0';
03018 else if (str[i] >= 'A' && str[i] <= 'F')
03019 digit = str[i] - 'A' + 10;
03020 else if (str[i] >= 'a' && str[i] <= 'f')
03021 digit = str[i] - 'a' + 10;
03022 else
03023 digit = radix;
03024
03025 if (digit < radix)
03026 {
03027 v *= radix;
03028 v += digit;
03029 }
03030 }
03031
03032 if (str[0] == '-')
03033 v.Negate();
03034
03035 return v;
03036 }
03037
03038 Integer::Integer(const char *str)
03039 : reg(2), sign(POSITIVE)
03040 {
03041 *this = StringToInteger(str);
03042 }
03043
03044 Integer::Integer(const wchar_t *str)
03045 : reg(2), sign(POSITIVE)
03046 {
03047 *this = StringToInteger(str);
03048 }
03049
03050 unsigned int Integer::WordCount() const
03051 {
03052 return (unsigned int)CountWords(reg, reg.size());
03053 }
03054
03055 unsigned int Integer::ByteCount() const
03056 {
03057 unsigned wordCount = WordCount();
03058 if (wordCount)
03059 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03060 else
03061 return 0;
03062 }
03063
03064 unsigned int Integer::BitCount() const
03065 {
03066 unsigned wordCount = WordCount();
03067 if (wordCount)
03068 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03069 else
03070 return 0;
03071 }
03072
03073 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
03074 {
03075 StringStore store(input, inputLen);
03076 Decode(store, inputLen, s);
03077 }
03078
03079 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
03080 {
03081 assert(bt.MaxRetrievable() >= inputLen);
03082
03083 byte b;
03084 bt.Peek(b);
03085 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03086
03087 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03088 {
03089 bt.Skip(1);
03090 inputLen--;
03091 bt.Peek(b);
03092 }
03093
03094 reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03095
03096 for (size_t i=inputLen; i > 0; i--)
03097 {
03098 bt.Get(b);
03099 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03100 }
03101
03102 if (sign == NEGATIVE)
03103 {
03104 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
03105 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03106 TwosComplement(reg, reg.size());
03107 }
03108 }
03109
03110 size_t Integer::MinEncodedSize(Signedness signedness) const
03111 {
03112 unsigned int outputLen = STDMAX(1U, ByteCount());
03113 if (signedness == UNSIGNED)
03114 return outputLen;
03115 if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03116 outputLen++;
03117 if (IsNegative() && *this < -Power2(outputLen*8-1))
03118 outputLen++;
03119 return outputLen;
03120 }
03121
03122 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
03123 {
03124 ArraySink sink(output, outputLen);
03125 Encode(sink, outputLen, signedness);
03126 }
03127
03128 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
03129 {
03130 if (signedness == UNSIGNED || NotNegative())
03131 {
03132 for (size_t i=outputLen; i > 0; i--)
03133 bt.Put(GetByte(i-1));
03134 }
03135 else
03136 {
03137
03138 Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
03139 temp.Encode(bt, outputLen, UNSIGNED);
03140 }
03141 }
03142
03143 void Integer::DEREncode(BufferedTransformation &bt) const
03144 {
03145 DERGeneralEncoder enc(bt, INTEGER);
03146 Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03147 enc.MessageEnd();
03148 }
03149
03150 void Integer::BERDecode(const byte *input, size_t len)
03151 {
03152 StringStore store(input, len);
03153 BERDecode(store);
03154 }
03155
03156 void Integer::BERDecode(BufferedTransformation &bt)
03157 {
03158 BERGeneralDecoder dec(bt, INTEGER);
03159 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03160 BERDecodeError();
03161 Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
03162 dec.MessageEnd();
03163 }
03164
03165 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
03166 {
03167 DERGeneralEncoder enc(bt, OCTET_STRING);
03168 Encode(enc, length);
03169 enc.MessageEnd();
03170 }
03171
03172 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
03173 {
03174 BERGeneralDecoder dec(bt, OCTET_STRING);
03175 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03176 BERDecodeError();
03177 Decode(dec, length);
03178 dec.MessageEnd();
03179 }
03180
03181 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
03182 {
03183 ArraySink sink(output, len);
03184 return OpenPGPEncode(sink);
03185 }
03186
03187 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
03188 {
03189 word16 bitCount = BitCount();
03190 bt.PutWord16(bitCount);
03191 size_t byteCount = BitsToBytes(bitCount);
03192 Encode(bt, byteCount);
03193 return 2 + byteCount;
03194 }
03195
03196 void Integer::OpenPGPDecode(const byte *input, size_t len)
03197 {
03198 StringStore store(input, len);
03199 OpenPGPDecode(store);
03200 }
03201
03202 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03203 {
03204 word16 bitCount;
03205 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03206 throw OpenPGPDecodeErr();
03207 Decode(bt, BitsToBytes(bitCount));
03208 }
03209
03210 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
03211 {
03212 const size_t nbytes = nbits/8 + 1;
03213 SecByteBlock buf(nbytes);
03214 rng.GenerateBlock(buf, nbytes);
03215 if (nbytes)
03216 buf[0] = (byte)Crop(buf[0], nbits % 8);
03217 Decode(buf, nbytes, UNSIGNED);
03218 }
03219
03220 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03221 {
03222 if (min > max)
03223 throw InvalidArgument("Integer: Min must be no greater than Max");
03224
03225 Integer range = max - min;
03226 const unsigned int nbits = range.BitCount();
03227
03228 do
03229 {
03230 Randomize(rng, nbits);
03231 }
03232 while (*this > range);
03233
03234 *this += min;
03235 }
03236
03237 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03238 {
03239 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03240 }
03241
03242 class KDF2_RNG : public RandomNumberGenerator
03243 {
03244 public:
03245 KDF2_RNG(const byte *seed, size_t seedSize)
03246 : m_counter(0), m_counterAndSeed(seedSize + 4)
03247 {
03248 memcpy(m_counterAndSeed + 4, seed, seedSize);
03249 }
03250
03251 void GenerateBlock(byte *output, size_t size)
03252 {
03253 PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03254 ++m_counter;
03255 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03256 }
03257
03258 private:
03259 word32 m_counter;
03260 SecByteBlock m_counterAndSeed;
03261 };
03262
03263 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms)
03264 {
03265 Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03266 Integer max;
03267 if (!params.GetValue("Max", max))
03268 {
03269 int bitLength;
03270 if (params.GetIntValue("BitLength", bitLength))
03271 max = Integer::Power2(bitLength);
03272 else
03273 throw InvalidArgument("Integer: missing Max argument");
03274 }
03275 if (min > max)
03276 throw InvalidArgument("Integer: Min must be no greater than Max");
03277
03278 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03279 Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03280
03281 if (equiv.IsNegative() || equiv >= mod)
03282 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03283
03284 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03285
03286 member_ptr<KDF2_RNG> kdf2Rng;
03287 ConstByteArrayParameter seed;
03288 if (params.GetValue(Name::Seed(), seed))
03289 {
03290 ByteQueue bq;
03291 DERSequenceEncoder seq(bq);
03292 min.DEREncode(seq);
03293 max.DEREncode(seq);
03294 equiv.DEREncode(seq);
03295 mod.DEREncode(seq);
03296 DEREncodeUnsigned(seq, rnType);
03297 DEREncodeOctetString(seq, seed.begin(), seed.size());
03298 seq.MessageEnd();
03299
03300 SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
03301 bq.Get(finalSeed, finalSeed.size());
03302 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03303 }
03304 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03305
03306 switch (rnType)
03307 {
03308 case ANY:
03309 if (mod == One())
03310 Randomize(rng, min, max);
03311 else
03312 {
03313 Integer min1 = min + (equiv-min)%mod;
03314 if (max < min1)
03315 return false;
03316 Randomize(rng, Zero(), (max - min1) / mod);
03317 *this *= mod;
03318 *this += min1;
03319 }
03320 return true;
03321
03322 case PRIME:
03323 {
03324 const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03325
03326 int i;
03327 i = 0;
03328 while (1)
03329 {
03330 if (++i==16)
03331 {
03332
03333 Integer first = min;
03334 if (FirstPrime(first, max, equiv, mod, pSelector))
03335 {
03336
03337 *this = first;
03338 if (!FirstPrime(first, max, equiv, mod, pSelector))
03339 return true;
03340 }
03341 else
03342 return false;
03343 }
03344
03345 Randomize(rng, min, max);
03346 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03347 return true;
03348 }
03349 }
03350
03351 default:
03352 throw InvalidArgument("Integer: invalid RandomNumberType argument");
03353 }
03354 }
03355
03356 std::istream& operator>>(std::istream& in, Integer &a)
03357 {
03358 char c;
03359 unsigned int length = 0;
03360 SecBlock<char> str(length + 16);
03361
03362 std::ws(in);
03363
03364 do
03365 {
03366 in.read(&c, 1);
03367 str[length++] = c;
03368 if (length >= str.size())
03369 str.Grow(length + 16);
03370 }
03371 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03372
03373 if (in.gcount())
03374 in.putback(c);
03375 str[length-1] = '\0';
03376 a = Integer(str);
03377
03378 return in;
03379 }
03380
03381 std::ostream& operator<<(std::ostream& out, const Integer &a)
03382 {
03383
03384 long f = out.flags() & std::ios::basefield;
03385 int base, block;
03386 char suffix;
03387 switch(f)
03388 {
03389 case std::ios::oct :
03390 base = 8;
03391 block = 8;
03392 suffix = 'o';
03393 break;
03394 case std::ios::hex :
03395 base = 16;
03396 block = 4;
03397 suffix = 'h';
03398 break;
03399 default :
03400 base = 10;
03401 block = 3;
03402 suffix = '.';
03403 }
03404
03405 Integer temp1=a, temp2;
03406
03407 if (a.IsNegative())
03408 {
03409 out << '-';
03410 temp1.Negate();
03411 }
03412
03413 if (!a)
03414 out << '0';
03415
03416 static const char upper[]="0123456789ABCDEF";
03417 static const char lower[]="0123456789abcdef";
03418
03419 const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
03420 unsigned i=0;
03421 SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03422
03423 while (!!temp1)
03424 {
03425 word digit;
03426 Integer::Divide(digit, temp2, temp1, base);
03427 s[i++]=vec[digit];
03428 temp1.swap(temp2);
03429 }
03430
03431 while (i--)
03432 {
03433 out << s[i];
03434
03435
03436 }
03437 return out << suffix;
03438 }
03439
03440 Integer& Integer::operator++()
03441 {
03442 if (NotNegative())
03443 {
03444 if (Increment(reg, reg.size()))
03445 {
03446 reg.CleanGrow(2*reg.size());
03447 reg[reg.size()/2]=1;
03448 }
03449 }
03450 else
03451 {
03452 word borrow = Decrement(reg, reg.size());
03453 assert(!borrow);
03454 if (WordCount()==0)
03455 *this = Zero();
03456 }
03457 return *this;
03458 }
03459
03460 Integer& Integer::operator--()
03461 {
03462 if (IsNegative())
03463 {
03464 if (Increment(reg, reg.size()))
03465 {
03466 reg.CleanGrow(2*reg.size());
03467 reg[reg.size()/2]=1;
03468 }
03469 }
03470 else
03471 {
03472 if (Decrement(reg, reg.size()))
03473 *this = -One();
03474 }
03475 return *this;
03476 }
03477
03478 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03479 {
03480 int carry;
03481 if (a.reg.size() == b.reg.size())
03482 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03483 else if (a.reg.size() > b.reg.size())
03484 {
03485 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03486 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03487 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03488 }
03489 else
03490 {
03491 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03492 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03493 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03494 }
03495
03496 if (carry)
03497 {
03498 sum.reg.CleanGrow(2*sum.reg.size());
03499 sum.reg[sum.reg.size()/2] = 1;
03500 }
03501 sum.sign = Integer::POSITIVE;
03502 }
03503
03504 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03505 {
03506 unsigned aSize = a.WordCount();
03507 aSize += aSize%2;
03508 unsigned bSize = b.WordCount();
03509 bSize += bSize%2;
03510
03511 if (aSize == bSize)
03512 {
03513 if (Compare(a.reg, b.reg, aSize) >= 0)
03514 {
03515 Subtract(diff.reg, a.reg, b.reg, aSize);
03516 diff.sign = Integer::POSITIVE;
03517 }
03518 else
03519 {
03520 Subtract(diff.reg, b.reg, a.reg, aSize);
03521 diff.sign = Integer::NEGATIVE;
03522 }
03523 }
03524 else if (aSize > bSize)
03525 {
03526 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03527 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03528 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03529 assert(!borrow);
03530 diff.sign = Integer::POSITIVE;
03531 }
03532 else
03533 {
03534 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03535 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03536 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03537 assert(!borrow);
03538 diff.sign = Integer::NEGATIVE;
03539 }
03540 }
03541
03542
03543 template <class T> inline const T& STDMAX2(const T& a, const T& b)
03544 {
03545 return a < b ? b : a;
03546 }
03547
03548 Integer Integer::Plus(const Integer& b) const
03549 {
03550 Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
03551 if (NotNegative())
03552 {
03553 if (b.NotNegative())
03554 PositiveAdd(sum, *this, b);
03555 else
03556 PositiveSubtract(sum, *this, b);
03557 }
03558 else
03559 {
03560 if (b.NotNegative())
03561 PositiveSubtract(sum, b, *this);
03562 else
03563 {
03564 PositiveAdd(sum, *this, b);
03565 sum.sign = Integer::NEGATIVE;
03566 }
03567 }
03568 return sum;
03569 }
03570
03571 Integer& Integer::operator+=(const Integer& t)
03572 {
03573 reg.CleanGrow(t.reg.size());
03574 if (NotNegative())
03575 {
03576 if (t.NotNegative())
03577 PositiveAdd(*this, *this, t);
03578 else
03579 PositiveSubtract(*this, *this, t);
03580 }
03581 else
03582 {
03583 if (t.NotNegative())
03584 PositiveSubtract(*this, t, *this);
03585 else
03586 {
03587 PositiveAdd(*this, *this, t);
03588 sign = Integer::NEGATIVE;
03589 }
03590 }
03591 return *this;
03592 }
03593
03594 Integer Integer::Minus(const Integer& b) const
03595 {
03596 Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
03597 if (NotNegative())
03598 {
03599 if (b.NotNegative())
03600 PositiveSubtract(diff, *this, b);
03601 else
03602 PositiveAdd(diff, *this, b);
03603 }
03604 else
03605 {
03606 if (b.NotNegative())
03607 {
03608 PositiveAdd(diff, *this, b);
03609 diff.sign = Integer::NEGATIVE;
03610 }
03611 else
03612 PositiveSubtract(diff, b, *this);
03613 }
03614 return diff;
03615 }
03616
03617 Integer& Integer::operator-=(const Integer& t)
03618 {
03619 reg.CleanGrow(t.reg.size());
03620 if (NotNegative())
03621 {
03622 if (t.NotNegative())
03623 PositiveSubtract(*this, *this, t);
03624 else
03625 PositiveAdd(*this, *this, t);
03626 }
03627 else
03628 {
03629 if (t.NotNegative())
03630 {
03631 PositiveAdd(*this, *this, t);
03632 sign = Integer::NEGATIVE;
03633 }
03634 else
03635 PositiveSubtract(*this, t, *this);
03636 }
03637 return *this;
03638 }
03639
03640 Integer& Integer::operator<<=(size_t n)
03641 {
03642 const size_t wordCount = WordCount();
03643 const size_t shiftWords = n / WORD_BITS;
03644 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03645
03646 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03647 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03648 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03649 return *this;
03650 }
03651
03652 Integer& Integer::operator>>=(size_t n)
03653 {
03654 const size_t wordCount = WordCount();
03655 const size_t shiftWords = n / WORD_BITS;
03656 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03657
03658 ShiftWordsRightByWords(reg, wordCount, shiftWords);
03659 if (wordCount > shiftWords)
03660 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03661 if (IsNegative() && WordCount()==0)
03662 *this = Zero();
03663 return *this;
03664 }
03665
03666 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03667 {
03668 size_t aSize = RoundupSize(a.WordCount());
03669 size_t bSize = RoundupSize(b.WordCount());
03670
03671 product.reg.CleanNew(RoundupSize(aSize+bSize));
03672 product.sign = Integer::POSITIVE;
03673
03674 IntegerSecBlock workspace(aSize + bSize);
03675 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03676 }
03677
03678 void Multiply(Integer &product, const Integer &a, const Integer &b)
03679 {
03680 PositiveMultiply(product, a, b);
03681
03682 if (a.NotNegative() != b.NotNegative())
03683 product.Negate();
03684 }
03685
03686 Integer Integer::Times(const Integer &b) const
03687 {
03688 Integer product;
03689 Multiply(product, *this, b);
03690 return product;
03691 }
03692
03693
03694
03695
03696
03697
03698
03699
03700
03701
03702
03703
03704
03705
03706
03707
03708
03709
03710
03711
03712
03713
03714
03715 void PositiveDivide(Integer &remainder, Integer "ient,
03716 const Integer &a, const Integer &b)
03717 {
03718 unsigned aSize = a.WordCount();
03719 unsigned bSize = b.WordCount();
03720
03721 if (!bSize)
03722 throw Integer::DivideByZero();
03723
03724 if (aSize < bSize)
03725 {
03726 remainder = a;
03727 remainder.sign = Integer::POSITIVE;
03728 quotient = Integer::Zero();
03729 return;
03730 }
03731
03732 aSize += aSize%2;
03733 bSize += bSize%2;
03734
03735 remainder.reg.CleanNew(RoundupSize(bSize));
03736 remainder.sign = Integer::POSITIVE;
03737 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03738 quotient.sign = Integer::POSITIVE;
03739
03740 IntegerSecBlock T(aSize+3*(bSize+2));
03741 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03742 }
03743
03744 void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor)
03745 {
03746 PositiveDivide(remainder, quotient, dividend, divisor);
03747
03748 if (dividend.IsNegative())
03749 {
03750 quotient.Negate();
03751 if (remainder.NotZero())
03752 {
03753 --quotient;
03754 remainder = divisor.AbsoluteValue() - remainder;
03755 }
03756 }
03757
03758 if (divisor.IsNegative())
03759 quotient.Negate();
03760 }
03761
03762 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03763 {
03764 q = a;
03765 q >>= n;
03766
03767 const size_t wordCount = BitsToWords(n);
03768 if (wordCount <= a.WordCount())
03769 {
03770 r.reg.resize(RoundupSize(wordCount));
03771 CopyWords(r.reg, a.reg, wordCount);
03772 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03773 if (n % WORD_BITS != 0)
03774 r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
03775 }
03776 else
03777 {
03778 r.reg.resize(RoundupSize(a.WordCount()));
03779 CopyWords(r.reg, a.reg, r.reg.size());
03780 }
03781 r.sign = POSITIVE;
03782
03783 if (a.IsNegative() && r.NotZero())
03784 {
03785 --q;
03786 r = Power2(n) - r;
03787 }
03788 }
03789
03790 Integer Integer::DividedBy(const Integer &b) const
03791 {
03792 Integer remainder, quotient;
03793 Integer::Divide(remainder, quotient, *this, b);
03794 return quotient;
03795 }
03796
03797 Integer Integer::Modulo(const Integer &b) const
03798 {
03799 Integer remainder, quotient;
03800 Integer::Divide(remainder, quotient, *this, b);
03801 return remainder;
03802 }
03803
03804 void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor)
03805 {
03806 if (!divisor)
03807 throw Integer::DivideByZero();
03808
03809 assert(divisor);
03810
03811 if ((divisor & (divisor-1)) == 0)
03812 {
03813 quotient = dividend >> (BitPrecision(divisor)-1);
03814 remainder = dividend.reg[0] & (divisor-1);
03815 return;
03816 }
03817
03818 unsigned int i = dividend.WordCount();
03819 quotient.reg.CleanNew(RoundupSize(i));
03820 remainder = 0;
03821 while (i--)
03822 {
03823 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03824 remainder = DWord(dividend.reg[i], remainder) % divisor;
03825 }
03826
03827 if (dividend.NotNegative())
03828 quotient.sign = POSITIVE;
03829 else
03830 {
03831 quotient.sign = NEGATIVE;
03832 if (remainder)
03833 {
03834 --quotient;
03835 remainder = divisor - remainder;
03836 }
03837 }
03838 }
03839
03840 Integer Integer::DividedBy(word b) const
03841 {
03842 word remainder;
03843 Integer quotient;
03844 Integer::Divide(remainder, quotient, *this, b);
03845 return quotient;
03846 }
03847
03848 word Integer::Modulo(word divisor) const
03849 {
03850 if (!divisor)
03851 throw Integer::DivideByZero();
03852
03853 assert(divisor);
03854
03855 word remainder;
03856
03857 if ((divisor & (divisor-1)) == 0)
03858 remainder = reg[0] & (divisor-1);
03859 else
03860 {
03861 unsigned int i = WordCount();
03862
03863 if (divisor <= 5)
03864 {
03865 DWord sum(0, 0);
03866 while (i--)
03867 sum += reg[i];
03868 remainder = sum % divisor;
03869 }
03870 else
03871 {
03872 remainder = 0;
03873 while (i--)
03874 remainder = DWord(reg[i], remainder) % divisor;
03875 }
03876 }
03877
03878 if (IsNegative() && remainder)
03879 remainder = divisor - remainder;
03880
03881 return remainder;
03882 }
03883
03884 void Integer::Negate()
03885 {
03886 if (!!(*this))
03887 sign = Sign(1-sign);
03888 }
03889
03890 int Integer::PositiveCompare(const Integer& t) const
03891 {
03892 unsigned size = WordCount(), tSize = t.WordCount();
03893
03894 if (size == tSize)
03895 return CryptoPP::Compare(reg, t.reg, size);
03896 else
03897 return size > tSize ? 1 : -1;
03898 }
03899
03900 int Integer::Compare(const Integer& t) const
03901 {
03902 if (NotNegative())
03903 {
03904 if (t.NotNegative())
03905 return PositiveCompare(t);
03906 else
03907 return 1;
03908 }
03909 else
03910 {
03911 if (t.NotNegative())
03912 return -1;
03913 else
03914 return -PositiveCompare(t);
03915 }
03916 }
03917
03918 Integer Integer::SquareRoot() const
03919 {
03920 if (!IsPositive())
03921 return Zero();
03922
03923
03924 Integer x, y = Power2((BitCount()+1)/2);
03925 assert(y*y >= *this);
03926
03927 do
03928 {
03929 x = y;
03930 y = (x + *this/x) >> 1;
03931 } while (y<x);
03932
03933 return x;
03934 }
03935
03936 bool Integer::IsSquare() const
03937 {
03938 Integer r = SquareRoot();
03939 return *this == r.Squared();
03940 }
03941
03942 bool Integer::IsUnit() const
03943 {
03944 return (WordCount() == 1) && (reg[0] == 1);
03945 }
03946
03947 Integer Integer::MultiplicativeInverse() const
03948 {
03949 return IsUnit() ? *this : Zero();
03950 }
03951
03952 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03953 {
03954 return x*y%m;
03955 }
03956
03957 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03958 {
03959 ModularArithmetic mr(m);
03960 return mr.Exponentiate(x, e);
03961 }
03962
03963 Integer Integer::Gcd(const Integer &a, const Integer &b)
03964 {
03965 return EuclideanDomainOf<Integer>().Gcd(a, b);
03966 }
03967
03968 Integer Integer::InverseMod(const Integer &m) const
03969 {
03970 assert(m.NotNegative());
03971
03972 if (IsNegative())
03973 return Modulo(m).InverseMod(m);
03974
03975 if (m.IsEven())
03976 {
03977 if (!m || IsEven())
03978 return Zero();
03979 if (*this == One())
03980 return One();
03981
03982 Integer u = m.Modulo(*this).InverseMod(*this);
03983 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03984 }
03985
03986 SecBlock<word> T(m.reg.size() * 4);
03987 Integer r((word)0, m.reg.size());
03988 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03989 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03990 return r;
03991 }
03992
03993 word Integer::InverseMod(word mod) const
03994 {
03995 word g0 = mod, g1 = *this % mod;
03996 word v0 = 0, v1 = 1;
03997 word y;
03998
03999 while (g1)
04000 {
04001 if (g1 == 1)
04002 return v1;
04003 y = g0 / g1;
04004 g0 = g0 % g1;
04005 v0 += y * v1;
04006
04007 if (!g0)
04008 break;
04009 if (g0 == 1)
04010 return mod-v0;
04011 y = g1 / g0;
04012 g1 = g1 % g0;
04013 v1 += y * v0;
04014 }
04015 return 0;
04016 }
04017
04018
04019
04020 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
04021 {
04022 BERSequenceDecoder seq(bt);
04023 OID oid(seq);
04024 if (oid != ASN1::prime_field())
04025 BERDecodeError();
04026 m_modulus.BERDecode(seq);
04027 seq.MessageEnd();
04028 m_result.reg.resize(m_modulus.reg.size());
04029 }
04030
04031 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
04032 {
04033 DERSequenceEncoder seq(bt);
04034 ASN1::prime_field().DEREncode(seq);
04035 m_modulus.DEREncode(seq);
04036 seq.MessageEnd();
04037 }
04038
04039 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04040 {
04041 a.DEREncodeAsOctetString(out, MaxElementByteLength());
04042 }
04043
04044 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04045 {
04046 a.BERDecodeAsOctetString(in, MaxElementByteLength());
04047 }
04048
04049 const Integer& ModularArithmetic::Half(const Integer &a) const
04050 {
04051 if (a.reg.size()==m_modulus.reg.size())
04052 {
04053 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
04054 return m_result;
04055 }
04056 else
04057 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
04058 }
04059
04060 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04061 {
04062 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04063 {
04064 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
04065 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
04066 {
04067 CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04068 }
04069 return m_result;
04070 }
04071 else
04072 {
04073 m_result1 = a+b;
04074 if (m_result1 >= m_modulus)
04075 m_result1 -= m_modulus;
04076 return m_result1;
04077 }
04078 }
04079
04080 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04081 {
04082 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04083 {
04084 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04085 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
04086 {
04087 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
04088 }
04089 }
04090 else
04091 {
04092 a+=b;
04093 if (a>=m_modulus)
04094 a-=m_modulus;
04095 }
04096
04097 return a;
04098 }
04099
04100 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04101 {
04102 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04103 {
04104 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
04105 CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04106 return m_result;
04107 }
04108 else
04109 {
04110 m_result1 = a-b;
04111 if (m_result1.IsNegative())
04112 m_result1 += m_modulus;
04113 return m_result1;
04114 }
04115 }
04116
04117 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04118 {
04119 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04120 {
04121 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04122 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
04123 }
04124 else
04125 {
04126 a-=b;
04127 if (a.IsNegative())
04128 a+=m_modulus;
04129 }
04130
04131 return a;
04132 }
04133
04134 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04135 {
04136 if (!a)
04137 return a;
04138
04139 CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
04140 if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
04141 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
04142
04143 return m_result;
04144 }
04145
04146 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04147 {
04148 if (m_modulus.IsOdd())
04149 {
04150 MontgomeryRepresentation dr(m_modulus);
04151 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04152 }
04153 else
04154 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04155 }
04156
04157 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04158 {
04159 if (m_modulus.IsOdd())
04160 {
04161 MontgomeryRepresentation dr(m_modulus);
04162 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04163 for (unsigned int i=0; i<exponentsCount; i++)
04164 results[i] = dr.ConvertOut(results[i]);
04165 }
04166 else
04167 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04168 }
04169
04170 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)
04171 : ModularArithmetic(m),
04172 m_u((word)0, m_modulus.reg.size()),
04173 m_workspace(5*m_modulus.reg.size())
04174 {
04175 if (!m_modulus.IsOdd())
04176 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04177
04178 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
04179 }
04180
04181 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04182 {
04183 word *const T = m_workspace.begin();
04184 word *const R = m_result.reg.begin();
04185 const size_t N = m_modulus.reg.size();
04186 assert(a.reg.size()<=N && b.reg.size()<=N);
04187
04188 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04189 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04190 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04191 return m_result;
04192 }
04193
04194 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04195 {
04196 word *const T = m_workspace.begin();
04197 word *const R = m_result.reg.begin();
04198 const size_t N = m_modulus.reg.size();
04199 assert(a.reg.size()<=N);
04200
04201 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04202 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04203 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04204 return m_result;
04205 }
04206
04207 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04208 {
04209 word *const T = m_workspace.begin();
04210 word *const R = m_result.reg.begin();
04211 const size_t N = m_modulus.reg.size();
04212 assert(a.reg.size()<=N);
04213
04214 CopyWords(T, a.reg, a.reg.size());
04215 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04216 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04217 return m_result;
04218 }
04219
04220 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04221 {
04222
04223 word *const T = m_workspace.begin();
04224 word *const R = m_result.reg.begin();
04225 const size_t N = m_modulus.reg.size();
04226 assert(a.reg.size()<=N);
04227
04228 CopyWords(T, a.reg, a.reg.size());
04229 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04230 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04231 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
04232
04233
04234
04235 if (k>N*WORD_BITS)
04236 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
04237 else
04238 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
04239
04240 return m_result;
04241 }
04242
04243 NAMESPACE_END
04244
04245 #endif