Crypto++
|
00001 // integer.cpp - written and placed in the public domain by Wei Dai 00002 // contains public domain code contributed by Alister Lee and Leonard Janke 00003 00004 #include "pch.h" 00005 00006 #ifndef CRYPTOPP_IMPORTS 00007 00008 #include "integer.h" 00009 #include "modarith.h" 00010 #include "nbtheory.h" 00011 #include "asn.h" 00012 #include "oids.h" 00013 #include "words.h" 00014 #include "algparam.h" 00015 #include "pubkey.h" // for P1363_KDF2 00016 #include "sha.h" 00017 #include "cpu.h" 00018 00019 #include <iostream> 00020 00021 #if _MSC_VER >= 1400 00022 #include <intrin.h> 00023 #endif 00024 00025 #ifdef __DECCXX 00026 #include <c_asm.h> 00027 #endif 00028 00029 #ifdef CRYPTOPP_MSVC6_NO_PP 00030 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.") 00031 #endif 00032 00033 #define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86) 00034 00035 NAMESPACE_BEGIN(CryptoPP) 00036 00037 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt) 00038 { 00039 if (valueType != typeid(Integer)) 00040 return false; 00041 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt); 00042 return true; 00043 } 00044 00045 inline static int Compare(const word *A, const word *B, size_t N) 00046 { 00047 while (N--) 00048 if (A[N] > B[N]) 00049 return 1; 00050 else if (A[N] < B[N]) 00051 return -1; 00052 00053 return 0; 00054 } 00055 00056 inline static int Increment(word *A, size_t N, word B=1) 00057 { 00058 assert(N); 00059 word t = A[0]; 00060 A[0] = t+B; 00061 if (A[0] >= t) 00062 return 0; 00063 for (unsigned i=1; i<N; i++) 00064 if (++A[i]) 00065 return 0; 00066 return 1; 00067 } 00068 00069 inline static int Decrement(word *A, size_t N, word B=1) 00070 { 00071 assert(N); 00072 word t = A[0]; 00073 A[0] = t-B; 00074 if (A[0] <= t) 00075 return 0; 00076 for (unsigned i=1; i<N; i++) 00077 if (A[i]--) 00078 return 0; 00079 return 1; 00080 } 00081 00082 static void TwosComplement(word *A, size_t N) 00083 { 00084 Decrement(A, N); 00085 for (unsigned i=0; i<N; i++) 00086 A[i] = ~A[i]; 00087 } 00088 00089 static word AtomicInverseModPower2(word A) 00090 { 00091 assert(A%2==1); 00092 00093 word R=A%8; 00094 00095 for (unsigned i=3; i<WORD_BITS; i*=2) 00096 R = R*(2-R*A); 00097 00098 assert(R*A==1); 00099 return R; 00100 } 00101 00102 // ******************************************************** 00103 00104 #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE)) 00105 #define Declare2Words(x) word x##0, x##1; 00106 #define AssignWord(a, b) a##0 = b; a##1 = 0; 00107 #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c); 00108 #define LowWord(a) a##0 00109 #define HighWord(a) a##1 00110 #ifdef _MSC_VER 00111 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1); 00112 #ifndef __INTEL_COMPILER 00113 #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2; 00114 #endif 00115 #elif defined(__DECCXX) 00116 #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b); 00117 #elif defined(__x86_64__) 00118 #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100 00119 // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works 00120 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc"); 00121 #else 00122 #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc"); 00123 #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc"); 00124 #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc"); 00125 #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc"); 00126 #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc"); 00127 #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc"); 00128 #endif 00129 #endif 00130 #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b) 00131 #ifndef Double3Words 00132 #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2; 00133 #endif 00134 #ifndef Acc2WordsBy2 00135 #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1; 00136 #endif 00137 #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);} 00138 #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);} 00139 #define GetCarry(u) u##1 00140 #define GetBorrow(u) u##1 00141 #else 00142 #define Declare2Words(x) dword x; 00143 #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) 00144 #define MultiplyWords(p, a, b) p = __emulu(a, b); 00145 #else 00146 #define MultiplyWords(p, a, b) p = (dword)a*b; 00147 #endif 00148 #define AssignWord(a, b) a = b; 00149 #define Add2WordsBy1(a, b, c) a = b + c; 00150 #define Acc2WordsBy2(a, b) a += b; 00151 #define LowWord(a) word(a) 00152 #define HighWord(a) word(a>>WORD_BITS) 00153 #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2; 00154 #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u); 00155 #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u); 00156 #define GetCarry(u) HighWord(u) 00157 #define GetBorrow(u) word(u>>(WORD_BITS*2-1)) 00158 #endif 00159 #ifndef MulAcc 00160 #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p)); 00161 #endif 00162 #ifndef Acc2WordsBy1 00163 #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b) 00164 #endif 00165 #ifndef Acc3WordsBy2 00166 #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e)); 00167 #endif 00168 00169 class DWord 00170 { 00171 public: 00172 DWord() {} 00173 00174 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00175 explicit DWord(word low) 00176 { 00177 m_whole = low; 00178 } 00179 #else 00180 explicit DWord(word low) 00181 { 00182 m_halfs.low = low; 00183 m_halfs.high = 0; 00184 } 00185 #endif 00186 00187 DWord(word low, word high) 00188 { 00189 m_halfs.low = low; 00190 m_halfs.high = high; 00191 } 00192 00193 static DWord Multiply(word a, word b) 00194 { 00195 DWord r; 00196 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00197 r.m_whole = (dword)a * b; 00198 #elif defined(MultiplyWordsLoHi) 00199 MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b); 00200 #endif 00201 return r; 00202 } 00203 00204 static DWord MultiplyAndAdd(word a, word b, word c) 00205 { 00206 DWord r = Multiply(a, b); 00207 return r += c; 00208 } 00209 00210 DWord & operator+=(word a) 00211 { 00212 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00213 m_whole = m_whole + a; 00214 #else 00215 m_halfs.low += a; 00216 m_halfs.high += (m_halfs.low < a); 00217 #endif 00218 return *this; 00219 } 00220 00221 DWord operator+(word a) 00222 { 00223 DWord r; 00224 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00225 r.m_whole = m_whole + a; 00226 #else 00227 r.m_halfs.low = m_halfs.low + a; 00228 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a); 00229 #endif 00230 return r; 00231 } 00232 00233 DWord operator-(DWord a) 00234 { 00235 DWord r; 00236 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00237 r.m_whole = m_whole - a.m_whole; 00238 #else 00239 r.m_halfs.low = m_halfs.low - a.m_halfs.low; 00240 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low); 00241 #endif 00242 return r; 00243 } 00244 00245 DWord operator-(word a) 00246 { 00247 DWord r; 00248 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00249 r.m_whole = m_whole - a; 00250 #else 00251 r.m_halfs.low = m_halfs.low - a; 00252 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low); 00253 #endif 00254 return r; 00255 } 00256 00257 // returns quotient, which must fit in a word 00258 word operator/(word divisor); 00259 00260 word operator%(word a); 00261 00262 bool operator!() const 00263 { 00264 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00265 return !m_whole; 00266 #else 00267 return !m_halfs.high && !m_halfs.low; 00268 #endif 00269 } 00270 00271 word GetLowHalf() const {return m_halfs.low;} 00272 word GetHighHalf() const {return m_halfs.high;} 00273 word GetHighHalfAsBorrow() const {return 0-m_halfs.high;} 00274 00275 private: 00276 union 00277 { 00278 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00279 dword m_whole; 00280 #endif 00281 struct 00282 { 00283 #ifdef IS_LITTLE_ENDIAN 00284 word low; 00285 word high; 00286 #else 00287 word high; 00288 word low; 00289 #endif 00290 } m_halfs; 00291 }; 00292 }; 00293 00294 class Word 00295 { 00296 public: 00297 Word() {} 00298 00299 Word(word value) 00300 { 00301 m_whole = value; 00302 } 00303 00304 Word(hword low, hword high) 00305 { 00306 m_whole = low | (word(high) << (WORD_BITS/2)); 00307 } 00308 00309 static Word Multiply(hword a, hword b) 00310 { 00311 Word r; 00312 r.m_whole = (word)a * b; 00313 return r; 00314 } 00315 00316 Word operator-(Word a) 00317 { 00318 Word r; 00319 r.m_whole = m_whole - a.m_whole; 00320 return r; 00321 } 00322 00323 Word operator-(hword a) 00324 { 00325 Word r; 00326 r.m_whole = m_whole - a; 00327 return r; 00328 } 00329 00330 // returns quotient, which must fit in a word 00331 hword operator/(hword divisor) 00332 { 00333 return hword(m_whole / divisor); 00334 } 00335 00336 bool operator!() const 00337 { 00338 return !m_whole; 00339 } 00340 00341 word GetWhole() const {return m_whole;} 00342 hword GetLowHalf() const {return hword(m_whole);} 00343 hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));} 00344 hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));} 00345 00346 private: 00347 word m_whole; 00348 }; 00349 00350 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A 00351 template <class S, class D> 00352 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL) 00353 { 00354 // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S 00355 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0)); 00356 00357 // estimate the quotient: do a 2 S by 1 S divide 00358 S Q; 00359 if (S(B1+1) == 0) 00360 Q = A[2]; 00361 else if (B1 > 0) 00362 Q = D(A[1], A[2]) / S(B1+1); 00363 else 00364 Q = D(A[0], A[1]) / B0; 00365 00366 // now subtract Q*B from A 00367 D p = D::Multiply(B0, Q); 00368 D u = (D) A[0] - p.GetLowHalf(); 00369 A[0] = u.GetLowHalf(); 00370 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q); 00371 A[1] = u.GetLowHalf(); 00372 A[2] += u.GetHighHalf(); 00373 00374 // Q <= actual quotient, so fix it 00375 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) 00376 { 00377 u = (D) A[0] - B0; 00378 A[0] = u.GetLowHalf(); 00379 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow(); 00380 A[1] = u.GetLowHalf(); 00381 A[2] += u.GetHighHalf(); 00382 Q++; 00383 assert(Q); // shouldn't overflow 00384 } 00385 00386 return Q; 00387 } 00388 00389 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1 00390 template <class S, class D> 00391 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B) 00392 { 00393 if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS) 00394 return D(Ah.GetLowHalf(), Ah.GetHighHalf()); 00395 else 00396 { 00397 S Q[2]; 00398 T[0] = Al.GetLowHalf(); 00399 T[1] = Al.GetHighHalf(); 00400 T[2] = Ah.GetLowHalf(); 00401 T[3] = Ah.GetHighHalf(); 00402 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf()); 00403 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf()); 00404 return D(Q[0], Q[1]); 00405 } 00406 } 00407 00408 // returns quotient, which must fit in a word 00409 inline word DWord::operator/(word a) 00410 { 00411 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00412 return word(m_whole / a); 00413 #else 00414 hword r[4]; 00415 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole(); 00416 #endif 00417 } 00418 00419 inline word DWord::operator%(word a) 00420 { 00421 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE 00422 return word(m_whole % a); 00423 #else 00424 if (a < (word(1) << (WORD_BITS/2))) 00425 { 00426 hword h = hword(a); 00427 word r = m_halfs.high % h; 00428 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h; 00429 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h); 00430 } 00431 else 00432 { 00433 hword r[4]; 00434 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a); 00435 return Word(r[0], r[1]).GetWhole(); 00436 } 00437 #endif 00438 } 00439 00440 // ******************************************************** 00441 00442 // use some tricks to share assembly code between MSVC and GCC 00443 #if defined(__GNUC__) 00444 #define AddPrologue \ 00445 int result; \ 00446 __asm__ __volatile__ \ 00447 ( \ 00448 ".intel_syntax noprefix;" 00449 #define AddEpilogue \ 00450 ".att_syntax prefix;" \ 00451 : "=a" (result)\ 00452 : "d" (C), "a" (A), "D" (B), "c" (N) \ 00453 : "%esi", "memory", "cc" \ 00454 );\ 00455 return result; 00456 #define MulPrologue \ 00457 __asm__ __volatile__ \ 00458 ( \ 00459 ".intel_syntax noprefix;" \ 00460 AS1( push ebx) \ 00461 AS2( mov ebx, edx) 00462 #define MulEpilogue \ 00463 AS1( pop ebx) \ 00464 ".att_syntax prefix;" \ 00465 : \ 00466 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \ 00467 : "%esi", "memory", "cc" \ 00468 ); 00469 #define SquPrologue MulPrologue 00470 #define SquEpilogue \ 00471 AS1( pop ebx) \ 00472 ".att_syntax prefix;" \ 00473 : \ 00474 : "d" (s_maskLow16), "c" (C), "a" (A) \ 00475 : "%esi", "%edi", "memory", "cc" \ 00476 ); 00477 #define TopPrologue MulPrologue 00478 #define TopEpilogue \ 00479 AS1( pop ebx) \ 00480 ".att_syntax prefix;" \ 00481 : \ 00482 : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \ 00483 : "memory", "cc" \ 00484 ); 00485 #else 00486 #define AddPrologue \ 00487 __asm push edi \ 00488 __asm push esi \ 00489 __asm mov eax, [esp+12] \ 00490 __asm mov edi, [esp+16] 00491 #define AddEpilogue \ 00492 __asm pop esi \ 00493 __asm pop edi \ 00494 __asm ret 8 00495 #if _MSC_VER < 1300 00496 #define SaveEBX __asm push ebx 00497 #define RestoreEBX __asm pop ebx 00498 #else 00499 #define SaveEBX 00500 #define RestoreEBX 00501 #endif 00502 #define SquPrologue \ 00503 AS2( mov eax, A) \ 00504 AS2( mov ecx, C) \ 00505 SaveEBX \ 00506 AS2( lea ebx, s_maskLow16) 00507 #define MulPrologue \ 00508 AS2( mov eax, A) \ 00509 AS2( mov edi, B) \ 00510 AS2( mov ecx, C) \ 00511 SaveEBX \ 00512 AS2( lea ebx, s_maskLow16) 00513 #define TopPrologue \ 00514 AS2( mov eax, A) \ 00515 AS2( mov edi, B) \ 00516 AS2( mov ecx, C) \ 00517 AS2( mov esi, L) \ 00518 SaveEBX \ 00519 AS2( lea ebx, s_maskLow16) 00520 #define SquEpilogue RestoreEBX 00521 #define MulEpilogue RestoreEBX 00522 #define TopEpilogue RestoreEBX 00523 #endif 00524 00525 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 00526 extern "C" { 00527 int Baseline_Add(size_t N, word *C, const word *A, const word *B); 00528 int Baseline_Sub(size_t N, word *C, const word *A, const word *B); 00529 } 00530 #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE) 00531 int Baseline_Add(size_t N, word *C, const word *A, const word *B) 00532 { 00533 word result; 00534 __asm__ __volatile__ 00535 ( 00536 ".intel_syntax;" 00537 AS1( neg %1) 00538 ASJ( jz, 1, f) 00539 AS2( mov %0,[%3+8*%1]) 00540 AS2( add %0,[%4+8*%1]) 00541 AS2( mov [%2+8*%1],%0) 00542 ASL(0) 00543 AS2( mov %0,[%3+8*%1+8]) 00544 AS2( adc %0,[%4+8*%1+8]) 00545 AS2( mov [%2+8*%1+8],%0) 00546 AS2( lea %1,[%1+2]) 00547 ASJ( jrcxz, 1, f) 00548 AS2( mov %0,[%3+8*%1]) 00549 AS2( adc %0,[%4+8*%1]) 00550 AS2( mov [%2+8*%1],%0) 00551 ASJ( jmp, 0, b) 00552 ASL(1) 00553 AS2( mov %0, 0) 00554 AS2( adc %0, %0) 00555 ".att_syntax;" 00556 : "=&r" (result), "+c" (N) 00557 : "r" (C+N), "r" (A+N), "r" (B+N) 00558 : "memory", "cc" 00559 ); 00560 return (int)result; 00561 } 00562 00563 int Baseline_Sub(size_t N, word *C, const word *A, const word *B) 00564 { 00565 word result; 00566 __asm__ __volatile__ 00567 ( 00568 ".intel_syntax;" 00569 AS1( neg %1) 00570 ASJ( jz, 1, f) 00571 AS2( mov %0,[%3+8*%1]) 00572 AS2( sub %0,[%4+8*%1]) 00573 AS2( mov [%2+8*%1],%0) 00574 ASL(0) 00575 AS2( mov %0,[%3+8*%1+8]) 00576 AS2( sbb %0,[%4+8*%1+8]) 00577 AS2( mov [%2+8*%1+8],%0) 00578 AS2( lea %1,[%1+2]) 00579 ASJ( jrcxz, 1, f) 00580 AS2( mov %0,[%3+8*%1]) 00581 AS2( sbb %0,[%4+8*%1]) 00582 AS2( mov [%2+8*%1],%0) 00583 ASJ( jmp, 0, b) 00584 ASL(1) 00585 AS2( mov %0, 0) 00586 AS2( adc %0, %0) 00587 ".att_syntax;" 00588 : "=&r" (result), "+c" (N) 00589 : "r" (C+N), "r" (A+N), "r" (B+N) 00590 : "memory", "cc" 00591 ); 00592 return (int)result; 00593 } 00594 #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86 00595 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B) 00596 { 00597 AddPrologue 00598 00599 // now: eax = A, edi = B, edx = C, ecx = N 00600 AS2( lea eax, [eax+4*ecx]) 00601 AS2( lea edi, [edi+4*ecx]) 00602 AS2( lea edx, [edx+4*ecx]) 00603 00604 AS1( neg ecx) // ecx is negative index 00605 AS2( test ecx, 2) // this clears carry flag 00606 ASJ( jz, 0, f) 00607 AS2( sub ecx, 2) 00608 ASJ( jmp, 1, f) 00609 00610 ASL(0) 00611 ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero 00612 AS2( mov esi,[eax+4*ecx]) 00613 AS2( adc esi,[edi+4*ecx]) 00614 AS2( mov [edx+4*ecx],esi) 00615 AS2( mov esi,[eax+4*ecx+4]) 00616 AS2( adc esi,[edi+4*ecx+4]) 00617 AS2( mov [edx+4*ecx+4],esi) 00618 ASL(1) 00619 AS2( mov esi,[eax+4*ecx+8]) 00620 AS2( adc esi,[edi+4*ecx+8]) 00621 AS2( mov [edx+4*ecx+8],esi) 00622 AS2( mov esi,[eax+4*ecx+12]) 00623 AS2( adc esi,[edi+4*ecx+12]) 00624 AS2( mov [edx+4*ecx+12],esi) 00625 00626 AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2 00627 ASJ( jmp, 0, b) 00628 00629 ASL(2) 00630 AS2( mov eax, 0) 00631 AS1( setc al) // store carry into eax (return result register) 00632 00633 AddEpilogue 00634 } 00635 00636 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B) 00637 { 00638 AddPrologue 00639 00640 // now: eax = A, edi = B, edx = C, ecx = N 00641 AS2( lea eax, [eax+4*ecx]) 00642 AS2( lea edi, [edi+4*ecx]) 00643 AS2( lea edx, [edx+4*ecx]) 00644 00645 AS1( neg ecx) // ecx is negative index 00646 AS2( test ecx, 2) // this clears carry flag 00647 ASJ( jz, 0, f) 00648 AS2( sub ecx, 2) 00649 ASJ( jmp, 1, f) 00650 00651 ASL(0) 00652 ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero 00653 AS2( mov esi,[eax+4*ecx]) 00654 AS2( sbb esi,[edi+4*ecx]) 00655 AS2( mov [edx+4*ecx],esi) 00656 AS2( mov esi,[eax+4*ecx+4]) 00657 AS2( sbb esi,[edi+4*ecx+4]) 00658 AS2( mov [edx+4*ecx+4],esi) 00659 ASL(1) 00660 AS2( mov esi,[eax+4*ecx+8]) 00661 AS2( sbb esi,[edi+4*ecx+8]) 00662 AS2( mov [edx+4*ecx+8],esi) 00663 AS2( mov esi,[eax+4*ecx+12]) 00664 AS2( sbb esi,[edi+4*ecx+12]) 00665 AS2( mov [edx+4*ecx+12],esi) 00666 00667 AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2 00668 ASJ( jmp, 0, b) 00669 00670 ASL(2) 00671 AS2( mov eax, 0) 00672 AS1( setc al) // store carry into eax (return result register) 00673 00674 AddEpilogue 00675 } 00676 00677 #if CRYPTOPP_INTEGER_SSE2 00678 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B) 00679 { 00680 AddPrologue 00681 00682 // now: eax = A, edi = B, edx = C, ecx = N 00683 AS2( lea eax, [eax+4*ecx]) 00684 AS2( lea edi, [edi+4*ecx]) 00685 AS2( lea edx, [edx+4*ecx]) 00686 00687 AS1( neg ecx) // ecx is negative index 00688 AS2( pxor mm2, mm2) 00689 ASJ( jz, 2, f) 00690 AS2( test ecx, 2) // this clears carry flag 00691 ASJ( jz, 0, f) 00692 AS2( sub ecx, 2) 00693 ASJ( jmp, 1, f) 00694 00695 ASL(0) 00696 AS2( movd mm0, DWORD PTR [eax+4*ecx]) 00697 AS2( movd mm1, DWORD PTR [edi+4*ecx]) 00698 AS2( paddq mm0, mm1) 00699 AS2( paddq mm2, mm0) 00700 AS2( movd DWORD PTR [edx+4*ecx], mm2) 00701 AS2( psrlq mm2, 32) 00702 00703 AS2( movd mm0, DWORD PTR [eax+4*ecx+4]) 00704 AS2( movd mm1, DWORD PTR [edi+4*ecx+4]) 00705 AS2( paddq mm0, mm1) 00706 AS2( paddq mm2, mm0) 00707 AS2( movd DWORD PTR [edx+4*ecx+4], mm2) 00708 AS2( psrlq mm2, 32) 00709 00710 ASL(1) 00711 AS2( movd mm0, DWORD PTR [eax+4*ecx+8]) 00712 AS2( movd mm1, DWORD PTR [edi+4*ecx+8]) 00713 AS2( paddq mm0, mm1) 00714 AS2( paddq mm2, mm0) 00715 AS2( movd DWORD PTR [edx+4*ecx+8], mm2) 00716 AS2( psrlq mm2, 32) 00717 00718 AS2( movd mm0, DWORD PTR [eax+4*ecx+12]) 00719 AS2( movd mm1, DWORD PTR [edi+4*ecx+12]) 00720 AS2( paddq mm0, mm1) 00721 AS2( paddq mm2, mm0) 00722 AS2( movd DWORD PTR [edx+4*ecx+12], mm2) 00723 AS2( psrlq mm2, 32) 00724 00725 AS2( add ecx, 4) 00726 ASJ( jnz, 0, b) 00727 00728 ASL(2) 00729 AS2( movd eax, mm2) 00730 AS1( emms) 00731 00732 AddEpilogue 00733 } 00734 CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B) 00735 { 00736 AddPrologue 00737 00738 // now: eax = A, edi = B, edx = C, ecx = N 00739 AS2( lea eax, [eax+4*ecx]) 00740 AS2( lea edi, [edi+4*ecx]) 00741 AS2( lea edx, [edx+4*ecx]) 00742 00743 AS1( neg ecx) // ecx is negative index 00744 AS2( pxor mm2, mm2) 00745 ASJ( jz, 2, f) 00746 AS2( test ecx, 2) // this clears carry flag 00747 ASJ( jz, 0, f) 00748 AS2( sub ecx, 2) 00749 ASJ( jmp, 1, f) 00750 00751 ASL(0) 00752 AS2( movd mm0, DWORD PTR [eax+4*ecx]) 00753 AS2( movd mm1, DWORD PTR [edi+4*ecx]) 00754 AS2( psubq mm0, mm1) 00755 AS2( psubq mm0, mm2) 00756 AS2( movd DWORD PTR [edx+4*ecx], mm0) 00757 AS2( psrlq mm0, 63) 00758 00759 AS2( movd mm2, DWORD PTR [eax+4*ecx+4]) 00760 AS2( movd mm1, DWORD PTR [edi+4*ecx+4]) 00761 AS2( psubq mm2, mm1) 00762 AS2( psubq mm2, mm0) 00763 AS2( movd DWORD PTR [edx+4*ecx+4], mm2) 00764 AS2( psrlq mm2, 63) 00765 00766 ASL(1) 00767 AS2( movd mm0, DWORD PTR [eax+4*ecx+8]) 00768 AS2( movd mm1, DWORD PTR [edi+4*ecx+8]) 00769 AS2( psubq mm0, mm1) 00770 AS2( psubq mm0, mm2) 00771 AS2( movd DWORD PTR [edx+4*ecx+8], mm0) 00772 AS2( psrlq mm0, 63) 00773 00774 AS2( movd mm2, DWORD PTR [eax+4*ecx+12]) 00775 AS2( movd mm1, DWORD PTR [edi+4*ecx+12]) 00776 AS2( psubq mm2, mm1) 00777 AS2( psubq mm2, mm0) 00778 AS2( movd DWORD PTR [edx+4*ecx+12], mm2) 00779 AS2( psrlq mm2, 63) 00780 00781 AS2( add ecx, 4) 00782 ASJ( jnz, 0, b) 00783 00784 ASL(2) 00785 AS2( movd eax, mm2) 00786 AS1( emms) 00787 00788 AddEpilogue 00789 } 00790 #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 00791 #else 00792 int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B) 00793 { 00794 assert (N%2 == 0); 00795 00796 Declare2Words(u); 00797 AssignWord(u, 0); 00798 for (size_t i=0; i<N; i+=2) 00799 { 00800 AddWithCarry(u, A[i], B[i]); 00801 C[i] = LowWord(u); 00802 AddWithCarry(u, A[i+1], B[i+1]); 00803 C[i+1] = LowWord(u); 00804 } 00805 return int(GetCarry(u)); 00806 } 00807 00808 int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B) 00809 { 00810 assert (N%2 == 0); 00811 00812 Declare2Words(u); 00813 AssignWord(u, 0); 00814 for (size_t i=0; i<N; i+=2) 00815 { 00816 SubtractWithBorrow(u, A[i], B[i]); 00817 C[i] = LowWord(u); 00818 SubtractWithBorrow(u, A[i+1], B[i+1]); 00819 C[i+1] = LowWord(u); 00820 } 00821 return int(GetBorrow(u)); 00822 } 00823 #endif 00824 00825 static word LinearMultiply(word *C, const word *A, word B, size_t N) 00826 { 00827 word carry=0; 00828 for(unsigned i=0; i<N; i++) 00829 { 00830 Declare2Words(p); 00831 MultiplyWords(p, A[i], B); 00832 Acc2WordsBy1(p, carry); 00833 C[i] = LowWord(p); 00834 carry = HighWord(p); 00835 } 00836 return carry; 00837 } 00838 00839 #ifndef CRYPTOPP_DOXYGEN_PROCESSING 00840 00841 #define Mul_2 \ 00842 Mul_Begin(2) \ 00843 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 00844 Mul_End(1, 1) 00845 00846 #define Mul_4 \ 00847 Mul_Begin(4) \ 00848 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 00849 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ 00850 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ 00851 Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \ 00852 Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \ 00853 Mul_End(5, 3) 00854 00855 #define Mul_8 \ 00856 Mul_Begin(8) \ 00857 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 00858 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ 00859 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ 00860 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ 00861 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ 00862 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ 00863 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ 00864 Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \ 00865 Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \ 00866 Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \ 00867 Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \ 00868 Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \ 00869 Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \ 00870 Mul_End(13, 7) 00871 00872 #define Mul_16 \ 00873 Mul_Begin(16) \ 00874 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 00875 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ 00876 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ 00877 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ 00878 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ 00879 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ 00880 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ 00881 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \ 00882 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \ 00883 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \ 00884 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \ 00885 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \ 00886 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \ 00887 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \ 00888 Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \ 00889 Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \ 00890 Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \ 00891 Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \ 00892 Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \ 00893 Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \ 00894 Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \ 00895 Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \ 00896 Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \ 00897 Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \ 00898 Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \ 00899 Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \ 00900 Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \ 00901 Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \ 00902 Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \ 00903 Mul_End(29, 15) 00904 00905 #define Squ_2 \ 00906 Squ_Begin(2) \ 00907 Squ_End(2) 00908 00909 #define Squ_4 \ 00910 Squ_Begin(4) \ 00911 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ 00912 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ 00913 Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \ 00914 Squ_SaveAcc(4, 2, 3) Squ_NonDiag \ 00915 Squ_End(4) 00916 00917 #define Squ_8 \ 00918 Squ_Begin(8) \ 00919 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ 00920 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ 00921 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \ 00922 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \ 00923 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \ 00924 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \ 00925 Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \ 00926 Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \ 00927 Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \ 00928 Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \ 00929 Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \ 00930 Squ_SaveAcc(12, 6, 7) Squ_NonDiag \ 00931 Squ_End(8) 00932 00933 #define Squ_16 \ 00934 Squ_Begin(16) \ 00935 Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ 00936 Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ 00937 Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \ 00938 Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \ 00939 Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \ 00940 Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \ 00941 Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \ 00942 Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \ 00943 Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \ 00944 Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \ 00945 Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \ 00946 Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \ 00947 Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \ 00948 Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \ 00949 Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \ 00950 Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \ 00951 Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \ 00952 Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \ 00953 Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \ 00954 Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \ 00955 Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \ 00956 Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \ 00957 Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \ 00958 Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \ 00959 Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \ 00960 Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \ 00961 Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \ 00962 Squ_SaveAcc(28, 14, 15) Squ_NonDiag \ 00963 Squ_End(16) 00964 00965 #define Bot_2 \ 00966 Mul_Begin(2) \ 00967 Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \ 00968 Bot_End(2) 00969 00970 #define Bot_4 \ 00971 Mul_Begin(4) \ 00972 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 00973 Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \ 00974 Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \ 00975 Bot_End(4) 00976 00977 #define Bot_8 \ 00978 Mul_Begin(8) \ 00979 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 00980 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ 00981 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ 00982 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ 00983 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ 00984 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ 00985 Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \ 00986 Bot_End(8) 00987 00988 #define Bot_16 \ 00989 Mul_Begin(16) \ 00990 Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ 00991 Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ 00992 Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ 00993 Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ 00994 Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ 00995 Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ 00996 Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ 00997 Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \ 00998 Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \ 00999 Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \ 01000 Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \ 01001 Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \ 01002 Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \ 01003 Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \ 01004 Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \ 01005 Bot_End(16) 01006 01007 #endif 01008 01009 #if 0 01010 #define Mul_Begin(n) \ 01011 Declare2Words(p) \ 01012 Declare2Words(c) \ 01013 Declare2Words(d) \ 01014 MultiplyWords(p, A[0], B[0]) \ 01015 AssignWord(c, LowWord(p)) \ 01016 AssignWord(d, HighWord(p)) 01017 01018 #define Mul_Acc(i, j) \ 01019 MultiplyWords(p, A[i], B[j]) \ 01020 Acc2WordsBy1(c, LowWord(p)) \ 01021 Acc2WordsBy1(d, HighWord(p)) 01022 01023 #define Mul_SaveAcc(k, i, j) \ 01024 R[k] = LowWord(c); \ 01025 Add2WordsBy1(c, d, HighWord(c)) \ 01026 MultiplyWords(p, A[i], B[j]) \ 01027 AssignWord(d, HighWord(p)) \ 01028 Acc2WordsBy1(c, LowWord(p)) 01029 01030 #define Mul_End(n) \ 01031 R[2*n-3] = LowWord(c); \ 01032 Acc2WordsBy1(d, HighWord(c)) \ 01033 MultiplyWords(p, A[n-1], B[n-1])\ 01034 Acc2WordsBy2(d, p) \ 01035 R[2*n-2] = LowWord(d); \ 01036 R[2*n-1] = HighWord(d); 01037 01038 #define Bot_SaveAcc(k, i, j) \ 01039 R[k] = LowWord(c); \ 01040 word e = LowWord(d) + HighWord(c); \ 01041 e += A[i] * B[j]; 01042 01043 #define Bot_Acc(i, j) \ 01044 e += A[i] * B[j]; 01045 01046 #define Bot_End(n) \ 01047 R[n-1] = e; 01048 #else 01049 #define Mul_Begin(n) \ 01050 Declare2Words(p) \ 01051 word c; \ 01052 Declare2Words(d) \ 01053 MultiplyWords(p, A[0], B[0]) \ 01054 c = LowWord(p); \ 01055 AssignWord(d, HighWord(p)) 01056 01057 #define Mul_Acc(i, j) \ 01058 MulAcc(c, d, A[i], B[j]) 01059 01060 #define Mul_SaveAcc(k, i, j) \ 01061 R[k] = c; \ 01062 c = LowWord(d); \ 01063 AssignWord(d, HighWord(d)) \ 01064 MulAcc(c, d, A[i], B[j]) 01065 01066 #define Mul_End(k, i) \ 01067 R[k] = c; \ 01068 MultiplyWords(p, A[i], B[i]) \ 01069 Acc2WordsBy2(p, d) \ 01070 R[k+1] = LowWord(p); \ 01071 R[k+2] = HighWord(p); 01072 01073 #define Bot_SaveAcc(k, i, j) \ 01074 R[k] = c; \ 01075 c = LowWord(d); \ 01076 c += A[i] * B[j]; 01077 01078 #define Bot_Acc(i, j) \ 01079 c += A[i] * B[j]; 01080 01081 #define Bot_End(n) \ 01082 R[n-1] = c; 01083 #endif 01084 01085 #define Squ_Begin(n) \ 01086 Declare2Words(p) \ 01087 word c; \ 01088 Declare2Words(d) \ 01089 Declare2Words(e) \ 01090 MultiplyWords(p, A[0], A[0]) \ 01091 R[0] = LowWord(p); \ 01092 AssignWord(e, HighWord(p)) \ 01093 MultiplyWords(p, A[0], A[1]) \ 01094 c = LowWord(p); \ 01095 AssignWord(d, HighWord(p)) \ 01096 Squ_NonDiag \ 01097 01098 #define Squ_NonDiag \ 01099 Double3Words(c, d) 01100 01101 #define Squ_SaveAcc(k, i, j) \ 01102 Acc3WordsBy2(c, d, e) \ 01103 R[k] = c; \ 01104 MultiplyWords(p, A[i], A[j]) \ 01105 c = LowWord(p); \ 01106 AssignWord(d, HighWord(p)) \ 01107 01108 #define Squ_Acc(i, j) \ 01109 MulAcc(c, d, A[i], A[j]) 01110 01111 #define Squ_Diag(i) \ 01112 Squ_NonDiag \ 01113 MulAcc(c, d, A[i], A[i]) 01114 01115 #define Squ_End(n) \ 01116 Acc3WordsBy2(c, d, e) \ 01117 R[2*n-3] = c; \ 01118 MultiplyWords(p, A[n-1], A[n-1])\ 01119 Acc2WordsBy2(p, e) \ 01120 R[2*n-2] = LowWord(p); \ 01121 R[2*n-1] = HighWord(p); 01122 01123 void Baseline_Multiply2(word *R, const word *A, const word *B) 01124 { 01125 Mul_2 01126 } 01127 01128 void Baseline_Multiply4(word *R, const word *A, const word *B) 01129 { 01130 Mul_4 01131 } 01132 01133 void Baseline_Multiply8(word *R, const word *A, const word *B) 01134 { 01135 Mul_8 01136 } 01137 01138 void Baseline_Square2(word *R, const word *A) 01139 { 01140 Squ_2 01141 } 01142 01143 void Baseline_Square4(word *R, const word *A) 01144 { 01145 Squ_4 01146 } 01147 01148 void Baseline_Square8(word *R, const word *A) 01149 { 01150 Squ_8 01151 } 01152 01153 void Baseline_MultiplyBottom2(word *R, const word *A, const word *B) 01154 { 01155 Bot_2 01156 } 01157 01158 void Baseline_MultiplyBottom4(word *R, const word *A, const word *B) 01159 { 01160 Bot_4 01161 } 01162 01163 void Baseline_MultiplyBottom8(word *R, const word *A, const word *B) 01164 { 01165 Bot_8 01166 } 01167 01168 #define Top_Begin(n) \ 01169 Declare2Words(p) \ 01170 word c; \ 01171 Declare2Words(d) \ 01172 MultiplyWords(p, A[0], B[n-2]);\ 01173 AssignWord(d, HighWord(p)); 01174 01175 #define Top_Acc(i, j) \ 01176 MultiplyWords(p, A[i], B[j]);\ 01177 Acc2WordsBy1(d, HighWord(p)); 01178 01179 #define Top_SaveAcc0(i, j) \ 01180 c = LowWord(d); \ 01181 AssignWord(d, HighWord(d)) \ 01182 MulAcc(c, d, A[i], B[j]) 01183 01184 #define Top_SaveAcc1(i, j) \ 01185 c = L<c; \ 01186 Acc2WordsBy1(d, c); \ 01187 c = LowWord(d); \ 01188 AssignWord(d, HighWord(d)) \ 01189 MulAcc(c, d, A[i], B[j]) 01190 01191 void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L) 01192 { 01193 word T[4]; 01194 Baseline_Multiply2(T, A, B); 01195 R[0] = T[2]; 01196 R[1] = T[3]; 01197 } 01198 01199 void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L) 01200 { 01201 Top_Begin(4) 01202 Top_Acc(1, 1) Top_Acc(2, 0) \ 01203 Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ 01204 Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \ 01205 Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \ 01206 Mul_End(1, 3) 01207 } 01208 01209 void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L) 01210 { 01211 Top_Begin(8) 01212 Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \ 01213 Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ 01214 Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \ 01215 Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \ 01216 Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \ 01217 Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \ 01218 Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \ 01219 Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \ 01220 Mul_End(5, 7) 01221 } 01222 01223 #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available 01224 void Baseline_Multiply16(word *R, const word *A, const word *B) 01225 { 01226 Mul_16 01227 } 01228 01229 void Baseline_Square16(word *R, const word *A) 01230 { 01231 Squ_16 01232 } 01233 01234 void Baseline_MultiplyBottom16(word *R, const word *A, const word *B) 01235 { 01236 Bot_16 01237 } 01238 01239 void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L) 01240 { 01241 Top_Begin(16) 01242 Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \ 01243 Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \ 01244 Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \ 01245 Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \ 01246 Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \ 01247 Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \ 01248 Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \ 01249 Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \ 01250 Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \ 01251 Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \ 01252 Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \ 01253 Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \ 01254 Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \ 01255 Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \ 01256 Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \ 01257 Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \ 01258 Mul_End(13, 15) 01259 } 01260 #endif 01261 01262 // ******************************************************** 01263 01264 #if CRYPTOPP_INTEGER_SSE2 01265 01266 CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff}; 01267 01268 #undef Mul_Begin 01269 #undef Mul_Acc 01270 #undef Top_Begin 01271 #undef Top_Acc 01272 #undef Squ_Acc 01273 #undef Squ_NonDiag 01274 #undef Squ_Diag 01275 #undef Squ_SaveAcc 01276 #undef Squ_Begin 01277 #undef Mul_SaveAcc 01278 #undef Bot_Acc 01279 #undef Bot_SaveAcc 01280 #undef Bot_End 01281 #undef Squ_End 01282 #undef Mul_End 01283 01284 #define SSE2_FinalSave(k) \ 01285 AS2( psllq xmm5, 16) \ 01286 AS2( paddq xmm4, xmm5) \ 01287 AS2( movq QWORD PTR [ecx+8*(k)], xmm4) 01288 01289 #define SSE2_SaveShift(k) \ 01290 AS2( movq xmm0, xmm6) \ 01291 AS2( punpckhqdq xmm6, xmm0) \ 01292 AS2( movq xmm1, xmm7) \ 01293 AS2( punpckhqdq xmm7, xmm1) \ 01294 AS2( paddd xmm6, xmm0) \ 01295 AS2( pslldq xmm6, 4) \ 01296 AS2( paddd xmm7, xmm1) \ 01297 AS2( paddd xmm4, xmm6) \ 01298 AS2( pslldq xmm7, 4) \ 01299 AS2( movq xmm6, xmm4) \ 01300 AS2( paddd xmm5, xmm7) \ 01301 AS2( movq xmm7, xmm5) \ 01302 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \ 01303 AS2( psrlq xmm6, 16) \ 01304 AS2( paddq xmm6, xmm7) \ 01305 AS2( punpckhqdq xmm4, xmm0) \ 01306 AS2( punpckhqdq xmm5, xmm0) \ 01307 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \ 01308 AS2( psrlq xmm6, 3*16) \ 01309 AS2( paddd xmm4, xmm6) \ 01310 01311 #define Squ_SSE2_SaveShift(k) \ 01312 AS2( movq xmm0, xmm6) \ 01313 AS2( punpckhqdq xmm6, xmm0) \ 01314 AS2( movq xmm1, xmm7) \ 01315 AS2( punpckhqdq xmm7, xmm1) \ 01316 AS2( paddd xmm6, xmm0) \ 01317 AS2( pslldq xmm6, 4) \ 01318 AS2( paddd xmm7, xmm1) \ 01319 AS2( paddd xmm4, xmm6) \ 01320 AS2( pslldq xmm7, 4) \ 01321 AS2( movhlps xmm6, xmm4) \ 01322 AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \ 01323 AS2( paddd xmm5, xmm7) \ 01324 AS2( movhps QWORD PTR [esp+12], xmm5)\ 01325 AS2( psrlq xmm4, 16) \ 01326 AS2( paddq xmm4, xmm5) \ 01327 AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \ 01328 AS2( psrlq xmm4, 3*16) \ 01329 AS2( paddd xmm4, xmm6) \ 01330 AS2( movq QWORD PTR [esp+4], xmm4)\ 01331 01332 #define SSE2_FirstMultiply(i) \ 01333 AS2( movdqa xmm7, [esi+(i)*16])\ 01334 AS2( movdqa xmm5, [edi-(i)*16])\ 01335 AS2( pmuludq xmm5, xmm7) \ 01336 AS2( movdqa xmm4, [ebx])\ 01337 AS2( movdqa xmm6, xmm4) \ 01338 AS2( pand xmm4, xmm5) \ 01339 AS2( psrld xmm5, 16) \ 01340 AS2( pmuludq xmm7, [edx-(i)*16])\ 01341 AS2( pand xmm6, xmm7) \ 01342 AS2( psrld xmm7, 16) 01343 01344 #define Squ_Begin(n) \ 01345 SquPrologue \ 01346 AS2( mov esi, esp)\ 01347 AS2( and esp, 0xfffffff0)\ 01348 AS2( lea edi, [esp-32*n])\ 01349 AS2( sub esp, 32*n+16)\ 01350 AS1( push esi)\ 01351 AS2( mov esi, edi) \ 01352 AS2( xor edx, edx) \ 01353 ASL(1) \ 01354 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ 01355 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ 01356 AS2( movdqa [edi+2*edx], xmm0) \ 01357 AS2( psrlq xmm0, 32) \ 01358 AS2( movdqa [edi+2*edx+16], xmm0) \ 01359 AS2( movdqa [edi+16*n+2*edx], xmm1) \ 01360 AS2( psrlq xmm1, 32) \ 01361 AS2( movdqa [edi+16*n+2*edx+16], xmm1) \ 01362 AS2( add edx, 16) \ 01363 AS2( cmp edx, 8*(n)) \ 01364 ASJ( jne, 1, b) \ 01365 AS2( lea edx, [edi+16*n])\ 01366 SSE2_FirstMultiply(0) \ 01367 01368 #define Squ_Acc(i) \ 01369 ASL(LSqu##i) \ 01370 AS2( movdqa xmm1, [esi+(i)*16]) \ 01371 AS2( movdqa xmm0, [edi-(i)*16]) \ 01372 AS2( movdqa xmm2, [ebx]) \ 01373 AS2( pmuludq xmm0, xmm1) \ 01374 AS2( pmuludq xmm1, [edx-(i)*16]) \ 01375 AS2( movdqa xmm3, xmm2) \ 01376 AS2( pand xmm2, xmm0) \ 01377 AS2( psrld xmm0, 16) \ 01378 AS2( paddd xmm4, xmm2) \ 01379 AS2( paddd xmm5, xmm0) \ 01380 AS2( pand xmm3, xmm1) \ 01381 AS2( psrld xmm1, 16) \ 01382 AS2( paddd xmm6, xmm3) \ 01383 AS2( paddd xmm7, xmm1) \ 01384 01385 #define Squ_Acc1(i) 01386 #define Squ_Acc2(i) ASC(call, LSqu##i) 01387 #define Squ_Acc3(i) Squ_Acc2(i) 01388 #define Squ_Acc4(i) Squ_Acc2(i) 01389 #define Squ_Acc5(i) Squ_Acc2(i) 01390 #define Squ_Acc6(i) Squ_Acc2(i) 01391 #define Squ_Acc7(i) Squ_Acc2(i) 01392 #define Squ_Acc8(i) Squ_Acc2(i) 01393 01394 #define SSE2_End(E, n) \ 01395 SSE2_SaveShift(2*(n)-3) \ 01396 AS2( movdqa xmm7, [esi+16]) \ 01397 AS2( movdqa xmm0, [edi]) \ 01398 AS2( pmuludq xmm0, xmm7) \ 01399 AS2( movdqa xmm2, [ebx]) \ 01400 AS2( pmuludq xmm7, [edx]) \ 01401 AS2( movdqa xmm6, xmm2) \ 01402 AS2( pand xmm2, xmm0) \ 01403 AS2( psrld xmm0, 16) \ 01404 AS2( paddd xmm4, xmm2) \ 01405 AS2( paddd xmm5, xmm0) \ 01406 AS2( pand xmm6, xmm7) \ 01407 AS2( psrld xmm7, 16) \ 01408 SSE2_SaveShift(2*(n)-2) \ 01409 SSE2_FinalSave(2*(n)-1) \ 01410 AS1( pop esp)\ 01411 E 01412 01413 #define Squ_End(n) SSE2_End(SquEpilogue, n) 01414 #define Mul_End(n) SSE2_End(MulEpilogue, n) 01415 #define Top_End(n) SSE2_End(TopEpilogue, n) 01416 01417 #define Squ_Column1(k, i) \ 01418 Squ_SSE2_SaveShift(k) \ 01419 AS2( add esi, 16) \ 01420 SSE2_FirstMultiply(1)\ 01421 Squ_Acc##i(i) \ 01422 AS2( paddd xmm4, xmm4) \ 01423 AS2( paddd xmm5, xmm5) \ 01424 AS2( movdqa xmm3, [esi]) \ 01425 AS2( movq xmm1, QWORD PTR [esi+8]) \ 01426 AS2( pmuludq xmm1, xmm3) \ 01427 AS2( pmuludq xmm3, xmm3) \ 01428 AS2( movdqa xmm0, [ebx])\ 01429 AS2( movdqa xmm2, xmm0) \ 01430 AS2( pand xmm0, xmm1) \ 01431 AS2( psrld xmm1, 16) \ 01432 AS2( paddd xmm6, xmm0) \ 01433 AS2( paddd xmm7, xmm1) \ 01434 AS2( pand xmm2, xmm3) \ 01435 AS2( psrld xmm3, 16) \ 01436 AS2( paddd xmm6, xmm6) \ 01437 AS2( paddd xmm7, xmm7) \ 01438 AS2( paddd xmm4, xmm2) \ 01439 AS2( paddd xmm5, xmm3) \ 01440 AS2( movq xmm0, QWORD PTR [esp+4])\ 01441 AS2( movq xmm1, QWORD PTR [esp+12])\ 01442 AS2( paddd xmm4, xmm0)\ 01443 AS2( paddd xmm5, xmm1)\ 01444 01445 #define Squ_Column0(k, i) \ 01446 Squ_SSE2_SaveShift(k) \ 01447 AS2( add edi, 16) \ 01448 AS2( add edx, 16) \ 01449 SSE2_FirstMultiply(1)\ 01450 Squ_Acc##i(i) \ 01451 AS2( paddd xmm6, xmm6) \ 01452 AS2( paddd xmm7, xmm7) \ 01453 AS2( paddd xmm4, xmm4) \ 01454 AS2( paddd xmm5, xmm5) \ 01455 AS2( movq xmm0, QWORD PTR [esp+4])\ 01456 AS2( movq xmm1, QWORD PTR [esp+12])\ 01457 AS2( paddd xmm4, xmm0)\ 01458 AS2( paddd xmm5, xmm1)\ 01459 01460 #define SSE2_MulAdd45 \ 01461 AS2( movdqa xmm7, [esi]) \ 01462 AS2( movdqa xmm0, [edi]) \ 01463 AS2( pmuludq xmm0, xmm7) \ 01464 AS2( movdqa xmm2, [ebx]) \ 01465 AS2( pmuludq xmm7, [edx]) \ 01466 AS2( movdqa xmm6, xmm2) \ 01467 AS2( pand xmm2, xmm0) \ 01468 AS2( psrld xmm0, 16) \ 01469 AS2( paddd xmm4, xmm2) \ 01470 AS2( paddd xmm5, xmm0) \ 01471 AS2( pand xmm6, xmm7) \ 01472 AS2( psrld xmm7, 16) 01473 01474 #define Mul_Begin(n) \ 01475 MulPrologue \ 01476 AS2( mov esi, esp)\ 01477 AS2( and esp, 0xfffffff0)\ 01478 AS2( sub esp, 48*n+16)\ 01479 AS1( push esi)\ 01480 AS2( xor edx, edx) \ 01481 ASL(1) \ 01482 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ 01483 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ 01484 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \ 01485 AS2( movdqa [esp+20+2*edx], xmm0) \ 01486 AS2( psrlq xmm0, 32) \ 01487 AS2( movdqa [esp+20+2*edx+16], xmm0) \ 01488 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \ 01489 AS2( psrlq xmm1, 32) \ 01490 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \ 01491 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \ 01492 AS2( psrlq xmm2, 32) \ 01493 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \ 01494 AS2( add edx, 16) \ 01495 AS2( cmp edx, 8*(n)) \ 01496 ASJ( jne, 1, b) \ 01497 AS2( lea edi, [esp+20])\ 01498 AS2( lea edx, [esp+20+16*n])\ 01499 AS2( lea esi, [esp+20+32*n])\ 01500 SSE2_FirstMultiply(0) \ 01501 01502 #define Mul_Acc(i) \ 01503 ASL(LMul##i) \ 01504 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \ 01505 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \ 01506 AS2( movdqa xmm2, [ebx]) \ 01507 AS2( pmuludq xmm0, xmm1) \ 01508 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ 01509 AS2( movdqa xmm3, xmm2) \ 01510 AS2( pand xmm2, xmm0) \ 01511 AS2( psrld xmm0, 16) \ 01512 AS2( paddd xmm4, xmm2) \ 01513 AS2( paddd xmm5, xmm0) \ 01514 AS2( pand xmm3, xmm1) \ 01515 AS2( psrld xmm1, 16) \ 01516 AS2( paddd xmm6, xmm3) \ 01517 AS2( paddd xmm7, xmm1) \ 01518 01519 #define Mul_Acc1(i) 01520 #define Mul_Acc2(i) ASC(call, LMul##i) 01521 #define Mul_Acc3(i) Mul_Acc2(i) 01522 #define Mul_Acc4(i) Mul_Acc2(i) 01523 #define Mul_Acc5(i) Mul_Acc2(i) 01524 #define Mul_Acc6(i) Mul_Acc2(i) 01525 #define Mul_Acc7(i) Mul_Acc2(i) 01526 #define Mul_Acc8(i) Mul_Acc2(i) 01527 #define Mul_Acc9(i) Mul_Acc2(i) 01528 #define Mul_Acc10(i) Mul_Acc2(i) 01529 #define Mul_Acc11(i) Mul_Acc2(i) 01530 #define Mul_Acc12(i) Mul_Acc2(i) 01531 #define Mul_Acc13(i) Mul_Acc2(i) 01532 #define Mul_Acc14(i) Mul_Acc2(i) 01533 #define Mul_Acc15(i) Mul_Acc2(i) 01534 #define Mul_Acc16(i) Mul_Acc2(i) 01535 01536 #define Mul_Column1(k, i) \ 01537 SSE2_SaveShift(k) \ 01538 AS2( add esi, 16) \ 01539 SSE2_MulAdd45\ 01540 Mul_Acc##i(i) \ 01541 01542 #define Mul_Column0(k, i) \ 01543 SSE2_SaveShift(k) \ 01544 AS2( add edi, 16) \ 01545 AS2( add edx, 16) \ 01546 SSE2_MulAdd45\ 01547 Mul_Acc##i(i) \ 01548 01549 #define Bot_Acc(i) \ 01550 AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \ 01551 AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \ 01552 AS2( pmuludq xmm0, xmm1) \ 01553 AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ 01554 AS2( paddq xmm4, xmm0) \ 01555 AS2( paddd xmm6, xmm1) 01556 01557 #define Bot_SaveAcc(k) \ 01558 SSE2_SaveShift(k) \ 01559 AS2( add edi, 16) \ 01560 AS2( add edx, 16) \ 01561 AS2( movdqa xmm6, [esi]) \ 01562 AS2( movdqa xmm0, [edi]) \ 01563 AS2( pmuludq xmm0, xmm6) \ 01564 AS2( paddq xmm4, xmm0) \ 01565 AS2( psllq xmm5, 16) \ 01566 AS2( paddq xmm4, xmm5) \ 01567 AS2( pmuludq xmm6, [edx]) 01568 01569 #define Bot_End(n) \ 01570 AS2( movhlps xmm7, xmm6) \ 01571 AS2( paddd xmm6, xmm7) \ 01572 AS2( psllq xmm6, 32) \ 01573 AS2( paddd xmm4, xmm6) \ 01574 AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \ 01575 AS1( pop esp)\ 01576 MulEpilogue 01577 01578 #define Top_Begin(n) \ 01579 TopPrologue \ 01580 AS2( mov edx, esp)\ 01581 AS2( and esp, 0xfffffff0)\ 01582 AS2( sub esp, 48*n+16)\ 01583 AS1( push edx)\ 01584 AS2( xor edx, edx) \ 01585 ASL(1) \ 01586 ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ 01587 ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ 01588 ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \ 01589 AS2( movdqa [esp+20+2*edx], xmm0) \ 01590 AS2( psrlq xmm0, 32) \ 01591 AS2( movdqa [esp+20+2*edx+16], xmm0) \ 01592 AS2( movdqa [esp+20+16*n+2*edx], xmm1) \ 01593 AS2( psrlq xmm1, 32) \ 01594 AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \ 01595 AS2( movdqa [esp+20+32*n+2*edx], xmm2) \ 01596 AS2( psrlq xmm2, 32) \ 01597 AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \ 01598 AS2( add edx, 16) \ 01599 AS2( cmp edx, 8*(n)) \ 01600 ASJ( jne, 1, b) \ 01601 AS2( mov eax, esi) \ 01602 AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\ 01603 AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\ 01604 AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\ 01605 AS2( pxor xmm4, xmm4)\ 01606 AS2( pxor xmm5, xmm5) 01607 01608 #define Top_Acc(i) \ 01609 AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \ 01610 AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ 01611 AS2( psrlq xmm0, 48) \ 01612 AS2( paddd xmm5, xmm0)\ 01613 01614 #define Top_Column0(i) \ 01615 AS2( psllq xmm5, 32) \ 01616 AS2( add edi, 16) \ 01617 AS2( add edx, 16) \ 01618 SSE2_MulAdd45\ 01619 Mul_Acc##i(i) \ 01620 01621 #define Top_Column1(i) \ 01622 SSE2_SaveShift(0) \ 01623 AS2( add esi, 16) \ 01624 SSE2_MulAdd45\ 01625 Mul_Acc##i(i) \ 01626 AS2( shr eax, 16) \ 01627 AS2( movd xmm0, eax)\ 01628 AS2( movd xmm1, [ecx+4])\ 01629 AS2( psrld xmm1, 16)\ 01630 AS2( pcmpgtd xmm1, xmm0)\ 01631 AS2( psrld xmm1, 31)\ 01632 AS2( paddd xmm4, xmm1)\ 01633 01634 void SSE2_Square4(word *C, const word *A) 01635 { 01636 Squ_Begin(2) 01637 Squ_Column0(0, 1) 01638 Squ_End(2) 01639 } 01640 01641 void SSE2_Square8(word *C, const word *A) 01642 { 01643 Squ_Begin(4) 01644 #ifndef __GNUC__ 01645 ASJ( jmp, 0, f) 01646 Squ_Acc(2) 01647 AS1( ret) ASL(0) 01648 #endif 01649 Squ_Column0(0, 1) 01650 Squ_Column1(1, 1) 01651 Squ_Column0(2, 2) 01652 Squ_Column1(3, 1) 01653 Squ_Column0(4, 1) 01654 Squ_End(4) 01655 } 01656 01657 void SSE2_Square16(word *C, const word *A) 01658 { 01659 Squ_Begin(8) 01660 #ifndef __GNUC__ 01661 ASJ( jmp, 0, f) 01662 Squ_Acc(4) Squ_Acc(3) Squ_Acc(2) 01663 AS1( ret) ASL(0) 01664 #endif 01665 Squ_Column0(0, 1) 01666 Squ_Column1(1, 1) 01667 Squ_Column0(2, 2) 01668 Squ_Column1(3, 2) 01669 Squ_Column0(4, 3) 01670 Squ_Column1(5, 3) 01671 Squ_Column0(6, 4) 01672 Squ_Column1(7, 3) 01673 Squ_Column0(8, 3) 01674 Squ_Column1(9, 2) 01675 Squ_Column0(10, 2) 01676 Squ_Column1(11, 1) 01677 Squ_Column0(12, 1) 01678 Squ_End(8) 01679 } 01680 01681 void SSE2_Square32(word *C, const word *A) 01682 { 01683 Squ_Begin(16) 01684 ASJ( jmp, 0, f) 01685 Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2) 01686 AS1( ret) ASL(0) 01687 Squ_Column0(0, 1) 01688 Squ_Column1(1, 1) 01689 Squ_Column0(2, 2) 01690 Squ_Column1(3, 2) 01691 Squ_Column0(4, 3) 01692 Squ_Column1(5, 3) 01693 Squ_Column0(6, 4) 01694 Squ_Column1(7, 4) 01695 Squ_Column0(8, 5) 01696 Squ_Column1(9, 5) 01697 Squ_Column0(10, 6) 01698 Squ_Column1(11, 6) 01699 Squ_Column0(12, 7) 01700 Squ_Column1(13, 7) 01701 Squ_Column0(14, 8) 01702 Squ_Column1(15, 7) 01703 Squ_Column0(16, 7) 01704 Squ_Column1(17, 6) 01705 Squ_Column0(18, 6) 01706 Squ_Column1(19, 5) 01707 Squ_Column0(20, 5) 01708 Squ_Column1(21, 4) 01709 Squ_Column0(22, 4) 01710 Squ_Column1(23, 3) 01711 Squ_Column0(24, 3) 01712 Squ_Column1(25, 2) 01713 Squ_Column0(26, 2) 01714 Squ_Column1(27, 1) 01715 Squ_Column0(28, 1) 01716 Squ_End(16) 01717 } 01718 01719 void SSE2_Multiply4(word *C, const word *A, const word *B) 01720 { 01721 Mul_Begin(2) 01722 #ifndef __GNUC__ 01723 ASJ( jmp, 0, f) 01724 Mul_Acc(2) 01725 AS1( ret) ASL(0) 01726 #endif 01727 Mul_Column0(0, 2) 01728 Mul_End(2) 01729 } 01730 01731 void SSE2_Multiply8(word *C, const word *A, const word *B) 01732 { 01733 Mul_Begin(4) 01734 #ifndef __GNUC__ 01735 ASJ( jmp, 0, f) 01736 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) 01737 AS1( ret) ASL(0) 01738 #endif 01739 Mul_Column0(0, 2) 01740 Mul_Column1(1, 3) 01741 Mul_Column0(2, 4) 01742 Mul_Column1(3, 3) 01743 Mul_Column0(4, 2) 01744 Mul_End(4) 01745 } 01746 01747 void SSE2_Multiply16(word *C, const word *A, const word *B) 01748 { 01749 Mul_Begin(8) 01750 #ifndef __GNUC__ 01751 ASJ( jmp, 0, f) 01752 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) 01753 AS1( ret) ASL(0) 01754 #endif 01755 Mul_Column0(0, 2) 01756 Mul_Column1(1, 3) 01757 Mul_Column0(2, 4) 01758 Mul_Column1(3, 5) 01759 Mul_Column0(4, 6) 01760 Mul_Column1(5, 7) 01761 Mul_Column0(6, 8) 01762 Mul_Column1(7, 7) 01763 Mul_Column0(8, 6) 01764 Mul_Column1(9, 5) 01765 Mul_Column0(10, 4) 01766 Mul_Column1(11, 3) 01767 Mul_Column0(12, 2) 01768 Mul_End(8) 01769 } 01770 01771 void SSE2_Multiply32(word *C, const word *A, const word *B) 01772 { 01773 Mul_Begin(16) 01774 ASJ( jmp, 0, f) 01775 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) 01776 AS1( ret) ASL(0) 01777 Mul_Column0(0, 2) 01778 Mul_Column1(1, 3) 01779 Mul_Column0(2, 4) 01780 Mul_Column1(3, 5) 01781 Mul_Column0(4, 6) 01782 Mul_Column1(5, 7) 01783 Mul_Column0(6, 8) 01784 Mul_Column1(7, 9) 01785 Mul_Column0(8, 10) 01786 Mul_Column1(9, 11) 01787 Mul_Column0(10, 12) 01788 Mul_Column1(11, 13) 01789 Mul_Column0(12, 14) 01790 Mul_Column1(13, 15) 01791 Mul_Column0(14, 16) 01792 Mul_Column1(15, 15) 01793 Mul_Column0(16, 14) 01794 Mul_Column1(17, 13) 01795 Mul_Column0(18, 12) 01796 Mul_Column1(19, 11) 01797 Mul_Column0(20, 10) 01798 Mul_Column1(21, 9) 01799 Mul_Column0(22, 8) 01800 Mul_Column1(23, 7) 01801 Mul_Column0(24, 6) 01802 Mul_Column1(25, 5) 01803 Mul_Column0(26, 4) 01804 Mul_Column1(27, 3) 01805 Mul_Column0(28, 2) 01806 Mul_End(16) 01807 } 01808 01809 void SSE2_MultiplyBottom4(word *C, const word *A, const word *B) 01810 { 01811 Mul_Begin(2) 01812 Bot_SaveAcc(0) Bot_Acc(2) 01813 Bot_End(2) 01814 } 01815 01816 void SSE2_MultiplyBottom8(word *C, const word *A, const word *B) 01817 { 01818 Mul_Begin(4) 01819 #ifndef __GNUC__ 01820 ASJ( jmp, 0, f) 01821 Mul_Acc(3) Mul_Acc(2) 01822 AS1( ret) ASL(0) 01823 #endif 01824 Mul_Column0(0, 2) 01825 Mul_Column1(1, 3) 01826 Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) 01827 Bot_End(4) 01828 } 01829 01830 void SSE2_MultiplyBottom16(word *C, const word *A, const word *B) 01831 { 01832 Mul_Begin(8) 01833 #ifndef __GNUC__ 01834 ASJ( jmp, 0, f) 01835 Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) 01836 AS1( ret) ASL(0) 01837 #endif 01838 Mul_Column0(0, 2) 01839 Mul_Column1(1, 3) 01840 Mul_Column0(2, 4) 01841 Mul_Column1(3, 5) 01842 Mul_Column0(4, 6) 01843 Mul_Column1(5, 7) 01844 Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) 01845 Bot_End(8) 01846 } 01847 01848 void SSE2_MultiplyBottom32(word *C, const word *A, const word *B) 01849 { 01850 Mul_Begin(16) 01851 #ifndef __GNUC__ 01852 ASJ( jmp, 0, f) 01853 Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) 01854 AS1( ret) ASL(0) 01855 #endif 01856 Mul_Column0(0, 2) 01857 Mul_Column1(1, 3) 01858 Mul_Column0(2, 4) 01859 Mul_Column1(3, 5) 01860 Mul_Column0(4, 6) 01861 Mul_Column1(5, 7) 01862 Mul_Column0(6, 8) 01863 Mul_Column1(7, 9) 01864 Mul_Column0(8, 10) 01865 Mul_Column1(9, 11) 01866 Mul_Column0(10, 12) 01867 Mul_Column1(11, 13) 01868 Mul_Column0(12, 14) 01869 Mul_Column1(13, 15) 01870 Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) 01871 Bot_End(16) 01872 } 01873 01874 void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L) 01875 { 01876 Top_Begin(4) 01877 Top_Acc(3) Top_Acc(2) Top_Acc(1) 01878 #ifndef __GNUC__ 01879 ASJ( jmp, 0, f) 01880 Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) 01881 AS1( ret) ASL(0) 01882 #endif 01883 Top_Column0(4) 01884 Top_Column1(3) 01885 Mul_Column0(0, 2) 01886 Top_End(2) 01887 } 01888 01889 void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L) 01890 { 01891 Top_Begin(8) 01892 Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1) 01893 #ifndef __GNUC__ 01894 ASJ( jmp, 0, f) 01895 Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) 01896 AS1( ret) ASL(0) 01897 #endif 01898 Top_Column0(8) 01899 Top_Column1(7) 01900 Mul_Column0(0, 6) 01901 Mul_Column1(1, 5) 01902 Mul_Column0(2, 4) 01903 Mul_Column1(3, 3) 01904 Mul_Column0(4, 2) 01905 Top_End(4) 01906 } 01907 01908 void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L) 01909 { 01910 Top_Begin(16) 01911 Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1) 01912 #ifndef __GNUC__ 01913 ASJ( jmp, 0, f) 01914 Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) 01915 AS1( ret) ASL(0) 01916 #endif 01917 Top_Column0(16) 01918 Top_Column1(15) 01919 Mul_Column0(0, 14) 01920 Mul_Column1(1, 13) 01921 Mul_Column0(2, 12) 01922 Mul_Column1(3, 11) 01923 Mul_Column0(4, 10) 01924 Mul_Column1(5, 9) 01925 Mul_Column0(6, 8) 01926 Mul_Column1(7, 7) 01927 Mul_Column0(8, 6) 01928 Mul_Column1(9, 5) 01929 Mul_Column0(10, 4) 01930 Mul_Column1(11, 3) 01931 Mul_Column0(12, 2) 01932 Top_End(8) 01933 } 01934 01935 #endif // #if CRYPTOPP_INTEGER_SSE2 01936 01937 // ******************************************************** 01938 01939 typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B); 01940 typedef void (* PMul)(word *C, const word *A, const word *B); 01941 typedef void (* PSqu)(word *C, const word *A); 01942 typedef void (* PMulTop)(word *C, const word *A, const word *B, word L); 01943 01944 #if CRYPTOPP_INTEGER_SSE2 01945 static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub; 01946 static size_t s_recursionLimit = 8; 01947 #else 01948 static const size_t s_recursionLimit = 16; 01949 #endif 01950 01951 static PMul s_pMul[9], s_pBot[9]; 01952 static PSqu s_pSqu[9]; 01953 static PMulTop s_pTop[9]; 01954 01955 static void SetFunctionPointers() 01956 { 01957 s_pMul[0] = &Baseline_Multiply2; 01958 s_pBot[0] = &Baseline_MultiplyBottom2; 01959 s_pSqu[0] = &Baseline_Square2; 01960 s_pTop[0] = &Baseline_MultiplyTop2; 01961 s_pTop[1] = &Baseline_MultiplyTop4; 01962 01963 #if CRYPTOPP_INTEGER_SSE2 01964 if (HasSSE2()) 01965 { 01966 #if _MSC_VER != 1200 || defined(NDEBUG) 01967 if (IsP4()) 01968 { 01969 s_pAdd = &SSE2_Add; 01970 s_pSub = &SSE2_Sub; 01971 } 01972 #endif 01973 01974 s_recursionLimit = 32; 01975 01976 s_pMul[1] = &SSE2_Multiply4; 01977 s_pMul[2] = &SSE2_Multiply8; 01978 s_pMul[4] = &SSE2_Multiply16; 01979 s_pMul[8] = &SSE2_Multiply32; 01980 01981 s_pBot[1] = &SSE2_MultiplyBottom4; 01982 s_pBot[2] = &SSE2_MultiplyBottom8; 01983 s_pBot[4] = &SSE2_MultiplyBottom16; 01984 s_pBot[8] = &SSE2_MultiplyBottom32; 01985 01986 s_pSqu[1] = &SSE2_Square4; 01987 s_pSqu[2] = &SSE2_Square8; 01988 s_pSqu[4] = &SSE2_Square16; 01989 s_pSqu[8] = &SSE2_Square32; 01990 01991 s_pTop[2] = &SSE2_MultiplyTop8; 01992 s_pTop[4] = &SSE2_MultiplyTop16; 01993 s_pTop[8] = &SSE2_MultiplyTop32; 01994 } 01995 else 01996 #endif 01997 { 01998 s_pMul[1] = &Baseline_Multiply4; 01999 s_pMul[2] = &Baseline_Multiply8; 02000 02001 s_pBot[1] = &Baseline_MultiplyBottom4; 02002 s_pBot[2] = &Baseline_MultiplyBottom8; 02003 02004 s_pSqu[1] = &Baseline_Square4; 02005 s_pSqu[2] = &Baseline_Square8; 02006 02007 s_pTop[2] = &Baseline_MultiplyTop8; 02008 02009 #if !CRYPTOPP_INTEGER_SSE2 02010 s_pMul[4] = &Baseline_Multiply16; 02011 s_pBot[4] = &Baseline_MultiplyBottom16; 02012 s_pSqu[4] = &Baseline_Square16; 02013 s_pTop[4] = &Baseline_MultiplyTop16; 02014 #endif 02015 } 02016 } 02017 02018 inline int Add(word *C, const word *A, const word *B, size_t N) 02019 { 02020 #if CRYPTOPP_INTEGER_SSE2 02021 return s_pAdd(N, C, A, B); 02022 #else 02023 return Baseline_Add(N, C, A, B); 02024 #endif 02025 } 02026 02027 inline int Subtract(word *C, const word *A, const word *B, size_t N) 02028 { 02029 #if CRYPTOPP_INTEGER_SSE2 02030 return s_pSub(N, C, A, B); 02031 #else 02032 return Baseline_Sub(N, C, A, B); 02033 #endif 02034 } 02035 02036 // ******************************************************** 02037 02038 02039 #define A0 A 02040 #define A1 (A+N2) 02041 #define B0 B 02042 #define B1 (B+N2) 02043 02044 #define T0 T 02045 #define T1 (T+N2) 02046 #define T2 (T+N) 02047 #define T3 (T+N+N2) 02048 02049 #define R0 R 02050 #define R1 (R+N2) 02051 #define R2 (R+N) 02052 #define R3 (R+N+N2) 02053 02054 // R[2*N] - result = A*B 02055 // T[2*N] - temporary work space 02056 // A[N] --- multiplier 02057 // B[N] --- multiplicant 02058 02059 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N) 02060 { 02061 assert(N>=2 && N%2==0); 02062 02063 if (N <= s_recursionLimit) 02064 s_pMul[N/4](R, A, B); 02065 else 02066 { 02067 const size_t N2 = N/2; 02068 02069 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2; 02070 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2); 02071 02072 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2; 02073 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2); 02074 02075 RecursiveMultiply(R2, T2, A1, B1, N2); 02076 RecursiveMultiply(T0, T2, R0, R1, N2); 02077 RecursiveMultiply(R0, T2, A0, B0, N2); 02078 02079 // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1 02080 02081 int c2 = Add(R2, R2, R1, N2); 02082 int c3 = c2; 02083 c2 += Add(R1, R2, R0, N2); 02084 c3 += Add(R2, R2, R3, N2); 02085 02086 if (AN2 == BN2) 02087 c3 -= Subtract(R1, R1, T0, N); 02088 else 02089 c3 += Add(R1, R1, T0, N); 02090 02091 c3 += Increment(R2, N2, c2); 02092 assert (c3 >= 0 && c3 <= 2); 02093 Increment(R3, N2, c3); 02094 } 02095 } 02096 02097 // R[2*N] - result = A*A 02098 // T[2*N] - temporary work space 02099 // A[N] --- number to be squared 02100 02101 void RecursiveSquare(word *R, word *T, const word *A, size_t N) 02102 { 02103 assert(N && N%2==0); 02104 02105 if (N <= s_recursionLimit) 02106 s_pSqu[N/4](R, A); 02107 else 02108 { 02109 const size_t N2 = N/2; 02110 02111 RecursiveSquare(R0, T2, A0, N2); 02112 RecursiveSquare(R2, T2, A1, N2); 02113 RecursiveMultiply(T0, T2, A0, A1, N2); 02114 02115 int carry = Add(R1, R1, T0, N); 02116 carry += Add(R1, R1, T0, N); 02117 Increment(R3, N2, carry); 02118 } 02119 } 02120 02121 // R[N] - bottom half of A*B 02122 // T[3*N/2] - temporary work space 02123 // A[N] - multiplier 02124 // B[N] - multiplicant 02125 02126 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N) 02127 { 02128 assert(N>=2 && N%2==0); 02129 02130 if (N <= s_recursionLimit) 02131 s_pBot[N/4](R, A, B); 02132 else 02133 { 02134 const size_t N2 = N/2; 02135 02136 RecursiveMultiply(R, T, A0, B0, N2); 02137 RecursiveMultiplyBottom(T0, T1, A1, B0, N2); 02138 Add(R1, R1, T0, N2); 02139 RecursiveMultiplyBottom(T0, T1, A0, B1, N2); 02140 Add(R1, R1, T0, N2); 02141 } 02142 } 02143 02144 // R[N] --- upper half of A*B 02145 // T[2*N] - temporary work space 02146 // L[N] --- lower half of A*B 02147 // A[N] --- multiplier 02148 // B[N] --- multiplicant 02149 02150 void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N) 02151 { 02152 assert(N>=2 && N%2==0); 02153 02154 if (N <= s_recursionLimit) 02155 s_pTop[N/4](R, A, B, L[N-1]); 02156 else 02157 { 02158 const size_t N2 = N/2; 02159 02160 size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2; 02161 Subtract(R0, A + AN2, A + (N2 ^ AN2), N2); 02162 02163 size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2; 02164 Subtract(R1, B + BN2, B + (N2 ^ BN2), N2); 02165 02166 RecursiveMultiply(T0, T2, R0, R1, N2); 02167 RecursiveMultiply(R0, T2, A1, B1, N2); 02168 02169 // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1 02170 02171 int t, c3; 02172 int c2 = Subtract(T2, L+N2, L, N2); 02173 02174 if (AN2 == BN2) 02175 { 02176 c2 -= Add(T2, T2, T0, N2); 02177 t = (Compare(T2, R0, N2) == -1); 02178 c3 = t - Subtract(T2, T2, T1, N2); 02179 } 02180 else 02181 { 02182 c2 += Subtract(T2, T2, T0, N2); 02183 t = (Compare(T2, R0, N2) == -1); 02184 c3 = t + Add(T2, T2, T1, N2); 02185 } 02186 02187 c2 += t; 02188 if (c2 >= 0) 02189 c3 += Increment(T2, N2, c2); 02190 else 02191 c3 -= Decrement(T2, N2, -c2); 02192 c3 += Add(R0, T2, R1, N2); 02193 02194 assert (c3 >= 0 && c3 <= 2); 02195 Increment(R1, N2, c3); 02196 } 02197 } 02198 02199 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N) 02200 { 02201 RecursiveMultiply(R, T, A, B, N); 02202 } 02203 02204 inline void Square(word *R, word *T, const word *A, size_t N) 02205 { 02206 RecursiveSquare(R, T, A, N); 02207 } 02208 02209 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N) 02210 { 02211 RecursiveMultiplyBottom(R, T, A, B, N); 02212 } 02213 02214 // R[NA+NB] - result = A*B 02215 // T[NA+NB] - temporary work space 02216 // A[NA] ---- multiplier 02217 // B[NB] ---- multiplicant 02218 02219 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB) 02220 { 02221 if (NA == NB) 02222 { 02223 if (A == B) 02224 Square(R, T, A, NA); 02225 else 02226 Multiply(R, T, A, B, NA); 02227 02228 return; 02229 } 02230 02231 if (NA > NB) 02232 { 02233 std::swap(A, B); 02234 std::swap(NA, NB); 02235 } 02236 02237 assert(NB % NA == 0); 02238 02239 if (NA==2 && !A[1]) 02240 { 02241 switch (A[0]) 02242 { 02243 case 0: 02244 SetWords(R, 0, NB+2); 02245 return; 02246 case 1: 02247 CopyWords(R, B, NB); 02248 R[NB] = R[NB+1] = 0; 02249 return; 02250 default: 02251 R[NB] = LinearMultiply(R, B, A[0], NB); 02252 R[NB+1] = 0; 02253 return; 02254 } 02255 } 02256 02257 size_t i; 02258 if ((NB/NA)%2 == 0) 02259 { 02260 Multiply(R, T, A, B, NA); 02261 CopyWords(T+2*NA, R+NA, NA); 02262 02263 for (i=2*NA; i<NB; i+=2*NA) 02264 Multiply(T+NA+i, T, A, B+i, NA); 02265 for (i=NA; i<NB; i+=2*NA) 02266 Multiply(R+i, T, A, B+i, NA); 02267 } 02268 else 02269 { 02270 for (i=0; i<NB; i+=2*NA) 02271 Multiply(R+i, T, A, B+i, NA); 02272 for (i=NA; i<NB; i+=2*NA) 02273 Multiply(T+NA+i, T, A, B+i, NA); 02274 } 02275 02276 if (Add(R+NA, R+NA, T+2*NA, NB-NA)) 02277 Increment(R+NB, NA); 02278 } 02279 02280 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N) 02281 // T[3*N/2] - temporary work space 02282 // A[N] ----- an odd number as input 02283 02284 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N) 02285 { 02286 if (N==2) 02287 { 02288 T[0] = AtomicInverseModPower2(A[0]); 02289 T[1] = 0; 02290 s_pBot[0](T+2, T, A); 02291 TwosComplement(T+2, 2); 02292 Increment(T+2, 2, 2); 02293 s_pBot[0](R, T, T+2); 02294 } 02295 else 02296 { 02297 const size_t N2 = N/2; 02298 RecursiveInverseModPower2(R0, T0, A0, N2); 02299 T0[0] = 1; 02300 SetWords(T0+1, 0, N2-1); 02301 MultiplyTop(R1, T1, T0, R0, A0, N2); 02302 MultiplyBottom(T0, T1, R0, A1, N2); 02303 Add(T0, R1, T0, N2); 02304 TwosComplement(T0, N2); 02305 MultiplyBottom(R1, T1, R0, T0, N2); 02306 } 02307 } 02308 02309 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M 02310 // T[3*N] - temporary work space 02311 // X[2*N] - number to be reduced 02312 // M[N] --- modulus 02313 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N) 02314 02315 void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N) 02316 { 02317 #if 1 02318 MultiplyBottom(R, T, X, U, N); 02319 MultiplyTop(T, T+N, X, R, M, N); 02320 word borrow = Subtract(T, X+N, T, N); 02321 // defend against timing attack by doing this Add even when not needed 02322 word carry = Add(T+N, T, M, N); 02323 assert(carry | !borrow); 02324 CopyWords(R, T + ((0-borrow) & N), N); 02325 #elif 0 02326 const word u = 0-U[0]; 02327 Declare2Words(p) 02328 for (size_t i=0; i<N; i++) 02329 { 02330 const word t = u * X[i]; 02331 word c = 0; 02332 for (size_t j=0; j<N; j+=2) 02333 { 02334 MultiplyWords(p, t, M[j]); 02335 Acc2WordsBy1(p, X[i+j]); 02336 Acc2WordsBy1(p, c); 02337 X[i+j] = LowWord(p); 02338 c = HighWord(p); 02339 MultiplyWords(p, t, M[j+1]); 02340 Acc2WordsBy1(p, X[i+j+1]); 02341 Acc2WordsBy1(p, c); 02342 X[i+j+1] = LowWord(p); 02343 c = HighWord(p); 02344 } 02345 02346 if (Increment(X+N+i, N-i, c)) 02347 while (!Subtract(X+N, X+N, M, N)) {} 02348 } 02349 02350 memcpy(R, X+N, N*WORD_SIZE); 02351 #else 02352 __m64 u = _mm_cvtsi32_si64(0-U[0]), p; 02353 for (size_t i=0; i<N; i++) 02354 { 02355 __m64 t = _mm_cvtsi32_si64(X[i]); 02356 t = _mm_mul_su32(t, u); 02357 __m64 c = _mm_setzero_si64(); 02358 for (size_t j=0; j<N; j+=2) 02359 { 02360 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j])); 02361 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j])); 02362 c = _mm_add_si64(c, p); 02363 X[i+j] = _mm_cvtsi64_si32(c); 02364 c = _mm_srli_si64(c, 32); 02365 p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1])); 02366 p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1])); 02367 c = _mm_add_si64(c, p); 02368 X[i+j+1] = _mm_cvtsi64_si32(c); 02369 c = _mm_srli_si64(c, 32); 02370 } 02371 02372 if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c))) 02373 while (!Subtract(X+N, X+N, M, N)) {} 02374 } 02375 02376 memcpy(R, X+N, N*WORD_SIZE); 02377 _mm_empty(); 02378 #endif 02379 } 02380 02381 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M 02382 // T[2*N] - temporary work space 02383 // X[2*N] - number to be reduced 02384 // M[N] --- modulus 02385 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2) 02386 // V[N] --- 2**(WORD_BITS*3*N/2) mod M 02387 02388 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N) 02389 { 02390 assert(N%2==0 && N>=4); 02391 02392 #define M0 M 02393 #define M1 (M+N2) 02394 #define V0 V 02395 #define V1 (V+N2) 02396 02397 #define X0 X 02398 #define X1 (X+N2) 02399 #define X2 (X+N) 02400 #define X3 (X+N+N2) 02401 02402 const size_t N2 = N/2; 02403 Multiply(T0, T2, V0, X3, N2); 02404 int c2 = Add(T0, T0, X0, N); 02405 MultiplyBottom(T3, T2, T0, U, N2); 02406 MultiplyTop(T2, R, T0, T3, M0, N2); 02407 c2 -= Subtract(T2, T1, T2, N2); 02408 Multiply(T0, R, T3, M1, N2); 02409 c2 -= Subtract(T0, T2, T0, N2); 02410 int c3 = -(int)Subtract(T1, X2, T1, N2); 02411 Multiply(R0, T2, V1, X3, N2); 02412 c3 += Add(R, R, T, N); 02413 02414 if (c2>0) 02415 c3 += Increment(R1, N2); 02416 else if (c2<0) 02417 c3 -= Decrement(R1, N2, -c2); 02418 02419 assert(c3>=-1 && c3<=1); 02420 if (c3>0) 02421 Subtract(R, R, M, N); 02422 else if (c3<0) 02423 Add(R, R, M, N); 02424 02425 #undef M0 02426 #undef M1 02427 #undef V0 02428 #undef V1 02429 02430 #undef X0 02431 #undef X1 02432 #undef X2 02433 #undef X3 02434 } 02435 02436 #undef A0 02437 #undef A1 02438 #undef B0 02439 #undef B1 02440 02441 #undef T0 02442 #undef T1 02443 #undef T2 02444 #undef T3 02445 02446 #undef R0 02447 #undef R1 02448 #undef R2 02449 #undef R3 02450 02451 /* 02452 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A 02453 static word SubatomicDivide(word *A, word B0, word B1) 02454 { 02455 // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word 02456 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0)); 02457 02458 // estimate the quotient: do a 2 word by 1 word divide 02459 word Q; 02460 if (B1+1 == 0) 02461 Q = A[2]; 02462 else 02463 Q = DWord(A[1], A[2]).DividedBy(B1+1); 02464 02465 // now subtract Q*B from A 02466 DWord p = DWord::Multiply(B0, Q); 02467 DWord u = (DWord) A[0] - p.GetLowHalf(); 02468 A[0] = u.GetLowHalf(); 02469 u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q); 02470 A[1] = u.GetLowHalf(); 02471 A[2] += u.GetHighHalf(); 02472 02473 // Q <= actual quotient, so fix it 02474 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) 02475 { 02476 u = (DWord) A[0] - B0; 02477 A[0] = u.GetLowHalf(); 02478 u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow(); 02479 A[1] = u.GetLowHalf(); 02480 A[2] += u.GetHighHalf(); 02481 Q++; 02482 assert(Q); // shouldn't overflow 02483 } 02484 02485 return Q; 02486 } 02487 02488 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1 02489 static inline void AtomicDivide(word *Q, const word *A, const word *B) 02490 { 02491 if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS) 02492 { 02493 Q[0] = A[2]; 02494 Q[1] = A[3]; 02495 } 02496 else 02497 { 02498 word T[4]; 02499 T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3]; 02500 Q[1] = SubatomicDivide(T+1, B[0], B[1]); 02501 Q[0] = SubatomicDivide(T, B[0], B[1]); 02502 02503 #ifndef NDEBUG 02504 // multiply quotient and divisor and add remainder, make sure it equals dividend 02505 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0]))); 02506 word P[4]; 02507 LowLevel::Multiply2(P, Q, B); 02508 Add(P, P, T, 4); 02509 assert(memcmp(P, A, 4*WORD_SIZE)==0); 02510 #endif 02511 } 02512 } 02513 */ 02514 02515 static inline void AtomicDivide(word *Q, const word *A, const word *B) 02516 { 02517 word T[4]; 02518 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1])); 02519 Q[0] = q.GetLowHalf(); 02520 Q[1] = q.GetHighHalf(); 02521 02522 #ifndef NDEBUG 02523 if (B[0] || B[1]) 02524 { 02525 // multiply quotient and divisor and add remainder, make sure it equals dividend 02526 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0]))); 02527 word P[4]; 02528 s_pMul[0](P, Q, B); 02529 Add(P, P, T, 4); 02530 assert(memcmp(P, A, 4*WORD_SIZE)==0); 02531 } 02532 #endif 02533 } 02534 02535 // for use by Divide(), corrects the underestimated quotient {Q1,Q0} 02536 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N) 02537 { 02538 assert(N && N%2==0); 02539 02540 AsymmetricMultiply(T, T+N+2, Q, 2, B, N); 02541 02542 word borrow = Subtract(R, R, T, N+2); 02543 assert(!borrow && !R[N+1]); 02544 02545 while (R[N] || Compare(R, B, N) >= 0) 02546 { 02547 R[N] -= Subtract(R, R, B, N); 02548 Q[1] += (++Q[0]==0); 02549 assert(Q[0] || Q[1]); // no overflow 02550 } 02551 } 02552 02553 // R[NB] -------- remainder = A%B 02554 // Q[NA-NB+2] --- quotient = A/B 02555 // T[NA+3*(NB+2)] - temp work space 02556 // A[NA] -------- dividend 02557 // B[NB] -------- divisor 02558 02559 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB) 02560 { 02561 assert(NA && NB && NA%2==0 && NB%2==0); 02562 assert(B[NB-1] || B[NB-2]); 02563 assert(NB <= NA); 02564 02565 // set up temporary work space 02566 word *const TA=T; 02567 word *const TB=T+NA+2; 02568 word *const TP=T+NA+2+NB; 02569 02570 // copy B into TB and normalize it so that TB has highest bit set to 1 02571 unsigned shiftWords = (B[NB-1]==0); 02572 TB[0] = TB[NB-1] = 0; 02573 CopyWords(TB+shiftWords, B, NB-shiftWords); 02574 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]); 02575 assert(shiftBits < WORD_BITS); 02576 ShiftWordsLeftByBits(TB, NB, shiftBits); 02577 02578 // copy A into TA and normalize it 02579 TA[0] = TA[NA] = TA[NA+1] = 0; 02580 CopyWords(TA+shiftWords, A, NA); 02581 ShiftWordsLeftByBits(TA, NA+2, shiftBits); 02582 02583 if (TA[NA+1]==0 && TA[NA] <= 1) 02584 { 02585 Q[NA-NB+1] = Q[NA-NB] = 0; 02586 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0) 02587 { 02588 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB); 02589 ++Q[NA-NB]; 02590 } 02591 } 02592 else 02593 { 02594 NA+=2; 02595 assert(Compare(TA+NA-NB, TB, NB) < 0); 02596 } 02597 02598 word BT[2]; 02599 BT[0] = TB[NB-2] + 1; 02600 BT[1] = TB[NB-1] + (BT[0]==0); 02601 02602 // start reducing TA mod TB, 2 words at a time 02603 for (size_t i=NA-2; i>=NB; i-=2) 02604 { 02605 AtomicDivide(Q+i-NB, TA+i-2, BT); 02606 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB); 02607 } 02608 02609 // copy TA into R, and denormalize it 02610 CopyWords(R, TA+shiftWords, NB); 02611 ShiftWordsRightByBits(R, NB, shiftBits); 02612 } 02613 02614 static inline size_t EvenWordCount(const word *X, size_t N) 02615 { 02616 while (N && X[N-2]==0 && X[N-1]==0) 02617 N-=2; 02618 return N; 02619 } 02620 02621 // return k 02622 // R[N] --- result = A^(-1) * 2^k mod M 02623 // T[4*N] - temporary work space 02624 // A[NA] -- number to take inverse of 02625 // M[N] --- modulus 02626 02627 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N) 02628 { 02629 assert(NA<=N && N && N%2==0); 02630 02631 word *b = T; 02632 word *c = T+N; 02633 word *f = T+2*N; 02634 word *g = T+3*N; 02635 size_t bcLen=2, fgLen=EvenWordCount(M, N); 02636 unsigned int k=0; 02637 bool s=false; 02638 02639 SetWords(T, 0, 3*N); 02640 b[0]=1; 02641 CopyWords(f, A, NA); 02642 CopyWords(g, M, N); 02643 02644 while (1) 02645 { 02646 word t=f[0]; 02647 while (!t) 02648 { 02649 if (EvenWordCount(f, fgLen)==0) 02650 { 02651 SetWords(R, 0, N); 02652 return 0; 02653 } 02654 02655 ShiftWordsRightByWords(f, fgLen, 1); 02656 bcLen += 2 * (c[bcLen-1] != 0); 02657 assert(bcLen <= N); 02658 ShiftWordsLeftByWords(c, bcLen, 1); 02659 k+=WORD_BITS; 02660 t=f[0]; 02661 } 02662 02663 unsigned int i = TrailingZeros(t); 02664 t >>= i; 02665 k += i; 02666 02667 if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0) 02668 { 02669 if (s) 02670 Subtract(R, M, b, N); 02671 else 02672 CopyWords(R, b, N); 02673 return k; 02674 } 02675 02676 ShiftWordsRightByBits(f, fgLen, i); 02677 t = ShiftWordsLeftByBits(c, bcLen, i); 02678 c[bcLen] += t; 02679 bcLen += 2 * (t!=0); 02680 assert(bcLen <= N); 02681 02682 bool swap = Compare(f, g, fgLen)==-1; 02683 ConditionalSwapPointers(swap, f, g); 02684 ConditionalSwapPointers(swap, b, c); 02685 s ^= swap; 02686 02687 fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]); 02688 02689 Subtract(f, f, g, fgLen); 02690 t = Add(b, b, c, bcLen); 02691 b[bcLen] += t; 02692 bcLen += 2*t; 02693 assert(bcLen <= N); 02694 } 02695 } 02696 02697 // R[N] - result = A/(2^k) mod M 02698 // A[N] - input 02699 // M[N] - modulus 02700 02701 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N) 02702 { 02703 CopyWords(R, A, N); 02704 02705 while (k--) 02706 { 02707 if (R[0]%2==0) 02708 ShiftWordsRightByBits(R, N, 1); 02709 else 02710 { 02711 word carry = Add(R, R, M, N); 02712 ShiftWordsRightByBits(R, N, 1); 02713 R[N-1] += carry<<(WORD_BITS-1); 02714 } 02715 } 02716 } 02717 02718 // R[N] - result = A*(2^k) mod M 02719 // A[N] - input 02720 // M[N] - modulus 02721 02722 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N) 02723 { 02724 CopyWords(R, A, N); 02725 02726 while (k--) 02727 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0) 02728 Subtract(R, R, M, N); 02729 } 02730 02731 // ****************************************************************** 02732 02733 InitializeInteger::InitializeInteger() 02734 { 02735 if (!g_pAssignIntToInteger) 02736 { 02737 SetFunctionPointers(); 02738 g_pAssignIntToInteger = AssignIntToInteger; 02739 } 02740 } 02741 02742 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8}; 02743 02744 static inline size_t RoundupSize(size_t n) 02745 { 02746 if (n<=8) 02747 return RoundupSizeTable[n]; 02748 else if (n<=16) 02749 return 16; 02750 else if (n<=32) 02751 return 32; 02752 else if (n<=64) 02753 return 64; 02754 else return size_t(1) << BitPrecision(n-1); 02755 } 02756 02757 Integer::Integer() 02758 : reg(2), sign(POSITIVE) 02759 { 02760 reg[0] = reg[1] = 0; 02761 } 02762 02763 Integer::Integer(const Integer& t) 02764 : reg(RoundupSize(t.WordCount())), sign(t.sign) 02765 { 02766 CopyWords(reg, t.reg, reg.size()); 02767 } 02768 02769 Integer::Integer(Sign s, lword value) 02770 : reg(2), sign(s) 02771 { 02772 reg[0] = word(value); 02773 reg[1] = word(SafeRightShift<WORD_BITS>(value)); 02774 } 02775 02776 Integer::Integer(signed long value) 02777 : reg(2) 02778 { 02779 if (value >= 0) 02780 sign = POSITIVE; 02781 else 02782 { 02783 sign = NEGATIVE; 02784 value = -value; 02785 } 02786 reg[0] = word(value); 02787 reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value)); 02788 } 02789 02790 Integer::Integer(Sign s, word high, word low) 02791 : reg(2), sign(s) 02792 { 02793 reg[0] = low; 02794 reg[1] = high; 02795 } 02796 02797 bool Integer::IsConvertableToLong() const 02798 { 02799 if (ByteCount() > sizeof(long)) 02800 return false; 02801 02802 unsigned long value = (unsigned long)reg[0]; 02803 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]); 02804 02805 if (sign==POSITIVE) 02806 return (signed long)value >= 0; 02807 else 02808 return -(signed long)value < 0; 02809 } 02810 02811 signed long Integer::ConvertToLong() const 02812 { 02813 assert(IsConvertableToLong()); 02814 02815 unsigned long value = (unsigned long)reg[0]; 02816 value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]); 02817 return sign==POSITIVE ? value : -(signed long)value; 02818 } 02819 02820 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s) 02821 { 02822 Decode(encodedInteger, byteCount, s); 02823 } 02824 02825 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s) 02826 { 02827 Decode(encodedInteger, byteCount, s); 02828 } 02829 02830 Integer::Integer(BufferedTransformation &bt) 02831 { 02832 BERDecode(bt); 02833 } 02834 02835 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount) 02836 { 02837 Randomize(rng, bitcount); 02838 } 02839 02840 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod) 02841 { 02842 if (!Randomize(rng, min, max, rnType, equiv, mod)) 02843 throw Integer::RandomNumberNotFound(); 02844 } 02845 02846 Integer Integer::Power2(size_t e) 02847 { 02848 Integer r((word)0, BitsToWords(e+1)); 02849 r.SetBit(e); 02850 return r; 02851 } 02852 02853 template <long i> 02854 struct NewInteger 02855 { 02856 Integer * operator()() const 02857 { 02858 return new Integer(i); 02859 } 02860 }; 02861 02862 const Integer &Integer::Zero() 02863 { 02864 return Singleton<Integer>().Ref(); 02865 } 02866 02867 const Integer &Integer::One() 02868 { 02869 return Singleton<Integer, NewInteger<1> >().Ref(); 02870 } 02871 02872 const Integer &Integer::Two() 02873 { 02874 return Singleton<Integer, NewInteger<2> >().Ref(); 02875 } 02876 02877 bool Integer::operator!() const 02878 { 02879 return IsNegative() ? false : (reg[0]==0 && WordCount()==0); 02880 } 02881 02882 Integer& Integer::operator=(const Integer& t) 02883 { 02884 if (this != &t) 02885 { 02886 if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0) 02887 reg.New(RoundupSize(t.WordCount())); 02888 CopyWords(reg, t.reg, reg.size()); 02889 sign = t.sign; 02890 } 02891 return *this; 02892 } 02893 02894 bool Integer::GetBit(size_t n) const 02895 { 02896 if (n/WORD_BITS >= reg.size()) 02897 return 0; 02898 else 02899 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1); 02900 } 02901 02902 void Integer::SetBit(size_t n, bool value) 02903 { 02904 if (value) 02905 { 02906 reg.CleanGrow(RoundupSize(BitsToWords(n+1))); 02907 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS)); 02908 } 02909 else 02910 { 02911 if (n/WORD_BITS < reg.size()) 02912 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS)); 02913 } 02914 } 02915 02916 byte Integer::GetByte(size_t n) const 02917 { 02918 if (n/WORD_SIZE >= reg.size()) 02919 return 0; 02920 else 02921 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8)); 02922 } 02923 02924 void Integer::SetByte(size_t n, byte value) 02925 { 02926 reg.CleanGrow(RoundupSize(BytesToWords(n+1))); 02927 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE)); 02928 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE)); 02929 } 02930 02931 lword Integer::GetBits(size_t i, size_t n) const 02932 { 02933 lword v = 0; 02934 assert(n <= sizeof(v)*8); 02935 for (unsigned int j=0; j<n; j++) 02936 v |= lword(GetBit(i+j)) << j; 02937 return v; 02938 } 02939 02940 Integer Integer::operator-() const 02941 { 02942 Integer result(*this); 02943 result.Negate(); 02944 return result; 02945 } 02946 02947 Integer Integer::AbsoluteValue() const 02948 { 02949 Integer result(*this); 02950 result.sign = POSITIVE; 02951 return result; 02952 } 02953 02954 void Integer::swap(Integer &a) 02955 { 02956 reg.swap(a.reg); 02957 std::swap(sign, a.sign); 02958 } 02959 02960 Integer::Integer(word value, size_t length) 02961 : reg(RoundupSize(length)), sign(POSITIVE) 02962 { 02963 reg[0] = value; 02964 SetWords(reg+1, 0, reg.size()-1); 02965 } 02966 02967 template <class T> 02968 static Integer StringToInteger(const T *str) 02969 { 02970 int radix; 02971 // GCC workaround 02972 // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3 02973 unsigned int length; 02974 for (length = 0; str[length] != 0; length++) {} 02975 02976 Integer v; 02977 02978 if (length == 0) 02979 return v; 02980 02981 switch (str[length-1]) 02982 { 02983 case 'h': 02984 case 'H': 02985 radix=16; 02986 break; 02987 case 'o': 02988 case 'O': 02989 radix=8; 02990 break; 02991 case 'b': 02992 case 'B': 02993 radix=2; 02994 break; 02995 default: 02996 radix=10; 02997 } 02998 02999 if (length > 2 && str[0] == '0' && str[1] == 'x') 03000 radix = 16; 03001 03002 for (unsigned i=0; i<length; i++) 03003 { 03004 int digit; 03005 03006 if (str[i] >= '0' && str[i] <= '9') 03007 digit = str[i] - '0'; 03008 else if (str[i] >= 'A' && str[i] <= 'F') 03009 digit = str[i] - 'A' + 10; 03010 else if (str[i] >= 'a' && str[i] <= 'f') 03011 digit = str[i] - 'a' + 10; 03012 else 03013 digit = radix; 03014 03015 if (digit < radix) 03016 { 03017 v *= radix; 03018 v += digit; 03019 } 03020 } 03021 03022 if (str[0] == '-') 03023 v.Negate(); 03024 03025 return v; 03026 } 03027 03028 Integer::Integer(const char *str) 03029 : reg(2), sign(POSITIVE) 03030 { 03031 *this = StringToInteger(str); 03032 } 03033 03034 Integer::Integer(const wchar_t *str) 03035 : reg(2), sign(POSITIVE) 03036 { 03037 *this = StringToInteger(str); 03038 } 03039 03040 unsigned int Integer::WordCount() const 03041 { 03042 return (unsigned int)CountWords(reg, reg.size()); 03043 } 03044 03045 unsigned int Integer::ByteCount() const 03046 { 03047 unsigned wordCount = WordCount(); 03048 if (wordCount) 03049 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]); 03050 else 03051 return 0; 03052 } 03053 03054 unsigned int Integer::BitCount() const 03055 { 03056 unsigned wordCount = WordCount(); 03057 if (wordCount) 03058 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]); 03059 else 03060 return 0; 03061 } 03062 03063 void Integer::Decode(const byte *input, size_t inputLen, Signedness s) 03064 { 03065 StringStore store(input, inputLen); 03066 Decode(store, inputLen, s); 03067 } 03068 03069 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s) 03070 { 03071 assert(bt.MaxRetrievable() >= inputLen); 03072 03073 byte b; 03074 bt.Peek(b); 03075 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE; 03076 03077 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff)) 03078 { 03079 bt.Skip(1); 03080 inputLen--; 03081 bt.Peek(b); 03082 } 03083 03084 reg.CleanNew(RoundupSize(BytesToWords(inputLen))); 03085 03086 for (size_t i=inputLen; i > 0; i--) 03087 { 03088 bt.Get(b); 03089 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8; 03090 } 03091 03092 if (sign == NEGATIVE) 03093 { 03094 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++) 03095 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8; 03096 TwosComplement(reg, reg.size()); 03097 } 03098 } 03099 03100 size_t Integer::MinEncodedSize(Signedness signedness) const 03101 { 03102 unsigned int outputLen = STDMAX(1U, ByteCount()); 03103 if (signedness == UNSIGNED) 03104 return outputLen; 03105 if (NotNegative() && (GetByte(outputLen-1) & 0x80)) 03106 outputLen++; 03107 if (IsNegative() && *this < -Power2(outputLen*8-1)) 03108 outputLen++; 03109 return outputLen; 03110 } 03111 03112 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const 03113 { 03114 ArraySink sink(output, outputLen); 03115 Encode(sink, outputLen, signedness); 03116 } 03117 03118 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const 03119 { 03120 if (signedness == UNSIGNED || NotNegative()) 03121 { 03122 for (size_t i=outputLen; i > 0; i--) 03123 bt.Put(GetByte(i-1)); 03124 } 03125 else 03126 { 03127 // take two's complement of *this 03128 Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this; 03129 temp.Encode(bt, outputLen, UNSIGNED); 03130 } 03131 } 03132 03133 void Integer::DEREncode(BufferedTransformation &bt) const 03134 { 03135 DERGeneralEncoder enc(bt, INTEGER); 03136 Encode(enc, MinEncodedSize(SIGNED), SIGNED); 03137 enc.MessageEnd(); 03138 } 03139 03140 void Integer::BERDecode(const byte *input, size_t len) 03141 { 03142 StringStore store(input, len); 03143 BERDecode(store); 03144 } 03145 03146 void Integer::BERDecode(BufferedTransformation &bt) 03147 { 03148 BERGeneralDecoder dec(bt, INTEGER); 03149 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength()) 03150 BERDecodeError(); 03151 Decode(dec, (size_t)dec.RemainingLength(), SIGNED); 03152 dec.MessageEnd(); 03153 } 03154 03155 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const 03156 { 03157 DERGeneralEncoder enc(bt, OCTET_STRING); 03158 Encode(enc, length); 03159 enc.MessageEnd(); 03160 } 03161 03162 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length) 03163 { 03164 BERGeneralDecoder dec(bt, OCTET_STRING); 03165 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length) 03166 BERDecodeError(); 03167 Decode(dec, length); 03168 dec.MessageEnd(); 03169 } 03170 03171 size_t Integer::OpenPGPEncode(byte *output, size_t len) const 03172 { 03173 ArraySink sink(output, len); 03174 return OpenPGPEncode(sink); 03175 } 03176 03177 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const 03178 { 03179 word16 bitCount = BitCount(); 03180 bt.PutWord16(bitCount); 03181 size_t byteCount = BitsToBytes(bitCount); 03182 Encode(bt, byteCount); 03183 return 2 + byteCount; 03184 } 03185 03186 void Integer::OpenPGPDecode(const byte *input, size_t len) 03187 { 03188 StringStore store(input, len); 03189 OpenPGPDecode(store); 03190 } 03191 03192 void Integer::OpenPGPDecode(BufferedTransformation &bt) 03193 { 03194 word16 bitCount; 03195 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount)) 03196 throw OpenPGPDecodeErr(); 03197 Decode(bt, BitsToBytes(bitCount)); 03198 } 03199 03200 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits) 03201 { 03202 const size_t nbytes = nbits/8 + 1; 03203 SecByteBlock buf(nbytes); 03204 rng.GenerateBlock(buf, nbytes); 03205 if (nbytes) 03206 buf[0] = (byte)Crop(buf[0], nbits % 8); 03207 Decode(buf, nbytes, UNSIGNED); 03208 } 03209 03210 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max) 03211 { 03212 if (min > max) 03213 throw InvalidArgument("Integer: Min must be no greater than Max"); 03214 03215 Integer range = max - min; 03216 const unsigned int nbits = range.BitCount(); 03217 03218 do 03219 { 03220 Randomize(rng, nbits); 03221 } 03222 while (*this > range); 03223 03224 *this += min; 03225 } 03226 03227 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod) 03228 { 03229 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod)); 03230 } 03231 03232 class KDF2_RNG : public RandomNumberGenerator 03233 { 03234 public: 03235 KDF2_RNG(const byte *seed, size_t seedSize) 03236 : m_counter(0), m_counterAndSeed(seedSize + 4) 03237 { 03238 memcpy(m_counterAndSeed + 4, seed, seedSize); 03239 } 03240 03241 void GenerateBlock(byte *output, size_t size) 03242 { 03243 PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter); 03244 ++m_counter; 03245 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0); 03246 } 03247 03248 private: 03249 word32 m_counter; 03250 SecByteBlock m_counterAndSeed; 03251 }; 03252 03253 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms) 03254 { 03255 Integer min = params.GetValueWithDefault("Min", Integer::Zero()); 03256 Integer max; 03257 if (!params.GetValue("Max", max)) 03258 { 03259 int bitLength; 03260 if (params.GetIntValue("BitLength", bitLength)) 03261 max = Integer::Power2(bitLength); 03262 else 03263 throw InvalidArgument("Integer: missing Max argument"); 03264 } 03265 if (min > max) 03266 throw InvalidArgument("Integer: Min must be no greater than Max"); 03267 03268 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero()); 03269 Integer mod = params.GetValueWithDefault("Mod", Integer::One()); 03270 03271 if (equiv.IsNegative() || equiv >= mod) 03272 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument"); 03273 03274 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY); 03275 03276 member_ptr<KDF2_RNG> kdf2Rng; 03277 ConstByteArrayParameter seed; 03278 if (params.GetValue(Name::Seed(), seed)) 03279 { 03280 ByteQueue bq; 03281 DERSequenceEncoder seq(bq); 03282 min.DEREncode(seq); 03283 max.DEREncode(seq); 03284 equiv.DEREncode(seq); 03285 mod.DEREncode(seq); 03286 DEREncodeUnsigned(seq, rnType); 03287 DEREncodeOctetString(seq, seed.begin(), seed.size()); 03288 seq.MessageEnd(); 03289 03290 SecByteBlock finalSeed((size_t)bq.MaxRetrievable()); 03291 bq.Get(finalSeed, finalSeed.size()); 03292 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size())); 03293 } 03294 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng; 03295 03296 switch (rnType) 03297 { 03298 case ANY: 03299 if (mod == One()) 03300 Randomize(rng, min, max); 03301 else 03302 { 03303 Integer min1 = min + (equiv-min)%mod; 03304 if (max < min1) 03305 return false; 03306 Randomize(rng, Zero(), (max - min1) / mod); 03307 *this *= mod; 03308 *this += min1; 03309 } 03310 return true; 03311 03312 case PRIME: 03313 { 03314 const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL); 03315 03316 int i; 03317 i = 0; 03318 while (1) 03319 { 03320 if (++i==16) 03321 { 03322 // check if there are any suitable primes in [min, max] 03323 Integer first = min; 03324 if (FirstPrime(first, max, equiv, mod, pSelector)) 03325 { 03326 // if there is only one suitable prime, we're done 03327 *this = first; 03328 if (!FirstPrime(first, max, equiv, mod, pSelector)) 03329 return true; 03330 } 03331 else 03332 return false; 03333 } 03334 03335 Randomize(rng, min, max); 03336 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector)) 03337 return true; 03338 } 03339 } 03340 03341 default: 03342 throw InvalidArgument("Integer: invalid RandomNumberType argument"); 03343 } 03344 } 03345 03346 std::istream& operator>>(std::istream& in, Integer &a) 03347 { 03348 char c; 03349 unsigned int length = 0; 03350 SecBlock<char> str(length + 16); 03351 03352 std::ws(in); 03353 03354 do 03355 { 03356 in.read(&c, 1); 03357 str[length++] = c; 03358 if (length >= str.size()) 03359 str.Grow(length + 16); 03360 } 03361 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.')); 03362 03363 if (in.gcount()) 03364 in.putback(c); 03365 str[length-1] = '\0'; 03366 a = Integer(str); 03367 03368 return in; 03369 } 03370 03371 std::ostream& operator<<(std::ostream& out, const Integer &a) 03372 { 03373 // Get relevant conversion specifications from ostream. 03374 long f = out.flags() & std::ios::basefield; // Get base digits. 03375 int base, block; 03376 char suffix; 03377 switch(f) 03378 { 03379 case std::ios::oct : 03380 base = 8; 03381 block = 8; 03382 suffix = 'o'; 03383 break; 03384 case std::ios::hex : 03385 base = 16; 03386 block = 4; 03387 suffix = 'h'; 03388 break; 03389 default : 03390 base = 10; 03391 block = 3; 03392 suffix = '.'; 03393 } 03394 03395 Integer temp1=a, temp2; 03396 03397 if (a.IsNegative()) 03398 { 03399 out << '-'; 03400 temp1.Negate(); 03401 } 03402 03403 if (!a) 03404 out << '0'; 03405 03406 static const char upper[]="0123456789ABCDEF"; 03407 static const char lower[]="0123456789abcdef"; 03408 03409 const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower; 03410 unsigned i=0; 03411 SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1); 03412 03413 while (!!temp1) 03414 { 03415 word digit; 03416 Integer::Divide(digit, temp2, temp1, base); 03417 s[i++]=vec[digit]; 03418 temp1.swap(temp2); 03419 } 03420 03421 while (i--) 03422 { 03423 out << s[i]; 03424 // if (i && !(i%block)) 03425 // out << ","; 03426 } 03427 return out << suffix; 03428 } 03429 03430 Integer& Integer::operator++() 03431 { 03432 if (NotNegative()) 03433 { 03434 if (Increment(reg, reg.size())) 03435 { 03436 reg.CleanGrow(2*reg.size()); 03437 reg[reg.size()/2]=1; 03438 } 03439 } 03440 else 03441 { 03442 word borrow = Decrement(reg, reg.size()); 03443 assert(!borrow); 03444 if (WordCount()==0) 03445 *this = Zero(); 03446 } 03447 return *this; 03448 } 03449 03450 Integer& Integer::operator--() 03451 { 03452 if (IsNegative()) 03453 { 03454 if (Increment(reg, reg.size())) 03455 { 03456 reg.CleanGrow(2*reg.size()); 03457 reg[reg.size()/2]=1; 03458 } 03459 } 03460 else 03461 { 03462 if (Decrement(reg, reg.size())) 03463 *this = -One(); 03464 } 03465 return *this; 03466 } 03467 03468 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b) 03469 { 03470 int carry; 03471 if (a.reg.size() == b.reg.size()) 03472 carry = Add(sum.reg, a.reg, b.reg, a.reg.size()); 03473 else if (a.reg.size() > b.reg.size()) 03474 { 03475 carry = Add(sum.reg, a.reg, b.reg, b.reg.size()); 03476 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size()); 03477 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry); 03478 } 03479 else 03480 { 03481 carry = Add(sum.reg, a.reg, b.reg, a.reg.size()); 03482 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size()); 03483 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry); 03484 } 03485 03486 if (carry) 03487 { 03488 sum.reg.CleanGrow(2*sum.reg.size()); 03489 sum.reg[sum.reg.size()/2] = 1; 03490 } 03491 sum.sign = Integer::POSITIVE; 03492 } 03493 03494 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b) 03495 { 03496 unsigned aSize = a.WordCount(); 03497 aSize += aSize%2; 03498 unsigned bSize = b.WordCount(); 03499 bSize += bSize%2; 03500 03501 if (aSize == bSize) 03502 { 03503 if (Compare(a.reg, b.reg, aSize) >= 0) 03504 { 03505 Subtract(diff.reg, a.reg, b.reg, aSize); 03506 diff.sign = Integer::POSITIVE; 03507 } 03508 else 03509 { 03510 Subtract(diff.reg, b.reg, a.reg, aSize); 03511 diff.sign = Integer::NEGATIVE; 03512 } 03513 } 03514 else if (aSize > bSize) 03515 { 03516 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize); 03517 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize); 03518 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow); 03519 assert(!borrow); 03520 diff.sign = Integer::POSITIVE; 03521 } 03522 else 03523 { 03524 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize); 03525 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize); 03526 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow); 03527 assert(!borrow); 03528 diff.sign = Integer::NEGATIVE; 03529 } 03530 } 03531 03532 // MSVC .NET 2003 workaround 03533 template <class T> inline const T& STDMAX2(const T& a, const T& b) 03534 { 03535 return a < b ? b : a; 03536 } 03537 03538 Integer Integer::Plus(const Integer& b) const 03539 { 03540 Integer sum((word)0, STDMAX2(reg.size(), b.reg.size())); 03541 if (NotNegative()) 03542 { 03543 if (b.NotNegative()) 03544 PositiveAdd(sum, *this, b); 03545 else 03546 PositiveSubtract(sum, *this, b); 03547 } 03548 else 03549 { 03550 if (b.NotNegative()) 03551 PositiveSubtract(sum, b, *this); 03552 else 03553 { 03554 PositiveAdd(sum, *this, b); 03555 sum.sign = Integer::NEGATIVE; 03556 } 03557 } 03558 return sum; 03559 } 03560 03561 Integer& Integer::operator+=(const Integer& t) 03562 { 03563 reg.CleanGrow(t.reg.size()); 03564 if (NotNegative()) 03565 { 03566 if (t.NotNegative()) 03567 PositiveAdd(*this, *this, t); 03568 else 03569 PositiveSubtract(*this, *this, t); 03570 } 03571 else 03572 { 03573 if (t.NotNegative()) 03574 PositiveSubtract(*this, t, *this); 03575 else 03576 { 03577 PositiveAdd(*this, *this, t); 03578 sign = Integer::NEGATIVE; 03579 } 03580 } 03581 return *this; 03582 } 03583 03584 Integer Integer::Minus(const Integer& b) const 03585 { 03586 Integer diff((word)0, STDMAX2(reg.size(), b.reg.size())); 03587 if (NotNegative()) 03588 { 03589 if (b.NotNegative()) 03590 PositiveSubtract(diff, *this, b); 03591 else 03592 PositiveAdd(diff, *this, b); 03593 } 03594 else 03595 { 03596 if (b.NotNegative()) 03597 { 03598 PositiveAdd(diff, *this, b); 03599 diff.sign = Integer::NEGATIVE; 03600 } 03601 else 03602 PositiveSubtract(diff, b, *this); 03603 } 03604 return diff; 03605 } 03606 03607 Integer& Integer::operator-=(const Integer& t) 03608 { 03609 reg.CleanGrow(t.reg.size()); 03610 if (NotNegative()) 03611 { 03612 if (t.NotNegative()) 03613 PositiveSubtract(*this, *this, t); 03614 else 03615 PositiveAdd(*this, *this, t); 03616 } 03617 else 03618 { 03619 if (t.NotNegative()) 03620 { 03621 PositiveAdd(*this, *this, t); 03622 sign = Integer::NEGATIVE; 03623 } 03624 else 03625 PositiveSubtract(*this, t, *this); 03626 } 03627 return *this; 03628 } 03629 03630 Integer& Integer::operator<<=(size_t n) 03631 { 03632 const size_t wordCount = WordCount(); 03633 const size_t shiftWords = n / WORD_BITS; 03634 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS); 03635 03636 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n))); 03637 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords); 03638 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits); 03639 return *this; 03640 } 03641 03642 Integer& Integer::operator>>=(size_t n) 03643 { 03644 const size_t wordCount = WordCount(); 03645 const size_t shiftWords = n / WORD_BITS; 03646 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS); 03647 03648 ShiftWordsRightByWords(reg, wordCount, shiftWords); 03649 if (wordCount > shiftWords) 03650 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits); 03651 if (IsNegative() && WordCount()==0) // avoid -0 03652 *this = Zero(); 03653 return *this; 03654 } 03655 03656 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b) 03657 { 03658 size_t aSize = RoundupSize(a.WordCount()); 03659 size_t bSize = RoundupSize(b.WordCount()); 03660 03661 product.reg.CleanNew(RoundupSize(aSize+bSize)); 03662 product.sign = Integer::POSITIVE; 03663 03664 IntegerSecBlock workspace(aSize + bSize); 03665 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize); 03666 } 03667 03668 void Multiply(Integer &product, const Integer &a, const Integer &b) 03669 { 03670 PositiveMultiply(product, a, b); 03671 03672 if (a.NotNegative() != b.NotNegative()) 03673 product.Negate(); 03674 } 03675 03676 Integer Integer::Times(const Integer &b) const 03677 { 03678 Integer product; 03679 Multiply(product, *this, b); 03680 return product; 03681 } 03682 03683 /* 03684 void PositiveDivide(Integer &remainder, Integer "ient, 03685 const Integer ÷nd, const Integer &divisor) 03686 { 03687 remainder.reg.CleanNew(divisor.reg.size()); 03688 remainder.sign = Integer::POSITIVE; 03689 quotient.reg.New(0); 03690 quotient.sign = Integer::POSITIVE; 03691 unsigned i=dividend.BitCount(); 03692 while (i--) 03693 { 03694 word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1); 03695 remainder.reg[0] |= dividend[i]; 03696 if (overflow || remainder >= divisor) 03697 { 03698 Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size()); 03699 quotient.SetBit(i); 03700 } 03701 } 03702 } 03703 */ 03704 03705 void PositiveDivide(Integer &remainder, Integer "ient, 03706 const Integer &a, const Integer &b) 03707 { 03708 unsigned aSize = a.WordCount(); 03709 unsigned bSize = b.WordCount(); 03710 03711 if (!bSize) 03712 throw Integer::DivideByZero(); 03713 03714 if (aSize < bSize) 03715 { 03716 remainder = a; 03717 remainder.sign = Integer::POSITIVE; 03718 quotient = Integer::Zero(); 03719 return; 03720 } 03721 03722 aSize += aSize%2; // round up to next even number 03723 bSize += bSize%2; 03724 03725 remainder.reg.CleanNew(RoundupSize(bSize)); 03726 remainder.sign = Integer::POSITIVE; 03727 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2)); 03728 quotient.sign = Integer::POSITIVE; 03729 03730 IntegerSecBlock T(aSize+3*(bSize+2)); 03731 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize); 03732 } 03733 03734 void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor) 03735 { 03736 PositiveDivide(remainder, quotient, dividend, divisor); 03737 03738 if (dividend.IsNegative()) 03739 { 03740 quotient.Negate(); 03741 if (remainder.NotZero()) 03742 { 03743 --quotient; 03744 remainder = divisor.AbsoluteValue() - remainder; 03745 } 03746 } 03747 03748 if (divisor.IsNegative()) 03749 quotient.Negate(); 03750 } 03751 03752 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n) 03753 { 03754 q = a; 03755 q >>= n; 03756 03757 const size_t wordCount = BitsToWords(n); 03758 if (wordCount <= a.WordCount()) 03759 { 03760 r.reg.resize(RoundupSize(wordCount)); 03761 CopyWords(r.reg, a.reg, wordCount); 03762 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount); 03763 if (n % WORD_BITS != 0) 03764 r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS)); 03765 } 03766 else 03767 { 03768 r.reg.resize(RoundupSize(a.WordCount())); 03769 CopyWords(r.reg, a.reg, r.reg.size()); 03770 } 03771 r.sign = POSITIVE; 03772 03773 if (a.IsNegative() && r.NotZero()) 03774 { 03775 --q; 03776 r = Power2(n) - r; 03777 } 03778 } 03779 03780 Integer Integer::DividedBy(const Integer &b) const 03781 { 03782 Integer remainder, quotient; 03783 Integer::Divide(remainder, quotient, *this, b); 03784 return quotient; 03785 } 03786 03787 Integer Integer::Modulo(const Integer &b) const 03788 { 03789 Integer remainder, quotient; 03790 Integer::Divide(remainder, quotient, *this, b); 03791 return remainder; 03792 } 03793 03794 void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor) 03795 { 03796 if (!divisor) 03797 throw Integer::DivideByZero(); 03798 03799 assert(divisor); 03800 03801 if ((divisor & (divisor-1)) == 0) // divisor is a power of 2 03802 { 03803 quotient = dividend >> (BitPrecision(divisor)-1); 03804 remainder = dividend.reg[0] & (divisor-1); 03805 return; 03806 } 03807 03808 unsigned int i = dividend.WordCount(); 03809 quotient.reg.CleanNew(RoundupSize(i)); 03810 remainder = 0; 03811 while (i--) 03812 { 03813 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor; 03814 remainder = DWord(dividend.reg[i], remainder) % divisor; 03815 } 03816 03817 if (dividend.NotNegative()) 03818 quotient.sign = POSITIVE; 03819 else 03820 { 03821 quotient.sign = NEGATIVE; 03822 if (remainder) 03823 { 03824 --quotient; 03825 remainder = divisor - remainder; 03826 } 03827 } 03828 } 03829 03830 Integer Integer::DividedBy(word b) const 03831 { 03832 word remainder; 03833 Integer quotient; 03834 Integer::Divide(remainder, quotient, *this, b); 03835 return quotient; 03836 } 03837 03838 word Integer::Modulo(word divisor) const 03839 { 03840 if (!divisor) 03841 throw Integer::DivideByZero(); 03842 03843 assert(divisor); 03844 03845 word remainder; 03846 03847 if ((divisor & (divisor-1)) == 0) // divisor is a power of 2 03848 remainder = reg[0] & (divisor-1); 03849 else 03850 { 03851 unsigned int i = WordCount(); 03852 03853 if (divisor <= 5) 03854 { 03855 DWord sum(0, 0); 03856 while (i--) 03857 sum += reg[i]; 03858 remainder = sum % divisor; 03859 } 03860 else 03861 { 03862 remainder = 0; 03863 while (i--) 03864 remainder = DWord(reg[i], remainder) % divisor; 03865 } 03866 } 03867 03868 if (IsNegative() && remainder) 03869 remainder = divisor - remainder; 03870 03871 return remainder; 03872 } 03873 03874 void Integer::Negate() 03875 { 03876 if (!!(*this)) // don't flip sign if *this==0 03877 sign = Sign(1-sign); 03878 } 03879 03880 int Integer::PositiveCompare(const Integer& t) const 03881 { 03882 unsigned size = WordCount(), tSize = t.WordCount(); 03883 03884 if (size == tSize) 03885 return CryptoPP::Compare(reg, t.reg, size); 03886 else 03887 return size > tSize ? 1 : -1; 03888 } 03889 03890 int Integer::Compare(const Integer& t) const 03891 { 03892 if (NotNegative()) 03893 { 03894 if (t.NotNegative()) 03895 return PositiveCompare(t); 03896 else 03897 return 1; 03898 } 03899 else 03900 { 03901 if (t.NotNegative()) 03902 return -1; 03903 else 03904 return -PositiveCompare(t); 03905 } 03906 } 03907 03908 Integer Integer::SquareRoot() const 03909 { 03910 if (!IsPositive()) 03911 return Zero(); 03912 03913 // overestimate square root 03914 Integer x, y = Power2((BitCount()+1)/2); 03915 assert(y*y >= *this); 03916 03917 do 03918 { 03919 x = y; 03920 y = (x + *this/x) >> 1; 03921 } while (y<x); 03922 03923 return x; 03924 } 03925 03926 bool Integer::IsSquare() const 03927 { 03928 Integer r = SquareRoot(); 03929 return *this == r.Squared(); 03930 } 03931 03932 bool Integer::IsUnit() const 03933 { 03934 return (WordCount() == 1) && (reg[0] == 1); 03935 } 03936 03937 Integer Integer::MultiplicativeInverse() const 03938 { 03939 return IsUnit() ? *this : Zero(); 03940 } 03941 03942 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m) 03943 { 03944 return x*y%m; 03945 } 03946 03947 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m) 03948 { 03949 ModularArithmetic mr(m); 03950 return mr.Exponentiate(x, e); 03951 } 03952 03953 Integer Integer::Gcd(const Integer &a, const Integer &b) 03954 { 03955 return EuclideanDomainOf<Integer>().Gcd(a, b); 03956 } 03957 03958 Integer Integer::InverseMod(const Integer &m) const 03959 { 03960 assert(m.NotNegative()); 03961 03962 if (IsNegative()) 03963 return Modulo(m).InverseMod(m); 03964 03965 if (m.IsEven()) 03966 { 03967 if (!m || IsEven()) 03968 return Zero(); // no inverse 03969 if (*this == One()) 03970 return One(); 03971 03972 Integer u = m.Modulo(*this).InverseMod(*this); 03973 return !u ? Zero() : (m*(*this-u)+1)/(*this); 03974 } 03975 03976 SecBlock<word> T(m.reg.size() * 4); 03977 Integer r((word)0, m.reg.size()); 03978 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size()); 03979 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size()); 03980 return r; 03981 } 03982 03983 word Integer::InverseMod(word mod) const 03984 { 03985 word g0 = mod, g1 = *this % mod; 03986 word v0 = 0, v1 = 1; 03987 word y; 03988 03989 while (g1) 03990 { 03991 if (g1 == 1) 03992 return v1; 03993 y = g0 / g1; 03994 g0 = g0 % g1; 03995 v0 += y * v1; 03996 03997 if (!g0) 03998 break; 03999 if (g0 == 1) 04000 return mod-v0; 04001 y = g1 / g0; 04002 g1 = g1 % g0; 04003 v1 += y * v0; 04004 } 04005 return 0; 04006 } 04007 04008 // ******************************************************** 04009 04010 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt) 04011 { 04012 BERSequenceDecoder seq(bt); 04013 OID oid(seq); 04014 if (oid != ASN1::prime_field()) 04015 BERDecodeError(); 04016 m_modulus.BERDecode(seq); 04017 seq.MessageEnd(); 04018 m_result.reg.resize(m_modulus.reg.size()); 04019 } 04020 04021 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const 04022 { 04023 DERSequenceEncoder seq(bt); 04024 ASN1::prime_field().DEREncode(seq); 04025 m_modulus.DEREncode(seq); 04026 seq.MessageEnd(); 04027 } 04028 04029 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const 04030 { 04031 a.DEREncodeAsOctetString(out, MaxElementByteLength()); 04032 } 04033 04034 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const 04035 { 04036 a.BERDecodeAsOctetString(in, MaxElementByteLength()); 04037 } 04038 04039 const Integer& ModularArithmetic::Half(const Integer &a) const 04040 { 04041 if (a.reg.size()==m_modulus.reg.size()) 04042 { 04043 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size()); 04044 return m_result; 04045 } 04046 else 04047 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1)); 04048 } 04049 04050 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const 04051 { 04052 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) 04053 { 04054 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size()) 04055 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0) 04056 { 04057 CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size()); 04058 } 04059 return m_result; 04060 } 04061 else 04062 { 04063 m_result1 = a+b; 04064 if (m_result1 >= m_modulus) 04065 m_result1 -= m_modulus; 04066 return m_result1; 04067 } 04068 } 04069 04070 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const 04071 { 04072 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) 04073 { 04074 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size()) 04075 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0) 04076 { 04077 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size()); 04078 } 04079 } 04080 else 04081 { 04082 a+=b; 04083 if (a>=m_modulus) 04084 a-=m_modulus; 04085 } 04086 04087 return a; 04088 } 04089 04090 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const 04091 { 04092 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) 04093 { 04094 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size())) 04095 CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size()); 04096 return m_result; 04097 } 04098 else 04099 { 04100 m_result1 = a-b; 04101 if (m_result1.IsNegative()) 04102 m_result1 += m_modulus; 04103 return m_result1; 04104 } 04105 } 04106 04107 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const 04108 { 04109 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) 04110 { 04111 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size())) 04112 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size()); 04113 } 04114 else 04115 { 04116 a-=b; 04117 if (a.IsNegative()) 04118 a+=m_modulus; 04119 } 04120 04121 return a; 04122 } 04123 04124 const Integer& ModularArithmetic::Inverse(const Integer &a) const 04125 { 04126 if (!a) 04127 return a; 04128 04129 CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size()); 04130 if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size())) 04131 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size()); 04132 04133 return m_result; 04134 } 04135 04136 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const 04137 { 04138 if (m_modulus.IsOdd()) 04139 { 04140 MontgomeryRepresentation dr(m_modulus); 04141 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2)); 04142 } 04143 else 04144 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2); 04145 } 04146 04147 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const 04148 { 04149 if (m_modulus.IsOdd()) 04150 { 04151 MontgomeryRepresentation dr(m_modulus); 04152 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount); 04153 for (unsigned int i=0; i<exponentsCount; i++) 04154 results[i] = dr.ConvertOut(results[i]); 04155 } 04156 else 04157 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount); 04158 } 04159 04160 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m) // modulus must be odd 04161 : ModularArithmetic(m), 04162 m_u((word)0, m_modulus.reg.size()), 04163 m_workspace(5*m_modulus.reg.size()) 04164 { 04165 if (!m_modulus.IsOdd()) 04166 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus"); 04167 04168 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size()); 04169 } 04170 04171 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const 04172 { 04173 word *const T = m_workspace.begin(); 04174 word *const R = m_result.reg.begin(); 04175 const size_t N = m_modulus.reg.size(); 04176 assert(a.reg.size()<=N && b.reg.size()<=N); 04177 04178 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size()); 04179 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size()); 04180 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); 04181 return m_result; 04182 } 04183 04184 const Integer& MontgomeryRepresentation::Square(const Integer &a) const 04185 { 04186 word *const T = m_workspace.begin(); 04187 word *const R = m_result.reg.begin(); 04188 const size_t N = m_modulus.reg.size(); 04189 assert(a.reg.size()<=N); 04190 04191 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size()); 04192 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size()); 04193 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); 04194 return m_result; 04195 } 04196 04197 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const 04198 { 04199 word *const T = m_workspace.begin(); 04200 word *const R = m_result.reg.begin(); 04201 const size_t N = m_modulus.reg.size(); 04202 assert(a.reg.size()<=N); 04203 04204 CopyWords(T, a.reg, a.reg.size()); 04205 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size()); 04206 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); 04207 return m_result; 04208 } 04209 04210 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const 04211 { 04212 // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus; 04213 word *const T = m_workspace.begin(); 04214 word *const R = m_result.reg.begin(); 04215 const size_t N = m_modulus.reg.size(); 04216 assert(a.reg.size()<=N); 04217 04218 CopyWords(T, a.reg, a.reg.size()); 04219 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size()); 04220 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); 04221 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N); 04222 04223 // cout << "k=" << k << " N*32=" << 32*N << endl; 04224 04225 if (k>N*WORD_BITS) 04226 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N); 04227 else 04228 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N); 04229 04230 return m_result; 04231 } 04232 04233 NAMESPACE_END 04234 04235 #endif