Crypto++
|
00001 // sosemanuk.cpp - written and placed in the public domain by Wei Dai 00002 00003 // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sosemanuk.cpp" to generate MASM code 00004 00005 #include "pch.h" 00006 00007 #ifndef CRYPTOPP_GENERATE_X64_MASM 00008 00009 #include "sosemanuk.h" 00010 #include "misc.h" 00011 #include "cpu.h" 00012 00013 #include "serpentp.h" 00014 00015 NAMESPACE_BEGIN(CryptoPP) 00016 00017 void SosemanukPolicy::CipherSetKey(const NameValuePairs ¶ms, const byte *userKey, size_t keylen) 00018 { 00019 Serpent_KeySchedule(m_key, 24, userKey, keylen); 00020 } 00021 00022 void SosemanukPolicy::CipherResynchronize(byte *keystreamBuffer, const byte *iv, size_t length) 00023 { 00024 assert(length==16); 00025 00026 word32 a, b, c, d, e; 00027 00028 typedef BlockGetAndPut<word32, LittleEndian> Block; 00029 Block::Get(iv)(a)(b)(c)(d); 00030 00031 const word32 *k = m_key; 00032 unsigned int i=1; 00033 00034 do 00035 { 00036 beforeS0(KX); beforeS0(S0); afterS0(LT); 00037 afterS0(KX); afterS0(S1); afterS1(LT); 00038 if (i == 3) // after 18th round 00039 { 00040 m_state[4] = b; 00041 m_state[5] = e; 00042 m_state[10] = c; 00043 m_state[11] = a; 00044 } 00045 afterS1(KX); afterS1(S2); afterS2(LT); 00046 afterS2(KX); afterS2(S3); afterS3(LT); 00047 if (i == 2) // after 12th round 00048 { 00049 m_state[6] = c; 00050 m_state[7] = d; 00051 m_state[8] = b; 00052 m_state[9] = e; 00053 } 00054 afterS3(KX); afterS3(S4); afterS4(LT); 00055 afterS4(KX); afterS4(S5); afterS5(LT); 00056 afterS5(KX); afterS5(S6); afterS6(LT); 00057 afterS6(KX); afterS6(S7); afterS7(LT); 00058 00059 if (i == 3) 00060 break; 00061 00062 ++i; 00063 c = b; 00064 b = e; 00065 e = d; 00066 d = a; 00067 a = e; 00068 k += 32; 00069 } 00070 while (true); 00071 00072 afterS7(KX); 00073 00074 m_state[0] = a; 00075 m_state[1] = b; 00076 m_state[2] = e; 00077 m_state[3] = d; 00078 00079 #define XMUX(c, x, y) (x ^ (y & (0 - (c & 1)))) 00080 m_state[11] += XMUX(m_state[10], m_state[1], m_state[8]); 00081 m_state[10] = rotlFixed(m_state[10] * 0x54655307, 7); 00082 } 00083 00084 extern "C" { 00085 word32 s_sosemanukMulTables[512] = { 00086 #if CRYPTOPP_BOOL_X86 | CRYPTOPP_BOOL_X64 00087 0x00000000, 0xE19FCF12, 0x6B973724, 0x8A08F836, 00088 0xD6876E48, 0x3718A15A, 0xBD10596C, 0x5C8F967E, 00089 0x05A7DC90, 0xE4381382, 0x6E30EBB4, 0x8FAF24A6, 00090 0xD320B2D8, 0x32BF7DCA, 0xB8B785FC, 0x59284AEE, 00091 0x0AE71189, 0xEB78DE9B, 0x617026AD, 0x80EFE9BF, 00092 0xDC607FC1, 0x3DFFB0D3, 0xB7F748E5, 0x566887F7, 00093 0x0F40CD19, 0xEEDF020B, 0x64D7FA3D, 0x8548352F, 00094 0xD9C7A351, 0x38586C43, 0xB2509475, 0x53CF5B67, 00095 0x146722BB, 0xF5F8EDA9, 0x7FF0159F, 0x9E6FDA8D, 00096 0xC2E04CF3, 0x237F83E1, 0xA9777BD7, 0x48E8B4C5, 00097 0x11C0FE2B, 0xF05F3139, 0x7A57C90F, 0x9BC8061D, 00098 0xC7479063, 0x26D85F71, 0xACD0A747, 0x4D4F6855, 00099 0x1E803332, 0xFF1FFC20, 0x75170416, 0x9488CB04, 00100 0xC8075D7A, 0x29989268, 0xA3906A5E, 0x420FA54C, 00101 0x1B27EFA2, 0xFAB820B0, 0x70B0D886, 0x912F1794, 00102 0xCDA081EA, 0x2C3F4EF8, 0xA637B6CE, 0x47A879DC, 00103 0x28CE44DF, 0xC9518BCD, 0x435973FB, 0xA2C6BCE9, 00104 0xFE492A97, 0x1FD6E585, 0x95DE1DB3, 0x7441D2A1, 00105 0x2D69984F, 0xCCF6575D, 0x46FEAF6B, 0xA7616079, 00106 0xFBEEF607, 0x1A713915, 0x9079C123, 0x71E60E31, 00107 0x22295556, 0xC3B69A44, 0x49BE6272, 0xA821AD60, 00108 0xF4AE3B1E, 0x1531F40C, 0x9F390C3A, 0x7EA6C328, 00109 0x278E89C6, 0xC61146D4, 0x4C19BEE2, 0xAD8671F0, 00110 0xF109E78E, 0x1096289C, 0x9A9ED0AA, 0x7B011FB8, 00111 0x3CA96664, 0xDD36A976, 0x573E5140, 0xB6A19E52, 00112 0xEA2E082C, 0x0BB1C73E, 0x81B93F08, 0x6026F01A, 00113 0x390EBAF4, 0xD89175E6, 0x52998DD0, 0xB30642C2, 00114 0xEF89D4BC, 0x0E161BAE, 0x841EE398, 0x65812C8A, 00115 0x364E77ED, 0xD7D1B8FF, 0x5DD940C9, 0xBC468FDB, 00116 0xE0C919A5, 0x0156D6B7, 0x8B5E2E81, 0x6AC1E193, 00117 0x33E9AB7D, 0xD276646F, 0x587E9C59, 0xB9E1534B, 00118 0xE56EC535, 0x04F10A27, 0x8EF9F211, 0x6F663D03, 00119 0x50358817, 0xB1AA4705, 0x3BA2BF33, 0xDA3D7021, 00120 0x86B2E65F, 0x672D294D, 0xED25D17B, 0x0CBA1E69, 00121 0x55925487, 0xB40D9B95, 0x3E0563A3, 0xDF9AACB1, 00122 0x83153ACF, 0x628AF5DD, 0xE8820DEB, 0x091DC2F9, 00123 0x5AD2999E, 0xBB4D568C, 0x3145AEBA, 0xD0DA61A8, 00124 0x8C55F7D6, 0x6DCA38C4, 0xE7C2C0F2, 0x065D0FE0, 00125 0x5F75450E, 0xBEEA8A1C, 0x34E2722A, 0xD57DBD38, 00126 0x89F22B46, 0x686DE454, 0xE2651C62, 0x03FAD370, 00127 0x4452AAAC, 0xA5CD65BE, 0x2FC59D88, 0xCE5A529A, 00128 0x92D5C4E4, 0x734A0BF6, 0xF942F3C0, 0x18DD3CD2, 00129 0x41F5763C, 0xA06AB92E, 0x2A624118, 0xCBFD8E0A, 00130 0x97721874, 0x76EDD766, 0xFCE52F50, 0x1D7AE042, 00131 0x4EB5BB25, 0xAF2A7437, 0x25228C01, 0xC4BD4313, 00132 0x9832D56D, 0x79AD1A7F, 0xF3A5E249, 0x123A2D5B, 00133 0x4B1267B5, 0xAA8DA8A7, 0x20855091, 0xC11A9F83, 00134 0x9D9509FD, 0x7C0AC6EF, 0xF6023ED9, 0x179DF1CB, 00135 0x78FBCCC8, 0x996403DA, 0x136CFBEC, 0xF2F334FE, 00136 0xAE7CA280, 0x4FE36D92, 0xC5EB95A4, 0x24745AB6, 00137 0x7D5C1058, 0x9CC3DF4A, 0x16CB277C, 0xF754E86E, 00138 0xABDB7E10, 0x4A44B102, 0xC04C4934, 0x21D38626, 00139 0x721CDD41, 0x93831253, 0x198BEA65, 0xF8142577, 00140 0xA49BB309, 0x45047C1B, 0xCF0C842D, 0x2E934B3F, 00141 0x77BB01D1, 0x9624CEC3, 0x1C2C36F5, 0xFDB3F9E7, 00142 0xA13C6F99, 0x40A3A08B, 0xCAAB58BD, 0x2B3497AF, 00143 0x6C9CEE73, 0x8D032161, 0x070BD957, 0xE6941645, 00144 0xBA1B803B, 0x5B844F29, 0xD18CB71F, 0x3013780D, 00145 0x693B32E3, 0x88A4FDF1, 0x02AC05C7, 0xE333CAD5, 00146 0xBFBC5CAB, 0x5E2393B9, 0xD42B6B8F, 0x35B4A49D, 00147 0x667BFFFA, 0x87E430E8, 0x0DECC8DE, 0xEC7307CC, 00148 0xB0FC91B2, 0x51635EA0, 0xDB6BA696, 0x3AF46984, 00149 0x63DC236A, 0x8243EC78, 0x084B144E, 0xE9D4DB5C, 00150 0xB55B4D22, 0x54C48230, 0xDECC7A06, 0x3F53B514, 00151 #else 00152 0x00000000, 0xE19FCF13, 0x6B973726, 0x8A08F835, 00153 0xD6876E4C, 0x3718A15F, 0xBD10596A, 0x5C8F9679, 00154 0x05A7DC98, 0xE438138B, 0x6E30EBBE, 0x8FAF24AD, 00155 0xD320B2D4, 0x32BF7DC7, 0xB8B785F2, 0x59284AE1, 00156 0x0AE71199, 0xEB78DE8A, 0x617026BF, 0x80EFE9AC, 00157 0xDC607FD5, 0x3DFFB0C6, 0xB7F748F3, 0x566887E0, 00158 0x0F40CD01, 0xEEDF0212, 0x64D7FA27, 0x85483534, 00159 0xD9C7A34D, 0x38586C5E, 0xB250946B, 0x53CF5B78, 00160 0x1467229B, 0xF5F8ED88, 0x7FF015BD, 0x9E6FDAAE, 00161 0xC2E04CD7, 0x237F83C4, 0xA9777BF1, 0x48E8B4E2, 00162 0x11C0FE03, 0xF05F3110, 0x7A57C925, 0x9BC80636, 00163 0xC747904F, 0x26D85F5C, 0xACD0A769, 0x4D4F687A, 00164 0x1E803302, 0xFF1FFC11, 0x75170424, 0x9488CB37, 00165 0xC8075D4E, 0x2998925D, 0xA3906A68, 0x420FA57B, 00166 0x1B27EF9A, 0xFAB82089, 0x70B0D8BC, 0x912F17AF, 00167 0xCDA081D6, 0x2C3F4EC5, 0xA637B6F0, 0x47A879E3, 00168 0x28CE449F, 0xC9518B8C, 0x435973B9, 0xA2C6BCAA, 00169 0xFE492AD3, 0x1FD6E5C0, 0x95DE1DF5, 0x7441D2E6, 00170 0x2D699807, 0xCCF65714, 0x46FEAF21, 0xA7616032, 00171 0xFBEEF64B, 0x1A713958, 0x9079C16D, 0x71E60E7E, 00172 0x22295506, 0xC3B69A15, 0x49BE6220, 0xA821AD33, 00173 0xF4AE3B4A, 0x1531F459, 0x9F390C6C, 0x7EA6C37F, 00174 0x278E899E, 0xC611468D, 0x4C19BEB8, 0xAD8671AB, 00175 0xF109E7D2, 0x109628C1, 0x9A9ED0F4, 0x7B011FE7, 00176 0x3CA96604, 0xDD36A917, 0x573E5122, 0xB6A19E31, 00177 0xEA2E0848, 0x0BB1C75B, 0x81B93F6E, 0x6026F07D, 00178 0x390EBA9C, 0xD891758F, 0x52998DBA, 0xB30642A9, 00179 0xEF89D4D0, 0x0E161BC3, 0x841EE3F6, 0x65812CE5, 00180 0x364E779D, 0xD7D1B88E, 0x5DD940BB, 0xBC468FA8, 00181 0xE0C919D1, 0x0156D6C2, 0x8B5E2EF7, 0x6AC1E1E4, 00182 0x33E9AB05, 0xD2766416, 0x587E9C23, 0xB9E15330, 00183 0xE56EC549, 0x04F10A5A, 0x8EF9F26F, 0x6F663D7C, 00184 0x50358897, 0xB1AA4784, 0x3BA2BFB1, 0xDA3D70A2, 00185 0x86B2E6DB, 0x672D29C8, 0xED25D1FD, 0x0CBA1EEE, 00186 0x5592540F, 0xB40D9B1C, 0x3E056329, 0xDF9AAC3A, 00187 0x83153A43, 0x628AF550, 0xE8820D65, 0x091DC276, 00188 0x5AD2990E, 0xBB4D561D, 0x3145AE28, 0xD0DA613B, 00189 0x8C55F742, 0x6DCA3851, 0xE7C2C064, 0x065D0F77, 00190 0x5F754596, 0xBEEA8A85, 0x34E272B0, 0xD57DBDA3, 00191 0x89F22BDA, 0x686DE4C9, 0xE2651CFC, 0x03FAD3EF, 00192 0x4452AA0C, 0xA5CD651F, 0x2FC59D2A, 0xCE5A5239, 00193 0x92D5C440, 0x734A0B53, 0xF942F366, 0x18DD3C75, 00194 0x41F57694, 0xA06AB987, 0x2A6241B2, 0xCBFD8EA1, 00195 0x977218D8, 0x76EDD7CB, 0xFCE52FFE, 0x1D7AE0ED, 00196 0x4EB5BB95, 0xAF2A7486, 0x25228CB3, 0xC4BD43A0, 00197 0x9832D5D9, 0x79AD1ACA, 0xF3A5E2FF, 0x123A2DEC, 00198 0x4B12670D, 0xAA8DA81E, 0x2085502B, 0xC11A9F38, 00199 0x9D950941, 0x7C0AC652, 0xF6023E67, 0x179DF174, 00200 0x78FBCC08, 0x9964031B, 0x136CFB2E, 0xF2F3343D, 00201 0xAE7CA244, 0x4FE36D57, 0xC5EB9562, 0x24745A71, 00202 0x7D5C1090, 0x9CC3DF83, 0x16CB27B6, 0xF754E8A5, 00203 0xABDB7EDC, 0x4A44B1CF, 0xC04C49FA, 0x21D386E9, 00204 0x721CDD91, 0x93831282, 0x198BEAB7, 0xF81425A4, 00205 0xA49BB3DD, 0x45047CCE, 0xCF0C84FB, 0x2E934BE8, 00206 0x77BB0109, 0x9624CE1A, 0x1C2C362F, 0xFDB3F93C, 00207 0xA13C6F45, 0x40A3A056, 0xCAAB5863, 0x2B349770, 00208 0x6C9CEE93, 0x8D032180, 0x070BD9B5, 0xE69416A6, 00209 0xBA1B80DF, 0x5B844FCC, 0xD18CB7F9, 0x301378EA, 00210 0x693B320B, 0x88A4FD18, 0x02AC052D, 0xE333CA3E, 00211 0xBFBC5C47, 0x5E239354, 0xD42B6B61, 0x35B4A472, 00212 0x667BFF0A, 0x87E43019, 0x0DECC82C, 0xEC73073F, 00213 0xB0FC9146, 0x51635E55, 0xDB6BA660, 0x3AF46973, 00214 0x63DC2392, 0x8243EC81, 0x084B14B4, 0xE9D4DBA7, 00215 0xB55B4DDE, 0x54C482CD, 0xDECC7AF8, 0x3F53B5EB, 00216 #endif 00217 0x00000000, 0x180F40CD, 0x301E8033, 0x2811C0FE, 00218 0x603CA966, 0x7833E9AB, 0x50222955, 0x482D6998, 00219 0xC078FBCC, 0xD877BB01, 0xF0667BFF, 0xE8693B32, 00220 0xA04452AA, 0xB84B1267, 0x905AD299, 0x88559254, 00221 0x29F05F31, 0x31FF1FFC, 0x19EEDF02, 0x01E19FCF, 00222 0x49CCF657, 0x51C3B69A, 0x79D27664, 0x61DD36A9, 00223 0xE988A4FD, 0xF187E430, 0xD99624CE, 0xC1996403, 00224 0x89B40D9B, 0x91BB4D56, 0xB9AA8DA8, 0xA1A5CD65, 00225 0x5249BE62, 0x4A46FEAF, 0x62573E51, 0x7A587E9C, 00226 0x32751704, 0x2A7A57C9, 0x026B9737, 0x1A64D7FA, 00227 0x923145AE, 0x8A3E0563, 0xA22FC59D, 0xBA208550, 00228 0xF20DECC8, 0xEA02AC05, 0xC2136CFB, 0xDA1C2C36, 00229 0x7BB9E153, 0x63B6A19E, 0x4BA76160, 0x53A821AD, 00230 0x1B854835, 0x038A08F8, 0x2B9BC806, 0x339488CB, 00231 0xBBC11A9F, 0xA3CE5A52, 0x8BDF9AAC, 0x93D0DA61, 00232 0xDBFDB3F9, 0xC3F2F334, 0xEBE333CA, 0xF3EC7307, 00233 0xA492D5C4, 0xBC9D9509, 0x948C55F7, 0x8C83153A, 00234 0xC4AE7CA2, 0xDCA13C6F, 0xF4B0FC91, 0xECBFBC5C, 00235 0x64EA2E08, 0x7CE56EC5, 0x54F4AE3B, 0x4CFBEEF6, 00236 0x04D6876E, 0x1CD9C7A3, 0x34C8075D, 0x2CC74790, 00237 0x8D628AF5, 0x956DCA38, 0xBD7C0AC6, 0xA5734A0B, 00238 0xED5E2393, 0xF551635E, 0xDD40A3A0, 0xC54FE36D, 00239 0x4D1A7139, 0x551531F4, 0x7D04F10A, 0x650BB1C7, 00240 0x2D26D85F, 0x35299892, 0x1D38586C, 0x053718A1, 00241 0xF6DB6BA6, 0xEED42B6B, 0xC6C5EB95, 0xDECAAB58, 00242 0x96E7C2C0, 0x8EE8820D, 0xA6F942F3, 0xBEF6023E, 00243 0x36A3906A, 0x2EACD0A7, 0x06BD1059, 0x1EB25094, 00244 0x569F390C, 0x4E9079C1, 0x6681B93F, 0x7E8EF9F2, 00245 0xDF2B3497, 0xC724745A, 0xEF35B4A4, 0xF73AF469, 00246 0xBF179DF1, 0xA718DD3C, 0x8F091DC2, 0x97065D0F, 00247 0x1F53CF5B, 0x075C8F96, 0x2F4D4F68, 0x37420FA5, 00248 0x7F6F663D, 0x676026F0, 0x4F71E60E, 0x577EA6C3, 00249 0xE18D0321, 0xF98243EC, 0xD1938312, 0xC99CC3DF, 00250 0x81B1AA47, 0x99BEEA8A, 0xB1AF2A74, 0xA9A06AB9, 00251 0x21F5F8ED, 0x39FAB820, 0x11EB78DE, 0x09E43813, 00252 0x41C9518B, 0x59C61146, 0x71D7D1B8, 0x69D89175, 00253 0xC87D5C10, 0xD0721CDD, 0xF863DC23, 0xE06C9CEE, 00254 0xA841F576, 0xB04EB5BB, 0x985F7545, 0x80503588, 00255 0x0805A7DC, 0x100AE711, 0x381B27EF, 0x20146722, 00256 0x68390EBA, 0x70364E77, 0x58278E89, 0x4028CE44, 00257 0xB3C4BD43, 0xABCBFD8E, 0x83DA3D70, 0x9BD57DBD, 00258 0xD3F81425, 0xCBF754E8, 0xE3E69416, 0xFBE9D4DB, 00259 0x73BC468F, 0x6BB30642, 0x43A2C6BC, 0x5BAD8671, 00260 0x1380EFE9, 0x0B8FAF24, 0x239E6FDA, 0x3B912F17, 00261 0x9A34E272, 0x823BA2BF, 0xAA2A6241, 0xB225228C, 00262 0xFA084B14, 0xE2070BD9, 0xCA16CB27, 0xD2198BEA, 00263 0x5A4C19BE, 0x42435973, 0x6A52998D, 0x725DD940, 00264 0x3A70B0D8, 0x227FF015, 0x0A6E30EB, 0x12617026, 00265 0x451FD6E5, 0x5D109628, 0x750156D6, 0x6D0E161B, 00266 0x25237F83, 0x3D2C3F4E, 0x153DFFB0, 0x0D32BF7D, 00267 0x85672D29, 0x9D686DE4, 0xB579AD1A, 0xAD76EDD7, 00268 0xE55B844F, 0xFD54C482, 0xD545047C, 0xCD4A44B1, 00269 0x6CEF89D4, 0x74E0C919, 0x5CF109E7, 0x44FE492A, 00270 0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C, 00271 0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6, 00272 0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80, 00273 0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879, 00274 0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F, 00275 0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5, 00276 0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3, 00277 0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748, 00278 0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E, 00279 0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84, 00280 0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2 00281 }; 00282 } 00283 00284 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64 00285 unsigned int SosemanukPolicy::GetAlignment() const 00286 { 00287 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 00288 #ifdef __INTEL_COMPILER 00289 if (HasSSE2() && !IsP4()) // Intel compiler produces faster code for this algorithm on the P4 00290 #else 00291 if (HasSSE2()) 00292 #endif 00293 return 16; 00294 else 00295 #endif 00296 return GetAlignmentOf<word32>(); 00297 } 00298 00299 unsigned int SosemanukPolicy::GetOptimalBlockSize() const 00300 { 00301 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 00302 #ifdef __INTEL_COMPILER 00303 if (HasSSE2() && !IsP4()) // Intel compiler produces faster code for this algorithm on the P4 00304 #else 00305 if (HasSSE2()) 00306 #endif 00307 return 4*BYTES_PER_ITERATION; 00308 else 00309 #endif 00310 return BYTES_PER_ITERATION; 00311 } 00312 #endif 00313 00314 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 00315 extern "C" { 00316 void Sosemanuk_OperateKeystream(size_t iterationCount, const byte *input, byte *output, word32 *state); 00317 } 00318 #endif 00319 00320 #pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code 00321 00322 void SosemanukPolicy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount) 00323 { 00324 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM 00325 00326 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 00327 Sosemanuk_OperateKeystream(iterationCount, input, output, m_state.data()); 00328 return; 00329 #endif 00330 00331 #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 00332 #ifdef CRYPTOPP_GENERATE_X64_MASM 00333 ALIGN 8 00334 Sosemanuk_OperateKeystream PROC FRAME 00335 rex_push_reg rsi 00336 push_reg rdi 00337 alloc_stack(80*4*2+12*4+8*WORD_SZ + 2*16+8) 00338 save_xmm128 xmm6, 02f0h 00339 save_xmm128 xmm7, 0300h 00340 .endprolog 00341 mov rdi, r8 00342 mov rax, r9 00343 #else 00344 #ifdef __INTEL_COMPILER 00345 if (HasSSE2() && !IsP4()) // Intel compiler produces faster code for this algorithm on the P4 00346 #else 00347 if (HasSSE2()) 00348 #endif 00349 { 00350 #ifdef __GNUC__ 00351 #if CRYPTOPP_BOOL_X64 00352 FixedSizeAlignedSecBlock<byte, 80*4*2+12*4+8*WORD_SZ> workspace; 00353 #endif 00354 __asm__ __volatile__ 00355 ( 00356 ".intel_syntax noprefix;" 00357 AS_PUSH_IF86( bx) 00358 #else 00359 word32 *state = m_state; 00360 AS2( mov WORD_REG(ax), state) 00361 AS2( mov WORD_REG(di), output) 00362 AS2( mov WORD_REG(dx), input) 00363 AS2( mov WORD_REG(cx), iterationCount) 00364 #endif 00365 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM 00366 00367 #if defined(__GNUC__) && CRYPTOPP_BOOL_X64 00368 #define SSE2_workspace %5 00369 #else 00370 #define SSE2_workspace WORD_REG(sp) 00371 #endif 00372 00373 #define SSE2_output WORD_PTR [SSE2_workspace+1*WORD_SZ] 00374 #define SSE2_input WORD_PTR [SSE2_workspace+2*WORD_SZ] 00375 #define SSE2_wordsLeft WORD_PTR [SSE2_workspace+3*WORD_SZ] 00376 #define SSE2_diEnd WORD_PTR [SSE2_workspace+4*WORD_SZ] 00377 #define SSE2_pMulTables WORD_PTR [SSE2_workspace+5*WORD_SZ] 00378 #define SSE2_state WORD_PTR [SSE2_workspace+6*WORD_SZ] 00379 #define SSE2_wordsLeft2 WORD_PTR [SSE2_workspace+7*WORD_SZ] 00380 #define SSE2_stateCopy SSE2_workspace + 8*WORD_SZ 00381 #define SSE2_uvStart SSE2_stateCopy + 12*4 00382 00383 #if CRYPTOPP_BOOL_X86 00384 AS_PUSH_IF86( bp) 00385 AS2( mov AS_REG_6, esp) 00386 AS2( and esp, -16) 00387 AS2( sub esp, 80*4*2+12*4+8*WORD_SZ) // 80 v's, 80 u's, 12 state, 8 locals 00388 AS2( mov [esp], AS_REG_6) 00389 #endif 00390 AS2( mov SSE2_output, WORD_REG(di)) 00391 AS2( mov SSE2_input, WORD_REG(dx)) 00392 AS2( mov SSE2_state, WORD_REG(ax)) 00393 #ifndef _MSC_VER 00394 AS2( mov SSE2_pMulTables, WORD_REG(si)) 00395 #endif 00396 AS2( lea WORD_REG(cx), [4*WORD_REG(cx)+WORD_REG(cx)]) 00397 AS2( lea WORD_REG(si), [4*WORD_REG(cx)]) 00398 AS2( mov SSE2_wordsLeft, WORD_REG(si)) 00399 AS2( movdqa xmm0, [WORD_REG(ax)+0*16]) // copy state to stack to save a register 00400 AS2( movdqa [SSE2_stateCopy+0*16], xmm0) 00401 AS2( movdqa xmm0, [WORD_REG(ax)+1*16]) 00402 AS2( movdqa [SSE2_stateCopy+1*16], xmm0) 00403 AS2( movq xmm0, QWORD PTR [WORD_REG(ax)+2*16]) 00404 AS2( movq QWORD PTR [SSE2_stateCopy+2*16], xmm0) 00405 AS2( psrlq xmm0, 32) 00406 AS2( movd AS_REG_6d, xmm0) // s(9) 00407 AS2( mov ecx, [WORD_REG(ax)+10*4]) 00408 AS2( mov edx, [WORD_REG(ax)+11*4]) 00409 AS2( pcmpeqb xmm7, xmm7) // all ones 00410 00411 #define s(i) SSE2_stateCopy + ASM_MOD(i,10)*4 00412 #define u(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4 00413 #define v(j) WORD_REG(di) + (ASM_MOD(j,4)*20 + (j/4)) * 4 + 80*4 00414 00415 #define R10 ecx 00416 #define R11 edx 00417 #define R20 edx 00418 #define R21 ecx 00419 // workaround bug in GAS 2.15 00420 #define R20r WORD_REG(dx) 00421 #define R21r WORD_REG(cx) 00422 00423 #define SSE2_STEP(i, j) \ 00424 AS2( mov eax, [s(i+0)])\ 00425 AS2( mov [v(i)], eax)\ 00426 AS2( rol eax, 8)\ 00427 AS2( lea AS_REG_7, [AS_REG_6 + R2##j##r])\ 00428 AS2( xor AS_REG_7d, R1##j)\ 00429 AS2( mov [u(i)], AS_REG_7d)\ 00430 AS2( mov AS_REG_7d, 1)\ 00431 AS2( and AS_REG_7d, R2##j)\ 00432 AS1( neg AS_REG_7d)\ 00433 AS2( and AS_REG_7d, AS_REG_6d)\ 00434 AS2( xor AS_REG_6d, eax)\ 00435 AS2( movzx eax, al)\ 00436 AS2( xor AS_REG_6d, [WORD_REG(si)+WORD_REG(ax)*4])\ 00437 AS2( mov eax, [s(i+3)])\ 00438 AS2( xor AS_REG_7d, [s(i+2)])\ 00439 AS2( add R1##j, AS_REG_7d)\ 00440 AS2( movzx AS_REG_7d, al)\ 00441 AS2( shr eax, 8)\ 00442 AS2( xor AS_REG_6d, [WORD_REG(si)+1024+AS_REG_7*4])\ 00443 AS2( xor AS_REG_6d, eax)\ 00444 AS2( imul R2##j, AS_HEX(54655307))\ 00445 AS2( rol R2##j, 7)\ 00446 AS2( mov [s(i+0)], AS_REG_6d)\ 00447 00448 ASL(2) // outer loop, each iteration of this processes 80 words 00449 AS2( lea WORD_REG(di), [SSE2_uvStart]) // start of v and u 00450 AS2( mov WORD_REG(ax), 80) 00451 AS2( cmp WORD_REG(si), 80) 00452 AS2( cmovg WORD_REG(si), WORD_REG(ax)) 00453 AS2( mov SSE2_wordsLeft2, WORD_REG(si)) 00454 AS2( lea WORD_REG(si), [WORD_REG(di)+WORD_REG(si)]) // use to end first inner loop 00455 AS2( mov SSE2_diEnd, WORD_REG(si)) 00456 #ifdef _MSC_VER 00457 AS2( lea WORD_REG(si), s_sosemanukMulTables) 00458 #else 00459 AS2( mov WORD_REG(si), SSE2_pMulTables) 00460 #endif 00461 00462 ASL(0) // first inner loop, 20 words each, 4 iterations 00463 SSE2_STEP(0, 0) 00464 SSE2_STEP(1, 1) 00465 SSE2_STEP(2, 0) 00466 SSE2_STEP(3, 1) 00467 SSE2_STEP(4, 0) 00468 SSE2_STEP(5, 1) 00469 SSE2_STEP(6, 0) 00470 SSE2_STEP(7, 1) 00471 SSE2_STEP(8, 0) 00472 SSE2_STEP(9, 1) 00473 SSE2_STEP(10, 0) 00474 SSE2_STEP(11, 1) 00475 SSE2_STEP(12, 0) 00476 SSE2_STEP(13, 1) 00477 SSE2_STEP(14, 0) 00478 SSE2_STEP(15, 1) 00479 SSE2_STEP(16, 0) 00480 SSE2_STEP(17, 1) 00481 SSE2_STEP(18, 0) 00482 SSE2_STEP(19, 1) 00483 // loop 00484 AS2( add WORD_REG(di), 5*4) 00485 AS2( cmp WORD_REG(di), SSE2_diEnd) 00486 ASJ( jne, 0, b) 00487 00488 AS2( mov WORD_REG(ax), SSE2_input) 00489 AS2( mov AS_REG_7, SSE2_output) 00490 AS2( lea WORD_REG(di), [SSE2_uvStart]) // start of v and u 00491 AS2( mov WORD_REG(si), SSE2_wordsLeft2) 00492 00493 ASL(1) // second inner loop, 16 words each, 5 iterations 00494 AS2( movdqa xmm0, [WORD_REG(di)+0*20*4]) 00495 AS2( movdqa xmm2, [WORD_REG(di)+2*20*4]) 00496 AS2( movdqa xmm3, [WORD_REG(di)+3*20*4]) 00497 AS2( movdqa xmm1, [WORD_REG(di)+1*20*4]) 00498 // S2 00499 AS2( movdqa xmm4, xmm0) 00500 AS2( pand xmm0, xmm2) 00501 AS2( pxor xmm0, xmm3) 00502 AS2( pxor xmm2, xmm1) 00503 AS2( pxor xmm2, xmm0) 00504 AS2( por xmm3, xmm4) 00505 AS2( pxor xmm3, xmm1) 00506 AS2( pxor xmm4, xmm2) 00507 AS2( movdqa xmm1, xmm3) 00508 AS2( por xmm3, xmm4) 00509 AS2( pxor xmm3, xmm0) 00510 AS2( pand xmm0, xmm1) 00511 AS2( pxor xmm4, xmm0) 00512 AS2( pxor xmm1, xmm3) 00513 AS2( pxor xmm1, xmm4) 00514 AS2( pxor xmm4, xmm7) 00515 // xor with v 00516 AS2( pxor xmm2, [WORD_REG(di)+80*4]) 00517 AS2( pxor xmm3, [WORD_REG(di)+80*5]) 00518 AS2( pxor xmm1, [WORD_REG(di)+80*6]) 00519 AS2( pxor xmm4, [WORD_REG(di)+80*7]) 00520 // exit loop early if less than 16 words left to output 00521 // this is necessary because block size is 20 words, and we output 16 words in each iteration of this loop 00522 AS2( cmp WORD_REG(si), 16) 00523 ASJ( jl, 4, f) 00524 // unpack 00525 AS2( movdqa xmm6, xmm2) 00526 AS2( punpckldq xmm2, xmm3) 00527 AS2( movdqa xmm5, xmm1) 00528 AS2( punpckldq xmm1, xmm4) 00529 AS2( movdqa xmm0, xmm2) 00530 AS2( punpcklqdq xmm2, xmm1) 00531 AS2( punpckhqdq xmm0, xmm1) 00532 AS2( punpckhdq xmm6, xmm3) 00533 AS2( punpckhdq xmm5, xmm4) 00534 AS2( movdqa xmm3, xmm6) 00535 AS2( punpcklqdq xmm6, xmm5) 00536 AS2( punpckhqdq xmm3, xmm5) 00537 // output keystream 00538 AS_XMM_OUTPUT4(SSE2_Sosemanuk_Output, WORD_REG(ax), AS_REG_7, 2,0,6,3, 1, 0,1,2,3, 4) 00539 00540 // loop 00541 AS2( add WORD_REG(di), 4*4) 00542 AS2( sub WORD_REG(si), 16) 00543 ASJ( jnz, 1, b) 00544 00545 // outer loop 00546 AS2( mov WORD_REG(si), SSE2_wordsLeft) 00547 AS2( sub WORD_REG(si), 80) 00548 ASJ( jz, 6, f) 00549 AS2( mov SSE2_wordsLeft, WORD_REG(si)) 00550 AS2( mov SSE2_input, WORD_REG(ax)) 00551 AS2( mov SSE2_output, AS_REG_7) 00552 ASJ( jmp, 2, b) 00553 00554 ASL(4) // final output of less than 16 words 00555 AS2( test WORD_REG(ax), WORD_REG(ax)) 00556 ASJ( jz, 5, f) 00557 AS2( movd xmm0, dword ptr [WORD_REG(ax)+0*4]) 00558 AS2( pxor xmm2, xmm0) 00559 AS2( movd xmm0, dword ptr [WORD_REG(ax)+1*4]) 00560 AS2( pxor xmm3, xmm0) 00561 AS2( movd xmm0, dword ptr [WORD_REG(ax)+2*4]) 00562 AS2( pxor xmm1, xmm0) 00563 AS2( movd xmm0, dword ptr [WORD_REG(ax)+3*4]) 00564 AS2( pxor xmm4, xmm0) 00565 AS2( add WORD_REG(ax), 16) 00566 ASL(5) 00567 AS2( movd dword ptr [AS_REG_7+0*4], xmm2) 00568 AS2( movd dword ptr [AS_REG_7+1*4], xmm3) 00569 AS2( movd dword ptr [AS_REG_7+2*4], xmm1) 00570 AS2( movd dword ptr [AS_REG_7+3*4], xmm4) 00571 AS2( sub WORD_REG(si), 4) 00572 ASJ( jz, 6, f) 00573 AS2( add AS_REG_7, 16) 00574 AS2( psrldq xmm2, 4) 00575 AS2( psrldq xmm3, 4) 00576 AS2( psrldq xmm1, 4) 00577 AS2( psrldq xmm4, 4) 00578 ASJ( jmp, 4, b) 00579 00580 ASL(6) // save state 00581 AS2( mov AS_REG_6, SSE2_state) 00582 AS2( movdqa xmm0, [SSE2_stateCopy+0*16]) 00583 AS2( movdqa [AS_REG_6+0*16], xmm0) 00584 AS2( movdqa xmm0, [SSE2_stateCopy+1*16]) 00585 AS2( movdqa [AS_REG_6+1*16], xmm0) 00586 AS2( movq xmm0, QWORD PTR [SSE2_stateCopy+2*16]) 00587 AS2( movq QWORD PTR [AS_REG_6+2*16], xmm0) 00588 AS2( mov [AS_REG_6+10*4], ecx) 00589 AS2( mov [AS_REG_6+11*4], edx) 00590 00591 AS_POP_IF86( sp) 00592 AS_POP_IF86( bp) 00593 00594 #ifdef __GNUC__ 00595 AS_POP_IF86( bx) 00596 ".att_syntax prefix;" 00597 : 00598 : "a" (m_state.m_ptr), "c" (iterationCount), "S" (s_sosemanukMulTables), "D" (output), "d" (input) 00599 #if CRYPTOPP_BOOL_X64 00600 , "r" (workspace.m_ptr) 00601 : "memory", "cc", "%r9", "%r10", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7" 00602 #else 00603 : "memory", "cc" 00604 #endif 00605 ); 00606 #endif 00607 #ifdef CRYPTOPP_GENERATE_X64_MASM 00608 movdqa xmm6, [rsp + 02f0h] 00609 movdqa xmm7, [rsp + 0300h] 00610 add rsp, 80*4*2+12*4+8*WORD_SZ + 2*16+8 00611 pop rdi 00612 pop rsi 00613 ret 00614 Sosemanuk_OperateKeystream ENDP 00615 #else 00616 } 00617 else 00618 #endif 00619 #endif 00620 #ifndef CRYPTOPP_GENERATE_X64_MASM 00621 { 00622 #if CRYPTOPP_BOOL_X86 | CRYPTOPP_BOOL_X64 00623 #define MUL_A(x) (x = rotlFixed(x, 8), x ^ s_sosemanukMulTables[byte(x)]) 00624 #else 00625 #define MUL_A(x) (((x) << 8) ^ s_sosemanukMulTables[(x) >> 24]) 00626 #endif 00627 00628 #define DIV_A(x) (((x) >> 8) ^ s_sosemanukMulTables[256 + byte(x)]) 00629 00630 #define r1(i) ((i%2) ? reg2 : reg1) 00631 #define r2(i) ((i%2) ? reg1 : reg2) 00632 00633 #define STEP(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, v, u) \ 00634 u = (s##x9 + r2(x0)) ^ r1(x0);\ 00635 v = s##x0;\ 00636 s##x0 = MUL_A(s##x0) ^ DIV_A(s##x3) ^ s##x9;\ 00637 r1(x0) += XMUX(r2(x0), s##x2, s##x9);\ 00638 r2(x0) = rotlFixed(r2(x0) * 0x54655307, 7);\ 00639 00640 #define SOSEMANUK_OUTPUT(x) \ 00641 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, u2 ^ v0);\ 00642 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, u3 ^ v1);\ 00643 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, u1 ^ v2);\ 00644 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, u4 ^ v3); 00645 00646 #define OUTPUT4 \ 00647 S2(0, u0, u1, u2, u3, u4);\ 00648 CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SOSEMANUK_OUTPUT, 4*4); 00649 00650 word32 s0 = m_state[0]; 00651 word32 s1 = m_state[1]; 00652 word32 s2 = m_state[2]; 00653 word32 s3 = m_state[3]; 00654 word32 s4 = m_state[4]; 00655 word32 s5 = m_state[5]; 00656 word32 s6 = m_state[6]; 00657 word32 s7 = m_state[7]; 00658 word32 s8 = m_state[8]; 00659 word32 s9 = m_state[9]; 00660 word32 reg1 = m_state[10]; 00661 word32 reg2 = m_state[11]; 00662 word32 u0, u1, u2, u3, u4, v0, v1, v2, v3; 00663 00664 do 00665 { 00666 STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v0, u0) 00667 STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v1, u1) 00668 STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v2, u2) 00669 STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v3, u3) 00670 OUTPUT4 00671 STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v0, u0) 00672 STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v1, u1) 00673 STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v2, u2) 00674 STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v3, u3) 00675 OUTPUT4 00676 STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v0, u0) 00677 STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v1, u1) 00678 STEP(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, v2, u2) 00679 STEP(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, v3, u3) 00680 OUTPUT4 00681 STEP(2, 3, 4, 5, 6, 7, 8, 9, 0, 1, v0, u0) 00682 STEP(3, 4, 5, 6, 7, 8, 9, 0, 1, 2, v1, u1) 00683 STEP(4, 5, 6, 7, 8, 9, 0, 1, 2, 3, v2, u2) 00684 STEP(5, 6, 7, 8, 9, 0, 1, 2, 3, 4, v3, u3) 00685 OUTPUT4 00686 STEP(6, 7, 8, 9, 0, 1, 2, 3, 4, 5, v0, u0) 00687 STEP(7, 8, 9, 0, 1, 2, 3, 4, 5, 6, v1, u1) 00688 STEP(8, 9, 0, 1, 2, 3, 4, 5, 6, 7, v2, u2) 00689 STEP(9, 0, 1, 2, 3, 4, 5, 6, 7, 8, v3, u3) 00690 OUTPUT4 00691 } 00692 while (--iterationCount); 00693 00694 m_state[0] = s0; 00695 m_state[1] = s1; 00696 m_state[2] = s2; 00697 m_state[3] = s3; 00698 m_state[4] = s4; 00699 m_state[5] = s5; 00700 m_state[6] = s6; 00701 m_state[7] = s7; 00702 m_state[8] = s8; 00703 m_state[9] = s9; 00704 m_state[10] = reg1; 00705 m_state[11] = reg2; 00706 } 00707 } 00708 00709 NAMESPACE_END 00710 00711 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM