8 #ifndef CRYPTOPP_GENERATE_X64_MASM 15 #if CRYPTOPP_MSC_VERSION 16 # pragma warning(disable: 4702 4740) 21 #if defined(CRYPTOPP_DISABLE_SALSA_ASM) 22 # undef CRYPTOPP_X86_ASM_AVAILABLE 23 # undef CRYPTOPP_X32_ASM_AVAILABLE 24 # undef CRYPTOPP_X64_ASM_AVAILABLE 25 # undef CRYPTOPP_SSE2_ASM_AVAILABLE 26 # undef CRYPTOPP_SSSE3_ASM_AVAILABLE 31 #if defined(CRYPTOPP_DEBUG) && !defined(CRYPTOPP_DOXYGEN_PROCESSING) 32 void Salsa20_TestInstantiations()
39 void Salsa20_Policy::CipherSetKey(
const NameValuePairs ¶ms,
const byte *key,
size_t length)
43 if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
44 throw InvalidRounds(Salsa20::StaticAlgorithmName(), m_rounds);
48 get1(m_state[13])(m_state[10])(m_state[7])(m_state[4]);
50 get2(m_state[15])(m_state[12])(m_state[9])(m_state[6]);
53 m_state[0] = 0x61707865;
54 m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e;
55 m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32;
56 m_state[3] = 0x6b206574;
59 void Salsa20_Policy::CipherResynchronize(byte *keystreamBuffer,
const byte *
IV,
size_t length)
61 CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
65 get(m_state[14])(m_state[11]);
66 m_state[8] = m_state[5] = 0;
69 void Salsa20_Policy::SeekToIteration(lword iterationCount)
71 m_state[8] = (word32)iterationCount;
72 m_state[5] = (word32)SafeRightShift<32>(iterationCount);
75 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) 78 #if CRYPTOPP_SSE2_ASM_AVAILABLE 83 return GetAlignmentOf<word32>();
88 #if CRYPTOPP_SSE2_ASM_AVAILABLE 90 return 4*BYTES_PER_ITERATION;
93 return BYTES_PER_ITERATION;
97 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 99 void Salsa20_OperateKeystream(byte *output,
const byte *input,
size_t iterationCount,
int rounds,
void *state);
103 #if CRYPTOPP_MSC_VERSION 104 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code 107 void Salsa20_Policy::OperateKeystream(
KeystreamOperation operation, byte *output,
const byte *input,
size_t iterationCount)
109 #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM 111 #ifdef CRYPTOPP_X64_MASM_AVAILABLE 112 Salsa20_OperateKeystream(output, input, iterationCount, m_rounds, m_state.
data());
116 #if CRYPTOPP_SSE2_ASM_AVAILABLE 117 #ifdef CRYPTOPP_GENERATE_X64_MASM 119 Salsa20_OperateKeystream PROC FRAME
120 mov r10, [rsp + 5*8] ; state
121 alloc_stack(10*16 + 32*16 + 8)
122 save_xmm128 xmm6, 0200h
123 save_xmm128 xmm7, 0210h
124 save_xmm128 xmm8, 0220h
125 save_xmm128 xmm9, 0230h
126 save_xmm128 xmm10, 0240h
127 save_xmm128 xmm11, 0250h
128 save_xmm128 xmm12, 0260h
129 save_xmm128 xmm13, 0270h
130 save_xmm128 xmm14, 0280h
131 save_xmm128 xmm15, 0290h
134 #define REG_output rcx 135 #define REG_input rdx 136 #define REG_iterationCount r8 137 #define REG_state r10 138 #define REG_rounds e9d 139 #define REG_roundsLeft eax 140 #define REG_temp32 r11d 142 #define SSE2_WORKSPACE rsp 146 #if CRYPTOPP_BOOL_X64 147 #define REG_output %1 149 #define REG_iterationCount %2 151 #define REG_rounds %3 152 #define REG_roundsLeft eax 153 #define REG_temp32 edx 155 #define SSE2_WORKSPACE %5 157 CRYPTOPP_ALIGN_DATA(16) byte workspace[16*32];
159 #define REG_output edi 160 #define REG_input eax 161 #define REG_iterationCount ecx 162 #define REG_state esi 163 #define REG_rounds edx 164 #define REG_roundsLeft ebx 165 #define REG_temp32 ebp 167 #define SSE2_WORKSPACE esp + WORD_SZ 176 void *s = m_state.
data();
179 AS2( mov REG_iterationCount, iterationCount)
180 AS2( mov REG_input, input)
181 AS2( mov REG_output, output)
182 AS2( mov REG_state, s)
183 AS2( mov REG_rounds, r)
185 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM 188 AS2( cmp REG_iterationCount, 4)
191 #if CRYPTOPP_BOOL_X86 198 #define SSE2_EXPAND_S(i, j) \ 199 ASS( pshufd xmm4, xmm##i, j, j, j, j) \ 200 AS2( movdqa [SSE2_WORKSPACE + (i*4+j)*16 + 256], xmm4) 202 AS2( movdqa xmm0, [REG_state + 0*16])
203 AS2( movdqa xmm1, [REG_state + 1*16])
204 AS2( movdqa xmm2, [REG_state + 2*16])
205 AS2( movdqa xmm3, [REG_state + 3*16])
221 #define SSE2_EXPAND_S85(i) \ 222 AS2( mov dword ptr [SSE2_WORKSPACE + 8*16 + i*4 + 256], REG_roundsLeft) \ 223 AS2( mov dword ptr [SSE2_WORKSPACE + 5*16 + i*4 + 256], REG_temp32) \ 224 AS2( add REG_roundsLeft, 1) \ 225 AS2( adc REG_temp32, 0) 228 AS2( mov REG_roundsLeft, dword ptr [REG_state + 8*4])
229 AS2( mov REG_temp32, dword ptr [REG_state + 5*4])
234 AS2( mov dword ptr [REG_state + 8*4], REG_roundsLeft)
235 AS2( mov dword ptr [REG_state + 5*4], REG_temp32)
237 #define SSE2_QUARTER_ROUND(a, b, d, i) \ 238 AS2( movdqa xmm4, xmm##d) \ 239 AS2( paddd xmm4, xmm##a) \ 240 AS2( movdqa xmm5, xmm4) \ 241 AS2( pslld xmm4, i) \ 242 AS2( psrld xmm5, 32-i) \ 243 AS2( pxor xmm##b, xmm4) \ 244 AS2( pxor xmm##b, xmm5) 246 #define L01(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) 247 #define L02(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##C, [SSE2_WORKSPACE + a*16 + i*256]) 248 #define L03(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) 249 #define L04(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 250 #define L05(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 7) 251 #define L06(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-7) 252 #define L07(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + b*16 + i*256]) 253 #define L08(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) 254 #define L09(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + b*16], xmm##A) 255 #define L10(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 256 #define L11(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) 257 #define L12(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 258 #define L13(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 9) 259 #define L14(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-9) 260 #define L15(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + c*16 + i*256]) 261 #define L16(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) 262 #define L17(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + c*16], xmm##A) 263 #define L18(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 264 #define L19(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##B) 265 #define L20(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) 266 #define L21(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 13) 267 #define L22(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-13) 268 #define L23(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) 269 #define L24(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) 270 #define L25(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + d*16], xmm##A) 271 #define L26(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##D) 272 #define L27(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) 273 #define L28(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 18) 274 #define L29(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-18) 275 #define L30(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##C) 276 #define L31(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) 277 #define L32(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + a*16], xmm##A) 279 #define SSE2_QUARTER_ROUND_X8(i, a, b, c, d, e, f, g, h) \ 280 L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) \ 281 L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) \ 282 L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) \ 283 L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) \ 284 L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) \ 285 L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) \ 286 L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) \ 287 L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) \ 288 L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) \ 289 L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) \ 290 L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) \ 291 L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) \ 292 L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) \ 293 L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) \ 294 L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) \ 295 L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) \ 296 L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) \ 297 L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) \ 298 L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) \ 299 L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) \ 300 L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) \ 301 L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) \ 302 L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) \ 303 L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) \ 304 L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) \ 305 L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) \ 306 L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) \ 307 L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) \ 308 L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) \ 309 L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) \ 310 L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) \ 311 L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) 313 #define SSE2_QUARTER_ROUND_X16(i, a, b, c, d, e, f, g, h, A, B, C, D, E, F, G, H) \ 314 L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) L01(8,9,10,11, A,B,C,D, i) L01(12,13,14,15, E,F,G,H, i) \ 315 L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) L02(8,9,10,11, A,B,C,D, i) L02(12,13,14,15, E,F,G,H, i) \ 316 L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) L03(8,9,10,11, A,B,C,D, i) L03(12,13,14,15, E,F,G,H, i) \ 317 L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) L04(8,9,10,11, A,B,C,D, i) L04(12,13,14,15, E,F,G,H, i) \ 318 L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) L05(8,9,10,11, A,B,C,D, i) L05(12,13,14,15, E,F,G,H, i) \ 319 L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) L06(8,9,10,11, A,B,C,D, i) L06(12,13,14,15, E,F,G,H, i) \ 320 L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) L07(8,9,10,11, A,B,C,D, i) L07(12,13,14,15, E,F,G,H, i) \ 321 L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) L08(8,9,10,11, A,B,C,D, i) L08(12,13,14,15, E,F,G,H, i) \ 322 L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) L09(8,9,10,11, A,B,C,D, i) L09(12,13,14,15, E,F,G,H, i) \ 323 L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) L10(8,9,10,11, A,B,C,D, i) L10(12,13,14,15, E,F,G,H, i) \ 324 L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) L11(8,9,10,11, A,B,C,D, i) L11(12,13,14,15, E,F,G,H, i) \ 325 L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) L12(8,9,10,11, A,B,C,D, i) L12(12,13,14,15, E,F,G,H, i) \ 326 L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) L13(8,9,10,11, A,B,C,D, i) L13(12,13,14,15, E,F,G,H, i) \ 327 L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) L14(8,9,10,11, A,B,C,D, i) L14(12,13,14,15, E,F,G,H, i) \ 328 L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) L15(8,9,10,11, A,B,C,D, i) L15(12,13,14,15, E,F,G,H, i) \ 329 L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) L16(8,9,10,11, A,B,C,D, i) L16(12,13,14,15, E,F,G,H, i) \ 330 L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) L17(8,9,10,11, A,B,C,D, i) L17(12,13,14,15, E,F,G,H, i) \ 331 L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) L18(8,9,10,11, A,B,C,D, i) L18(12,13,14,15, E,F,G,H, i) \ 332 L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) L19(8,9,10,11, A,B,C,D, i) L19(12,13,14,15, E,F,G,H, i) \ 333 L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) L20(8,9,10,11, A,B,C,D, i) L20(12,13,14,15, E,F,G,H, i) \ 334 L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) L21(8,9,10,11, A,B,C,D, i) L21(12,13,14,15, E,F,G,H, i) \ 335 L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) L22(8,9,10,11, A,B,C,D, i) L22(12,13,14,15, E,F,G,H, i) \ 336 L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) L23(8,9,10,11, A,B,C,D, i) L23(12,13,14,15, E,F,G,H, i) \ 337 L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) L24(8,9,10,11, A,B,C,D, i) L24(12,13,14,15, E,F,G,H, i) \ 338 L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) L25(8,9,10,11, A,B,C,D, i) L25(12,13,14,15, E,F,G,H, i) \ 339 L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) L26(8,9,10,11, A,B,C,D, i) L26(12,13,14,15, E,F,G,H, i) \ 340 L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) L27(8,9,10,11, A,B,C,D, i) L27(12,13,14,15, E,F,G,H, i) \ 341 L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) L28(8,9,10,11, A,B,C,D, i) L28(12,13,14,15, E,F,G,H, i) \ 342 L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) L29(8,9,10,11, A,B,C,D, i) L29(12,13,14,15, E,F,G,H, i) \ 343 L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) L30(8,9,10,11, A,B,C,D, i) L30(12,13,14,15, E,F,G,H, i) \ 344 L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) L31(8,9,10,11, A,B,C,D, i) L31(12,13,14,15, E,F,G,H, i) \ 345 L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) L32(8,9,10,11, A,B,C,D, i) L32(12,13,14,15, E,F,G,H, i) 347 #if CRYPTOPP_BOOL_X64 348 SSE2_QUARTER_ROUND_X16(1, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
350 SSE2_QUARTER_ROUND_X8(1, 2, 6, 10, 14, 3, 7, 11, 15)
351 SSE2_QUARTER_ROUND_X8(1, 0, 4, 8, 12, 1, 5, 9, 13)
353 AS2( mov REG_roundsLeft, REG_rounds)
356 ASL(SSE2_Salsa_Output)
357 AS2( movdqa xmm0, xmm4)
358 AS2( punpckldq xmm4, xmm5)
359 AS2( movdqa xmm1, xmm6)
360 AS2( punpckldq xmm6, xmm7)
361 AS2( movdqa xmm2, xmm4)
362 AS2( punpcklqdq xmm4, xmm6)
363 AS2( punpckhqdq xmm2, xmm6)
364 AS2( punpckhdq xmm0, xmm5)
365 AS2( punpckhdq xmm1, xmm7)
366 AS2( movdqa xmm6, xmm0)
367 AS2( punpcklqdq xmm0, xmm1)
368 AS2( punpckhqdq xmm6, xmm1)
369 AS_XMM_OUTPUT4(SSE2_Salsa_Output_A, REG_input, REG_output, 4, 2, 0, 6, 1, 0, 4, 8, 12, 1)
373 #if CRYPTOPP_BOOL_X64 374 SSE2_QUARTER_ROUND_X16(0, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)
376 SSE2_QUARTER_ROUND_X16(0, 0, 13, 10, 7, 1, 14, 11, 4, 2, 15, 8, 5, 3, 12, 9, 6)
378 SSE2_QUARTER_ROUND_X8(0, 2, 6, 10, 14, 3, 7, 11, 15)
379 SSE2_QUARTER_ROUND_X8(0, 0, 4, 8, 12, 1, 5, 9, 13)
381 SSE2_QUARTER_ROUND_X8(0, 2, 15, 8, 5, 3, 12, 9, 6)
382 SSE2_QUARTER_ROUND_X8(0, 0, 13, 10, 7, 1, 14, 11, 4)
384 AS2( sub REG_roundsLeft, 2)
387 #define SSE2_OUTPUT_4(a, b, c, d) \ 388 AS2( movdqa xmm4, [SSE2_WORKSPACE + a*16 + 256])\ 389 AS2( paddd xmm4, [SSE2_WORKSPACE + a*16])\ 390 AS2( movdqa xmm5, [SSE2_WORKSPACE + b*16 + 256])\ 391 AS2( paddd xmm5, [SSE2_WORKSPACE + b*16])\ 392 AS2( movdqa xmm6, [SSE2_WORKSPACE + c*16 + 256])\ 393 AS2( paddd xmm6, [SSE2_WORKSPACE + c*16])\ 394 AS2( movdqa xmm7, [SSE2_WORKSPACE + d*16 + 256])\ 395 AS2( paddd xmm7, [SSE2_WORKSPACE + d*16])\ 396 ASC( call, SSE2_Salsa_Output) 398 SSE2_OUTPUT_4(0, 13, 10, 7)
399 SSE2_OUTPUT_4(4, 1, 14, 11)
400 SSE2_OUTPUT_4(8, 5, 2, 15)
401 SSE2_OUTPUT_4(12, 9, 6, 3)
402 AS2( test REG_input, REG_input)
404 AS2( add REG_input, 12*16)
406 AS2( add REG_output, 12*16)
407 AS2( sub REG_iterationCount, 4)
408 AS2( cmp REG_iterationCount, 4)
413 AS2( sub REG_iterationCount, 1)
415 AS2( movdqa xmm0, [REG_state + 0*16])
416 AS2( movdqa xmm1, [REG_state + 1*16])
417 AS2( movdqa xmm2, [REG_state + 2*16])
418 AS2( movdqa xmm3, [REG_state + 3*16])
419 AS2( mov REG_roundsLeft, REG_rounds)
422 SSE2_QUARTER_ROUND(0, 1, 3, 7)
423 SSE2_QUARTER_ROUND(1, 2, 0, 9)
424 SSE2_QUARTER_ROUND(2, 3, 1, 13)
425 SSE2_QUARTER_ROUND(3, 0, 2, 18)
426 ASS( pshufd xmm1, xmm1, 2, 1, 0, 3)
427 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2)
428 ASS( pshufd xmm3, xmm3, 0, 3, 2, 1)
429 SSE2_QUARTER_ROUND(0, 3, 1, 7)
430 SSE2_QUARTER_ROUND(3, 2, 0, 9)
431 SSE2_QUARTER_ROUND(2, 1, 3, 13)
432 SSE2_QUARTER_ROUND(1, 0, 2, 18)
433 ASS( pshufd xmm1, xmm1, 0, 3, 2, 1)
434 ASS( pshufd xmm2, xmm2, 1, 0, 3, 2)
435 ASS( pshufd xmm3, xmm3, 2, 1, 0, 3)
436 AS2( sub REG_roundsLeft, 2)
439 AS2( paddd xmm0, [REG_state + 0*16])
440 AS2( paddd xmm1, [REG_state + 1*16])
441 AS2( paddd xmm2, [REG_state + 2*16])
442 AS2( paddd xmm3, [REG_state + 3*16])
444 AS2( add dword ptr [REG_state + 8*4], 1)
445 AS2( adc dword ptr [REG_state + 5*4], 0)
447 AS2( pcmpeqb xmm6, xmm6)
449 ASS( pshufd xmm7, xmm6, 0, 1, 2, 3)
450 AS2( movdqa xmm4, xmm0)
451 AS2( movdqa xmm5, xmm3)
452 AS2( pand xmm0, xmm7)
453 AS2( pand xmm4, xmm6)
454 AS2( pand xmm3, xmm6)
455 AS2( pand xmm5, xmm7)
457 AS2( movdqa xmm5, xmm1)
458 AS2( pand xmm1, xmm7)
459 AS2( pand xmm5, xmm6)
461 AS2( pand xmm6, xmm2)
462 AS2( pand xmm2, xmm7)
466 AS2( movdqa xmm5, xmm4)
467 AS2( movdqa xmm6, xmm0)
468 AS3( shufpd xmm4, xmm1, 2)
469 AS3( shufpd xmm0, xmm2, 2)
470 AS3( shufpd xmm1, xmm5, 2)
471 AS3( shufpd xmm2, xmm6, 2)
474 AS_XMM_OUTPUT4(SSE2_Salsa_Output_B, REG_input, REG_output, 4, 0, 1, 2, 3, 0, 1, 2, 3, 4)
482 #if CRYPTOPP_BOOL_X64 483 :
"+r" (input),
"+r" (output),
"+r" (iterationCount)
484 :
"r" (m_rounds),
"r" (m_state.
begin()),
"r" (workspace)
485 :
"%eax",
"%rdx",
"memory",
"cc",
"%xmm0",
"%xmm1",
"%xmm2",
"%xmm3",
"%xmm4",
"%xmm5",
"%xmm6",
"%xmm7",
"%xmm8",
"%xmm9",
"%xmm10",
"%xmm11",
"%xmm12",
"%xmm13",
"%xmm14",
"%xmm15" 487 :
"+a" (input),
"+D" (output),
"+c" (iterationCount)
488 :
"d" (m_rounds),
"S" (m_state.
begin())
493 #ifdef CRYPTOPP_GENERATE_X64_MASM 494 movdqa xmm6, [rsp + 0200h]
495 movdqa xmm7, [rsp + 0210h]
496 movdqa xmm8, [rsp + 0220h]
497 movdqa xmm9, [rsp + 0230h]
498 movdqa xmm10, [rsp + 0240h]
499 movdqa xmm11, [rsp + 0250h]
500 movdqa xmm12, [rsp + 0260h]
501 movdqa xmm13, [rsp + 0270h]
502 movdqa xmm14, [rsp + 0280h]
503 movdqa xmm15, [rsp + 0290h]
504 add rsp, 10*16 + 32*16 + 8
506 Salsa20_OperateKeystream ENDP
512 #ifndef CRYPTOPP_GENERATE_X64_MASM 514 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
516 while (iterationCount--)
518 x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
519 x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7];
520 x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11];
521 x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15];
523 for (
int i=m_rounds; i>0; i-=2)
525 #define QUARTER_ROUND(a, b, c, d) \ 526 b = b ^ rotlConstant<7>(a + d); \ 527 c = c ^ rotlConstant<9>(b + a); \ 528 d = d ^ rotlConstant<13>(c + b); \ 529 a = a ^ rotlConstant<18>(d + c); 531 QUARTER_ROUND(x0, x4, x8, x12)
532 QUARTER_ROUND(x1, x5, x9, x13)
533 QUARTER_ROUND(x2, x6, x10, x14)
534 QUARTER_ROUND(x3, x7, x11, x15)
536 QUARTER_ROUND(x0, x13, x10, x7)
537 QUARTER_ROUND(x1, x14, x11, x4)
538 QUARTER_ROUND(x2, x15, x8, x5)
539 QUARTER_ROUND(x3, x12, x9, x6)
542 #define SALSA_OUTPUT(x) {\ 543 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\ 544 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x13 + m_state[13]);\ 545 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x10 + m_state[10]);\ 546 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x7 + m_state[7]);\ 547 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\ 548 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x1 + m_state[1]);\ 549 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x14 + m_state[14]);\ 550 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x11 + m_state[11]);\ 551 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\ 552 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x5 + m_state[5]);\ 553 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x2 + m_state[2]);\ 554 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x15 + m_state[15]);\ 555 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\ 556 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x9 + m_state[9]);\ 557 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x6 + m_state[6]);\ 558 CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x3 + m_state[3]);} 560 #ifndef CRYPTOPP_DOXYGEN_PROCESSING 564 if (++m_state[8] == 0)
574 if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20))
575 throw InvalidRounds(XSalsa20::StaticAlgorithmName(), m_rounds);
582 m_state[0] = 0x61707865;
583 m_state[1] = 0x3320646e;
584 m_state[2] = 0x79622d32;
585 m_state[3] = 0x6b206574;
590 CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length);
593 word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
596 get(x14)(x11)(x8)(x5)(m_state[14])(m_state[11]);
598 x13 = m_key[0]; x10 = m_key[1]; x7 = m_key[2]; x4 = m_key[3];
599 x15 = m_key[4]; x12 = m_key[5]; x9 = m_key[6]; x6 = m_key[7];
600 x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
602 for (
int i=m_rounds; i>0; i-=2)
604 QUARTER_ROUND(x0, x4, x8, x12)
605 QUARTER_ROUND(x1, x5, x9, x13)
606 QUARTER_ROUND(x2, x6, x10, x14)
607 QUARTER_ROUND(x3, x7, x11, x15)
609 QUARTER_ROUND(x0, x13, x10, x7)
610 QUARTER_ROUND(x1, x14, x11, x4)
611 QUARTER_ROUND(x2, x15, x8, x5)
612 QUARTER_ROUND(x3, x12, x9, x6)
615 m_state[13] = x0; m_state[10] = x1; m_state[7] = x2; m_state[4] = x3;
616 m_state[15] = x14; m_state[12] = x11; m_state[9] = x8; m_state[6] = x5;
617 m_state[8] = m_state[5] = 0;
622 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM int GetIntValueWithDefault(const char *name, int defaultValue) const
Get a named value with type int, with default.
Standard names for retrieving values by name when working with NameValuePairs.
Utility functions for the Crypto++ library.
Library configuration file.
virtual unsigned int GetOptimalBlockSize() const
Provides number of ideal bytes to process.
unsigned int GetAlignment() const
Provides data alignment requirements.
#define CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(x, y)
Helper macro to implement OperateKeystream.
byte order is little-endian
void CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
Key the cipher.
Exception thrown when an invalid number of rounds is encountered.
A::pointer data()
Provides a pointer to the first element in the memory block.
void CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length)
Resynchronize the cipher.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Functions for CPU features and intrinsics.
Classes for Salsa and Salsa20 stream ciphers.
iterator begin()
Provides an iterator pointing to the first element in the memory block.
const char * IV()
ConstByteArrayParameter, also accepts const byte * for backwards compatibility.
bool HasSSE2()
Determines SSE2 availability.
Access a block of memory.
KeystreamOperation
Keystream operation flags.
Crypto++ library namespace.
SymmetricCipher implementation.
size_type size() const
Provides the count of elements in the SecBlock.
Interface for retrieving values given their names.