19 #if (CRYPTOPP_SSSE3_AVAILABLE)
21 # include <pmmintrin.h>
22 # include <tmmintrin.h>
25 #if (CRYPTOPP_SSE41_AVAILABLE)
26 # include <smmintrin.h>
30 # include <ammintrin.h>
33 #if defined(__AVX512F__)
34 # define CRYPTOPP_AVX512_ROTATE 1
35 # include <immintrin.h>
39 #if (CRYPTOPP_ARM_NEON_AVAILABLE)
42 # include <arm_neon.h>
46 #if (CRYPTOPP_ARM_ACLE_AVAILABLE)
48 # include <arm_acle.h>
51 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
57 extern const char SPECK64_SIMD_FNAME[] = __FILE__;
59 ANONYMOUS_NAMESPACE_BEGIN
62 using CryptoPP::word32;
63 using CryptoPP::word64;
67 #if (CRYPTOPP_ARM_NEON_AVAILABLE)
70 inline T UnpackHigh32(
const T& a,
const T& b)
72 const uint32x2_t x(vget_high_u32((uint32x4_t)a));
73 const uint32x2_t y(vget_high_u32((uint32x4_t)b));
74 const uint32x2x2_t r = vzip_u32(x, y);
75 return (T)vcombine_u32(r.val[0], r.val[1]);
79 inline T UnpackLow32(
const T& a,
const T& b)
81 const uint32x2_t x(vget_low_u32((uint32x4_t)a));
82 const uint32x2_t y(vget_low_u32((uint32x4_t)b));
83 const uint32x2x2_t r = vzip_u32(x, y);
84 return (T)vcombine_u32(r.val[0], r.val[1]);
87 template <
unsigned int R>
88 inline uint32x4_t RotateLeft32(
const uint32x4_t& val)
90 const uint32x4_t a(vshlq_n_u32(val, R));
91 const uint32x4_t b(vshrq_n_u32(val, 32 - R));
92 return vorrq_u32(a, b);
95 template <
unsigned int R>
96 inline uint32x4_t RotateRight32(
const uint32x4_t& val)
98 const uint32x4_t a(vshlq_n_u32(val, 32 - R));
99 const uint32x4_t b(vshrq_n_u32(val, R));
100 return vorrq_u32(a, b);
103 #if defined(__aarch32__) || defined(__aarch64__)
106 inline uint32x4_t RotateLeft32<8>(
const uint32x4_t& val)
108 const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
109 const uint8x16_t mask = vld1q_u8(maskb);
111 return vreinterpretq_u32_u8(
112 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
117 inline uint32x4_t RotateRight32<8>(
const uint32x4_t& val)
119 const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,15,12 };
120 const uint8x16_t mask = vld1q_u8(maskb);
122 return vreinterpretq_u32_u8(
123 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
125 #endif // Aarch32 or Aarch64
127 inline void SPECK64_Enc_Block(uint32x4_t &block0, uint32x4_t &block1,
128 const word32 *subkeys,
unsigned int rounds)
131 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
132 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
134 for (
int i=0; i < static_cast<int>(rounds); ++i)
136 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
138 x1 = RotateRight32<8>(x1);
139 x1 = vaddq_u32(x1, y1);
140 x1 = veorq_u32(x1, rk);
141 y1 = RotateLeft32<3>(y1);
142 y1 = veorq_u32(y1, x1);
146 block0 = UnpackLow32(y1, x1);
147 block1 = UnpackHigh32(y1, x1);
150 inline void SPECK64_Dec_Block(uint32x4_t &block0, uint32x4_t &block1,
151 const word32 *subkeys,
unsigned int rounds)
154 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
155 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
157 for (
int i =
static_cast<int>(rounds-1); i >= 0; --i)
159 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
161 y1 = veorq_u32(y1, x1);
162 y1 = RotateRight32<3>(y1);
163 x1 = veorq_u32(x1, rk);
164 x1 = vsubq_u32(x1, y1);
165 x1 = RotateLeft32<8>(x1);
169 block0 = UnpackLow32(y1, x1);
170 block1 = UnpackHigh32(y1, x1);
173 inline void SPECK64_Enc_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
174 uint32x4_t &block2, uint32x4_t &block3, uint32x4_t &block4, uint32x4_t &block5,
175 const word32 *subkeys,
unsigned int rounds)
178 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
179 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
180 uint32x4_t x2 = vuzpq_u32(block2, block3).val[1];
181 uint32x4_t y2 = vuzpq_u32(block2, block3).val[0];
182 uint32x4_t x3 = vuzpq_u32(block4, block5).val[1];
183 uint32x4_t y3 = vuzpq_u32(block4, block5).val[0];
185 for (
int i=0; i < static_cast<int>(rounds); ++i)
187 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
189 x1 = RotateRight32<8>(x1);
190 x2 = RotateRight32<8>(x2);
191 x3 = RotateRight32<8>(x3);
192 x1 = vaddq_u32(x1, y1);
193 x2 = vaddq_u32(x2, y2);
194 x3 = vaddq_u32(x3, y3);
195 x1 = veorq_u32(x1, rk);
196 x2 = veorq_u32(x2, rk);
197 x3 = veorq_u32(x3, rk);
198 y1 = RotateLeft32<3>(y1);
199 y2 = RotateLeft32<3>(y2);
200 y3 = RotateLeft32<3>(y3);
201 y1 = veorq_u32(y1, x1);
202 y2 = veorq_u32(y2, x2);
203 y3 = veorq_u32(y3, x3);
207 block0 = UnpackLow32(y1, x1);
208 block1 = UnpackHigh32(y1, x1);
209 block2 = UnpackLow32(y2, x2);
210 block3 = UnpackHigh32(y2, x2);
211 block4 = UnpackLow32(y3, x3);
212 block5 = UnpackHigh32(y3, x3);
215 inline void SPECK64_Dec_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
216 uint32x4_t &block2, uint32x4_t &block3, uint32x4_t &block4, uint32x4_t &block5,
217 const word32 *subkeys,
unsigned int rounds)
220 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
221 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
222 uint32x4_t x2 = vuzpq_u32(block2, block3).val[1];
223 uint32x4_t y2 = vuzpq_u32(block2, block3).val[0];
224 uint32x4_t x3 = vuzpq_u32(block4, block5).val[1];
225 uint32x4_t y3 = vuzpq_u32(block4, block5).val[0];
227 for (
int i =
static_cast<int>(rounds-1); i >= 0; --i)
229 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
231 y1 = veorq_u32(y1, x1);
232 y2 = veorq_u32(y2, x2);
233 y3 = veorq_u32(y3, x3);
234 y1 = RotateRight32<3>(y1);
235 y2 = RotateRight32<3>(y2);
236 y3 = RotateRight32<3>(y3);
237 x1 = veorq_u32(x1, rk);
238 x2 = veorq_u32(x2, rk);
239 x3 = veorq_u32(x3, rk);
240 x1 = vsubq_u32(x1, y1);
241 x2 = vsubq_u32(x2, y2);
242 x3 = vsubq_u32(x3, y3);
243 x1 = RotateLeft32<8>(x1);
244 x2 = RotateLeft32<8>(x2);
245 x3 = RotateLeft32<8>(x3);
249 block0 = UnpackLow32(y1, x1);
250 block1 = UnpackHigh32(y1, x1);
251 block2 = UnpackLow32(y2, x2);
252 block3 = UnpackHigh32(y2, x2);
253 block4 = UnpackLow32(y3, x3);
254 block5 = UnpackHigh32(y3, x3);
257 #endif // CRYPTOPP_ARM_NEON_AVAILABLE
261 #if defined(CRYPTOPP_SSE41_AVAILABLE)
263 template <
unsigned int R>
264 inline __m128i RotateLeft32(
const __m128i& val)
267 return _mm_roti_epi32(val, R);
270 _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
274 template <
unsigned int R>
275 inline __m128i RotateRight32(
const __m128i& val)
278 return _mm_roti_epi32(val, 32-R);
281 _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
287 __m128i RotateLeft32<8>(
const __m128i& val)
290 return _mm_roti_epi32(val, 8);
292 const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
293 return _mm_shuffle_epi8(val, mask);
299 __m128i RotateRight32<8>(
const __m128i& val)
302 return _mm_roti_epi32(val, 32-8);
304 const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
305 return _mm_shuffle_epi8(val, mask);
309 inline void SPECK64_Enc_Block(__m128i &block0, __m128i &block1,
310 const word32 *subkeys,
unsigned int rounds)
313 const __m128 t0 = _mm_castsi128_ps(block0);
314 const __m128 t1 = _mm_castsi128_ps(block1);
315 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
316 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
318 for (
int i=0; i < static_cast<int>(rounds); ++i)
320 const __m128i rk = _mm_set1_epi32(subkeys[i]);
322 x1 = RotateRight32<8>(x1);
323 x1 = _mm_add_epi32(x1, y1);
324 x1 = _mm_xor_si128(x1, rk);
325 y1 = RotateLeft32<3>(y1);
326 y1 = _mm_xor_si128(y1, x1);
331 block0 = _mm_unpacklo_epi32(y1, x1);
332 block1 = _mm_unpackhi_epi32(y1, x1);
335 inline void SPECK64_Dec_Block(__m128i &block0, __m128i &block1,
336 const word32 *subkeys,
unsigned int rounds)
339 const __m128 t0 = _mm_castsi128_ps(block0);
340 const __m128 t1 = _mm_castsi128_ps(block1);
341 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
342 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
344 for (
int i =
static_cast<int>(rounds-1); i >= 0; --i)
346 const __m128i rk = _mm_set1_epi32(subkeys[i]);
348 y1 = _mm_xor_si128(y1, x1);
349 y1 = RotateRight32<3>(y1);
350 x1 = _mm_xor_si128(x1, rk);
351 x1 = _mm_sub_epi32(x1, y1);
352 x1 = RotateLeft32<8>(x1);
357 block0 = _mm_unpacklo_epi32(y1, x1);
358 block1 = _mm_unpackhi_epi32(y1, x1);
361 inline void SPECK64_Enc_6_Blocks(__m128i &block0, __m128i &block1,
362 __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
363 const word32 *subkeys,
unsigned int rounds)
366 const __m128 t0 = _mm_castsi128_ps(block0);
367 const __m128 t1 = _mm_castsi128_ps(block1);
368 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
369 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
371 const __m128 t2 = _mm_castsi128_ps(block2);
372 const __m128 t3 = _mm_castsi128_ps(block3);
373 __m128i x2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(3,1,3,1)));
374 __m128i y2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(2,0,2,0)));
376 const __m128 t4 = _mm_castsi128_ps(block4);
377 const __m128 t5 = _mm_castsi128_ps(block5);
378 __m128i x3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(3,1,3,1)));
379 __m128i y3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(2,0,2,0)));
381 for (
int i=0; i < static_cast<int>(rounds); ++i)
383 const __m128i rk = _mm_set1_epi32(subkeys[i]);
385 x1 = RotateRight32<8>(x1);
386 x2 = RotateRight32<8>(x2);
387 x3 = RotateRight32<8>(x3);
388 x1 = _mm_add_epi32(x1, y1);
389 x2 = _mm_add_epi32(x2, y2);
390 x3 = _mm_add_epi32(x3, y3);
391 x1 = _mm_xor_si128(x1, rk);
392 x2 = _mm_xor_si128(x2, rk);
393 x3 = _mm_xor_si128(x3, rk);
394 y1 = RotateLeft32<3>(y1);
395 y2 = RotateLeft32<3>(y2);
396 y3 = RotateLeft32<3>(y3);
397 y1 = _mm_xor_si128(y1, x1);
398 y2 = _mm_xor_si128(y2, x2);
399 y3 = _mm_xor_si128(y3, x3);
404 block0 = _mm_unpacklo_epi32(y1, x1);
405 block1 = _mm_unpackhi_epi32(y1, x1);
406 block2 = _mm_unpacklo_epi32(y2, x2);
407 block3 = _mm_unpackhi_epi32(y2, x2);
408 block4 = _mm_unpacklo_epi32(y3, x3);
409 block5 = _mm_unpackhi_epi32(y3, x3);
412 inline void SPECK64_Dec_6_Blocks(__m128i &block0, __m128i &block1,
413 __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
414 const word32 *subkeys,
unsigned int rounds)
417 const __m128 t0 = _mm_castsi128_ps(block0);
418 const __m128 t1 = _mm_castsi128_ps(block1);
419 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
420 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
422 const __m128 t2 = _mm_castsi128_ps(block2);
423 const __m128 t3 = _mm_castsi128_ps(block3);
424 __m128i x2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(3,1,3,1)));
425 __m128i y2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(2,0,2,0)));
427 const __m128 t4 = _mm_castsi128_ps(block4);
428 const __m128 t5 = _mm_castsi128_ps(block5);
429 __m128i x3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(3,1,3,1)));
430 __m128i y3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(2,0,2,0)));
432 for (
int i =
static_cast<int>(rounds-1); i >= 0; --i)
434 const __m128i rk = _mm_set1_epi32(subkeys[i]);
436 y1 = _mm_xor_si128(y1, x1);
437 y2 = _mm_xor_si128(y2, x2);
438 y3 = _mm_xor_si128(y3, x3);
439 y1 = RotateRight32<3>(y1);
440 y2 = RotateRight32<3>(y2);
441 y3 = RotateRight32<3>(y3);
442 x1 = _mm_xor_si128(x1, rk);
443 x2 = _mm_xor_si128(x2, rk);
444 x3 = _mm_xor_si128(x3, rk);
445 x1 = _mm_sub_epi32(x1, y1);
446 x2 = _mm_sub_epi32(x2, y2);
447 x3 = _mm_sub_epi32(x3, y3);
448 x1 = RotateLeft32<8>(x1);
449 x2 = RotateLeft32<8>(x2);
450 x3 = RotateLeft32<8>(x3);
455 block0 = _mm_unpacklo_epi32(y1, x1);
456 block1 = _mm_unpackhi_epi32(y1, x1);
457 block2 = _mm_unpacklo_epi32(y2, x2);
458 block3 = _mm_unpackhi_epi32(y2, x2);
459 block4 = _mm_unpacklo_epi32(y3, x3);
460 block5 = _mm_unpackhi_epi32(y3, x3);
463 #endif // CRYPTOPP_SSE41_AVAILABLE
467 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
478 template<
unsigned int C>
482 return vec_rl(val, m);
486 template<
unsigned int C>
489 const uint32x4_p m = {32-C, 32-C, 32-C, 32-C};
490 return vec_rl(val, m);
494 const word32 *subkeys,
unsigned int rounds)
496 #if (CRYPTOPP_BIG_ENDIAN)
497 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
498 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
500 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
501 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
508 for (
int i=0; i < static_cast<int>(rounds); ++i)
510 #if CRYPTOPP_POWER8_AVAILABLE
514 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
519 x1 = RotateRight32<8>(x1);
523 y1 = RotateLeft32<3>(y1);
527 #if (CRYPTOPP_BIG_ENDIAN)
528 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
529 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
531 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
532 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
541 const word32 *subkeys,
unsigned int rounds)
543 #if (CRYPTOPP_BIG_ENDIAN)
544 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
545 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
547 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
548 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
555 for (
int i =
static_cast<int>(rounds-1); i >= 0; --i)
557 #if CRYPTOPP_POWER8_AVAILABLE
561 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
567 y1 = RotateRight32<3>(y1);
571 x1 = RotateLeft32<8>(x1);
574 #if (CRYPTOPP_BIG_ENDIAN)
575 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
576 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
578 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
579 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
589 uint32x4_p &block5,
const word32 *subkeys,
unsigned int rounds)
591 #if (CRYPTOPP_BIG_ENDIAN)
592 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
593 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
595 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
596 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
607 for (
int i=0; i < static_cast<int>(rounds); ++i)
609 #if CRYPTOPP_POWER8_AVAILABLE
613 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
618 x1 = RotateRight32<8>(x1);
619 x2 = RotateRight32<8>(x2);
620 x3 = RotateRight32<8>(x3);
630 y1 = RotateLeft32<3>(y1);
631 y2 = RotateLeft32<3>(y2);
632 y3 = RotateLeft32<3>(y3);
639 #if (CRYPTOPP_BIG_ENDIAN)
640 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
641 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
643 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
644 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
658 uint32x4_p &block5,
const word32 *subkeys,
unsigned int rounds)
660 #if (CRYPTOPP_BIG_ENDIAN)
661 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
662 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
664 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
665 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
676 for (
int i =
static_cast<int>(rounds-1); i >= 0; --i)
678 #if CRYPTOPP_POWER8_AVAILABLE
682 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
691 y1 = RotateRight32<3>(y1);
692 y2 = RotateRight32<3>(y2);
693 y3 = RotateRight32<3>(y3);
703 x1 = RotateLeft32<8>(x1);
704 x2 = RotateLeft32<8>(x2);
705 x3 = RotateLeft32<8>(x3);
708 #if (CRYPTOPP_BIG_ENDIAN)
709 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
710 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
712 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
713 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
725 #endif // CRYPTOPP_ALTIVEC_AVAILABLE
727 ANONYMOUS_NAMESPACE_END
735 #if (CRYPTOPP_ARM_NEON_AVAILABLE)
736 size_t SPECK64_Enc_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
737 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length, word32 flags)
739 return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
740 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
743 size_t SPECK64_Dec_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
744 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length, word32 flags)
746 return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
747 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
753 #if defined(CRYPTOPP_SSE41_AVAILABLE)
754 size_t SPECK64_Enc_AdvancedProcessBlocks_SSE41(
const word32* subKeys,
size_t rounds,
755 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length, word32 flags)
757 return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
758 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
761 size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(
const word32* subKeys,
size_t rounds,
762 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length, word32 flags)
764 return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
765 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
771 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
772 size_t SPECK64_Enc_AdvancedProcessBlocks_ALTIVEC(
const word32* subKeys,
size_t rounds,
773 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length, word32 flags)
775 return AdvancedProcessBlocks64_6x2_ALTIVEC(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
776 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
779 size_t SPECK64_Dec_AdvancedProcessBlocks_ALTIVEC(
const word32* subKeys,
size_t rounds,
780 const byte *inBlocks,
const byte *xorBlocks,
byte *outBlocks,
size_t length, word32 flags)
782 return AdvancedProcessBlocks64_6x2_ALTIVEC(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
783 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);