63#ifndef INCLUDED_volk_32fc_32f_dot_prod_32fc_a_H
64#define INCLUDED_volk_32fc_32f_dot_prod_32fc_a_H
74 unsigned int num_points)
78 float *realpt = &res[0], *imagpt = &res[1];
79 const float* aPtr = (
float*)input;
80 const float* bPtr = taps;
81 unsigned int number = 0;
86 for (number = 0; number < num_points; number++) {
87 *realpt += ((*aPtr++) * (*bPtr));
88 *imagpt += ((*aPtr++) * (*bPtr++));
96#if LV_HAVE_AVX2 && LV_HAVE_FMA
100static inline void volk_32fc_32f_dot_prod_32fc_a_avx2_fma(
lv_32fc_t* result,
103 unsigned int num_points)
106 unsigned int number = 0;
107 const unsigned int sixteenthPoints = num_points / 16;
110 float *realpt = &res[0], *imagpt = &res[1];
111 const float* aPtr = (
float*)input;
112 const float* bPtr = taps;
114 __m256 a0Val, a1Val, a2Val, a3Val;
115 __m256 b0Val, b1Val, b2Val, b3Val;
116 __m256 x0Val, x1Val, x0loVal, x0hiVal, x1loVal, x1hiVal;
118 __m256 dotProdVal0 = _mm256_setzero_ps();
119 __m256 dotProdVal1 = _mm256_setzero_ps();
120 __m256 dotProdVal2 = _mm256_setzero_ps();
121 __m256 dotProdVal3 = _mm256_setzero_ps();
123 for (; number < sixteenthPoints; number++) {
125 a0Val = _mm256_load_ps(aPtr);
126 a1Val = _mm256_load_ps(aPtr + 8);
127 a2Val = _mm256_load_ps(aPtr + 16);
128 a3Val = _mm256_load_ps(aPtr + 24);
130 x0Val = _mm256_load_ps(bPtr);
131 x1Val = _mm256_load_ps(bPtr + 8);
132 x0loVal = _mm256_unpacklo_ps(x0Val, x0Val);
133 x0hiVal = _mm256_unpackhi_ps(x0Val, x0Val);
134 x1loVal = _mm256_unpacklo_ps(x1Val, x1Val);
135 x1hiVal = _mm256_unpackhi_ps(x1Val, x1Val);
138 b0Val = _mm256_permute2f128_ps(x0loVal, x0hiVal, 0x20);
139 b1Val = _mm256_permute2f128_ps(x0loVal, x0hiVal, 0x31);
140 b2Val = _mm256_permute2f128_ps(x1loVal, x1hiVal, 0x20);
141 b3Val = _mm256_permute2f128_ps(x1loVal, x1hiVal, 0x31);
143 dotProdVal0 = _mm256_fmadd_ps(a0Val, b0Val, dotProdVal0);
144 dotProdVal1 = _mm256_fmadd_ps(a1Val, b1Val, dotProdVal1);
145 dotProdVal2 = _mm256_fmadd_ps(a2Val, b2Val, dotProdVal2);
146 dotProdVal3 = _mm256_fmadd_ps(a3Val, b3Val, dotProdVal3);
152 dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal1);
153 dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal2);
154 dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal3);
158 _mm256_store_ps(dotProductVector,
161 *realpt = dotProductVector[0];
162 *imagpt = dotProductVector[1];
163 *realpt += dotProductVector[2];
164 *imagpt += dotProductVector[3];
165 *realpt += dotProductVector[4];
166 *imagpt += dotProductVector[5];
167 *realpt += dotProductVector[6];
168 *imagpt += dotProductVector[7];
170 number = sixteenthPoints * 16;
171 for (; number < num_points; number++) {
172 *realpt += ((*aPtr++) * (*bPtr));
173 *imagpt += ((*aPtr++) * (*bPtr++));
183#include <immintrin.h>
188 unsigned int num_points)
191 unsigned int number = 0;
192 const unsigned int sixteenthPoints = num_points / 16;
195 float *realpt = &res[0], *imagpt = &res[1];
196 const float* aPtr = (
float*)input;
197 const float* bPtr = taps;
199 __m256 a0Val, a1Val, a2Val, a3Val;
200 __m256 b0Val, b1Val, b2Val, b3Val;
201 __m256 x0Val, x1Val, x0loVal, x0hiVal, x1loVal, x1hiVal;
202 __m256 c0Val, c1Val, c2Val, c3Val;
204 __m256 dotProdVal0 = _mm256_setzero_ps();
205 __m256 dotProdVal1 = _mm256_setzero_ps();
206 __m256 dotProdVal2 = _mm256_setzero_ps();
207 __m256 dotProdVal3 = _mm256_setzero_ps();
209 for (; number < sixteenthPoints; number++) {
211 a0Val = _mm256_load_ps(aPtr);
212 a1Val = _mm256_load_ps(aPtr + 8);
213 a2Val = _mm256_load_ps(aPtr + 16);
214 a3Val = _mm256_load_ps(aPtr + 24);
216 x0Val = _mm256_load_ps(bPtr);
217 x1Val = _mm256_load_ps(bPtr + 8);
218 x0loVal = _mm256_unpacklo_ps(x0Val, x0Val);
219 x0hiVal = _mm256_unpackhi_ps(x0Val, x0Val);
220 x1loVal = _mm256_unpacklo_ps(x1Val, x1Val);
221 x1hiVal = _mm256_unpackhi_ps(x1Val, x1Val);
224 b0Val = _mm256_permute2f128_ps(x0loVal, x0hiVal, 0x20);
225 b1Val = _mm256_permute2f128_ps(x0loVal, x0hiVal, 0x31);
226 b2Val = _mm256_permute2f128_ps(x1loVal, x1hiVal, 0x20);
227 b3Val = _mm256_permute2f128_ps(x1loVal, x1hiVal, 0x31);
229 c0Val = _mm256_mul_ps(a0Val, b0Val);
230 c1Val = _mm256_mul_ps(a1Val, b1Val);
231 c2Val = _mm256_mul_ps(a2Val, b2Val);
232 c3Val = _mm256_mul_ps(a3Val, b3Val);
234 dotProdVal0 = _mm256_add_ps(c0Val, dotProdVal0);
235 dotProdVal1 = _mm256_add_ps(c1Val, dotProdVal1);
236 dotProdVal2 = _mm256_add_ps(c2Val, dotProdVal2);
237 dotProdVal3 = _mm256_add_ps(c3Val, dotProdVal3);
243 dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal1);
244 dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal2);
245 dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal3);
249 _mm256_store_ps(dotProductVector,
252 *realpt = dotProductVector[0];
253 *imagpt = dotProductVector[1];
254 *realpt += dotProductVector[2];
255 *imagpt += dotProductVector[3];
256 *realpt += dotProductVector[4];
257 *imagpt += dotProductVector[5];
258 *realpt += dotProductVector[6];
259 *imagpt += dotProductVector[7];
261 number = sixteenthPoints * 16;
262 for (; number < num_points; number++) {
263 *realpt += ((*aPtr++) * (*bPtr));
264 *imagpt += ((*aPtr++) * (*bPtr++));
279 unsigned int num_points)
282 unsigned int number = 0;
283 const unsigned int sixteenthPoints = num_points / 8;
286 float *realpt = &res[0], *imagpt = &res[1];
287 const float* aPtr = (
float*)input;
288 const float* bPtr = taps;
290 __m128 a0Val, a1Val, a2Val, a3Val;
291 __m128 b0Val, b1Val, b2Val, b3Val;
292 __m128 x0Val, x1Val, x2Val, x3Val;
293 __m128 c0Val, c1Val, c2Val, c3Val;
295 __m128 dotProdVal0 = _mm_setzero_ps();
296 __m128 dotProdVal1 = _mm_setzero_ps();
297 __m128 dotProdVal2 = _mm_setzero_ps();
298 __m128 dotProdVal3 = _mm_setzero_ps();
300 for (; number < sixteenthPoints; number++) {
302 a0Val = _mm_load_ps(aPtr);
303 a1Val = _mm_load_ps(aPtr + 4);
304 a2Val = _mm_load_ps(aPtr + 8);
305 a3Val = _mm_load_ps(aPtr + 12);
307 x0Val = _mm_load_ps(bPtr);
308 x1Val = _mm_load_ps(bPtr);
309 x2Val = _mm_load_ps(bPtr + 4);
310 x3Val = _mm_load_ps(bPtr + 4);
311 b0Val = _mm_unpacklo_ps(x0Val, x1Val);
312 b1Val = _mm_unpackhi_ps(x0Val, x1Val);
313 b2Val = _mm_unpacklo_ps(x2Val, x3Val);
314 b3Val = _mm_unpackhi_ps(x2Val, x3Val);
316 c0Val = _mm_mul_ps(a0Val, b0Val);
317 c1Val = _mm_mul_ps(a1Val, b1Val);
318 c2Val = _mm_mul_ps(a2Val, b2Val);
319 c3Val = _mm_mul_ps(a3Val, b3Val);
321 dotProdVal0 = _mm_add_ps(c0Val, dotProdVal0);
322 dotProdVal1 = _mm_add_ps(c1Val, dotProdVal1);
323 dotProdVal2 = _mm_add_ps(c2Val, dotProdVal2);
324 dotProdVal3 = _mm_add_ps(c3Val, dotProdVal3);
330 dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal1);
331 dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal2);
332 dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal3);
336 _mm_store_ps(dotProductVector,
339 *realpt = dotProductVector[0];
340 *imagpt = dotProductVector[1];
341 *realpt += dotProductVector[2];
342 *imagpt += dotProductVector[3];
344 number = sixteenthPoints * 8;
345 for (; number < num_points; number++) {
346 *realpt += ((*aPtr++) * (*bPtr));
347 *imagpt += ((*aPtr++) * (*bPtr++));
355#if LV_HAVE_AVX2 && LV_HAVE_FMA
357#include <immintrin.h>
359static inline void volk_32fc_32f_dot_prod_32fc_u_avx2_fma(
lv_32fc_t* result,
362 unsigned int num_points)
365 unsigned int number = 0;
366 const unsigned int sixteenthPoints = num_points / 16;
369 float *realpt = &res[0], *imagpt = &res[1];
370 const float* aPtr = (
float*)input;
371 const float* bPtr = taps;
373 __m256 a0Val, a1Val, a2Val, a3Val;
374 __m256 b0Val, b1Val, b2Val, b3Val;
375 __m256 x0Val, x1Val, x0loVal, x0hiVal, x1loVal, x1hiVal;
377 __m256 dotProdVal0 = _mm256_setzero_ps();
378 __m256 dotProdVal1 = _mm256_setzero_ps();
379 __m256 dotProdVal2 = _mm256_setzero_ps();
380 __m256 dotProdVal3 = _mm256_setzero_ps();
382 for (; number < sixteenthPoints; number++) {
384 a0Val = _mm256_loadu_ps(aPtr);
385 a1Val = _mm256_loadu_ps(aPtr + 8);
386 a2Val = _mm256_loadu_ps(aPtr + 16);
387 a3Val = _mm256_loadu_ps(aPtr + 24);
389 x0Val = _mm256_load_ps(bPtr);
390 x1Val = _mm256_load_ps(bPtr + 8);
391 x0loVal = _mm256_unpacklo_ps(x0Val, x0Val);
392 x0hiVal = _mm256_unpackhi_ps(x0Val, x0Val);
393 x1loVal = _mm256_unpacklo_ps(x1Val, x1Val);
394 x1hiVal = _mm256_unpackhi_ps(x1Val, x1Val);
397 b0Val = _mm256_permute2f128_ps(x0loVal, x0hiVal, 0x20);
398 b1Val = _mm256_permute2f128_ps(x0loVal, x0hiVal, 0x31);
399 b2Val = _mm256_permute2f128_ps(x1loVal, x1hiVal, 0x20);
400 b3Val = _mm256_permute2f128_ps(x1loVal, x1hiVal, 0x31);
402 dotProdVal0 = _mm256_fmadd_ps(a0Val, b0Val, dotProdVal0);
403 dotProdVal1 = _mm256_fmadd_ps(a1Val, b1Val, dotProdVal1);
404 dotProdVal2 = _mm256_fmadd_ps(a2Val, b2Val, dotProdVal2);
405 dotProdVal3 = _mm256_fmadd_ps(a3Val, b3Val, dotProdVal3);
411 dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal1);
412 dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal2);
413 dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal3);
417 _mm256_store_ps(dotProductVector,
420 *realpt = dotProductVector[0];
421 *imagpt = dotProductVector[1];
422 *realpt += dotProductVector[2];
423 *imagpt += dotProductVector[3];
424 *realpt += dotProductVector[4];
425 *imagpt += dotProductVector[5];
426 *realpt += dotProductVector[6];
427 *imagpt += dotProductVector[7];
429 number = sixteenthPoints * 16;
430 for (; number < num_points; number++) {
431 *realpt += ((*aPtr++) * (*bPtr));
432 *imagpt += ((*aPtr++) * (*bPtr++));
442#include <immintrin.h>
447 unsigned int num_points)
450 unsigned int number = 0;
451 const unsigned int sixteenthPoints = num_points / 16;
454 float *realpt = &res[0], *imagpt = &res[1];
455 const float* aPtr = (
float*)input;
456 const float* bPtr = taps;
458 __m256 a0Val, a1Val, a2Val, a3Val;
459 __m256 b0Val, b1Val, b2Val, b3Val;
460 __m256 x0Val, x1Val, x0loVal, x0hiVal, x1loVal, x1hiVal;
461 __m256 c0Val, c1Val, c2Val, c3Val;
463 __m256 dotProdVal0 = _mm256_setzero_ps();
464 __m256 dotProdVal1 = _mm256_setzero_ps();
465 __m256 dotProdVal2 = _mm256_setzero_ps();
466 __m256 dotProdVal3 = _mm256_setzero_ps();
468 for (; number < sixteenthPoints; number++) {
470 a0Val = _mm256_loadu_ps(aPtr);
471 a1Val = _mm256_loadu_ps(aPtr + 8);
472 a2Val = _mm256_loadu_ps(aPtr + 16);
473 a3Val = _mm256_loadu_ps(aPtr + 24);
475 x0Val = _mm256_loadu_ps(bPtr);
476 x1Val = _mm256_loadu_ps(bPtr + 8);
477 x0loVal = _mm256_unpacklo_ps(x0Val, x0Val);
478 x0hiVal = _mm256_unpackhi_ps(x0Val, x0Val);
479 x1loVal = _mm256_unpacklo_ps(x1Val, x1Val);
480 x1hiVal = _mm256_unpackhi_ps(x1Val, x1Val);
483 b0Val = _mm256_permute2f128_ps(x0loVal, x0hiVal, 0x20);
484 b1Val = _mm256_permute2f128_ps(x0loVal, x0hiVal, 0x31);
485 b2Val = _mm256_permute2f128_ps(x1loVal, x1hiVal, 0x20);
486 b3Val = _mm256_permute2f128_ps(x1loVal, x1hiVal, 0x31);
488 c0Val = _mm256_mul_ps(a0Val, b0Val);
489 c1Val = _mm256_mul_ps(a1Val, b1Val);
490 c2Val = _mm256_mul_ps(a2Val, b2Val);
491 c3Val = _mm256_mul_ps(a3Val, b3Val);
493 dotProdVal0 = _mm256_add_ps(c0Val, dotProdVal0);
494 dotProdVal1 = _mm256_add_ps(c1Val, dotProdVal1);
495 dotProdVal2 = _mm256_add_ps(c2Val, dotProdVal2);
496 dotProdVal3 = _mm256_add_ps(c3Val, dotProdVal3);
502 dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal1);
503 dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal2);
504 dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal3);
508 _mm256_store_ps(dotProductVector,
511 *realpt = dotProductVector[0];
512 *imagpt = dotProductVector[1];
513 *realpt += dotProductVector[2];
514 *imagpt += dotProductVector[3];
515 *realpt += dotProductVector[4];
516 *imagpt += dotProductVector[5];
517 *realpt += dotProductVector[6];
518 *imagpt += dotProductVector[7];
520 number = sixteenthPoints * 16;
521 for (; number < num_points; number++) {
522 *realpt += ((*aPtr++) * (*bPtr));
523 *imagpt += ((*aPtr++) * (*bPtr++));
536 const float* __restrict taps,
537 unsigned int num_points)
541 const unsigned int quarterPoints = num_points / 8;
544 float *realpt = &res[0], *imagpt = &res[1];
545 const float* inputPtr = (
float*)input;
546 const float* tapsPtr = taps;
547 float zero[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
548 float accVector_real[4];
549 float accVector_imag[4];
551 float32x4x2_t inputVector0, inputVector1;
552 float32x4_t tapsVector0, tapsVector1;
553 float32x4_t tmp_real0, tmp_imag0;
554 float32x4_t tmp_real1, tmp_imag1;
555 float32x4_t real_accumulator0, imag_accumulator0;
556 float32x4_t real_accumulator1, imag_accumulator1;
560 real_accumulator0 = vld1q_f32(zero);
561 imag_accumulator0 = vld1q_f32(zero);
562 real_accumulator1 = vld1q_f32(zero);
563 imag_accumulator1 = vld1q_f32(zero);
565 for (number = 0; number < quarterPoints; number++) {
567 tapsVector0 = vld1q_f32(tapsPtr);
568 tapsVector1 = vld1q_f32(tapsPtr + 4);
571 inputVector0 = vld2q_f32(inputPtr);
572 inputVector1 = vld2q_f32(inputPtr + 8);
575 tmp_real0 = vmulq_f32(tapsVector0, inputVector0.val[0]);
576 tmp_imag0 = vmulq_f32(tapsVector0, inputVector0.val[1]);
578 tmp_real1 = vmulq_f32(tapsVector1, inputVector1.val[0]);
579 tmp_imag1 = vmulq_f32(tapsVector1, inputVector1.val[1]);
581 real_accumulator0 = vaddq_f32(real_accumulator0, tmp_real0);
582 imag_accumulator0 = vaddq_f32(imag_accumulator0, tmp_imag0);
584 real_accumulator1 = vaddq_f32(real_accumulator1, tmp_real1);
585 imag_accumulator1 = vaddq_f32(imag_accumulator1, tmp_imag1);
591 real_accumulator0 = vaddq_f32(real_accumulator0, real_accumulator1);
592 imag_accumulator0 = vaddq_f32(imag_accumulator0, imag_accumulator1);
595 vst1q_f32(accVector_real, real_accumulator0);
596 vst1q_f32(accVector_imag, imag_accumulator0);
598 accVector_real[0] + accVector_real[1] + accVector_real[2] + accVector_real[3];
601 accVector_imag[0] + accVector_imag[1] + accVector_imag[2] + accVector_imag[3];
604 for (number = quarterPoints * 8; number < num_points; number++) {
605 *realpt += ((*inputPtr++) * (*tapsPtr));
606 *imagpt += ((*inputPtr++) * (*tapsPtr++));
619 const float* __restrict taps,
620 unsigned int num_points)
624 const unsigned int quarterPoints = num_points / 4;
627 float *realpt = &res[0], *imagpt = &res[1];
628 const float* inputPtr = (
float*)input;
629 const float* tapsPtr = taps;
630 float zero[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
631 float accVector_real[4];
632 float accVector_imag[4];
634 float32x4x2_t inputVector;
635 float32x4_t tapsVector;
636 float32x4_t tmp_real, tmp_imag;
637 float32x4_t real_accumulator, imag_accumulator;
642 real_accumulator = vld1q_f32(zero);
643 imag_accumulator = vld1q_f32(zero);
645 for (number = 0; number < quarterPoints; number++) {
648 tapsVector = vld1q_f32(tapsPtr);
651 inputVector = vld2q_f32(inputPtr);
653 tmp_real = vmulq_f32(tapsVector, inputVector.val[0]);
654 tmp_imag = vmulq_f32(tapsVector, inputVector.val[1]);
656 real_accumulator = vaddq_f32(real_accumulator, tmp_real);
657 imag_accumulator = vaddq_f32(imag_accumulator, tmp_imag);
665 vst1q_f32(accVector_real, real_accumulator);
666 vst1q_f32(accVector_imag, imag_accumulator);
668 accVector_real[0] + accVector_real[1] + accVector_real[2] + accVector_real[3];
671 accVector_imag[0] + accVector_imag[1] + accVector_imag[2] + accVector_imag[3];
674 for (number = quarterPoints * 4; number < num_points; number++) {
675 *realpt += ((*inputPtr++) * (*tapsPtr));
676 *imagpt += ((*inputPtr++) * (*tapsPtr++));
685extern void volk_32fc_32f_dot_prod_32fc_a_neonasm(
lv_32fc_t* result,
688 unsigned int num_points);
692extern void volk_32fc_32f_dot_prod_32fc_a_neonasmvmla(
lv_32fc_t* result,
695 unsigned int num_points);
699extern void volk_32fc_32f_dot_prod_32fc_a_neonpipeline(
lv_32fc_t* result,
702 unsigned int num_points);
710 unsigned int num_points)
713 unsigned int number = 0;
714 const unsigned int sixteenthPoints = num_points / 8;
717 float *realpt = &res[0], *imagpt = &res[1];
718 const float* aPtr = (
float*)input;
719 const float* bPtr = taps;
721 __m128 a0Val, a1Val, a2Val, a3Val;
722 __m128 b0Val, b1Val, b2Val, b3Val;
723 __m128 x0Val, x1Val, x2Val, x3Val;
724 __m128 c0Val, c1Val, c2Val, c3Val;
726 __m128 dotProdVal0 = _mm_setzero_ps();
727 __m128 dotProdVal1 = _mm_setzero_ps();
728 __m128 dotProdVal2 = _mm_setzero_ps();
729 __m128 dotProdVal3 = _mm_setzero_ps();
731 for (; number < sixteenthPoints; number++) {
733 a0Val = _mm_loadu_ps(aPtr);
734 a1Val = _mm_loadu_ps(aPtr + 4);
735 a2Val = _mm_loadu_ps(aPtr + 8);
736 a3Val = _mm_loadu_ps(aPtr + 12);
738 x0Val = _mm_loadu_ps(bPtr);
739 x1Val = _mm_loadu_ps(bPtr);
740 x2Val = _mm_loadu_ps(bPtr + 4);
741 x3Val = _mm_loadu_ps(bPtr + 4);
742 b0Val = _mm_unpacklo_ps(x0Val, x1Val);
743 b1Val = _mm_unpackhi_ps(x0Val, x1Val);
744 b2Val = _mm_unpacklo_ps(x2Val, x3Val);
745 b3Val = _mm_unpackhi_ps(x2Val, x3Val);
747 c0Val = _mm_mul_ps(a0Val, b0Val);
748 c1Val = _mm_mul_ps(a1Val, b1Val);
749 c2Val = _mm_mul_ps(a2Val, b2Val);
750 c3Val = _mm_mul_ps(a3Val, b3Val);
752 dotProdVal0 = _mm_add_ps(c0Val, dotProdVal0);
753 dotProdVal1 = _mm_add_ps(c1Val, dotProdVal1);
754 dotProdVal2 = _mm_add_ps(c2Val, dotProdVal2);
755 dotProdVal3 = _mm_add_ps(c3Val, dotProdVal3);
761 dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal1);
762 dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal2);
763 dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal3);
767 _mm_store_ps(dotProductVector,
770 *realpt = dotProductVector[0];
771 *imagpt = dotProductVector[1];
772 *realpt += dotProductVector[2];
773 *imagpt += dotProductVector[3];
775 number = sixteenthPoints * 8;
776 for (; number < num_points; number++) {
777 *realpt += ((*aPtr++) * (*bPtr));
778 *imagpt += ((*aPtr++) * (*bPtr++));
static void volk_32fc_32f_dot_prod_32fc_u_avx(lv_32fc_t *result, const lv_32fc_t *input, const float *taps, unsigned int num_points)
Definition: volk_32fc_32f_dot_prod_32fc.h:444
static void volk_32fc_32f_dot_prod_32fc_a_sse(lv_32fc_t *result, const lv_32fc_t *input, const float *taps, unsigned int num_points)
Definition: volk_32fc_32f_dot_prod_32fc.h:276
static void volk_32fc_32f_dot_prod_32fc_neon_unroll(lv_32fc_t *__restrict result, const lv_32fc_t *__restrict input, const float *__restrict taps, unsigned int num_points)
Definition: volk_32fc_32f_dot_prod_32fc.h:534
static void volk_32fc_32f_dot_prod_32fc_a_neon(lv_32fc_t *__restrict result, const lv_32fc_t *__restrict input, const float *__restrict taps, unsigned int num_points)
Definition: volk_32fc_32f_dot_prod_32fc.h:617
static void volk_32fc_32f_dot_prod_32fc_generic(lv_32fc_t *result, const lv_32fc_t *input, const float *taps, unsigned int num_points)
Definition: volk_32fc_32f_dot_prod_32fc.h:71
static void volk_32fc_32f_dot_prod_32fc_a_avx(lv_32fc_t *result, const lv_32fc_t *input, const float *taps, unsigned int num_points)
Definition: volk_32fc_32f_dot_prod_32fc.h:185
static void volk_32fc_32f_dot_prod_32fc_u_sse(lv_32fc_t *result, const lv_32fc_t *input, const float *taps, unsigned int num_points)
Definition: volk_32fc_32f_dot_prod_32fc.h:707
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
float complex lv_32fc_t
Definition: volk_complex.h:65