71#ifndef INCLUDED_volk_64f_x2_add_64f_H
72#define INCLUDED_volk_64f_x2_add_64f_H
80 const double* aVector,
81 const double* bVector,
82 unsigned int num_points)
84 double* cPtr = cVector;
85 const double* aPtr = aVector;
86 const double* bPtr = bVector;
87 unsigned int number = 0;
89 for (number = 0; number < num_points; number++) {
90 *cPtr++ = (*aPtr++) + (*bPtr++);
102#include <emmintrin.h>
105 const double* aVector,
106 const double* bVector,
107 unsigned int num_points)
109 unsigned int number = 0;
110 const unsigned int half_points = num_points / 2;
112 double* cPtr = cVector;
113 const double* aPtr = aVector;
114 const double* bPtr = bVector;
116 __m128d aVal, bVal, cVal;
117 for (; number < half_points; number++) {
118 aVal = _mm_loadu_pd(aPtr);
119 bVal = _mm_loadu_pd(bPtr);
121 cVal = _mm_add_pd(aVal, bVal);
123 _mm_storeu_pd(cPtr, cVal);
130 number = half_points * 2;
131 for (; number < num_points; number++) {
132 *cPtr++ = (*aPtr++) + (*bPtr++);
141#include <immintrin.h>
144 const double* aVector,
145 const double* bVector,
146 unsigned int num_points)
148 unsigned int number = 0;
149 const unsigned int quarter_points = num_points / 4;
151 double* cPtr = cVector;
152 const double* aPtr = aVector;
153 const double* bPtr = bVector;
155 __m256d aVal, bVal, cVal;
156 for (; number < quarter_points; number++) {
158 aVal = _mm256_loadu_pd(aPtr);
159 bVal = _mm256_loadu_pd(bPtr);
161 cVal = _mm256_add_pd(aVal, bVal);
163 _mm256_storeu_pd(cPtr, cVal);
170 number = quarter_points * 4;
171 for (; number < num_points; number++) {
172 *cPtr++ = (*aPtr++) + (*bPtr++);
184#include <emmintrin.h>
187 const double* aVector,
188 const double* bVector,
189 unsigned int num_points)
191 unsigned int number = 0;
192 const unsigned int half_points = num_points / 2;
194 double* cPtr = cVector;
195 const double* aPtr = aVector;
196 const double* bPtr = bVector;
198 __m128d aVal, bVal, cVal;
199 for (; number < half_points; number++) {
200 aVal = _mm_load_pd(aPtr);
201 bVal = _mm_load_pd(bPtr);
203 cVal = _mm_add_pd(aVal, bVal);
205 _mm_store_pd(cPtr, cVal);
212 number = half_points * 2;
213 for (; number < num_points; number++) {
214 *cPtr++ = (*aPtr++) + (*bPtr++);
223#include <immintrin.h>
226 const double* aVector,
227 const double* bVector,
228 unsigned int num_points)
230 unsigned int number = 0;
231 const unsigned int quarter_points = num_points / 4;
233 double* cPtr = cVector;
234 const double* aPtr = aVector;
235 const double* bPtr = bVector;
237 __m256d aVal, bVal, cVal;
238 for (; number < quarter_points; number++) {
240 aVal = _mm256_load_pd(aPtr);
241 bVal = _mm256_load_pd(bPtr);
243 cVal = _mm256_add_pd(aVal, bVal);
245 _mm256_store_pd(cPtr, cVal);
252 number = quarter_points * 4;
253 for (; number < num_points; number++) {
254 *cPtr++ = (*aPtr++) + (*bPtr++);
static void volk_64f_x2_add_64f_u_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:104
static void volk_64f_x2_add_64f_generic(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:79
static void volk_64f_x2_add_64f_a_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:186
static void volk_64f_x2_add_64f_a_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:225
static void volk_64f_x2_add_64f_u_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_add_64f.h:143