71#ifndef INCLUDED_volk_64f_x2_max_64f_a_H
72#define INCLUDED_volk_64f_x2_max_64f_a_H
80static inline void volk_64f_x2_max_64f_a_avx512f(
double* cVector,
81 const double* aVector,
82 const double* bVector,
83 unsigned int num_points)
85 unsigned int number = 0;
86 const unsigned int eigthPoints = num_points / 8;
88 double* cPtr = cVector;
89 const double* aPtr = aVector;
90 const double* bPtr = bVector;
92 __m512d aVal, bVal, cVal;
93 for (; number < eigthPoints; number++) {
95 aVal = _mm512_load_pd(aPtr);
96 bVal = _mm512_load_pd(bPtr);
98 cVal = _mm512_max_pd(aVal, bVal);
100 _mm512_store_pd(cPtr, cVal);
107 number = eigthPoints * 8;
108 for (; number < num_points; number++) {
109 const double a = *aPtr++;
110 const double b = *bPtr++;
111 *cPtr++ = (a > b ? a : b);
118#include <immintrin.h>
121 const double* aVector,
122 const double* bVector,
123 unsigned int num_points)
125 unsigned int number = 0;
126 const unsigned int quarterPoints = num_points / 4;
128 double* cPtr = cVector;
129 const double* aPtr = aVector;
130 const double* bPtr = bVector;
132 __m256d aVal, bVal, cVal;
133 for (; number < quarterPoints; number++) {
135 aVal = _mm256_load_pd(aPtr);
136 bVal = _mm256_load_pd(bPtr);
138 cVal = _mm256_max_pd(aVal, bVal);
140 _mm256_store_pd(cPtr, cVal);
147 number = quarterPoints * 4;
148 for (; number < num_points; number++) {
149 const double a = *aPtr++;
150 const double b = *bPtr++;
151 *cPtr++ = (a > b ? a : b);
158#include <emmintrin.h>
161 const double* aVector,
162 const double* bVector,
163 unsigned int num_points)
165 unsigned int number = 0;
166 const unsigned int halfPoints = num_points / 2;
168 double* cPtr = cVector;
169 const double* aPtr = aVector;
170 const double* bPtr = bVector;
172 __m128d aVal, bVal, cVal;
173 for (; number < halfPoints; number++) {
175 aVal = _mm_load_pd(aPtr);
176 bVal = _mm_load_pd(bPtr);
178 cVal = _mm_max_pd(aVal, bVal);
180 _mm_store_pd(cPtr, cVal);
187 number = halfPoints * 2;
188 for (; number < num_points; number++) {
189 const double a = *aPtr++;
190 const double b = *bPtr++;
191 *cPtr++ = (a > b ? a : b);
197#ifdef LV_HAVE_GENERIC
200 const double* aVector,
201 const double* bVector,
202 unsigned int num_points)
204 double* cPtr = cVector;
205 const double* aPtr = aVector;
206 const double* bPtr = bVector;
207 unsigned int number = 0;
209 for (number = 0; number < num_points; number++) {
210 const double a = *aPtr++;
211 const double b = *bPtr++;
212 *cPtr++ = (a > b ? a : b);
221#ifndef INCLUDED_volk_64f_x2_max_64f_u_H
222#define INCLUDED_volk_64f_x2_max_64f_u_H
227#ifdef LV_HAVE_AVX512F
228#include <immintrin.h>
230static inline void volk_64f_x2_max_64f_u_avx512f(
double* cVector,
231 const double* aVector,
232 const double* bVector,
233 unsigned int num_points)
235 unsigned int number = 0;
236 const unsigned int eigthPoints = num_points / 8;
238 double* cPtr = cVector;
239 const double* aPtr = aVector;
240 const double* bPtr = bVector;
242 __m512d aVal, bVal, cVal;
243 for (; number < eigthPoints; number++) {
245 aVal = _mm512_loadu_pd(aPtr);
246 bVal = _mm512_loadu_pd(bPtr);
248 cVal = _mm512_max_pd(aVal, bVal);
250 _mm512_storeu_pd(cPtr, cVal);
257 number = eigthPoints * 8;
258 for (; number < num_points; number++) {
259 const double a = *aPtr++;
260 const double b = *bPtr++;
261 *cPtr++ = (a > b ? a : b);
268#include <immintrin.h>
271 const double* aVector,
272 const double* bVector,
273 unsigned int num_points)
275 unsigned int number = 0;
276 const unsigned int quarterPoints = num_points / 4;
278 double* cPtr = cVector;
279 const double* aPtr = aVector;
280 const double* bPtr = bVector;
282 __m256d aVal, bVal, cVal;
283 for (; number < quarterPoints; number++) {
285 aVal = _mm256_loadu_pd(aPtr);
286 bVal = _mm256_loadu_pd(bPtr);
288 cVal = _mm256_max_pd(aVal, bVal);
290 _mm256_storeu_pd(cPtr, cVal);
297 number = quarterPoints * 4;
298 for (; number < num_points; number++) {
299 const double a = *aPtr++;
300 const double b = *bPtr++;
301 *cPtr++ = (a > b ? a : b);
static void volk_64f_x2_max_64f_a_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:120
static void volk_64f_x2_max_64f_u_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:270
static void volk_64f_x2_max_64f_a_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:160
static void volk_64f_x2_max_64f_generic(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:199