57#ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
58#define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
67 const float* inputVector,
70 unsigned int num_points)
75 unsigned int number = 1;
79 const unsigned int eighthPoints = (num_points - 1) / 8;
81 float* outPtr = outputVector;
82 const float* inPtr = inputVector;
83 __m256 upperBound = _mm256_set1_ps(bound);
84 __m256 lowerBound = _mm256_set1_ps(-bound);
88 __m256 posBoundAdjust = _mm256_set1_ps(-2 * bound);
89 __m256 negBoundAdjust = _mm256_set1_ps(2 * bound);
91 *outPtr = *inPtr - *saveValue;
98 for (j = 1; j < ((8 < num_points) ? 8 : num_points); j++) {
99 *outPtr = *(inPtr) - *(inPtr - 1);
101 *outPtr -= 2 * bound;
102 if (*outPtr < -bound)
103 *outPtr += 2 * bound;
108 for (; number < eighthPoints; number++) {
110 next3old1 = _mm256_loadu_ps((
float*)(inPtr - 1));
111 next4 = _mm256_load_ps(inPtr);
114 next3old1 = _mm256_sub_ps(next4, next3old1);
116 boundAdjust = _mm256_cmp_ps(next3old1, upperBound, _CMP_GT_OS);
117 boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust);
118 next4 = _mm256_cmp_ps(next3old1, lowerBound, _CMP_LT_OS);
119 next4 = _mm256_and_ps(next4, negBoundAdjust);
120 boundAdjust = _mm256_or_ps(next4, boundAdjust);
122 next3old1 = _mm256_add_ps(next3old1, boundAdjust);
123 _mm256_store_ps(outPtr, next3old1);
127 for (number = (8 > (eighthPoints * 8) ? 8 : (8 * eighthPoints)); number < num_points;
129 *outPtr = *(inPtr) - *(inPtr - 1);
131 *outPtr -= 2 * bound;
132 if (*outPtr < -bound)
133 *outPtr += 2 * bound;
138 *saveValue = inputVector[num_points - 1];
144#include <xmmintrin.h>
147 const float* inputVector,
150 unsigned int num_points)
152 if (num_points < 1) {
155 unsigned int number = 1;
159 const unsigned int quarterPoints = (num_points - 1) / 4;
161 float* outPtr = outputVector;
162 const float* inPtr = inputVector;
163 __m128 upperBound = _mm_set_ps1(bound);
164 __m128 lowerBound = _mm_set_ps1(-bound);
168 __m128 posBoundAdjust = _mm_set_ps1(-2 * bound);
169 __m128 negBoundAdjust = _mm_set_ps1(2 * bound);
171 *outPtr = *inPtr - *saveValue;
173 *outPtr -= 2 * bound;
174 if (*outPtr < -bound)
175 *outPtr += 2 * bound;
178 for (j = 1; j < ((4 < num_points) ? 4 : num_points); j++) {
179 *outPtr = *(inPtr) - *(inPtr - 1);
181 *outPtr -= 2 * bound;
182 if (*outPtr < -bound)
183 *outPtr += 2 * bound;
188 for (; number < quarterPoints; number++) {
190 next3old1 = _mm_loadu_ps((
float*)(inPtr - 1));
191 next4 = _mm_load_ps(inPtr);
194 next3old1 = _mm_sub_ps(next4, next3old1);
196 boundAdjust = _mm_cmpgt_ps(next3old1, upperBound);
197 boundAdjust = _mm_and_ps(boundAdjust, posBoundAdjust);
198 next4 = _mm_cmplt_ps(next3old1, lowerBound);
199 next4 = _mm_and_ps(next4, negBoundAdjust);
200 boundAdjust = _mm_or_ps(next4, boundAdjust);
202 next3old1 = _mm_add_ps(next3old1, boundAdjust);
203 _mm_store_ps(outPtr, next3old1);
207 for (number = (4 > (quarterPoints * 4) ? 4 : (4 * quarterPoints));
210 *outPtr = *(inPtr) - *(inPtr - 1);
212 *outPtr -= 2 * bound;
213 if (*outPtr < -bound)
214 *outPtr += 2 * bound;
219 *saveValue = inputVector[num_points - 1];
223#ifdef LV_HAVE_GENERIC
226 const float* inputVector,
229 unsigned int num_points)
231 if (num_points < 1) {
234 unsigned int number = 0;
235 float* outPtr = outputVector;
236 const float* inPtr = inputVector;
239 *outPtr = *inPtr - *saveValue;
241 *outPtr -= 2 * bound;
242 if (*outPtr < -bound)
243 *outPtr += 2 * bound;
247 for (number = 1; number < num_points; number++) {
248 *outPtr = *(inPtr) - *(inPtr - 1);
250 *outPtr -= 2 * bound;
251 if (*outPtr < -bound)
252 *outPtr += 2 * bound;
257 *saveValue = inputVector[num_points - 1];
265#ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H
266#define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H
272#include <immintrin.h>
275 const float* inputVector,
278 unsigned int num_points)
280 if (num_points < 1) {
283 unsigned int number = 1;
287 const unsigned int eighthPoints = (num_points - 1) / 8;
289 float* outPtr = outputVector;
290 const float* inPtr = inputVector;
291 __m256 upperBound = _mm256_set1_ps(bound);
292 __m256 lowerBound = _mm256_set1_ps(-bound);
296 __m256 posBoundAdjust = _mm256_set1_ps(-2 * bound);
297 __m256 negBoundAdjust = _mm256_set1_ps(2 * bound);
299 *outPtr = *inPtr - *saveValue;
301 *outPtr -= 2 * bound;
302 if (*outPtr < -bound)
303 *outPtr += 2 * bound;
306 for (j = 1; j < ((8 < num_points) ? 8 : num_points); j++) {
307 *outPtr = *(inPtr) - *(inPtr - 1);
309 *outPtr -= 2 * bound;
310 if (*outPtr < -bound)
311 *outPtr += 2 * bound;
316 for (; number < eighthPoints; number++) {
318 next3old1 = _mm256_loadu_ps((
float*)(inPtr - 1));
319 next4 = _mm256_loadu_ps(inPtr);
322 next3old1 = _mm256_sub_ps(next4, next3old1);
324 boundAdjust = _mm256_cmp_ps(next3old1, upperBound, _CMP_GT_OS);
325 boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust);
326 next4 = _mm256_cmp_ps(next3old1, lowerBound, _CMP_LT_OS);
327 next4 = _mm256_and_ps(next4, negBoundAdjust);
328 boundAdjust = _mm256_or_ps(next4, boundAdjust);
330 next3old1 = _mm256_add_ps(next3old1, boundAdjust);
331 _mm256_storeu_ps(outPtr, next3old1);
335 for (number = (8 > (eighthPoints * 8) ? 8 : (8 * eighthPoints)); number < num_points;
337 *outPtr = *(inPtr) - *(inPtr - 1);
339 *outPtr -= 2 * bound;
340 if (*outPtr < -bound)
341 *outPtr += 2 * bound;
346 *saveValue = inputVector[num_points - 1];
static void volk_32f_s32f_32f_fm_detect_32f_a_avx(float *outputVector, const float *inputVector, const float bound, float *saveValue, unsigned int num_points)
Definition: volk_32f_s32f_32f_fm_detect_32f.h:66
static void volk_32f_s32f_32f_fm_detect_32f_u_avx(float *outputVector, const float *inputVector, const float bound, float *saveValue, unsigned int num_points)
Definition: volk_32f_s32f_32f_fm_detect_32f.h:274
static void volk_32f_s32f_32f_fm_detect_32f_a_sse(float *outputVector, const float *inputVector, const float bound, float *saveValue, unsigned int num_points)
Definition: volk_32f_s32f_32f_fm_detect_32f.h:146
static void volk_32f_s32f_32f_fm_detect_32f_generic(float *outputVector, const float *inputVector, const float bound, float *saveValue, unsigned int num_points)
Definition: volk_32f_s32f_32f_fm_detect_32f.h:225