56#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H
57#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H
68volk_8ic_s32f_deinterleave_32f_x2_a_sse4_1(
float* iBuffer,
72 unsigned int num_points)
74 float* iBufferPtr = iBuffer;
75 float* qBufferPtr = qBuffer;
77 unsigned int number = 0;
78 const unsigned int eighthPoints = num_points / 8;
79 __m128 iFloatValue, qFloatValue;
81 const float iScalar = 1.0 / scalar;
82 __m128 invScalar = _mm_set_ps1(iScalar);
83 __m128i complexVal, iIntVal, qIntVal, iComplexVal, qComplexVal;
84 int8_t* complexVectorPtr = (int8_t*)complexVector;
86 __m128i iMoveMask = _mm_set_epi8(
87 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
88 __m128i qMoveMask = _mm_set_epi8(
89 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
91 for (; number < eighthPoints; number++) {
92 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
93 complexVectorPtr += 16;
94 iComplexVal = _mm_shuffle_epi8(complexVal, iMoveMask);
95 qComplexVal = _mm_shuffle_epi8(complexVal, qMoveMask);
97 iIntVal = _mm_cvtepi8_epi32(iComplexVal);
98 iFloatValue = _mm_cvtepi32_ps(iIntVal);
99 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
100 _mm_store_ps(iBufferPtr, iFloatValue);
103 iComplexVal = _mm_srli_si128(iComplexVal, 4);
105 iIntVal = _mm_cvtepi8_epi32(iComplexVal);
106 iFloatValue = _mm_cvtepi32_ps(iIntVal);
107 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
108 _mm_store_ps(iBufferPtr, iFloatValue);
111 qIntVal = _mm_cvtepi8_epi32(qComplexVal);
112 qFloatValue = _mm_cvtepi32_ps(qIntVal);
113 qFloatValue = _mm_mul_ps(qFloatValue, invScalar);
114 _mm_store_ps(qBufferPtr, qFloatValue);
117 qComplexVal = _mm_srli_si128(qComplexVal, 4);
119 qIntVal = _mm_cvtepi8_epi32(qComplexVal);
120 qFloatValue = _mm_cvtepi32_ps(qIntVal);
121 qFloatValue = _mm_mul_ps(qFloatValue, invScalar);
122 _mm_store_ps(qBufferPtr, qFloatValue);
127 number = eighthPoints * 8;
128 for (; number < num_points; number++) {
129 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
130 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
137#include <xmmintrin.h>
143 unsigned int num_points)
145 float* iBufferPtr = iBuffer;
146 float* qBufferPtr = qBuffer;
148 unsigned int number = 0;
149 const unsigned int quarterPoints = num_points / 4;
150 __m128 cplxValue1, cplxValue2, iValue, qValue;
152 __m128 invScalar = _mm_set_ps1(1.0 / scalar);
153 int8_t* complexVectorPtr = (int8_t*)complexVector;
157 for (; number < quarterPoints; number++) {
158 floatBuffer[0] = (float)(complexVectorPtr[0]);
159 floatBuffer[1] = (float)(complexVectorPtr[1]);
160 floatBuffer[2] = (float)(complexVectorPtr[2]);
161 floatBuffer[3] = (float)(complexVectorPtr[3]);
163 floatBuffer[4] = (float)(complexVectorPtr[4]);
164 floatBuffer[5] = (float)(complexVectorPtr[5]);
165 floatBuffer[6] = (float)(complexVectorPtr[6]);
166 floatBuffer[7] = (float)(complexVectorPtr[7]);
168 cplxValue1 = _mm_load_ps(&floatBuffer[0]);
169 cplxValue2 = _mm_load_ps(&floatBuffer[4]);
171 complexVectorPtr += 8;
173 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
174 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
177 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
178 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
180 _mm_store_ps(iBufferPtr, iValue);
181 _mm_store_ps(qBufferPtr, qValue);
187 number = quarterPoints * 4;
188 complexVectorPtr = (int8_t*)&complexVector[number];
189 for (; number < num_points; number++) {
190 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
191 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
198#include <immintrin.h>
200static inline void volk_8ic_s32f_deinterleave_32f_x2_a_avx2(
float* iBuffer,
204 unsigned int num_points)
206 float* iBufferPtr = iBuffer;
207 float* qBufferPtr = qBuffer;
209 unsigned int number = 0;
210 const unsigned int sixteenthPoints = num_points / 16;
211 __m256 iFloatValue, qFloatValue;
213 const float iScalar = 1.0 / scalar;
214 __m256 invScalar = _mm256_set1_ps(iScalar);
215 __m256i complexVal, iIntVal, qIntVal, iComplexVal, qComplexVal;
216 int8_t* complexVectorPtr = (int8_t*)complexVector;
218 __m256i iMoveMask = _mm256_set_epi8(0x80,
250 __m256i qMoveMask = _mm256_set_epi8(0x80,
283 for (; number < sixteenthPoints; number++) {
284 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
285 complexVectorPtr += 32;
286 iComplexVal = _mm256_shuffle_epi8(complexVal, iMoveMask);
287 qComplexVal = _mm256_shuffle_epi8(complexVal, qMoveMask);
289 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(iComplexVal));
290 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
291 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
292 _mm256_store_ps(iBufferPtr, iFloatValue);
295 iComplexVal = _mm256_permute4x64_epi64(iComplexVal, 0b11000110);
296 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(iComplexVal));
297 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
298 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
299 _mm256_store_ps(iBufferPtr, iFloatValue);
302 qIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(qComplexVal));
303 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
304 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
305 _mm256_store_ps(qBufferPtr, qFloatValue);
308 qComplexVal = _mm256_permute4x64_epi64(qComplexVal, 0b11000110);
309 qIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(qComplexVal));
310 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
311 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
312 _mm256_store_ps(qBufferPtr, qFloatValue);
316 number = sixteenthPoints * 16;
317 for (; number < num_points; number++) {
318 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
319 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
325#ifdef LV_HAVE_GENERIC
332 unsigned int num_points)
334 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
335 float* iBufferPtr = iBuffer;
336 float* qBufferPtr = qBuffer;
338 const float invScalar = 1.0 / scalar;
339 for (number = 0; number < num_points; number++) {
340 *iBufferPtr++ = (float)(*complexVectorPtr++) * invScalar;
341 *qBufferPtr++ = (float)(*complexVectorPtr++) * invScalar;
350#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_u_H
351#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_u_H
358#include <immintrin.h>
360static inline void volk_8ic_s32f_deinterleave_32f_x2_u_avx2(
float* iBuffer,
364 unsigned int num_points)
366 float* iBufferPtr = iBuffer;
367 float* qBufferPtr = qBuffer;
369 unsigned int number = 0;
370 const unsigned int sixteenthPoints = num_points / 16;
371 __m256 iFloatValue, qFloatValue;
373 const float iScalar = 1.0 / scalar;
374 __m256 invScalar = _mm256_set1_ps(iScalar);
375 __m256i complexVal, iIntVal, qIntVal;
376 __m128i iComplexVal, qComplexVal;
377 int8_t* complexVectorPtr = (int8_t*)complexVector;
379 __m256i MoveMask = _mm256_set_epi8(15,
412 for (; number < sixteenthPoints; number++) {
413 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
414 complexVectorPtr += 32;
415 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
416 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
417 iComplexVal = _mm256_extractf128_si256(complexVal, 0);
418 qComplexVal = _mm256_extractf128_si256(complexVal, 1);
420 iIntVal = _mm256_cvtepi8_epi32(iComplexVal);
421 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
422 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
423 _mm256_storeu_ps(iBufferPtr, iFloatValue);
426 qIntVal = _mm256_cvtepi8_epi32(qComplexVal);
427 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
428 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
429 _mm256_storeu_ps(qBufferPtr, qFloatValue);
432 complexVal = _mm256_srli_si256(complexVal, 8);
433 iComplexVal = _mm256_extractf128_si256(complexVal, 0);
434 qComplexVal = _mm256_extractf128_si256(complexVal, 1);
436 iIntVal = _mm256_cvtepi8_epi32(iComplexVal);
437 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
438 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
439 _mm256_storeu_ps(iBufferPtr, iFloatValue);
442 qIntVal = _mm256_cvtepi8_epi32(qComplexVal);
443 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
444 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
445 _mm256_storeu_ps(qBufferPtr, qFloatValue);
449 number = sixteenthPoints * 16;
450 for (; number < num_points; number++) {
451 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
452 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
static void volk_8ic_s32f_deinterleave_32f_x2_a_sse(float *iBuffer, float *qBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_32f_x2.h:139
static void volk_8ic_s32f_deinterleave_32f_x2_generic(float *iBuffer, float *qBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_32f_x2.h:328
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:61