56#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
57#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
67volk_16ic_s32f_deinterleave_real_32f_a_avx2(
float* iBuffer,
70 unsigned int num_points)
72 float* iBufferPtr = iBuffer;
74 unsigned int number = 0;
75 const unsigned int eighthPoints = num_points / 8;
79 const float iScalar = 1.0 / scalar;
80 __m256 invScalar = _mm256_set1_ps(iScalar);
81 __m256i complexVal, iIntVal;
82 __m128i complexVal128;
83 int8_t* complexVectorPtr = (int8_t*)complexVector;
85 __m256i moveMask = _mm256_set_epi8(0x80,
118 for (; number < eighthPoints; number++) {
119 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
120 complexVectorPtr += 32;
121 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
122 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
123 complexVal128 = _mm256_extracti128_si256(complexVal, 0);
125 iIntVal = _mm256_cvtepi16_epi32(complexVal128);
126 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
128 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
130 _mm256_store_ps(iBufferPtr, iFloatValue);
135 number = eighthPoints * 8;
136 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
137 for (; number < num_points; number++) {
138 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
139 sixteenTComplexVectorPtr++;
145#include <smmintrin.h>
148volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(
float* iBuffer,
151 unsigned int num_points)
153 float* iBufferPtr = iBuffer;
155 unsigned int number = 0;
156 const unsigned int quarterPoints = num_points / 4;
160 const float iScalar = 1.0 / scalar;
161 __m128 invScalar = _mm_set_ps1(iScalar);
162 __m128i complexVal, iIntVal;
163 int8_t* complexVectorPtr = (int8_t*)complexVector;
165 __m128i moveMask = _mm_set_epi8(
166 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
168 for (; number < quarterPoints; number++) {
169 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
170 complexVectorPtr += 16;
171 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
173 iIntVal = _mm_cvtepi16_epi32(complexVal);
174 iFloatValue = _mm_cvtepi32_ps(iIntVal);
176 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
178 _mm_store_ps(iBufferPtr, iFloatValue);
183 number = quarterPoints * 4;
184 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
185 for (; number < num_points; number++) {
186 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
187 sixteenTComplexVectorPtr++;
193#include <xmmintrin.h>
199 unsigned int num_points)
201 float* iBufferPtr = iBuffer;
203 unsigned int number = 0;
204 const unsigned int quarterPoints = num_points / 4;
207 const float iScalar = 1.0 / scalar;
208 __m128 invScalar = _mm_set_ps1(iScalar);
209 int16_t* complexVectorPtr = (int16_t*)complexVector;
213 for (; number < quarterPoints; number++) {
214 floatBuffer[0] = (float)(*complexVectorPtr);
215 complexVectorPtr += 2;
216 floatBuffer[1] = (float)(*complexVectorPtr);
217 complexVectorPtr += 2;
218 floatBuffer[2] = (float)(*complexVectorPtr);
219 complexVectorPtr += 2;
220 floatBuffer[3] = (float)(*complexVectorPtr);
221 complexVectorPtr += 2;
223 iValue = _mm_load_ps(floatBuffer);
225 iValue = _mm_mul_ps(iValue, invScalar);
227 _mm_store_ps(iBufferPtr, iValue);
232 number = quarterPoints * 4;
233 complexVectorPtr = (int16_t*)&complexVector[number];
234 for (; number < num_points; number++) {
235 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
241#ifdef LV_HAVE_GENERIC
246 unsigned int num_points)
248 unsigned int number = 0;
249 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
250 float* iBufferPtr = iBuffer;
251 const float invScalar = 1.0 / scalar;
252 for (number = 0; number < num_points; number++) {
253 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
262#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
263#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
270#include <immintrin.h>
273volk_16ic_s32f_deinterleave_real_32f_u_avx2(
float* iBuffer,
276 unsigned int num_points)
278 float* iBufferPtr = iBuffer;
280 unsigned int number = 0;
281 const unsigned int eighthPoints = num_points / 8;
285 const float iScalar = 1.0 / scalar;
286 __m256 invScalar = _mm256_set1_ps(iScalar);
287 __m256i complexVal, iIntVal;
288 __m128i complexVal128;
289 int8_t* complexVectorPtr = (int8_t*)complexVector;
291 __m256i moveMask = _mm256_set_epi8(0x80,
324 for (; number < eighthPoints; number++) {
325 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
326 complexVectorPtr += 32;
327 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
328 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
329 complexVal128 = _mm256_extracti128_si256(complexVal, 0);
331 iIntVal = _mm256_cvtepi16_epi32(complexVal128);
332 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
334 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
336 _mm256_storeu_ps(iBufferPtr, iFloatValue);
341 number = eighthPoints * 8;
342 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
343 for (; number < num_points; number++) {
344 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
345 sixteenTComplexVectorPtr++;
static void volk_16ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_real_32f.h:243
static void volk_16ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_real_32f.h:196
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
short complex lv_16sc_t
Definition: volk_complex.h:62