75#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
76#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
85static inline void volk_32f_x2_s32f_interleave_16ic_a_avx2(
lv_16sc_t* complexVector,
89 unsigned int num_points)
91 unsigned int number = 0;
92 const float* iBufferPtr = iBuffer;
93 const float* qBufferPtr = qBuffer;
95 __m256 vScalar = _mm256_set1_ps(scalar);
97 const unsigned int eighthPoints = num_points / 8;
99 __m256 iValue, qValue, cplxValue1, cplxValue2;
100 __m256i intValue1, intValue2;
102 int16_t* complexVectorPtr = (int16_t*)complexVector;
104 for (; number < eighthPoints; number++) {
105 iValue = _mm256_load_ps(iBufferPtr);
106 qValue = _mm256_load_ps(qBufferPtr);
109 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
110 cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
113 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
114 cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
116 intValue1 = _mm256_cvtps_epi32(cplxValue1);
117 intValue2 = _mm256_cvtps_epi32(cplxValue2);
119 intValue1 = _mm256_packs_epi32(intValue1, intValue2);
121 _mm256_store_si256((__m256i*)complexVectorPtr, intValue1);
122 complexVectorPtr += 16;
128 number = eighthPoints * 8;
129 complexVectorPtr = (int16_t*)(&complexVector[number]);
130 for (; number < num_points; number++) {
131 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
132 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
139#include <emmintrin.h>
142 const float* iBuffer,
143 const float* qBuffer,
145 unsigned int num_points)
147 unsigned int number = 0;
148 const float* iBufferPtr = iBuffer;
149 const float* qBufferPtr = qBuffer;
151 __m128 vScalar = _mm_set_ps1(scalar);
153 const unsigned int quarterPoints = num_points / 4;
155 __m128 iValue, qValue, cplxValue1, cplxValue2;
156 __m128i intValue1, intValue2;
158 int16_t* complexVectorPtr = (int16_t*)complexVector;
160 for (; number < quarterPoints; number++) {
161 iValue = _mm_load_ps(iBufferPtr);
162 qValue = _mm_load_ps(qBufferPtr);
165 cplxValue1 = _mm_unpacklo_ps(iValue, qValue);
166 cplxValue1 = _mm_mul_ps(cplxValue1, vScalar);
169 cplxValue2 = _mm_unpackhi_ps(iValue, qValue);
170 cplxValue2 = _mm_mul_ps(cplxValue2, vScalar);
172 intValue1 = _mm_cvtps_epi32(cplxValue1);
173 intValue2 = _mm_cvtps_epi32(cplxValue2);
175 intValue1 = _mm_packs_epi32(intValue1, intValue2);
177 _mm_store_si128((__m128i*)complexVectorPtr, intValue1);
178 complexVectorPtr += 8;
184 number = quarterPoints * 4;
185 complexVectorPtr = (int16_t*)(&complexVector[number]);
186 for (; number < num_points; number++) {
187 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
188 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
195#include <xmmintrin.h>
198 const float* iBuffer,
199 const float* qBuffer,
201 unsigned int num_points)
203 unsigned int number = 0;
204 const float* iBufferPtr = iBuffer;
205 const float* qBufferPtr = qBuffer;
207 __m128 vScalar = _mm_set_ps1(scalar);
209 const unsigned int quarterPoints = num_points / 4;
211 __m128 iValue, qValue, cplxValue;
213 int16_t* complexVectorPtr = (int16_t*)complexVector;
217 for (; number < quarterPoints; number++) {
218 iValue = _mm_load_ps(iBufferPtr);
219 qValue = _mm_load_ps(qBufferPtr);
222 cplxValue = _mm_unpacklo_ps(iValue, qValue);
223 cplxValue = _mm_mul_ps(cplxValue, vScalar);
225 _mm_store_ps(floatBuffer, cplxValue);
227 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
228 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
229 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
230 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
233 cplxValue = _mm_unpackhi_ps(iValue, qValue);
234 cplxValue = _mm_mul_ps(cplxValue, vScalar);
236 _mm_store_ps(floatBuffer, cplxValue);
238 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
239 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
240 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
241 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
247 number = quarterPoints * 4;
248 complexVectorPtr = (int16_t*)(&complexVector[number]);
249 for (; number < num_points; number++) {
250 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
251 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
257#ifdef LV_HAVE_GENERIC
260 const float* iBuffer,
261 const float* qBuffer,
263 unsigned int num_points)
265 int16_t* complexVectorPtr = (int16_t*)complexVector;
266 const float* iBufferPtr = iBuffer;
267 const float* qBufferPtr = qBuffer;
268 unsigned int number = 0;
270 for (number = 0; number < num_points; number++) {
271 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
272 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
280#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H
281#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H
288#include <immintrin.h>
290static inline void volk_32f_x2_s32f_interleave_16ic_u_avx2(
lv_16sc_t* complexVector,
291 const float* iBuffer,
292 const float* qBuffer,
294 unsigned int num_points)
296 unsigned int number = 0;
297 const float* iBufferPtr = iBuffer;
298 const float* qBufferPtr = qBuffer;
300 __m256 vScalar = _mm256_set1_ps(scalar);
302 const unsigned int eighthPoints = num_points / 8;
304 __m256 iValue, qValue, cplxValue1, cplxValue2;
305 __m256i intValue1, intValue2;
307 int16_t* complexVectorPtr = (int16_t*)complexVector;
309 for (; number < eighthPoints; number++) {
310 iValue = _mm256_loadu_ps(iBufferPtr);
311 qValue = _mm256_loadu_ps(qBufferPtr);
314 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
315 cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
318 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
319 cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
321 intValue1 = _mm256_cvtps_epi32(cplxValue1);
322 intValue2 = _mm256_cvtps_epi32(cplxValue2);
324 intValue1 = _mm256_packs_epi32(intValue1, intValue2);
326 _mm256_storeu_si256((__m256i*)complexVectorPtr, intValue1);
327 complexVectorPtr += 16;
333 number = eighthPoints * 8;
334 complexVectorPtr = (int16_t*)(&complexVector[number]);
335 for (; number < num_points; number++) {
336 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
337 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
static float rintf(float x)
Definition: config.h:37
static void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_x2_s32f_interleave_16ic.h:141
static void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_x2_s32f_interleave_16ic.h:197
static void volk_32f_x2_s32f_interleave_16ic_generic(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_x2_s32f_interleave_16ic.h:259
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
short complex lv_16sc_t
Definition: volk_complex.h:62