Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_32fc_s32f_magnitude_16i.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
73#ifdef LV_HAVE_GENERIC
74#include <volk/volk_common.h>
75
76static inline void volk_32fc_s32f_magnitude_16i_generic(int16_t* magnitudeVector,
77 const lv_32fc_t* complexVector,
78 const float scalar,
79 unsigned int num_points)
80{
81 const float* complexVectorPtr = (float*)complexVector;
82 int16_t* magnitudeVectorPtr = magnitudeVector;
83 unsigned int number = 0;
84 for (number = 0; number < num_points; number++) {
85 __VOLK_VOLATILE float real = *complexVectorPtr++;
86 __VOLK_VOLATILE float imag = *complexVectorPtr++;
87 real *= real;
88 imag *= imag;
89 *magnitudeVectorPtr++ = (int16_t)rintf(scalar * sqrtf(real + imag));
90 }
91}
92#endif /* LV_HAVE_GENERIC */
93
94#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
95#define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
96
97#include <inttypes.h>
98#include <math.h>
99#include <stdio.h>
100#include <volk/volk_common.h>
101
102#ifdef LV_HAVE_AVX2
103#include <immintrin.h>
104
105static inline void volk_32fc_s32f_magnitude_16i_a_avx2(int16_t* magnitudeVector,
106 const lv_32fc_t* complexVector,
107 const float scalar,
108 unsigned int num_points)
109{
110 unsigned int number = 0;
111 const unsigned int eighthPoints = num_points / 8;
112
113 const float* complexVectorPtr = (const float*)complexVector;
114 int16_t* magnitudeVectorPtr = magnitudeVector;
115
116 __m256 vScalar = _mm256_set1_ps(scalar);
117 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
118 __m256 cplxValue1, cplxValue2, result;
119 __m256i resultInt;
120 __m128i resultShort;
121
122 for (; number < eighthPoints; number++) {
123 cplxValue1 = _mm256_load_ps(complexVectorPtr);
124 complexVectorPtr += 8;
125
126 cplxValue2 = _mm256_load_ps(complexVectorPtr);
127 complexVectorPtr += 8;
128
129 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
130 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
131
132 result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
133
134 result = _mm256_sqrt_ps(result);
135
136 result = _mm256_mul_ps(result, vScalar);
137
138 resultInt = _mm256_cvtps_epi32(result);
139 resultInt = _mm256_packs_epi32(resultInt, resultInt);
140 resultInt = _mm256_permutevar8x32_epi32(
141 resultInt, idx); // permute to compensate for shuffling in hadd and packs
142 resultShort = _mm256_extracti128_si256(resultInt, 0);
143 _mm_store_si128((__m128i*)magnitudeVectorPtr, resultShort);
144 magnitudeVectorPtr += 8;
145 }
146
147 number = eighthPoints * 8;
149 magnitudeVector + number, complexVector + number, scalar, num_points - number);
150}
151#endif /* LV_HAVE_AVX2 */
152
153#ifdef LV_HAVE_SSE3
154#include <pmmintrin.h>
155
156static inline void volk_32fc_s32f_magnitude_16i_a_sse3(int16_t* magnitudeVector,
157 const lv_32fc_t* complexVector,
158 const float scalar,
159 unsigned int num_points)
160{
161 unsigned int number = 0;
162 const unsigned int quarterPoints = num_points / 4;
163
164 const float* complexVectorPtr = (const float*)complexVector;
165 int16_t* magnitudeVectorPtr = magnitudeVector;
166
167 __m128 vScalar = _mm_set_ps1(scalar);
168
169 __m128 cplxValue1, cplxValue2, result;
170
171 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
172
173 for (; number < quarterPoints; number++) {
174 cplxValue1 = _mm_load_ps(complexVectorPtr);
175 complexVectorPtr += 4;
176
177 cplxValue2 = _mm_load_ps(complexVectorPtr);
178 complexVectorPtr += 4;
179
180 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
181 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
182
183 result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
184
185 result = _mm_sqrt_ps(result);
186
187 result = _mm_mul_ps(result, vScalar);
188
189 _mm_store_ps(floatBuffer, result);
190 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
191 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
192 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
193 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
194 }
195
196 number = quarterPoints * 4;
198 magnitudeVector + number, complexVector + number, scalar, num_points - number);
199}
200#endif /* LV_HAVE_SSE3 */
201
202
203#ifdef LV_HAVE_SSE
204#include <xmmintrin.h>
205
206static inline void volk_32fc_s32f_magnitude_16i_a_sse(int16_t* magnitudeVector,
207 const lv_32fc_t* complexVector,
208 const float scalar,
209 unsigned int num_points)
210{
211 unsigned int number = 0;
212 const unsigned int quarterPoints = num_points / 4;
213
214 const float* complexVectorPtr = (const float*)complexVector;
215 int16_t* magnitudeVectorPtr = magnitudeVector;
216
217 __m128 vScalar = _mm_set_ps1(scalar);
218
219 __m128 cplxValue1, cplxValue2, result;
220 __m128 iValue, qValue;
221
222 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
223
224 for (; number < quarterPoints; number++) {
225 cplxValue1 = _mm_load_ps(complexVectorPtr);
226 complexVectorPtr += 4;
227
228 cplxValue2 = _mm_load_ps(complexVectorPtr);
229 complexVectorPtr += 4;
230
231 // Arrange in i1i2i3i4 format
232 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
233 // Arrange in q1q2q3q4 format
234 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
235
236 __VOLK_VOLATILE __m128 iValue2 =
237 _mm_mul_ps(iValue, iValue); // Square the I values
238 __VOLK_VOLATILE __m128 qValue2 =
239 _mm_mul_ps(qValue, qValue); // Square the Q Values
240
241 result = _mm_add_ps(iValue2, qValue2); // Add the I2 and Q2 values
242
243 result = _mm_sqrt_ps(result);
244
245 result = _mm_mul_ps(result, vScalar);
246
247 _mm_store_ps(floatBuffer, result);
248 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
249 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
250 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
251 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
252 }
253
254 number = quarterPoints * 4;
256 magnitudeVector + number, complexVector + number, scalar, num_points - number);
257}
258#endif /* LV_HAVE_SSE */
259
260
261#endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a_H */
262
263#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
264#define INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
265
266#include <inttypes.h>
267#include <math.h>
268#include <stdio.h>
269#include <volk/volk_common.h>
270
271#ifdef LV_HAVE_AVX2
272#include <immintrin.h>
273
274static inline void volk_32fc_s32f_magnitude_16i_u_avx2(int16_t* magnitudeVector,
275 const lv_32fc_t* complexVector,
276 const float scalar,
277 unsigned int num_points)
278{
279 unsigned int number = 0;
280 const unsigned int eighthPoints = num_points / 8;
281
282 const float* complexVectorPtr = (const float*)complexVector;
283 int16_t* magnitudeVectorPtr = magnitudeVector;
284
285 __m256 vScalar = _mm256_set1_ps(scalar);
286 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
287 __m256 cplxValue1, cplxValue2, result;
288 __m256i resultInt;
289 __m128i resultShort;
290
291 for (; number < eighthPoints; number++) {
292 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
293 complexVectorPtr += 8;
294
295 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
296 complexVectorPtr += 8;
297
298 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
299 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
300
301 result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
302
303 result = _mm256_sqrt_ps(result);
304
305 result = _mm256_mul_ps(result, vScalar);
306
307 resultInt = _mm256_cvtps_epi32(result);
308 resultInt = _mm256_packs_epi32(resultInt, resultInt);
309 resultInt = _mm256_permutevar8x32_epi32(
310 resultInt, idx); // permute to compensate for shuffling in hadd and packs
311 resultShort = _mm256_extracti128_si256(resultInt, 0);
312 _mm_storeu_si128((__m128i*)magnitudeVectorPtr, resultShort);
313 magnitudeVectorPtr += 8;
314 }
315
316 number = eighthPoints * 8;
318 magnitudeVector + number, complexVector + number, scalar, num_points - number);
319}
320#endif /* LV_HAVE_AVX2 */
321
322#endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_u_H */
static float rintf(float x)
Definition: config.h:37
static void volk_32fc_s32f_magnitude_16i_generic(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:76
static void volk_32fc_s32f_magnitude_16i_a_sse(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:206
static void volk_32fc_s32f_magnitude_16i_a_sse3(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:156
#define __VOLK_VOLATILE
Definition: volk_common.h:64
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
float complex lv_32fc_t
Definition: volk_complex.h:65