Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_32fc_magnitude_squared_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
71#ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
72#define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
73
74#include <inttypes.h>
75#include <math.h>
76#include <stdio.h>
77
78#ifdef LV_HAVE_AVX
79#include <immintrin.h>
81
82static inline void volk_32fc_magnitude_squared_32f_u_avx(float* magnitudeVector,
83 const lv_32fc_t* complexVector,
84 unsigned int num_points)
85{
86 unsigned int number = 0;
87 const unsigned int eighthPoints = num_points / 8;
88
89 const float* complexVectorPtr = (float*)complexVector;
90 float* magnitudeVectorPtr = magnitudeVector;
91
92 __m256 cplxValue1, cplxValue2, result;
93
94 for (; number < eighthPoints; number++) {
95 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
96 cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
97 result = _mm256_magnitudesquared_ps(cplxValue1, cplxValue2);
98 _mm256_storeu_ps(magnitudeVectorPtr, result);
99
100 complexVectorPtr += 16;
101 magnitudeVectorPtr += 8;
102 }
103
104 number = eighthPoints * 8;
105 for (; number < num_points; number++) {
106 float val1Real = *complexVectorPtr++;
107 float val1Imag = *complexVectorPtr++;
108 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
109 }
110}
111#endif /* LV_HAVE_AVX */
112
113
114#ifdef LV_HAVE_SSE3
115#include <pmmintrin.h>
117
118static inline void volk_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector,
119 const lv_32fc_t* complexVector,
120 unsigned int num_points)
121{
122 unsigned int number = 0;
123 const unsigned int quarterPoints = num_points / 4;
124
125 const float* complexVectorPtr = (float*)complexVector;
126 float* magnitudeVectorPtr = magnitudeVector;
127
128 __m128 cplxValue1, cplxValue2, result;
129 for (; number < quarterPoints; number++) {
130 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
131 complexVectorPtr += 4;
132
133 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
134 complexVectorPtr += 4;
135
136 result = _mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2);
137 _mm_storeu_ps(magnitudeVectorPtr, result);
138 magnitudeVectorPtr += 4;
139 }
140
141 number = quarterPoints * 4;
142 for (; number < num_points; number++) {
143 float val1Real = *complexVectorPtr++;
144 float val1Imag = *complexVectorPtr++;
145 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
146 }
147}
148#endif /* LV_HAVE_SSE3 */
149
150
151#ifdef LV_HAVE_SSE
153#include <xmmintrin.h>
154
155static inline void volk_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector,
156 const lv_32fc_t* complexVector,
157 unsigned int num_points)
158{
159 unsigned int number = 0;
160 const unsigned int quarterPoints = num_points / 4;
161
162 const float* complexVectorPtr = (float*)complexVector;
163 float* magnitudeVectorPtr = magnitudeVector;
164
165 __m128 cplxValue1, cplxValue2, result;
166
167 for (; number < quarterPoints; number++) {
168 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
169 complexVectorPtr += 4;
170
171 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
172 complexVectorPtr += 4;
173
174 result = _mm_magnitudesquared_ps(cplxValue1, cplxValue2);
175 _mm_storeu_ps(magnitudeVectorPtr, result);
176 magnitudeVectorPtr += 4;
177 }
178
179 number = quarterPoints * 4;
180 for (; number < num_points; number++) {
181 float val1Real = *complexVectorPtr++;
182 float val1Imag = *complexVectorPtr++;
183 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
184 }
185}
186#endif /* LV_HAVE_SSE */
187
188
189#ifdef LV_HAVE_GENERIC
190
191static inline void volk_32fc_magnitude_squared_32f_generic(float* magnitudeVector,
192 const lv_32fc_t* complexVector,
193 unsigned int num_points)
194{
195 const float* complexVectorPtr = (float*)complexVector;
196 float* magnitudeVectorPtr = magnitudeVector;
197 unsigned int number = 0;
198 for (number = 0; number < num_points; number++) {
199 const float real = *complexVectorPtr++;
200 const float imag = *complexVectorPtr++;
201 *magnitudeVectorPtr++ = (real * real) + (imag * imag);
202 }
203}
204#endif /* LV_HAVE_GENERIC */
205
206
207#endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
208#ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
209#define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
210
211#include <inttypes.h>
212#include <math.h>
213#include <stdio.h>
214
215#ifdef LV_HAVE_AVX
216#include <immintrin.h>
218
219static inline void volk_32fc_magnitude_squared_32f_a_avx(float* magnitudeVector,
220 const lv_32fc_t* complexVector,
221 unsigned int num_points)
222{
223 unsigned int number = 0;
224 const unsigned int eighthPoints = num_points / 8;
225
226 const float* complexVectorPtr = (float*)complexVector;
227 float* magnitudeVectorPtr = magnitudeVector;
228
229 __m256 cplxValue1, cplxValue2, result;
230 for (; number < eighthPoints; number++) {
231 cplxValue1 = _mm256_load_ps(complexVectorPtr);
232 complexVectorPtr += 8;
233
234 cplxValue2 = _mm256_load_ps(complexVectorPtr);
235 complexVectorPtr += 8;
236
237 result = _mm256_magnitudesquared_ps(cplxValue1, cplxValue2);
238 _mm256_store_ps(magnitudeVectorPtr, result);
239 magnitudeVectorPtr += 8;
240 }
241
242 number = eighthPoints * 8;
243 for (; number < num_points; number++) {
244 float val1Real = *complexVectorPtr++;
245 float val1Imag = *complexVectorPtr++;
246 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
247 }
248}
249#endif /* LV_HAVE_AVX */
250
251
252#ifdef LV_HAVE_SSE3
253#include <pmmintrin.h>
255
256static inline void volk_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector,
257 const lv_32fc_t* complexVector,
258 unsigned int num_points)
259{
260 unsigned int number = 0;
261 const unsigned int quarterPoints = num_points / 4;
262
263 const float* complexVectorPtr = (float*)complexVector;
264 float* magnitudeVectorPtr = magnitudeVector;
265
266 __m128 cplxValue1, cplxValue2, result;
267 for (; number < quarterPoints; number++) {
268 cplxValue1 = _mm_load_ps(complexVectorPtr);
269 complexVectorPtr += 4;
270
271 cplxValue2 = _mm_load_ps(complexVectorPtr);
272 complexVectorPtr += 4;
273
274 result = _mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2);
275 _mm_store_ps(magnitudeVectorPtr, result);
276 magnitudeVectorPtr += 4;
277 }
278
279 number = quarterPoints * 4;
280 for (; number < num_points; number++) {
281 float val1Real = *complexVectorPtr++;
282 float val1Imag = *complexVectorPtr++;
283 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
284 }
285}
286#endif /* LV_HAVE_SSE3 */
287
288
289#ifdef LV_HAVE_SSE
291#include <xmmintrin.h>
292
293static inline void volk_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector,
294 const lv_32fc_t* complexVector,
295 unsigned int num_points)
296{
297 unsigned int number = 0;
298 const unsigned int quarterPoints = num_points / 4;
299
300 const float* complexVectorPtr = (float*)complexVector;
301 float* magnitudeVectorPtr = magnitudeVector;
302
303 __m128 cplxValue1, cplxValue2, result;
304 for (; number < quarterPoints; number++) {
305 cplxValue1 = _mm_load_ps(complexVectorPtr);
306 complexVectorPtr += 4;
307
308 cplxValue2 = _mm_load_ps(complexVectorPtr);
309 complexVectorPtr += 4;
310
311 result = _mm_magnitudesquared_ps(cplxValue1, cplxValue2);
312 _mm_store_ps(magnitudeVectorPtr, result);
313 magnitudeVectorPtr += 4;
314 }
315
316 number = quarterPoints * 4;
317 for (; number < num_points; number++) {
318 float val1Real = *complexVectorPtr++;
319 float val1Imag = *complexVectorPtr++;
320 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
321 }
322}
323#endif /* LV_HAVE_SSE */
324
325
326#ifdef LV_HAVE_NEON
327#include <arm_neon.h>
328
329static inline void volk_32fc_magnitude_squared_32f_neon(float* magnitudeVector,
330 const lv_32fc_t* complexVector,
331 unsigned int num_points)
332{
333 unsigned int number = 0;
334 const unsigned int quarterPoints = num_points / 4;
335
336 const float* complexVectorPtr = (float*)complexVector;
337 float* magnitudeVectorPtr = magnitudeVector;
338
339 float32x4x2_t cmplx_val;
340 float32x4_t result;
341 for (; number < quarterPoints; number++) {
342 cmplx_val = vld2q_f32(complexVectorPtr);
343 complexVectorPtr += 8;
344
345 cmplx_val.val[0] =
346 vmulq_f32(cmplx_val.val[0], cmplx_val.val[0]); // Square the values
347 cmplx_val.val[1] =
348 vmulq_f32(cmplx_val.val[1], cmplx_val.val[1]); // Square the values
349
350 result =
351 vaddq_f32(cmplx_val.val[0], cmplx_val.val[1]); // Add the I2 and Q2 values
352
353 vst1q_f32(magnitudeVectorPtr, result);
354 magnitudeVectorPtr += 4;
355 }
356
357 number = quarterPoints * 4;
358 for (; number < num_points; number++) {
359 float val1Real = *complexVectorPtr++;
360 float val1Imag = *complexVectorPtr++;
361 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
362 }
363}
364#endif /* LV_HAVE_NEON */
365
366
367#ifdef LV_HAVE_GENERIC
368
370 float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
371{
372 const float* complexVectorPtr = (float*)complexVector;
373 float* magnitudeVectorPtr = magnitudeVector;
374 unsigned int number = 0;
375 for (number = 0; number < num_points; number++) {
376 const float real = *complexVectorPtr++;
377 const float imag = *complexVectorPtr++;
378 *magnitudeVectorPtr++ = (real * real) + (imag * imag);
379 }
380}
381#endif /* LV_HAVE_GENERIC */
382
383#endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
static void volk_32fc_magnitude_squared_32f_a_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:369
static void volk_32fc_magnitude_squared_32f_neon(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:329
static void volk_32fc_magnitude_squared_32f_u_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:82
static void volk_32fc_magnitude_squared_32f_a_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:256
static void volk_32fc_magnitude_squared_32f_a_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:293
static void volk_32fc_magnitude_squared_32f_a_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:219
static void volk_32fc_magnitude_squared_32f_u_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:155
static void volk_32fc_magnitude_squared_32f_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:191
static void volk_32fc_magnitude_squared_32f_u_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:118
static __m256 _mm256_magnitudesquared_ps(__m256 cplxValue1, __m256 cplxValue2)
Definition: volk_avx_intrinsics.h:73
float complex lv_32fc_t
Definition: volk_complex.h:65
static __m128 _mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:51
static __m128 _mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:32