Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_32fc_s32f_power_spectrum_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
53#ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
54#define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
55
56#include <inttypes.h>
57#include <math.h>
58#include <stdio.h>
59
60#ifdef LV_HAVE_GENERIC
61
62static inline void
64 const lv_32fc_t* complexFFTInput,
65 const float normalizationFactor,
66 unsigned int num_points)
67{
68 // Calculate the Power of the complex point
69 const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
70
71 // Calculate dBm
72 // 50 ohm load assumption
73 // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
74 // 75 ohm load assumption
75 // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
76
77 /*
78 * For generic reference, the code below is a volk-optimized
79 * approach that also leverages a faster log2 calculation
80 * to calculate the log10:
81 * n*log10(x) = n*log2(x)/log2(10) = (n/log2(10)) * log2(x)
82 *
83 * Generic code:
84 *
85 * const float real = *inputPtr++ * iNormalizationFactor;
86 * const float imag = *inputPtr++ * iNormalizationFactor;
87 * realFFTDataPointsPtr = 10.0*log10f(((real * real) + (imag * imag)) + 1e-20);
88 * realFFTDataPointsPtr++;
89 *
90 */
91
92 // Calc mag^2
93 volk_32fc_magnitude_squared_32f(logPowerOutput, complexFFTInput, num_points);
94
95 // Finish ((real * real) + (imag * imag)) calculation:
96 volk_32f_s32f_multiply_32f(logPowerOutput, logPowerOutput, normFactSq, num_points);
97
98 // The following calculates 10*log10(x) = 10*log2(x)/log2(10) = (10/log2(10))
99 // * log2(x)
100 volk_32f_log2_32f(logPowerOutput, logPowerOutput, num_points);
101 volk_32f_s32f_multiply_32f(
102 logPowerOutput, logPowerOutput, volk_log2to10factor, num_points);
103}
104#endif /* LV_HAVE_GENERIC */
105
106#ifdef LV_HAVE_SSE3
107#include <pmmintrin.h>
108
109#ifdef LV_HAVE_LIB_SIMDMATH
110#include <simdmath.h>
111#endif /* LV_HAVE_LIB_SIMDMATH */
112
113static inline void
115 const lv_32fc_t* complexFFTInput,
116 const float normalizationFactor,
117 unsigned int num_points)
118{
119 const float* inputPtr = (const float*)complexFFTInput;
120 float* destPtr = logPowerOutput;
121 uint64_t number = 0;
122 const float iNormalizationFactor = 1.0 / normalizationFactor;
123#ifdef LV_HAVE_LIB_SIMDMATH
124 __m128 magScalar = _mm_set_ps1(10.0);
125 magScalar = _mm_div_ps(magScalar, logf4(magScalar));
126
127 __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
128
129 __m128 power;
130 __m128 input1, input2;
131 const uint64_t quarterPoints = num_points / 4;
132 for (; number < quarterPoints; number++) {
133 // Load the complex values
134 input1 = _mm_load_ps(inputPtr);
135 inputPtr += 4;
136 input2 = _mm_load_ps(inputPtr);
137 inputPtr += 4;
138
139 // Apply the normalization factor
140 input1 = _mm_mul_ps(input1, invNormalizationFactor);
141 input2 = _mm_mul_ps(input2, invNormalizationFactor);
142
143 // Multiply each value by itself
144 // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
145 input1 = _mm_mul_ps(input1, input1);
146 // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
147 input2 = _mm_mul_ps(input2, input2);
148
149 // Horizontal add, to add (r*r) + (i*i) for each complex value
150 // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
151 power = _mm_hadd_ps(input1, input2);
152
153 // Calculate the natural log power
154 power = logf4(power);
155
156 // Convert to log10 and multiply by 10.0
157 power = _mm_mul_ps(power, magScalar);
158
159 // Store the floating point results
160 _mm_store_ps(destPtr, power);
161
162 destPtr += 4;
163 }
164
165 number = quarterPoints * 4;
166#endif /* LV_HAVE_LIB_SIMDMATH */
167 // Calculate the FFT for any remaining points
168
169 for (; number < num_points; number++) {
170 // Calculate dBm
171 // 50 ohm load assumption
172 // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
173 // 75 ohm load assumption
174 // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
175
176 const float real = *inputPtr++ * iNormalizationFactor;
177 const float imag = *inputPtr++ * iNormalizationFactor;
178
179 *destPtr = volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));
180
181 destPtr++;
182 }
183}
184#endif /* LV_HAVE_SSE3 */
185
186#ifdef LV_HAVE_NEON
187#include <arm_neon.h>
189
190static inline void
192 const lv_32fc_t* complexFFTInput,
193 const float normalizationFactor,
194 unsigned int num_points)
195{
196 float* logPowerOutputPtr = logPowerOutput;
197 const lv_32fc_t* complexFFTInputPtr = complexFFTInput;
198 const float iNormalizationFactor = 1.0 / normalizationFactor;
199 unsigned int number;
200 unsigned int quarter_points = num_points / 4;
201 float32x4x2_t fft_vec;
202 float32x4_t log_pwr_vec;
203 float32x4_t mag_squared_vec;
204
205 const float inv_ln10_10 = 4.34294481903f; // 10.0/ln(10.)
206
207 for (number = 0; number < quarter_points; number++) {
208 // Load
209 fft_vec = vld2q_f32((float*)complexFFTInputPtr);
210 // Prefetch next 4
211 __VOLK_PREFETCH(complexFFTInputPtr + 4);
212 // Normalize
213 fft_vec.val[0] = vmulq_n_f32(fft_vec.val[0], iNormalizationFactor);
214 fft_vec.val[1] = vmulq_n_f32(fft_vec.val[1], iNormalizationFactor);
215 mag_squared_vec = _vmagnitudesquaredq_f32(fft_vec);
216 log_pwr_vec = vmulq_n_f32(_vlogq_f32(mag_squared_vec), inv_ln10_10);
217 // Store
218 vst1q_f32(logPowerOutputPtr, log_pwr_vec);
219 // Move pointers ahead
220 complexFFTInputPtr += 4;
221 logPowerOutputPtr += 4;
222 }
223
224 // deal with the rest
225 for (number = quarter_points * 4; number < num_points; number++) {
226 const float real = lv_creal(*complexFFTInputPtr) * iNormalizationFactor;
227 const float imag = lv_cimag(*complexFFTInputPtr) * iNormalizationFactor;
228
229 *logPowerOutputPtr =
230 volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));
231 complexFFTInputPtr++;
232 logPowerOutputPtr++;
233 }
234}
235
236#endif /* LV_HAVE_NEON */
237
238#endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */
static void volk_32fc_s32f_power_spectrum_32f_a_sse3(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:114
static void volk_32fc_s32f_power_spectrum_32f_generic(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:63
static void volk_32fc_s32f_power_spectrum_32f_neon(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:191
#define volk_log2to10factor
Definition: volk_common.h:160
static float log2f_non_ieee(float f)
Definition: volk_common.h:150
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:62
#define lv_cimag(x)
Definition: volk_complex.h:89
#define lv_creal(x)
Definition: volk_complex.h:87
float complex lv_32fc_t
Definition: volk_complex.h:65
static float32x4_t _vlogq_f32(float32x4_t x)
Definition: volk_neon_intrinsics.h:156
static float32x4_t _vmagnitudesquaredq_f32(float32x4x2_t cmplxValue)
Definition: volk_neon_intrinsics.h:86