Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_32fc_s32f_atan2_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
74#ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
75#define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
76
77#include <inttypes.h>
78#include <math.h>
79#include <stdio.h>
80
81#ifdef LV_HAVE_SSE4_1
82#include <smmintrin.h>
83
84#ifdef LV_HAVE_LIB_SIMDMATH
85#include <simdmath.h>
86#endif /* LV_HAVE_LIB_SIMDMATH */
87
88static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector,
89 const lv_32fc_t* complexVector,
90 const float normalizeFactor,
91 unsigned int num_points)
92{
93 const float* complexVectorPtr = (float*)complexVector;
94 float* outPtr = outputVector;
95
96 unsigned int number = 0;
97 const float invNormalizeFactor = 1.0 / normalizeFactor;
98
99#ifdef LV_HAVE_LIB_SIMDMATH
100 const unsigned int quarterPoints = num_points / 4;
101 __m128 testVector = _mm_set_ps1(2 * M_PI);
102 __m128 correctVector = _mm_set_ps1(M_PI);
103 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
104 __m128 phase;
105 __m128 complex1, complex2, iValue, qValue;
106 __m128 keepMask;
107
108 for (; number < quarterPoints; number++) {
109 // Load IQ data:
110 complex1 = _mm_load_ps(complexVectorPtr);
111 complexVectorPtr += 4;
112 complex2 = _mm_load_ps(complexVectorPtr);
113 complexVectorPtr += 4;
114 // Deinterleave IQ data:
115 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2, 0, 2, 0));
116 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3, 1, 3, 1));
117 // Arctan to get phase:
118 phase = atan2f4(qValue, iValue);
119 // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
120 // Compare to 2pi:
121 keepMask = _mm_cmpneq_ps(phase, testVector);
122 phase = _mm_blendv_ps(correctVector, phase, keepMask);
123 // done with above correction.
124 phase = _mm_mul_ps(phase, vNormalizeFactor);
125 _mm_store_ps((float*)outPtr, phase);
126 outPtr += 4;
127 }
128 number = quarterPoints * 4;
129#endif /* LV_HAVE_LIB_SIMDMATH */
130
131 for (; number < num_points; number++) {
132 const float real = *complexVectorPtr++;
133 const float imag = *complexVectorPtr++;
134 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
135 }
136}
137#endif /* LV_HAVE_SSE4_1 */
138
139
140#ifdef LV_HAVE_SSE
141#include <xmmintrin.h>
142
143#ifdef LV_HAVE_LIB_SIMDMATH
144#include <simdmath.h>
145#endif /* LV_HAVE_LIB_SIMDMATH */
146
147static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector,
148 const lv_32fc_t* complexVector,
149 const float normalizeFactor,
150 unsigned int num_points)
151{
152 const float* complexVectorPtr = (float*)complexVector;
153 float* outPtr = outputVector;
154
155 unsigned int number = 0;
156 const float invNormalizeFactor = 1.0 / normalizeFactor;
157
158#ifdef LV_HAVE_LIB_SIMDMATH
159 const unsigned int quarterPoints = num_points / 4;
160 __m128 testVector = _mm_set_ps1(2 * M_PI);
161 __m128 correctVector = _mm_set_ps1(M_PI);
162 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
163 __m128 phase;
164 __m128 complex1, complex2, iValue, qValue;
165 __m128 mask;
166 __m128 keepMask;
167
168 for (; number < quarterPoints; number++) {
169 // Load IQ data:
170 complex1 = _mm_load_ps(complexVectorPtr);
171 complexVectorPtr += 4;
172 complex2 = _mm_load_ps(complexVectorPtr);
173 complexVectorPtr += 4;
174 // Deinterleave IQ data:
175 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2, 0, 2, 0));
176 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3, 1, 3, 1));
177 // Arctan to get phase:
178 phase = atan2f4(qValue, iValue);
179 // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
180 // Compare to 2pi:
181 keepMask = _mm_cmpneq_ps(phase, testVector);
182 phase = _mm_and_ps(phase, keepMask);
183 mask = _mm_andnot_ps(keepMask, correctVector);
184 phase = _mm_or_ps(phase, mask);
185 // done with above correction.
186 phase = _mm_mul_ps(phase, vNormalizeFactor);
187 _mm_store_ps((float*)outPtr, phase);
188 outPtr += 4;
189 }
190 number = quarterPoints * 4;
191#endif /* LV_HAVE_LIB_SIMDMATH */
192
193 for (; number < num_points; number++) {
194 const float real = *complexVectorPtr++;
195 const float imag = *complexVectorPtr++;
196 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
197 }
198}
199#endif /* LV_HAVE_SSE */
200
201#ifdef LV_HAVE_GENERIC
202
203static inline void volk_32fc_s32f_atan2_32f_generic(float* outputVector,
204 const lv_32fc_t* inputVector,
205 const float normalizeFactor,
206 unsigned int num_points)
207{
208 float* outPtr = outputVector;
209 const float* inPtr = (float*)inputVector;
210 const float invNormalizeFactor = 1.0 / normalizeFactor;
211 unsigned int number;
212 for (number = 0; number < num_points; number++) {
213 const float real = *inPtr++;
214 const float imag = *inPtr++;
215 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
216 }
217}
218#endif /* LV_HAVE_GENERIC */
219
220
221#endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */
static void volk_32fc_s32f_atan2_32f_generic(float *outputVector, const lv_32fc_t *inputVector, const float normalizeFactor, unsigned int num_points)
Definition: volk_32fc_s32f_atan2_32f.h:203
static void volk_32fc_s32f_atan2_32f_a_sse(float *outputVector, const lv_32fc_t *complexVector, const float normalizeFactor, unsigned int num_points)
Definition: volk_32fc_s32f_atan2_32f.h:147
float complex lv_32fc_t
Definition: volk_complex.h:65