Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_32fc_conjugate_32fc.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
68#ifndef INCLUDED_volk_32fc_conjugate_32fc_u_H
69#define INCLUDED_volk_32fc_conjugate_32fc_u_H
70
71#include <float.h>
72#include <inttypes.h>
73#include <stdio.h>
74#include <volk/volk_complex.h>
75
76#ifdef LV_HAVE_AVX
77#include <immintrin.h>
78
79static inline void volk_32fc_conjugate_32fc_u_avx(lv_32fc_t* cVector,
80 const lv_32fc_t* aVector,
81 unsigned int num_points)
82{
83 unsigned int number = 0;
84 const unsigned int quarterPoints = num_points / 4;
85
86 __m256 x;
87 lv_32fc_t* c = cVector;
88 const lv_32fc_t* a = aVector;
89
90 __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
91
92 for (; number < quarterPoints; number++) {
93
94 x = _mm256_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
95
96 x = _mm256_xor_ps(x, conjugator); // conjugate register
97
98 _mm256_storeu_ps((float*)c, x); // Store the results back into the C container
99
100 a += 4;
101 c += 4;
102 }
103
104 number = quarterPoints * 4;
105
106 for (; number < num_points; number++) {
107 *c++ = lv_conj(*a++);
108 }
109}
110#endif /* LV_HAVE_AVX */
111
112#ifdef LV_HAVE_SSE3
113#include <pmmintrin.h>
114
115static inline void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector,
116 const lv_32fc_t* aVector,
117 unsigned int num_points)
118{
119 unsigned int number = 0;
120 const unsigned int halfPoints = num_points / 2;
121
122 __m128 x;
123 lv_32fc_t* c = cVector;
124 const lv_32fc_t* a = aVector;
125
126 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
127
128 for (; number < halfPoints; number++) {
129
130 x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
131
132 x = _mm_xor_ps(x, conjugator); // conjugate register
133
134 _mm_storeu_ps((float*)c, x); // Store the results back into the C container
135
136 a += 2;
137 c += 2;
138 }
139
140 if ((num_points % 2) != 0) {
141 *c = lv_conj(*a);
142 }
143}
144#endif /* LV_HAVE_SSE3 */
145
146#ifdef LV_HAVE_GENERIC
147
148static inline void volk_32fc_conjugate_32fc_generic(lv_32fc_t* cVector,
149 const lv_32fc_t* aVector,
150 unsigned int num_points)
151{
152 lv_32fc_t* cPtr = cVector;
153 const lv_32fc_t* aPtr = aVector;
154 unsigned int number = 0;
155
156 for (number = 0; number < num_points; number++) {
157 *cPtr++ = lv_conj(*aPtr++);
158 }
159}
160#endif /* LV_HAVE_GENERIC */
161
162
163#endif /* INCLUDED_volk_32fc_conjugate_32fc_u_H */
164#ifndef INCLUDED_volk_32fc_conjugate_32fc_a_H
165#define INCLUDED_volk_32fc_conjugate_32fc_a_H
166
167#include <float.h>
168#include <inttypes.h>
169#include <stdio.h>
170#include <volk/volk_complex.h>
171
172#ifdef LV_HAVE_AVX
173#include <immintrin.h>
174
175static inline void volk_32fc_conjugate_32fc_a_avx(lv_32fc_t* cVector,
176 const lv_32fc_t* aVector,
177 unsigned int num_points)
178{
179 unsigned int number = 0;
180 const unsigned int quarterPoints = num_points / 4;
181
182 __m256 x;
183 lv_32fc_t* c = cVector;
184 const lv_32fc_t* a = aVector;
185
186 __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
187
188 for (; number < quarterPoints; number++) {
189
190 x = _mm256_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
191
192 x = _mm256_xor_ps(x, conjugator); // conjugate register
193
194 _mm256_store_ps((float*)c, x); // Store the results back into the C container
195
196 a += 4;
197 c += 4;
198 }
199
200 number = quarterPoints * 4;
201
202 for (; number < num_points; number++) {
203 *c++ = lv_conj(*a++);
204 }
205}
206#endif /* LV_HAVE_AVX */
207
208#ifdef LV_HAVE_SSE3
209#include <pmmintrin.h>
210
211static inline void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector,
212 const lv_32fc_t* aVector,
213 unsigned int num_points)
214{
215 unsigned int number = 0;
216 const unsigned int halfPoints = num_points / 2;
217
218 __m128 x;
219 lv_32fc_t* c = cVector;
220 const lv_32fc_t* a = aVector;
221
222 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
223
224 for (; number < halfPoints; number++) {
225
226 x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
227
228 x = _mm_xor_ps(x, conjugator); // conjugate register
229
230 _mm_store_ps((float*)c, x); // Store the results back into the C container
231
232 a += 2;
233 c += 2;
234 }
235
236 if ((num_points % 2) != 0) {
237 *c = lv_conj(*a);
238 }
239}
240#endif /* LV_HAVE_SSE3 */
241
242#ifdef LV_HAVE_NEON
243#include <arm_neon.h>
244
245static inline void volk_32fc_conjugate_32fc_a_neon(lv_32fc_t* cVector,
246 const lv_32fc_t* aVector,
247 unsigned int num_points)
248{
249 unsigned int number;
250 const unsigned int quarterPoints = num_points / 4;
251
252 float32x4x2_t x;
253 lv_32fc_t* c = cVector;
254 const lv_32fc_t* a = aVector;
255
256 for (number = 0; number < quarterPoints; number++) {
257 __VOLK_PREFETCH(a + 4);
258 x = vld2q_f32((float*)a); // Load the complex data as ar,br,cr,dr; ai,bi,ci,di
259
260 // xor the imaginary lane
261 x.val[1] = vnegq_f32(x.val[1]);
262
263 vst2q_f32((float*)c, x); // Store the results back into the C container
264
265 a += 4;
266 c += 4;
267 }
268
269 for (number = quarterPoints * 4; number < num_points; number++) {
270 *c++ = lv_conj(*a++);
271 }
272}
273#endif /* LV_HAVE_NEON */
274
275
276#ifdef LV_HAVE_GENERIC
277
279 const lv_32fc_t* aVector,
280 unsigned int num_points)
281{
282 lv_32fc_t* cPtr = cVector;
283 const lv_32fc_t* aPtr = aVector;
284 unsigned int number = 0;
285
286 for (number = 0; number < num_points; number++) {
287 *cPtr++ = lv_conj(*aPtr++);
288 }
289}
290#endif /* LV_HAVE_GENERIC */
291
292
293#endif /* INCLUDED_volk_32fc_conjugate_32fc_a_H */
static void volk_32fc_conjugate_32fc_a_avx(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:175
static void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:115
static void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:211
static void volk_32fc_conjugate_32fc_a_neon(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:245
static void volk_32fc_conjugate_32fc_u_avx(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:79
static void volk_32fc_conjugate_32fc_a_generic(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:278
static void volk_32fc_conjugate_32fc_generic(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition: volk_32fc_conjugate_32fc.h:148
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:62
#define lv_conj(x)
Definition: volk_complex.h:91
float complex lv_32fc_t
Definition: volk_complex.h:65