Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_32fc_deinterleave_64f_x2.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
73#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
74#define INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
75
76#include <inttypes.h>
77#include <stdio.h>
78
79#ifdef LV_HAVE_AVX
80#include <immintrin.h>
81
82static inline void volk_32fc_deinterleave_64f_x2_u_avx(double* iBuffer,
83 double* qBuffer,
84 const lv_32fc_t* complexVector,
85 unsigned int num_points)
86{
87 unsigned int number = 0;
88
89 const float* complexVectorPtr = (float*)complexVector;
90 double* iBufferPtr = iBuffer;
91 double* qBufferPtr = qBuffer;
92
93 const unsigned int quarterPoints = num_points / 4;
94 __m256 cplxValue;
95 __m128 complexH, complexL, fVal;
96 __m256d dVal;
97
98 for (; number < quarterPoints; number++) {
99
100 cplxValue = _mm256_loadu_ps(complexVectorPtr);
101 complexVectorPtr += 8;
102
103 complexH = _mm256_extractf128_ps(cplxValue, 1);
104 complexL = _mm256_extractf128_ps(cplxValue, 0);
105
106 // Arrange in i1i2i1i2 format
107 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
108 dVal = _mm256_cvtps_pd(fVal);
109 _mm256_storeu_pd(iBufferPtr, dVal);
110
111 // Arrange in q1q2q1q2 format
112 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
113 dVal = _mm256_cvtps_pd(fVal);
114 _mm256_storeu_pd(qBufferPtr, dVal);
115
116 iBufferPtr += 4;
117 qBufferPtr += 4;
118 }
119
120 number = quarterPoints * 4;
121 for (; number < num_points; number++) {
122 *iBufferPtr++ = *complexVectorPtr++;
123 *qBufferPtr++ = *complexVectorPtr++;
124 }
125}
126#endif /* LV_HAVE_AVX */
127
128#ifdef LV_HAVE_SSE2
129#include <emmintrin.h>
130
131static inline void volk_32fc_deinterleave_64f_x2_u_sse2(double* iBuffer,
132 double* qBuffer,
133 const lv_32fc_t* complexVector,
134 unsigned int num_points)
135{
136 unsigned int number = 0;
137
138 const float* complexVectorPtr = (float*)complexVector;
139 double* iBufferPtr = iBuffer;
140 double* qBufferPtr = qBuffer;
141
142 const unsigned int halfPoints = num_points / 2;
143 __m128 cplxValue, fVal;
144 __m128d dVal;
145
146 for (; number < halfPoints; number++) {
147
148 cplxValue = _mm_loadu_ps(complexVectorPtr);
149 complexVectorPtr += 4;
150
151 // Arrange in i1i2i1i2 format
152 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
153 dVal = _mm_cvtps_pd(fVal);
154 _mm_storeu_pd(iBufferPtr, dVal);
155
156 // Arrange in q1q2q1q2 format
157 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
158 dVal = _mm_cvtps_pd(fVal);
159 _mm_storeu_pd(qBufferPtr, dVal);
160
161 iBufferPtr += 2;
162 qBufferPtr += 2;
163 }
164
165 number = halfPoints * 2;
166 for (; number < num_points; number++) {
167 *iBufferPtr++ = *complexVectorPtr++;
168 *qBufferPtr++ = *complexVectorPtr++;
169 }
170}
171#endif /* LV_HAVE_SSE */
172
173#ifdef LV_HAVE_GENERIC
174
175static inline void volk_32fc_deinterleave_64f_x2_generic(double* iBuffer,
176 double* qBuffer,
177 const lv_32fc_t* complexVector,
178 unsigned int num_points)
179{
180 unsigned int number = 0;
181 const float* complexVectorPtr = (float*)complexVector;
182 double* iBufferPtr = iBuffer;
183 double* qBufferPtr = qBuffer;
184
185 for (number = 0; number < num_points; number++) {
186 *iBufferPtr++ = (double)*complexVectorPtr++;
187 *qBufferPtr++ = (double)*complexVectorPtr++;
188 }
189}
190#endif /* LV_HAVE_GENERIC */
191
192#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_u_H */
193#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
194#define INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
195
196#include <inttypes.h>
197#include <stdio.h>
198
199#ifdef LV_HAVE_AVX
200#include <immintrin.h>
201
202static inline void volk_32fc_deinterleave_64f_x2_a_avx(double* iBuffer,
203 double* qBuffer,
204 const lv_32fc_t* complexVector,
205 unsigned int num_points)
206{
207 unsigned int number = 0;
208
209 const float* complexVectorPtr = (float*)complexVector;
210 double* iBufferPtr = iBuffer;
211 double* qBufferPtr = qBuffer;
212
213 const unsigned int quarterPoints = num_points / 4;
214 __m256 cplxValue;
215 __m128 complexH, complexL, fVal;
216 __m256d dVal;
217
218 for (; number < quarterPoints; number++) {
219
220 cplxValue = _mm256_load_ps(complexVectorPtr);
221 complexVectorPtr += 8;
222
223 complexH = _mm256_extractf128_ps(cplxValue, 1);
224 complexL = _mm256_extractf128_ps(cplxValue, 0);
225
226 // Arrange in i1i2i1i2 format
227 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
228 dVal = _mm256_cvtps_pd(fVal);
229 _mm256_store_pd(iBufferPtr, dVal);
230
231 // Arrange in q1q2q1q2 format
232 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
233 dVal = _mm256_cvtps_pd(fVal);
234 _mm256_store_pd(qBufferPtr, dVal);
235
236 iBufferPtr += 4;
237 qBufferPtr += 4;
238 }
239
240 number = quarterPoints * 4;
241 for (; number < num_points; number++) {
242 *iBufferPtr++ = *complexVectorPtr++;
243 *qBufferPtr++ = *complexVectorPtr++;
244 }
245}
246#endif /* LV_HAVE_AVX */
247
248#ifdef LV_HAVE_SSE2
249#include <emmintrin.h>
250
251static inline void volk_32fc_deinterleave_64f_x2_a_sse2(double* iBuffer,
252 double* qBuffer,
253 const lv_32fc_t* complexVector,
254 unsigned int num_points)
255{
256 unsigned int number = 0;
257
258 const float* complexVectorPtr = (float*)complexVector;
259 double* iBufferPtr = iBuffer;
260 double* qBufferPtr = qBuffer;
261
262 const unsigned int halfPoints = num_points / 2;
263 __m128 cplxValue, fVal;
264 __m128d dVal;
265
266 for (; number < halfPoints; number++) {
267
268 cplxValue = _mm_load_ps(complexVectorPtr);
269 complexVectorPtr += 4;
270
271 // Arrange in i1i2i1i2 format
272 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
273 dVal = _mm_cvtps_pd(fVal);
274 _mm_store_pd(iBufferPtr, dVal);
275
276 // Arrange in q1q2q1q2 format
277 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
278 dVal = _mm_cvtps_pd(fVal);
279 _mm_store_pd(qBufferPtr, dVal);
280
281 iBufferPtr += 2;
282 qBufferPtr += 2;
283 }
284
285 number = halfPoints * 2;
286 for (; number < num_points; number++) {
287 *iBufferPtr++ = *complexVectorPtr++;
288 *qBufferPtr++ = *complexVectorPtr++;
289 }
290}
291#endif /* LV_HAVE_SSE */
292
293#ifdef LV_HAVE_GENERIC
294
295static inline void volk_32fc_deinterleave_64f_x2_a_generic(double* iBuffer,
296 double* qBuffer,
297 const lv_32fc_t* complexVector,
298 unsigned int num_points)
299{
300 unsigned int number = 0;
301 const float* complexVectorPtr = (float*)complexVector;
302 double* iBufferPtr = iBuffer;
303 double* qBufferPtr = qBuffer;
304
305 for (number = 0; number < num_points; number++) {
306 *iBufferPtr++ = (double)*complexVectorPtr++;
307 *qBufferPtr++ = (double)*complexVectorPtr++;
308 }
309}
310#endif /* LV_HAVE_GENERIC */
311
312#ifdef LV_HAVE_NEONV8
313#include <arm_neon.h>
314
315static inline void volk_32fc_deinterleave_64f_x2_neon(double* iBuffer,
316 double* qBuffer,
317 const lv_32fc_t* complexVector,
318 unsigned int num_points)
319{
320 unsigned int number = 0;
321 unsigned int half_points = num_points / 2;
322 const float* complexVectorPtr = (float*)complexVector;
323 double* iBufferPtr = iBuffer;
324 double* qBufferPtr = qBuffer;
325 float32x2x2_t complexInput;
326 float64x2_t iVal, qVal;
327
328 for (number = 0; number < half_points; number++) {
329 complexInput = vld2_f32(complexVectorPtr);
330
331 iVal = vcvt_f64_f32(complexInput.val[0]);
332 qVal = vcvt_f64_f32(complexInput.val[1]);
333
334 vst1q_f64(iBufferPtr, iVal);
335 vst1q_f64(qBufferPtr, qVal);
336
337 complexVectorPtr += 4;
338 iBufferPtr += 2;
339 qBufferPtr += 2;
340 }
341
342 for (number = half_points * 2; number < num_points; number++) {
343 *iBufferPtr++ = (double)*complexVectorPtr++;
344 *qBufferPtr++ = (double)*complexVectorPtr++;
345 }
346}
347#endif /* LV_HAVE_NEONV8 */
348
349#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a_H */
static void volk_32fc_deinterleave_64f_x2_a_avx(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:202
static void volk_32fc_deinterleave_64f_x2_u_sse2(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:131
static void volk_32fc_deinterleave_64f_x2_generic(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:175
static void volk_32fc_deinterleave_64f_x2_a_sse2(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:251
static void volk_32fc_deinterleave_64f_x2_a_generic(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:295
static void volk_32fc_deinterleave_64f_x2_u_avx(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:82
float complex lv_32fc_t
Definition: volk_complex.h:65