Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_32fc_deinterleave_32f_x2.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
73#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
74#define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
75
76#include <inttypes.h>
77#include <stdio.h>
78
79#ifdef LV_HAVE_AVX
80#include <immintrin.h>
81static inline void volk_32fc_deinterleave_32f_x2_a_avx(float* iBuffer,
82 float* qBuffer,
83 const lv_32fc_t* complexVector,
84 unsigned int num_points)
85{
86 const float* complexVectorPtr = (float*)complexVector;
87 float* iBufferPtr = iBuffer;
88 float* qBufferPtr = qBuffer;
89
90 unsigned int number = 0;
91 // Mask for real and imaginary parts
92 const unsigned int eighthPoints = num_points / 8;
93 __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
94 for (; number < eighthPoints; number++) {
95 cplxValue1 = _mm256_load_ps(complexVectorPtr);
96 complexVectorPtr += 8;
97
98 cplxValue2 = _mm256_load_ps(complexVectorPtr);
99 complexVectorPtr += 8;
100
101 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
102 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
103
104 // Arrange in i1i2i3i4 format
105 iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
106 // Arrange in q1q2q3q4 format
107 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
108
109 _mm256_store_ps(iBufferPtr, iValue);
110 _mm256_store_ps(qBufferPtr, qValue);
111
112 iBufferPtr += 8;
113 qBufferPtr += 8;
114 }
115
116 number = eighthPoints * 8;
117 for (; number < num_points; number++) {
118 *iBufferPtr++ = *complexVectorPtr++;
119 *qBufferPtr++ = *complexVectorPtr++;
120 }
121}
122#endif /* LV_HAVE_AVX */
123
124#ifdef LV_HAVE_SSE
125#include <xmmintrin.h>
126
127static inline void volk_32fc_deinterleave_32f_x2_a_sse(float* iBuffer,
128 float* qBuffer,
129 const lv_32fc_t* complexVector,
130 unsigned int num_points)
131{
132 const float* complexVectorPtr = (float*)complexVector;
133 float* iBufferPtr = iBuffer;
134 float* qBufferPtr = qBuffer;
135
136 unsigned int number = 0;
137 const unsigned int quarterPoints = num_points / 4;
138 __m128 cplxValue1, cplxValue2, iValue, qValue;
139 for (; number < quarterPoints; number++) {
140 cplxValue1 = _mm_load_ps(complexVectorPtr);
141 complexVectorPtr += 4;
142
143 cplxValue2 = _mm_load_ps(complexVectorPtr);
144 complexVectorPtr += 4;
145
146 // Arrange in i1i2i3i4 format
147 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
148 // Arrange in q1q2q3q4 format
149 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
150
151 _mm_store_ps(iBufferPtr, iValue);
152 _mm_store_ps(qBufferPtr, qValue);
153
154 iBufferPtr += 4;
155 qBufferPtr += 4;
156 }
157
158 number = quarterPoints * 4;
159 for (; number < num_points; number++) {
160 *iBufferPtr++ = *complexVectorPtr++;
161 *qBufferPtr++ = *complexVectorPtr++;
162 }
163}
164#endif /* LV_HAVE_SSE */
165
166
167#ifdef LV_HAVE_NEON
168#include <arm_neon.h>
169
170static inline void volk_32fc_deinterleave_32f_x2_neon(float* iBuffer,
171 float* qBuffer,
172 const lv_32fc_t* complexVector,
173 unsigned int num_points)
174{
175 unsigned int number = 0;
176 unsigned int quarter_points = num_points / 4;
177 const float* complexVectorPtr = (float*)complexVector;
178 float* iBufferPtr = iBuffer;
179 float* qBufferPtr = qBuffer;
180 float32x4x2_t complexInput;
181
182 for (number = 0; number < quarter_points; number++) {
183 complexInput = vld2q_f32(complexVectorPtr);
184 vst1q_f32(iBufferPtr, complexInput.val[0]);
185 vst1q_f32(qBufferPtr, complexInput.val[1]);
186 complexVectorPtr += 8;
187 iBufferPtr += 4;
188 qBufferPtr += 4;
189 }
190
191 for (number = quarter_points * 4; number < num_points; number++) {
192 *iBufferPtr++ = *complexVectorPtr++;
193 *qBufferPtr++ = *complexVectorPtr++;
194 }
195}
196#endif /* LV_HAVE_NEON */
197
198
199#ifdef LV_HAVE_GENERIC
200
201static inline void volk_32fc_deinterleave_32f_x2_generic(float* iBuffer,
202 float* qBuffer,
203 const lv_32fc_t* complexVector,
204 unsigned int num_points)
205{
206 const float* complexVectorPtr = (float*)complexVector;
207 float* iBufferPtr = iBuffer;
208 float* qBufferPtr = qBuffer;
209 unsigned int number;
210 for (number = 0; number < num_points; number++) {
211 *iBufferPtr++ = *complexVectorPtr++;
212 *qBufferPtr++ = *complexVectorPtr++;
213 }
214}
215#endif /* LV_HAVE_GENERIC */
216
217#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a_H */
218
219
220#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
221#define INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
222
223#include <inttypes.h>
224#include <stdio.h>
225
226#ifdef LV_HAVE_AVX
227#include <immintrin.h>
228static inline void volk_32fc_deinterleave_32f_x2_u_avx(float* iBuffer,
229 float* qBuffer,
230 const lv_32fc_t* complexVector,
231 unsigned int num_points)
232{
233 const float* complexVectorPtr = (float*)complexVector;
234 float* iBufferPtr = iBuffer;
235 float* qBufferPtr = qBuffer;
236
237 unsigned int number = 0;
238 // Mask for real and imaginary parts
239 const unsigned int eighthPoints = num_points / 8;
240 __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
241 for (; number < eighthPoints; number++) {
242 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
243 complexVectorPtr += 8;
244
245 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
246 complexVectorPtr += 8;
247
248 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
249 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
250
251 // Arrange in i1i2i3i4 format
252 iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
253 // Arrange in q1q2q3q4 format
254 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
255
256 _mm256_storeu_ps(iBufferPtr, iValue);
257 _mm256_storeu_ps(qBufferPtr, qValue);
258
259 iBufferPtr += 8;
260 qBufferPtr += 8;
261 }
262
263 number = eighthPoints * 8;
264 for (; number < num_points; number++) {
265 *iBufferPtr++ = *complexVectorPtr++;
266 *qBufferPtr++ = *complexVectorPtr++;
267 }
268}
269#endif /* LV_HAVE_AVX */
270#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_u_H */
static void volk_32fc_deinterleave_32f_x2_generic(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:201
static void volk_32fc_deinterleave_32f_x2_a_avx(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:81
static void volk_32fc_deinterleave_32f_x2_a_sse(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:127
static void volk_32fc_deinterleave_32f_x2_neon(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:170
static void volk_32fc_deinterleave_32f_x2_u_avx(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:228
float complex lv_32fc_t
Definition: volk_complex.h:65