Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_32f_x2_interleave_32fc.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
73#ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H
74#define INCLUDED_volk_32f_x2_interleave_32fc_a_H
75
76#include <inttypes.h>
77#include <stdio.h>
78
79#ifdef LV_HAVE_AVX
80#include <immintrin.h>
81
82static inline void volk_32f_x2_interleave_32fc_a_avx(lv_32fc_t* complexVector,
83 const float* iBuffer,
84 const float* qBuffer,
85 unsigned int num_points)
86{
87 unsigned int number = 0;
88 float* complexVectorPtr = (float*)complexVector;
89 const float* iBufferPtr = iBuffer;
90 const float* qBufferPtr = qBuffer;
91
92 const uint64_t eighthPoints = num_points / 8;
93
94 __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
95 for (; number < eighthPoints; number++) {
96 iValue = _mm256_load_ps(iBufferPtr);
97 qValue = _mm256_load_ps(qBufferPtr);
98
99 // Interleaves the lower two values in the i and q variables into one buffer
100 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
101 // Interleaves the upper two values in the i and q variables into one buffer
102 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
103
104 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
105 _mm256_store_ps(complexVectorPtr, cplxValue);
106 complexVectorPtr += 8;
107
108 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
109 _mm256_store_ps(complexVectorPtr, cplxValue);
110 complexVectorPtr += 8;
111
112 iBufferPtr += 8;
113 qBufferPtr += 8;
114 }
115
116 number = eighthPoints * 8;
117 for (; number < num_points; number++) {
118 *complexVectorPtr++ = *iBufferPtr++;
119 *complexVectorPtr++ = *qBufferPtr++;
120 }
121}
122
123#endif /* LV_HAV_AVX */
124
125#ifdef LV_HAVE_SSE
126#include <xmmintrin.h>
127
128static inline void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t* complexVector,
129 const float* iBuffer,
130 const float* qBuffer,
131 unsigned int num_points)
132{
133 unsigned int number = 0;
134 float* complexVectorPtr = (float*)complexVector;
135 const float* iBufferPtr = iBuffer;
136 const float* qBufferPtr = qBuffer;
137
138 const uint64_t quarterPoints = num_points / 4;
139
140 __m128 iValue, qValue, cplxValue;
141 for (; number < quarterPoints; number++) {
142 iValue = _mm_load_ps(iBufferPtr);
143 qValue = _mm_load_ps(qBufferPtr);
144
145 // Interleaves the lower two values in the i and q variables into one buffer
146 cplxValue = _mm_unpacklo_ps(iValue, qValue);
147 _mm_store_ps(complexVectorPtr, cplxValue);
148 complexVectorPtr += 4;
149
150 // Interleaves the upper two values in the i and q variables into one buffer
151 cplxValue = _mm_unpackhi_ps(iValue, qValue);
152 _mm_store_ps(complexVectorPtr, cplxValue);
153 complexVectorPtr += 4;
154
155 iBufferPtr += 4;
156 qBufferPtr += 4;
157 }
158
159 number = quarterPoints * 4;
160 for (; number < num_points; number++) {
161 *complexVectorPtr++ = *iBufferPtr++;
162 *complexVectorPtr++ = *qBufferPtr++;
163 }
164}
165#endif /* LV_HAVE_SSE */
166
167
168#ifdef LV_HAVE_NEON
169#include <arm_neon.h>
170
171static inline void volk_32f_x2_interleave_32fc_neon(lv_32fc_t* complexVector,
172 const float* iBuffer,
173 const float* qBuffer,
174 unsigned int num_points)
175{
176 unsigned int quarter_points = num_points / 4;
177 unsigned int number;
178 float* complexVectorPtr = (float*)complexVector;
179
180 float32x4x2_t complex_vec;
181 for (number = 0; number < quarter_points; ++number) {
182 complex_vec.val[0] = vld1q_f32(iBuffer);
183 complex_vec.val[1] = vld1q_f32(qBuffer);
184 vst2q_f32(complexVectorPtr, complex_vec);
185 iBuffer += 4;
186 qBuffer += 4;
187 complexVectorPtr += 8;
188 }
189
190 for (number = quarter_points * 4; number < num_points; ++number) {
191 *complexVectorPtr++ = *iBuffer++;
192 *complexVectorPtr++ = *qBuffer++;
193 }
194}
195#endif /* LV_HAVE_NEON */
196
197
198#ifdef LV_HAVE_GENERIC
199
200static inline void volk_32f_x2_interleave_32fc_generic(lv_32fc_t* complexVector,
201 const float* iBuffer,
202 const float* qBuffer,
203 unsigned int num_points)
204{
205 float* complexVectorPtr = (float*)complexVector;
206 const float* iBufferPtr = iBuffer;
207 const float* qBufferPtr = qBuffer;
208 unsigned int number;
209
210 for (number = 0; number < num_points; number++) {
211 *complexVectorPtr++ = *iBufferPtr++;
212 *complexVectorPtr++ = *qBufferPtr++;
213 }
214}
215#endif /* LV_HAVE_GENERIC */
216
217
218#endif /* INCLUDED_volk_32f_x2_interleave_32fc_a_H */
219
220#ifndef INCLUDED_volk_32f_x2_interleave_32fc_u_H
221#define INCLUDED_volk_32f_x2_interleave_32fc_u_H
222
223#include <inttypes.h>
224#include <stdio.h>
225
226#ifdef LV_HAVE_AVX
227#include <immintrin.h>
228
229static inline void volk_32f_x2_interleave_32fc_u_avx(lv_32fc_t* complexVector,
230 const float* iBuffer,
231 const float* qBuffer,
232 unsigned int num_points)
233{
234 unsigned int number = 0;
235 float* complexVectorPtr = (float*)complexVector;
236 const float* iBufferPtr = iBuffer;
237 const float* qBufferPtr = qBuffer;
238
239 const uint64_t eighthPoints = num_points / 8;
240
241 __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
242 for (; number < eighthPoints; number++) {
243 iValue = _mm256_loadu_ps(iBufferPtr);
244 qValue = _mm256_loadu_ps(qBufferPtr);
245
246 // Interleaves the lower two values in the i and q variables into one buffer
247 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
248 // Interleaves the upper two values in the i and q variables into one buffer
249 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
250
251 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
252 _mm256_storeu_ps(complexVectorPtr, cplxValue);
253 complexVectorPtr += 8;
254
255 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
256 _mm256_storeu_ps(complexVectorPtr, cplxValue);
257 complexVectorPtr += 8;
258
259 iBufferPtr += 8;
260 qBufferPtr += 8;
261 }
262
263 number = eighthPoints * 8;
264 for (; number < num_points; number++) {
265 *complexVectorPtr++ = *iBufferPtr++;
266 *complexVectorPtr++ = *qBufferPtr++;
267 }
268}
269#endif /* LV_HAVE_AVX */
270
271#endif /* INCLUDED_volk_32f_x2_interleave_32fc_u_H */
static void volk_32f_x2_interleave_32fc_a_avx(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:82
static void volk_32f_x2_interleave_32fc_generic(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:200
static void volk_32f_x2_interleave_32fc_neon(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:171
static void volk_32f_x2_interleave_32fc_u_avx(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:229
static void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:128
float complex lv_32fc_t
Definition: volk_complex.h:65