Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_16ic_convert_32fc.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2016 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
58#ifndef INCLUDED_volk_16ic_convert_32fc_a_H
59#define INCLUDED_volk_16ic_convert_32fc_a_H
60
61#include <volk/volk_complex.h>
62
63#ifdef LV_HAVE_AVX2
64#include <immintrin.h>
65
66static inline void volk_16ic_convert_32fc_a_avx2(lv_32fc_t* outputVector,
67 const lv_16sc_t* inputVector,
68 unsigned int num_points)
69{
70 const unsigned int avx_iters = num_points / 8;
71 unsigned int number = 0;
72 const int16_t* complexVectorPtr = (int16_t*)inputVector;
73 float* outputVectorPtr = (float*)outputVector;
74 __m256 outVal;
75 __m256i outValInt;
76 __m128i cplxValue;
77
78 for (number = 0; number < avx_iters; number++) {
79 cplxValue = _mm_load_si128((__m128i*)complexVectorPtr);
80 complexVectorPtr += 8;
81
82 outValInt = _mm256_cvtepi16_epi32(cplxValue);
83 outVal = _mm256_cvtepi32_ps(outValInt);
84 _mm256_store_ps((float*)outputVectorPtr, outVal);
85
86 outputVectorPtr += 8;
87 }
88
89 number = avx_iters * 8;
90 for (; number < num_points * 2; number++) {
91 *outputVectorPtr++ = (float)*complexVectorPtr++;
92 }
93}
94
95#endif /* LV_HAVE_AVX2 */
96
97#ifdef LV_HAVE_GENERIC
98
99static inline void volk_16ic_convert_32fc_generic(lv_32fc_t* outputVector,
100 const lv_16sc_t* inputVector,
101 unsigned int num_points)
102{
103 unsigned int i;
104 for (i = 0; i < num_points; i++) {
105 outputVector[i] =
106 lv_cmake((float)lv_creal(inputVector[i]), (float)lv_cimag(inputVector[i]));
107 }
108}
109
110#endif /* LV_HAVE_GENERIC */
111
112
113#ifdef LV_HAVE_SSE2
114#include <emmintrin.h>
115
116static inline void volk_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector,
117 const lv_16sc_t* inputVector,
118 unsigned int num_points)
119{
120 const unsigned int sse_iters = num_points / 2;
121
122 const lv_16sc_t* _in = inputVector;
123 lv_32fc_t* _out = outputVector;
124 __m128 a;
125 unsigned int number;
126
127 for (number = 0; number < sse_iters; number++) {
128 a = _mm_set_ps(
129 (float)(lv_cimag(_in[1])),
130 (float)(lv_creal(_in[1])),
131 (float)(lv_cimag(_in[0])),
132 (float)(lv_creal(
133 _in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
134 _mm_store_ps((float*)_out, a);
135 _in += 2;
136 _out += 2;
137 }
138 if (num_points & 1) {
139 *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
140 _in++;
141 }
142}
143
144#endif /* LV_HAVE_SSE2 */
145
146#ifdef LV_HAVE_AVX
147#include <immintrin.h>
148
149static inline void volk_16ic_convert_32fc_a_avx(lv_32fc_t* outputVector,
150 const lv_16sc_t* inputVector,
151 unsigned int num_points)
152{
153 const unsigned int sse_iters = num_points / 4;
154
155 const lv_16sc_t* _in = inputVector;
156 lv_32fc_t* _out = outputVector;
157 __m256 a;
158 unsigned int i, number;
159
160 for (number = 0; number < sse_iters; number++) {
161 a = _mm256_set_ps(
162 (float)(lv_cimag(_in[3])),
163 (float)(lv_creal(_in[3])),
164 (float)(lv_cimag(_in[2])),
165 (float)(lv_creal(_in[2])),
166 (float)(lv_cimag(_in[1])),
167 (float)(lv_creal(_in[1])),
168 (float)(lv_cimag(_in[0])),
169 (float)(lv_creal(
170 _in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
171 _mm256_store_ps((float*)_out, a);
172 _in += 4;
173 _out += 4;
174 }
175
176 for (i = 0; i < (num_points % 4); ++i) {
177 *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
178 _in++;
179 }
180}
181
182#endif /* LV_HAVE_AVX */
183
184
185#ifdef LV_HAVE_NEON
186#include <arm_neon.h>
187
188static inline void volk_16ic_convert_32fc_neon(lv_32fc_t* outputVector,
189 const lv_16sc_t* inputVector,
190 unsigned int num_points)
191{
192 const unsigned int sse_iters = num_points / 2;
193
194 const lv_16sc_t* _in = inputVector;
195 lv_32fc_t* _out = outputVector;
196
197 int16x4_t a16x4;
198 int32x4_t a32x4;
199 float32x4_t f32x4;
200 unsigned int i, number;
201
202 for (number = 0; number < sse_iters; number++) {
203 a16x4 = vld1_s16((const int16_t*)_in);
204 __VOLK_PREFETCH(_in + 4);
205 a32x4 = vmovl_s16(a16x4);
206 f32x4 = vcvtq_f32_s32(a32x4);
207 vst1q_f32((float32_t*)_out, f32x4);
208 _in += 2;
209 _out += 2;
210 }
211 for (i = 0; i < (num_points % 2); ++i) {
212 *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
213 _in++;
214 }
215}
216#endif /* LV_HAVE_NEON */
217
218#endif /* INCLUDED_volk_32fc_convert_16ic_a_H */
219
220#ifndef INCLUDED_volk_16ic_convert_32fc_u_H
221#define INCLUDED_volk_16ic_convert_32fc_u_H
222
223#include <volk/volk_complex.h>
224
225
226#ifdef LV_HAVE_AVX2
227#include <immintrin.h>
228
229static inline void volk_16ic_convert_32fc_u_avx2(lv_32fc_t* outputVector,
230 const lv_16sc_t* inputVector,
231 unsigned int num_points)
232{
233 const unsigned int avx_iters = num_points / 8;
234 unsigned int number = 0;
235 const int16_t* complexVectorPtr = (int16_t*)inputVector;
236 float* outputVectorPtr = (float*)outputVector;
237 __m256 outVal;
238 __m256i outValInt;
239 __m128i cplxValue;
240
241 for (number = 0; number < avx_iters; number++) {
242 cplxValue = _mm_loadu_si128((__m128i*)complexVectorPtr);
243 complexVectorPtr += 8;
244
245 outValInt = _mm256_cvtepi16_epi32(cplxValue);
246 outVal = _mm256_cvtepi32_ps(outValInt);
247 _mm256_storeu_ps((float*)outputVectorPtr, outVal);
248
249 outputVectorPtr += 8;
250 }
251
252 number = avx_iters * 8;
253 for (; number < num_points * 2; number++) {
254 *outputVectorPtr++ = (float)*complexVectorPtr++;
255 }
256}
257
258#endif /* LV_HAVE_AVX2 */
259
260#ifdef LV_HAVE_SSE2
261#include <emmintrin.h>
262
263static inline void volk_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector,
264 const lv_16sc_t* inputVector,
265 unsigned int num_points)
266{
267 const unsigned int sse_iters = num_points / 2;
268
269 const lv_16sc_t* _in = inputVector;
270 lv_32fc_t* _out = outputVector;
271 __m128 a;
272 unsigned int number;
273
274 for (number = 0; number < sse_iters; number++) {
275 a = _mm_set_ps(
276 (float)(lv_cimag(_in[1])),
277 (float)(lv_creal(_in[1])),
278 (float)(lv_cimag(_in[0])),
279 (float)(lv_creal(
280 _in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
281 _mm_storeu_ps((float*)_out, a);
282 _in += 2;
283 _out += 2;
284 }
285 if (num_points & 1) {
286 *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
287 _in++;
288 }
289}
290
291#endif /* LV_HAVE_SSE2 */
292
293
294#ifdef LV_HAVE_AVX
295#include <immintrin.h>
296
297static inline void volk_16ic_convert_32fc_u_avx(lv_32fc_t* outputVector,
298 const lv_16sc_t* inputVector,
299 unsigned int num_points)
300{
301 const unsigned int sse_iters = num_points / 4;
302
303 const lv_16sc_t* _in = inputVector;
304 lv_32fc_t* _out = outputVector;
305 __m256 a;
306 unsigned int i, number;
307
308 for (number = 0; number < sse_iters; number++) {
309 a = _mm256_set_ps(
310 (float)(lv_cimag(_in[3])),
311 (float)(lv_creal(_in[3])),
312 (float)(lv_cimag(_in[2])),
313 (float)(lv_creal(_in[2])),
314 (float)(lv_cimag(_in[1])),
315 (float)(lv_creal(_in[1])),
316 (float)(lv_cimag(_in[0])),
317 (float)(lv_creal(
318 _in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
319 _mm256_storeu_ps((float*)_out, a);
320 _in += 4;
321 _out += 4;
322 }
323
324 for (i = 0; i < (num_points % 4); ++i) {
325 *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
326 _in++;
327 }
328}
329
330#endif /* LV_HAVE_AVX */
331#endif /* INCLUDED_volk_32fc_convert_16ic_u_H */
static void volk_16ic_convert_32fc_generic(lv_32fc_t *outputVector, const lv_16sc_t *inputVector, unsigned int num_points)
Definition: volk_16ic_convert_32fc.h:99
static void volk_16ic_convert_32fc_u_avx(lv_32fc_t *outputVector, const lv_16sc_t *inputVector, unsigned int num_points)
Definition: volk_16ic_convert_32fc.h:297
static void volk_16ic_convert_32fc_a_avx(lv_32fc_t *outputVector, const lv_16sc_t *inputVector, unsigned int num_points)
Definition: volk_16ic_convert_32fc.h:149
static void volk_16ic_convert_32fc_u_sse2(lv_32fc_t *outputVector, const lv_16sc_t *inputVector, unsigned int num_points)
Definition: volk_16ic_convert_32fc.h:263
static void volk_16ic_convert_32fc_neon(lv_32fc_t *outputVector, const lv_16sc_t *inputVector, unsigned int num_points)
Definition: volk_16ic_convert_32fc.h:188
static void volk_16ic_convert_32fc_a_sse2(lv_32fc_t *outputVector, const lv_16sc_t *inputVector, unsigned int num_points)
Definition: volk_16ic_convert_32fc.h:116
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:62
#define lv_cimag(x)
Definition: volk_complex.h:89
#define lv_cmake(r, i)
Definition: volk_complex.h:68
#define lv_creal(x)
Definition: volk_complex.h:87
float complex lv_32fc_t
Definition: volk_complex.h:65
short complex lv_16sc_t
Definition: volk_complex.h:62
for i
Definition: volk_config_fixed.tmpl.h:25