Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_8ic_x2_s32f_multiply_conjugate_32fc.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
57#ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
58#define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
59
60#include <inttypes.h>
61#include <stdio.h>
62#include <volk/volk_complex.h>
63
64#ifdef LV_HAVE_AVX2
65#include <immintrin.h>
66
67static inline void
68volk_8ic_x2_s32f_multiply_conjugate_32fc_a_avx2(lv_32fc_t* cVector,
69 const lv_8sc_t* aVector,
70 const lv_8sc_t* bVector,
71 const float scalar,
72 unsigned int num_points)
73{
74 unsigned int number = 0;
75 const unsigned int oneEigthPoints = num_points / 8;
76
77 __m256i x, y, realz, imagz;
78 __m256 ret, retlo, rethi;
79 lv_32fc_t* c = cVector;
80 const lv_8sc_t* a = aVector;
81 const lv_8sc_t* b = bVector;
82 __m256i conjugateSign =
83 _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
84
85 __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
86
87 for (; number < oneEigthPoints; number++) {
88 // Convert 8 bit values into 16 bit values
89 x = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)a));
90 y = _mm256_cvtepi8_epi16(_mm_load_si128((__m128i*)b));
91
92 // Calculate the ar*cr - ai*(-ci) portions
93 realz = _mm256_madd_epi16(x, y);
94
95 // Calculate the complex conjugate of the cr + ci j values
96 y = _mm256_sign_epi16(y, conjugateSign);
97
98 // Shift the order of the cr and ci values
99 y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
100 _MM_SHUFFLE(2, 3, 0, 1));
101
102 // Calculate the ar*(-ci) + cr*(ai)
103 imagz = _mm256_madd_epi16(x, y);
104
105 // Interleave real and imaginary and then convert to float values
106 retlo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi32(realz, imagz));
107
108 // Normalize the floating point values
109 retlo = _mm256_mul_ps(retlo, invScalar);
110
111 // Interleave real and imaginary and then convert to float values
112 rethi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi32(realz, imagz));
113
114 // Normalize the floating point values
115 rethi = _mm256_mul_ps(rethi, invScalar);
116
117 ret = _mm256_permute2f128_ps(retlo, rethi, 0b00100000);
118 _mm256_store_ps((float*)c, ret);
119 c += 4;
120
121 ret = _mm256_permute2f128_ps(retlo, rethi, 0b00110001);
122 _mm256_store_ps((float*)c, ret);
123 c += 4;
124
125 a += 8;
126 b += 8;
127 }
128
129 number = oneEigthPoints * 8;
130 float* cFloatPtr = (float*)&cVector[number];
131 int8_t* a8Ptr = (int8_t*)&aVector[number];
132 int8_t* b8Ptr = (int8_t*)&bVector[number];
133 for (; number < num_points; number++) {
134 float aReal = (float)*a8Ptr++;
135 float aImag = (float)*a8Ptr++;
136 lv_32fc_t aVal = lv_cmake(aReal, aImag);
137 float bReal = (float)*b8Ptr++;
138 float bImag = (float)*b8Ptr++;
139 lv_32fc_t bVal = lv_cmake(bReal, -bImag);
140 lv_32fc_t temp = aVal * bVal;
141
142 *cFloatPtr++ = lv_creal(temp) / scalar;
143 *cFloatPtr++ = lv_cimag(temp) / scalar;
144 }
145}
146#endif /* LV_HAVE_AVX2*/
147
148
149#ifdef LV_HAVE_SSE4_1
150#include <smmintrin.h>
151
152static inline void
153volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t* cVector,
154 const lv_8sc_t* aVector,
155 const lv_8sc_t* bVector,
156 const float scalar,
157 unsigned int num_points)
158{
159 unsigned int number = 0;
160 const unsigned int quarterPoints = num_points / 4;
161
162 __m128i x, y, realz, imagz;
163 __m128 ret;
164 lv_32fc_t* c = cVector;
165 const lv_8sc_t* a = aVector;
166 const lv_8sc_t* b = bVector;
167 __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
168
169 __m128 invScalar = _mm_set_ps1(1.0 / scalar);
170
171 for (; number < quarterPoints; number++) {
172 // Convert into 8 bit values into 16 bit values
173 x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
174 y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
175
176 // Calculate the ar*cr - ai*(-ci) portions
177 realz = _mm_madd_epi16(x, y);
178
179 // Calculate the complex conjugate of the cr + ci j values
180 y = _mm_sign_epi16(y, conjugateSign);
181
182 // Shift the order of the cr and ci values
183 y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
184 _MM_SHUFFLE(2, 3, 0, 1));
185
186 // Calculate the ar*(-ci) + cr*(ai)
187 imagz = _mm_madd_epi16(x, y);
188
189 // Interleave real and imaginary and then convert to float values
190 ret = _mm_cvtepi32_ps(_mm_unpacklo_epi32(realz, imagz));
191
192 // Normalize the floating point values
193 ret = _mm_mul_ps(ret, invScalar);
194
195 // Store the floating point values
196 _mm_store_ps((float*)c, ret);
197 c += 2;
198
199 // Interleave real and imaginary and then convert to float values
200 ret = _mm_cvtepi32_ps(_mm_unpackhi_epi32(realz, imagz));
201
202 // Normalize the floating point values
203 ret = _mm_mul_ps(ret, invScalar);
204
205 // Store the floating point values
206 _mm_store_ps((float*)c, ret);
207 c += 2;
208
209 a += 4;
210 b += 4;
211 }
212
213 number = quarterPoints * 4;
214 float* cFloatPtr = (float*)&cVector[number];
215 int8_t* a8Ptr = (int8_t*)&aVector[number];
216 int8_t* b8Ptr = (int8_t*)&bVector[number];
217 for (; number < num_points; number++) {
218 float aReal = (float)*a8Ptr++;
219 float aImag = (float)*a8Ptr++;
220 lv_32fc_t aVal = lv_cmake(aReal, aImag);
221 float bReal = (float)*b8Ptr++;
222 float bImag = (float)*b8Ptr++;
223 lv_32fc_t bVal = lv_cmake(bReal, -bImag);
224 lv_32fc_t temp = aVal * bVal;
225
226 *cFloatPtr++ = lv_creal(temp) / scalar;
227 *cFloatPtr++ = lv_cimag(temp) / scalar;
228 }
229}
230#endif /* LV_HAVE_SSE4_1 */
231
232
233#ifdef LV_HAVE_GENERIC
234
235static inline void
237 const lv_8sc_t* aVector,
238 const lv_8sc_t* bVector,
239 const float scalar,
240 unsigned int num_points)
241{
242 unsigned int number = 0;
243 float* cPtr = (float*)cVector;
244 const float invScalar = 1.0 / scalar;
245 int8_t* a8Ptr = (int8_t*)aVector;
246 int8_t* b8Ptr = (int8_t*)bVector;
247 for (number = 0; number < num_points; number++) {
248 float aReal = (float)*a8Ptr++;
249 float aImag = (float)*a8Ptr++;
250 lv_32fc_t aVal = lv_cmake(aReal, aImag);
251 float bReal = (float)*b8Ptr++;
252 float bImag = (float)*b8Ptr++;
253 lv_32fc_t bVal = lv_cmake(bReal, -bImag);
254 lv_32fc_t temp = aVal * bVal;
255
256 *cPtr++ = (lv_creal(temp) * invScalar);
257 *cPtr++ = (lv_cimag(temp) * invScalar);
258 }
259}
260#endif /* LV_HAVE_GENERIC */
261
262
263#endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H */
264
265#ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_u_H
266#define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_u_H
267
268#include <inttypes.h>
269#include <stdio.h>
270#include <volk/volk_complex.h>
271
272#ifdef LV_HAVE_AVX2
273#include <immintrin.h>
274
275static inline void
276volk_8ic_x2_s32f_multiply_conjugate_32fc_u_avx2(lv_32fc_t* cVector,
277 const lv_8sc_t* aVector,
278 const lv_8sc_t* bVector,
279 const float scalar,
280 unsigned int num_points)
281{
282 unsigned int number = 0;
283 const unsigned int oneEigthPoints = num_points / 8;
284
285 __m256i x, y, realz, imagz;
286 __m256 ret, retlo, rethi;
287 lv_32fc_t* c = cVector;
288 const lv_8sc_t* a = aVector;
289 const lv_8sc_t* b = bVector;
290 __m256i conjugateSign =
291 _mm256_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1);
292
293 __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
294
295 for (; number < oneEigthPoints; number++) {
296 // Convert 8 bit values into 16 bit values
297 x = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)a));
298 y = _mm256_cvtepi8_epi16(_mm_loadu_si128((__m128i*)b));
299
300 // Calculate the ar*cr - ai*(-ci) portions
301 realz = _mm256_madd_epi16(x, y);
302
303 // Calculate the complex conjugate of the cr + ci j values
304 y = _mm256_sign_epi16(y, conjugateSign);
305
306 // Shift the order of the cr and ci values
307 y = _mm256_shufflehi_epi16(_mm256_shufflelo_epi16(y, _MM_SHUFFLE(2, 3, 0, 1)),
308 _MM_SHUFFLE(2, 3, 0, 1));
309
310 // Calculate the ar*(-ci) + cr*(ai)
311 imagz = _mm256_madd_epi16(x, y);
312
313 // Interleave real and imaginary and then convert to float values
314 retlo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi32(realz, imagz));
315
316 // Normalize the floating point values
317 retlo = _mm256_mul_ps(retlo, invScalar);
318
319 // Interleave real and imaginary and then convert to float values
320 rethi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi32(realz, imagz));
321
322 // Normalize the floating point values
323 rethi = _mm256_mul_ps(rethi, invScalar);
324
325 ret = _mm256_permute2f128_ps(retlo, rethi, 0b00100000);
326 _mm256_storeu_ps((float*)c, ret);
327 c += 4;
328
329 ret = _mm256_permute2f128_ps(retlo, rethi, 0b00110001);
330 _mm256_storeu_ps((float*)c, ret);
331 c += 4;
332
333 a += 8;
334 b += 8;
335 }
336
337 number = oneEigthPoints * 8;
338 float* cFloatPtr = (float*)&cVector[number];
339 int8_t* a8Ptr = (int8_t*)&aVector[number];
340 int8_t* b8Ptr = (int8_t*)&bVector[number];
341 for (; number < num_points; number++) {
342 float aReal = (float)*a8Ptr++;
343 float aImag = (float)*a8Ptr++;
344 lv_32fc_t aVal = lv_cmake(aReal, aImag);
345 float bReal = (float)*b8Ptr++;
346 float bImag = (float)*b8Ptr++;
347 lv_32fc_t bVal = lv_cmake(bReal, -bImag);
348 lv_32fc_t temp = aVal * bVal;
349
350 *cFloatPtr++ = lv_creal(temp) / scalar;
351 *cFloatPtr++ = lv_cimag(temp) / scalar;
352 }
353}
354#endif /* LV_HAVE_AVX2*/
355
356
357#endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_u_H */
static void volk_8ic_x2_s32f_multiply_conjugate_32fc_generic(lv_32fc_t *cVector, const lv_8sc_t *aVector, const lv_8sc_t *bVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_x2_s32f_multiply_conjugate_32fc.h:236
#define lv_cimag(x)
Definition: volk_complex.h:89
#define lv_cmake(r, i)
Definition: volk_complex.h:68
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:61
#define lv_creal(x)
Definition: volk_complex.h:87
float complex lv_32fc_t
Definition: volk_complex.h:65