Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_sse3_intrinsics.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2015 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
23/*
24 * This file is intended to hold SSE3 intrinsics of intrinsics.
25 * They should be used in VOLK kernels to avoid copy-pasta.
26 */
27
28#ifndef INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_
29#define INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_
30#include <pmmintrin.h>
31
32static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y)
33{
34 __m128 yl, yh, tmp1, tmp2;
35 yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
36 yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
37 tmp1 = _mm_mul_ps(x, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
38 x = _mm_shuffle_ps(x, x, 0xB1); // Re-arrange x to be ai,ar,bi,br
39 tmp2 = _mm_mul_ps(x, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
40 return _mm_addsub_ps(tmp1,
41 tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
42}
43
44static inline __m128 _mm_complexconjugatemul_ps(__m128 x, __m128 y)
45{
46 const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
47 y = _mm_xor_ps(y, conjugator); // conjugate y
48 return _mm_complexmul_ps(x, y);
49}
50
51static inline __m128 _mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
52{
53 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
54 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
55 return _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
56}
57
58static inline __m128 _mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
59{
60 return _mm_sqrt_ps(_mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2));
61}
62
63static inline __m128 _mm_scaled_norm_dist_ps_sse3(const __m128 symbols0,
64 const __m128 symbols1,
65 const __m128 points0,
66 const __m128 points1,
67 const __m128 scalar)
68{
69 /*
70 * Calculate: |y - x|^2 * SNR_lin
71 * Consider 'symbolsX' and 'pointsX' to be complex float
72 * 'symbolsX' are 'y' and 'pointsX' are 'x'
73 */
74 const __m128 diff0 = _mm_sub_ps(symbols0, points0);
75 const __m128 diff1 = _mm_sub_ps(symbols1, points1);
76 const __m128 norms = _mm_magnitudesquared_ps_sse3(diff0, diff1);
77 return _mm_mul_ps(norms, scalar);
78}
79
80#endif /* INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_ */
static __m128 _mm_complexmul_ps(__m128 x, __m128 y)
Definition: volk_sse3_intrinsics.h:32
static __m128 _mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:58
static __m128 _mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:51
static __m128 _mm_scaled_norm_dist_ps_sse3(const __m128 symbols0, const __m128 symbols1, const __m128 points0, const __m128 points1, const __m128 scalar)
Definition: volk_sse3_intrinsics.h:63
static __m128 _mm_complexconjugatemul_ps(__m128 x, __m128 y)
Definition: volk_sse3_intrinsics.h:44