Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_sse_intrinsics.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2015 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
23/*
24 * This file is intended to hold SSE intrinsics of intrinsics.
25 * They should be used in VOLK kernels to avoid copy-pasta.
26 */
27
28#ifndef INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_
29#define INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_
30#include <xmmintrin.h>
31
32static inline __m128 _mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2)
33{
34 __m128 iValue, qValue;
35 // Arrange in i1i2i3i4 format
36 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
37 // Arrange in q1q2q3q4 format
38 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
39 iValue = _mm_mul_ps(iValue, iValue); // Square the I values
40 qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
41 return _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
42}
43
44static inline __m128 _mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2)
45{
46 return _mm_sqrt_ps(_mm_magnitudesquared_ps(cplxValue1, cplxValue2));
47}
48
49static inline __m128 _mm_scaled_norm_dist_ps_sse(const __m128 symbols0,
50 const __m128 symbols1,
51 const __m128 points0,
52 const __m128 points1,
53 const __m128 scalar)
54{
55 // calculate scalar * |x - y|^2
56 const __m128 diff0 = _mm_sub_ps(symbols0, points0);
57 const __m128 diff1 = _mm_sub_ps(symbols1, points1);
58 const __m128 norms = _mm_magnitudesquared_ps(diff0, diff1);
59 return _mm_mul_ps(norms, scalar);
60}
61
62static inline __m128 _mm_accumulate_square_sum_ps(
63 __m128 sq_acc, __m128 acc, __m128 val, __m128 rec, __m128 aux)
64{
65 aux = _mm_mul_ps(aux, val);
66 aux = _mm_sub_ps(aux, acc);
67 aux = _mm_mul_ps(aux, aux);
68 aux = _mm_mul_ps(aux, rec);
69 return _mm_add_ps(sq_acc, aux);
70}
71
72#endif /* INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_ */
static __m128 _mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:32
static __m128 _mm_accumulate_square_sum_ps(__m128 sq_acc, __m128 acc, __m128 val, __m128 rec, __m128 aux)
Definition: volk_sse_intrinsics.h:62
static __m128 _mm_scaled_norm_dist_ps_sse(const __m128 symbols0, const __m128 symbols1, const __m128 points0, const __m128 points1, const __m128 scalar)
Definition: volk_sse_intrinsics.h:49
static __m128 _mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:44