Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_32f_s32f_power_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
71#ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
72#define INCLUDED_volk_32f_s32f_power_32f_a_H
73
74#include <inttypes.h>
75#include <math.h>
76#include <stdio.h>
77
78#ifdef LV_HAVE_SSE4_1
79#include <tmmintrin.h>
80
81#ifdef LV_HAVE_LIB_SIMDMATH
82#include <simdmath.h>
83#endif /* LV_HAVE_LIB_SIMDMATH */
84
85static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector,
86 const float* aVector,
87 const float power,
88 unsigned int num_points)
89{
90 unsigned int number = 0;
91
92 float* cPtr = cVector;
93 const float* aPtr = aVector;
94
95#ifdef LV_HAVE_LIB_SIMDMATH
96 const unsigned int quarterPoints = num_points / 4;
97 __m128 vPower = _mm_set_ps1(power);
98 __m128 zeroValue = _mm_setzero_ps();
99 __m128 signMask;
100 __m128 negatedValues;
101 __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
102 __m128 onesMask = _mm_set_ps1(1);
103
104 __m128 aVal, cVal;
105 for (; number < quarterPoints; number++) {
106
107 aVal = _mm_load_ps(aPtr);
108 signMask = _mm_cmplt_ps(aVal, zeroValue);
109 negatedValues = _mm_sub_ps(zeroValue, aVal);
110 aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
111
112 // powf4 doesn't support negative values in the base, so we mask them off and then
113 // apply the negative after
114 cVal = powf4(aVal, vPower); // Takes each input value to the specified power
115
116 cVal = _mm_mul_ps(_mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
117
118 _mm_store_ps(cPtr, cVal); // Store the results back into the C container
119
120 aPtr += 4;
121 cPtr += 4;
122 }
123
124 number = quarterPoints * 4;
125#endif /* LV_HAVE_LIB_SIMDMATH */
126
127 for (; number < num_points; number++) {
128 *cPtr++ = powf((*aPtr++), power);
129 }
130}
131
132#endif /* LV_HAVE_SSE4_1 */
133
134
135#ifdef LV_HAVE_SSE
136#include <xmmintrin.h>
137
138#ifdef LV_HAVE_LIB_SIMDMATH
139#include <simdmath.h>
140#endif /* LV_HAVE_LIB_SIMDMATH */
141
142static inline void volk_32f_s32f_power_32f_a_sse(float* cVector,
143 const float* aVector,
144 const float power,
145 unsigned int num_points)
146{
147 unsigned int number = 0;
148
149 float* cPtr = cVector;
150 const float* aPtr = aVector;
151
152#ifdef LV_HAVE_LIB_SIMDMATH
153 const unsigned int quarterPoints = num_points / 4;
154 __m128 vPower = _mm_set_ps1(power);
155 __m128 zeroValue = _mm_setzero_ps();
156 __m128 signMask;
157 __m128 negatedValues;
158 __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
159 __m128 onesMask = _mm_set_ps1(1);
160
161 __m128 aVal, cVal;
162 for (; number < quarterPoints; number++) {
163
164 aVal = _mm_load_ps(aPtr);
165 signMask = _mm_cmplt_ps(aVal, zeroValue);
166 negatedValues = _mm_sub_ps(zeroValue, aVal);
167 aVal =
168 _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues));
169
170 // powf4 doesn't support negative values in the base, so we mask them off and then
171 // apply the negative after
172 cVal = powf4(aVal, vPower); // Takes each input value to the specified power
173
174 cVal = _mm_mul_ps(_mm_or_ps(_mm_andnot_ps(signMask, onesMask),
175 _mm_and_ps(signMask, negativeOneToPower)),
176 cVal);
177
178 _mm_store_ps(cPtr, cVal); // Store the results back into the C container
179
180 aPtr += 4;
181 cPtr += 4;
182 }
183
184 number = quarterPoints * 4;
185#endif /* LV_HAVE_LIB_SIMDMATH */
186
187 for (; number < num_points; number++) {
188 *cPtr++ = powf((*aPtr++), power);
189 }
190}
191
192#endif /* LV_HAVE_SSE */
193
194
195#ifdef LV_HAVE_GENERIC
196
197static inline void volk_32f_s32f_power_32f_generic(float* cVector,
198 const float* aVector,
199 const float power,
200 unsigned int num_points)
201{
202 float* cPtr = cVector;
203 const float* aPtr = aVector;
204 unsigned int number = 0;
205
206 for (number = 0; number < num_points; number++) {
207 *cPtr++ = powf((*aPtr++), power);
208 }
209}
210#endif /* LV_HAVE_GENERIC */
211
212
213#endif /* INCLUDED_volk_32f_s32f_power_32f_a_H */
static void volk_32f_s32f_power_32f_a_sse(float *cVector, const float *aVector, const float power, unsigned int num_points)
Definition: volk_32f_s32f_power_32f.h:142
static void volk_32f_s32f_power_32f_generic(float *cVector, const float *aVector, const float power, unsigned int num_points)
Definition: volk_32f_s32f_power_32f.h:197