Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_32f_s32f_normalize.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
70#ifndef INCLUDED_volk_32f_s32f_normalize_a_H
71#define INCLUDED_volk_32f_s32f_normalize_a_H
72
73#include <inttypes.h>
74#include <stdio.h>
75
76#ifdef LV_HAVE_AVX
77#include <immintrin.h>
78
79static inline void volk_32f_s32f_normalize_a_avx(float* vecBuffer,
80 const float scalar,
81 unsigned int num_points)
82{
83 unsigned int number = 0;
84 float* inputPtr = vecBuffer;
85
86 const float invScalar = 1.0 / scalar;
87 __m256 vecScalar = _mm256_set1_ps(invScalar);
88
89 __m256 input1;
90
91 const uint64_t eighthPoints = num_points / 8;
92 for (; number < eighthPoints; number++) {
93
94 input1 = _mm256_load_ps(inputPtr);
95
96 input1 = _mm256_mul_ps(input1, vecScalar);
97
98 _mm256_store_ps(inputPtr, input1);
99
100 inputPtr += 8;
101 }
102
103 number = eighthPoints * 8;
104 for (; number < num_points; number++) {
105 *inputPtr *= invScalar;
106 inputPtr++;
107 }
108}
109#endif /* LV_HAVE_AVX */
110
111#ifdef LV_HAVE_SSE
112#include <xmmintrin.h>
113
114static inline void volk_32f_s32f_normalize_a_sse(float* vecBuffer,
115 const float scalar,
116 unsigned int num_points)
117{
118 unsigned int number = 0;
119 float* inputPtr = vecBuffer;
120
121 const float invScalar = 1.0 / scalar;
122 __m128 vecScalar = _mm_set_ps1(invScalar);
123
124 __m128 input1;
125
126 const uint64_t quarterPoints = num_points / 4;
127 for (; number < quarterPoints; number++) {
128
129 input1 = _mm_load_ps(inputPtr);
130
131 input1 = _mm_mul_ps(input1, vecScalar);
132
133 _mm_store_ps(inputPtr, input1);
134
135 inputPtr += 4;
136 }
137
138 number = quarterPoints * 4;
139 for (; number < num_points; number++) {
140 *inputPtr *= invScalar;
141 inputPtr++;
142 }
143}
144#endif /* LV_HAVE_SSE */
145
146#ifdef LV_HAVE_GENERIC
147
148static inline void volk_32f_s32f_normalize_generic(float* vecBuffer,
149 const float scalar,
150 unsigned int num_points)
151{
152 unsigned int number = 0;
153 float* inputPtr = vecBuffer;
154 const float invScalar = 1.0 / scalar;
155 for (number = 0; number < num_points; number++) {
156 *inputPtr *= invScalar;
157 inputPtr++;
158 }
159}
160#endif /* LV_HAVE_GENERIC */
161
162#ifdef LV_HAVE_ORC
163
164extern void volk_32f_s32f_normalize_a_orc_impl(float* dst,
165 float* src,
166 const float scalar,
167 unsigned int num_points);
168static inline void volk_32f_s32f_normalize_u_orc(float* vecBuffer,
169 const float scalar,
170 unsigned int num_points)
171{
172 float invscalar = 1.0 / scalar;
173 volk_32f_s32f_normalize_a_orc_impl(vecBuffer, vecBuffer, invscalar, num_points);
174}
175#endif /* LV_HAVE_GENERIC */
176
177#endif /* INCLUDED_volk_32f_s32f_normalize_a_H */
178
179#ifndef INCLUDED_volk_32f_s32f_normalize_u_H
180#define INCLUDED_volk_32f_s32f_normalize_u_H
181
182#include <inttypes.h>
183#include <stdio.h>
184#ifdef LV_HAVE_AVX
185#include <immintrin.h>
186
187static inline void volk_32f_s32f_normalize_u_avx(float* vecBuffer,
188 const float scalar,
189 unsigned int num_points)
190{
191 unsigned int number = 0;
192 float* inputPtr = vecBuffer;
193
194 const float invScalar = 1.0 / scalar;
195 __m256 vecScalar = _mm256_set1_ps(invScalar);
196
197 __m256 input1;
198
199 const uint64_t eighthPoints = num_points / 8;
200 for (; number < eighthPoints; number++) {
201
202 input1 = _mm256_loadu_ps(inputPtr);
203
204 input1 = _mm256_mul_ps(input1, vecScalar);
205
206 _mm256_storeu_ps(inputPtr, input1);
207
208 inputPtr += 8;
209 }
210
211 number = eighthPoints * 8;
212 for (; number < num_points; number++) {
213 *inputPtr *= invScalar;
214 inputPtr++;
215 }
216}
217#endif /* LV_HAVE_AVX */
218
219
220#endif /* INCLUDED_volk_32f_s32f_normalize_u_H */
static void volk_32f_s32f_normalize_a_avx(float *vecBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_normalize.h:79
static void volk_32f_s32f_normalize_u_avx(float *vecBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_normalize.h:187
static void volk_32f_s32f_normalize_generic(float *vecBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_normalize.h:148
static void volk_32f_s32f_normalize_a_sse(float *vecBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_normalize.h:114