Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_32i_x2_and_32i.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
81#ifndef INCLUDED_volk_32i_x2_and_32i_a_H
82#define INCLUDED_volk_32i_x2_and_32i_a_H
83
84#include <inttypes.h>
85#include <stdio.h>
86
87#ifdef LV_HAVE_AVX512F
88#include <immintrin.h>
89
90static inline void volk_32i_x2_and_32i_a_avx512f(int32_t* cVector,
91 const int32_t* aVector,
92 const int32_t* bVector,
93 unsigned int num_points)
94{
95 unsigned int number = 0;
96 const unsigned int sixteenthPoints = num_points / 16;
97
98 int32_t* cPtr = (int32_t*)cVector;
99 const int32_t* aPtr = (int32_t*)aVector;
100 const int32_t* bPtr = (int32_t*)bVector;
101
102 __m512i aVal, bVal, cVal;
103 for (; number < sixteenthPoints; number++) {
104
105 aVal = _mm512_load_si512(aPtr);
106 bVal = _mm512_load_si512(bPtr);
107
108 cVal = _mm512_and_si512(aVal, bVal);
109
110 _mm512_store_si512(cPtr, cVal); // Store the results back into the C container
111
112 aPtr += 16;
113 bPtr += 16;
114 cPtr += 16;
115 }
116
117 number = sixteenthPoints * 16;
118 for (; number < num_points; number++) {
119 cVector[number] = aVector[number] & bVector[number];
120 }
121}
122#endif /* LV_HAVE_AVX512F */
123
124#ifdef LV_HAVE_AVX2
125#include <immintrin.h>
126
127static inline void volk_32i_x2_and_32i_a_avx2(int32_t* cVector,
128 const int32_t* aVector,
129 const int32_t* bVector,
130 unsigned int num_points)
131{
132 unsigned int number = 0;
133 const unsigned int oneEightPoints = num_points / 8;
134
135 int32_t* cPtr = cVector;
136 const int32_t* aPtr = aVector;
137 const int32_t* bPtr = bVector;
138
139 __m256i aVal, bVal, cVal;
140 for (; number < oneEightPoints; number++) {
141
142 aVal = _mm256_load_si256((__m256i*)aPtr);
143 bVal = _mm256_load_si256((__m256i*)bPtr);
144
145 cVal = _mm256_and_si256(aVal, bVal);
146
147 _mm256_store_si256((__m256i*)cPtr,
148 cVal); // Store the results back into the C container
149
150 aPtr += 8;
151 bPtr += 8;
152 cPtr += 8;
153 }
154
155 number = oneEightPoints * 8;
156 for (; number < num_points; number++) {
157 cVector[number] = aVector[number] & bVector[number];
158 }
159}
160#endif /* LV_HAVE_AVX2 */
161
162
163#ifdef LV_HAVE_SSE
164#include <xmmintrin.h>
165
166static inline void volk_32i_x2_and_32i_a_sse(int32_t* cVector,
167 const int32_t* aVector,
168 const int32_t* bVector,
169 unsigned int num_points)
170{
171 unsigned int number = 0;
172 const unsigned int quarterPoints = num_points / 4;
173
174 float* cPtr = (float*)cVector;
175 const float* aPtr = (float*)aVector;
176 const float* bPtr = (float*)bVector;
177
178 __m128 aVal, bVal, cVal;
179 for (; number < quarterPoints; number++) {
180
181 aVal = _mm_load_ps(aPtr);
182 bVal = _mm_load_ps(bPtr);
183
184 cVal = _mm_and_ps(aVal, bVal);
185
186 _mm_store_ps(cPtr, cVal); // Store the results back into the C container
187
188 aPtr += 4;
189 bPtr += 4;
190 cPtr += 4;
191 }
192
193 number = quarterPoints * 4;
194 for (; number < num_points; number++) {
195 cVector[number] = aVector[number] & bVector[number];
196 }
197}
198#endif /* LV_HAVE_SSE */
199
200
201#ifdef LV_HAVE_NEON
202#include <arm_neon.h>
203
204static inline void volk_32i_x2_and_32i_neon(int32_t* cVector,
205 const int32_t* aVector,
206 const int32_t* bVector,
207 unsigned int num_points)
208{
209 int32_t* cPtr = cVector;
210 const int32_t* aPtr = aVector;
211 const int32_t* bPtr = bVector;
212 unsigned int number = 0;
213 unsigned int quarter_points = num_points / 4;
214
215 int32x4_t a_val, b_val, c_val;
216
217 for (number = 0; number < quarter_points; number++) {
218 a_val = vld1q_s32(aPtr);
219 b_val = vld1q_s32(bPtr);
220 c_val = vandq_s32(a_val, b_val);
221 vst1q_s32(cPtr, c_val);
222 aPtr += 4;
223 bPtr += 4;
224 cPtr += 4;
225 }
226
227 for (number = quarter_points * 4; number < num_points; number++) {
228 *cPtr++ = (*aPtr++) & (*bPtr++);
229 }
230}
231#endif /* LV_HAVE_NEON */
232
233
234#ifdef LV_HAVE_GENERIC
235
236static inline void volk_32i_x2_and_32i_generic(int32_t* cVector,
237 const int32_t* aVector,
238 const int32_t* bVector,
239 unsigned int num_points)
240{
241 int32_t* cPtr = cVector;
242 const int32_t* aPtr = aVector;
243 const int32_t* bPtr = bVector;
244 unsigned int number = 0;
245
246 for (number = 0; number < num_points; number++) {
247 *cPtr++ = (*aPtr++) & (*bPtr++);
248 }
249}
250#endif /* LV_HAVE_GENERIC */
251
252
253#ifdef LV_HAVE_ORC
254extern void volk_32i_x2_and_32i_a_orc_impl(int32_t* cVector,
255 const int32_t* aVector,
256 const int32_t* bVector,
257 unsigned int num_points);
258
259static inline void volk_32i_x2_and_32i_u_orc(int32_t* cVector,
260 const int32_t* aVector,
261 const int32_t* bVector,
262 unsigned int num_points)
263{
264 volk_32i_x2_and_32i_a_orc_impl(cVector, aVector, bVector, num_points);
265}
266#endif /* LV_HAVE_ORC */
267
268
269#endif /* INCLUDED_volk_32i_x2_and_32i_a_H */
270
271
272#ifndef INCLUDED_volk_32i_x2_and_32i_u_H
273#define INCLUDED_volk_32i_x2_and_32i_u_H
274
275#include <inttypes.h>
276#include <stdio.h>
277
278#ifdef LV_HAVE_AVX512F
279#include <immintrin.h>
280
281static inline void volk_32i_x2_and_32i_u_avx512f(int32_t* cVector,
282 const int32_t* aVector,
283 const int32_t* bVector,
284 unsigned int num_points)
285{
286 unsigned int number = 0;
287 const unsigned int sixteenthPoints = num_points / 16;
288
289 int32_t* cPtr = (int32_t*)cVector;
290 const int32_t* aPtr = (int32_t*)aVector;
291 const int32_t* bPtr = (int32_t*)bVector;
292
293 __m512i aVal, bVal, cVal;
294 for (; number < sixteenthPoints; number++) {
295
296 aVal = _mm512_loadu_si512(aPtr);
297 bVal = _mm512_loadu_si512(bPtr);
298
299 cVal = _mm512_and_si512(aVal, bVal);
300
301 _mm512_storeu_si512(cPtr, cVal); // Store the results back into the C container
302
303 aPtr += 16;
304 bPtr += 16;
305 cPtr += 16;
306 }
307
308 number = sixteenthPoints * 16;
309 for (; number < num_points; number++) {
310 cVector[number] = aVector[number] & bVector[number];
311 }
312}
313#endif /* LV_HAVE_AVX512F */
314
315#ifdef LV_HAVE_AVX2
316#include <immintrin.h>
317
318static inline void volk_32i_x2_and_32i_u_avx2(int32_t* cVector,
319 const int32_t* aVector,
320 const int32_t* bVector,
321 unsigned int num_points)
322{
323 unsigned int number = 0;
324 const unsigned int oneEightPoints = num_points / 8;
325
326 int32_t* cPtr = cVector;
327 const int32_t* aPtr = aVector;
328 const int32_t* bPtr = bVector;
329
330 __m256i aVal, bVal, cVal;
331 for (; number < oneEightPoints; number++) {
332
333 aVal = _mm256_loadu_si256((__m256i*)aPtr);
334 bVal = _mm256_loadu_si256((__m256i*)bPtr);
335
336 cVal = _mm256_and_si256(aVal, bVal);
337
338 _mm256_storeu_si256((__m256i*)cPtr,
339 cVal); // Store the results back into the C container
340
341 aPtr += 8;
342 bPtr += 8;
343 cPtr += 8;
344 }
345
346 number = oneEightPoints * 8;
347 for (; number < num_points; number++) {
348 cVector[number] = aVector[number] & bVector[number];
349 }
350}
351#endif /* LV_HAVE_AVX2 */
352
353
354#endif /* INCLUDED_volk_32i_x2_and_32i_u_H */
static void volk_32i_x2_and_32i_a_sse(int32_t *cVector, const int32_t *aVector, const int32_t *bVector, unsigned int num_points)
Definition: volk_32i_x2_and_32i.h:166
static void volk_32i_x2_and_32i_generic(int32_t *cVector, const int32_t *aVector, const int32_t *bVector, unsigned int num_points)
Definition: volk_32i_x2_and_32i.h:236
static void volk_32i_x2_and_32i_neon(int32_t *cVector, const int32_t *aVector, const int32_t *bVector, unsigned int num_points)
Definition: volk_32i_x2_and_32i.h:204