Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_16i_convert_8i.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
53#ifndef INCLUDED_volk_16i_convert_8i_u_H
54#define INCLUDED_volk_16i_convert_8i_u_H
55
56#include <inttypes.h>
57#include <stdio.h>
58
59#ifdef LV_HAVE_AVX2
60#include <immintrin.h>
61
62static inline void volk_16i_convert_8i_u_avx2(int8_t* outputVector,
63 const int16_t* inputVector,
64 unsigned int num_points)
65{
66 unsigned int number = 0;
67 const unsigned int thirtysecondPoints = num_points / 32;
68
69 int8_t* outputVectorPtr = outputVector;
70 int16_t* inputPtr = (int16_t*)inputVector;
71 __m256i inputVal1;
72 __m256i inputVal2;
73 __m256i ret;
74
75 for (; number < thirtysecondPoints; number++) {
76
77 // Load the 16 values
78 inputVal1 = _mm256_loadu_si256((__m256i*)inputPtr);
79 inputPtr += 16;
80 inputVal2 = _mm256_loadu_si256((__m256i*)inputPtr);
81 inputPtr += 16;
82
83 inputVal1 = _mm256_srai_epi16(inputVal1, 8);
84 inputVal2 = _mm256_srai_epi16(inputVal2, 8);
85
86 ret = _mm256_packs_epi16(inputVal1, inputVal2);
87 ret = _mm256_permute4x64_epi64(ret, 0b11011000);
88
89 _mm256_storeu_si256((__m256i*)outputVectorPtr, ret);
90
91 outputVectorPtr += 32;
92 }
93
94 number = thirtysecondPoints * 32;
95 for (; number < num_points; number++) {
96 outputVector[number] = (int8_t)(inputVector[number] >> 8);
97 }
98}
99#endif /* LV_HAVE_AVX2 */
100
101
102#ifdef LV_HAVE_SSE2
103#include <emmintrin.h>
104
105static inline void volk_16i_convert_8i_u_sse2(int8_t* outputVector,
106 const int16_t* inputVector,
107 unsigned int num_points)
108{
109 unsigned int number = 0;
110 const unsigned int sixteenthPoints = num_points / 16;
111
112 int8_t* outputVectorPtr = outputVector;
113 int16_t* inputPtr = (int16_t*)inputVector;
114 __m128i inputVal1;
115 __m128i inputVal2;
116 __m128i ret;
117
118 for (; number < sixteenthPoints; number++) {
119
120 // Load the 16 values
121 inputVal1 = _mm_loadu_si128((__m128i*)inputPtr);
122 inputPtr += 8;
123 inputVal2 = _mm_loadu_si128((__m128i*)inputPtr);
124 inputPtr += 8;
125
126 inputVal1 = _mm_srai_epi16(inputVal1, 8);
127 inputVal2 = _mm_srai_epi16(inputVal2, 8);
128
129 ret = _mm_packs_epi16(inputVal1, inputVal2);
130
131 _mm_storeu_si128((__m128i*)outputVectorPtr, ret);
132
133 outputVectorPtr += 16;
134 }
135
136 number = sixteenthPoints * 16;
137 for (; number < num_points; number++) {
138 outputVector[number] = (int8_t)(inputVector[number] >> 8);
139 }
140}
141#endif /* LV_HAVE_SSE2 */
142
143
144#ifdef LV_HAVE_GENERIC
145
146static inline void volk_16i_convert_8i_generic(int8_t* outputVector,
147 const int16_t* inputVector,
148 unsigned int num_points)
149{
150 int8_t* outputVectorPtr = outputVector;
151 const int16_t* inputVectorPtr = inputVector;
152 unsigned int number = 0;
153
154 for (number = 0; number < num_points; number++) {
155 *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
156 }
157}
158#endif /* LV_HAVE_GENERIC */
159
160
161#endif /* INCLUDED_volk_16i_convert_8i_u_H */
162#ifndef INCLUDED_volk_16i_convert_8i_a_H
163#define INCLUDED_volk_16i_convert_8i_a_H
164
165#include <inttypes.h>
166#include <stdio.h>
167
168#ifdef LV_HAVE_AVX2
169#include <immintrin.h>
170
171static inline void volk_16i_convert_8i_a_avx2(int8_t* outputVector,
172 const int16_t* inputVector,
173 unsigned int num_points)
174{
175 unsigned int number = 0;
176 const unsigned int thirtysecondPoints = num_points / 32;
177
178 int8_t* outputVectorPtr = outputVector;
179 int16_t* inputPtr = (int16_t*)inputVector;
180 __m256i inputVal1;
181 __m256i inputVal2;
182 __m256i ret;
183
184 for (; number < thirtysecondPoints; number++) {
185
186 // Load the 16 values
187 inputVal1 = _mm256_load_si256((__m256i*)inputPtr);
188 inputPtr += 16;
189 inputVal2 = _mm256_load_si256((__m256i*)inputPtr);
190 inputPtr += 16;
191
192 inputVal1 = _mm256_srai_epi16(inputVal1, 8);
193 inputVal2 = _mm256_srai_epi16(inputVal2, 8);
194
195 ret = _mm256_packs_epi16(inputVal1, inputVal2);
196 ret = _mm256_permute4x64_epi64(ret, 0b11011000);
197
198 _mm256_store_si256((__m256i*)outputVectorPtr, ret);
199
200 outputVectorPtr += 32;
201 }
202
203 number = thirtysecondPoints * 32;
204 for (; number < num_points; number++) {
205 outputVector[number] = (int8_t)(inputVector[number] >> 8);
206 }
207}
208#endif /* LV_HAVE_AVX2 */
209
210
211#ifdef LV_HAVE_SSE2
212#include <emmintrin.h>
213
214static inline void volk_16i_convert_8i_a_sse2(int8_t* outputVector,
215 const int16_t* inputVector,
216 unsigned int num_points)
217{
218 unsigned int number = 0;
219 const unsigned int sixteenthPoints = num_points / 16;
220
221 int8_t* outputVectorPtr = outputVector;
222 int16_t* inputPtr = (int16_t*)inputVector;
223 __m128i inputVal1;
224 __m128i inputVal2;
225 __m128i ret;
226
227 for (; number < sixteenthPoints; number++) {
228
229 // Load the 16 values
230 inputVal1 = _mm_load_si128((__m128i*)inputPtr);
231 inputPtr += 8;
232 inputVal2 = _mm_load_si128((__m128i*)inputPtr);
233 inputPtr += 8;
234
235 inputVal1 = _mm_srai_epi16(inputVal1, 8);
236 inputVal2 = _mm_srai_epi16(inputVal2, 8);
237
238 ret = _mm_packs_epi16(inputVal1, inputVal2);
239
240 _mm_store_si128((__m128i*)outputVectorPtr, ret);
241
242 outputVectorPtr += 16;
243 }
244
245 number = sixteenthPoints * 16;
246 for (; number < num_points; number++) {
247 outputVector[number] = (int8_t)(inputVector[number] >> 8);
248 }
249}
250#endif /* LV_HAVE_SSE2 */
251
252
253#ifdef LV_HAVE_NEON
254#include <arm_neon.h>
255
256static inline void volk_16i_convert_8i_neon(int8_t* outputVector,
257 const int16_t* inputVector,
258 unsigned int num_points)
259{
260 int8_t* outputVectorPtr = outputVector;
261 const int16_t* inputVectorPtr = inputVector;
262 unsigned int number = 0;
263 unsigned int sixteenth_points = num_points / 16;
264
265 int16x8_t inputVal0;
266 int16x8_t inputVal1;
267 int8x8_t outputVal0;
268 int8x8_t outputVal1;
269 int8x16_t outputVal;
270
271 for (number = 0; number < sixteenth_points; number++) {
272 // load two input vectors
273 inputVal0 = vld1q_s16(inputVectorPtr);
274 inputVal1 = vld1q_s16(inputVectorPtr + 8);
275 // shift right
276 outputVal0 = vshrn_n_s16(inputVal0, 8);
277 outputVal1 = vshrn_n_s16(inputVal1, 8);
278 // squash two vectors and write output
279 outputVal = vcombine_s8(outputVal0, outputVal1);
280 vst1q_s8(outputVectorPtr, outputVal);
281 inputVectorPtr += 16;
282 outputVectorPtr += 16;
283 }
284
285 for (number = sixteenth_points * 16; number < num_points; number++) {
286 *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
287 }
288}
289#endif /* LV_HAVE_NEON */
290
291
292#ifdef LV_HAVE_GENERIC
293
294static inline void volk_16i_convert_8i_a_generic(int8_t* outputVector,
295 const int16_t* inputVector,
296 unsigned int num_points)
297{
298 int8_t* outputVectorPtr = outputVector;
299 const int16_t* inputVectorPtr = inputVector;
300 unsigned int number = 0;
301
302 for (number = 0; number < num_points; number++) {
303 *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
304 }
305}
306#endif /* LV_HAVE_GENERIC */
307
308#endif /* INCLUDED_volk_16i_convert_8i_a_H */
static void volk_16i_convert_8i_a_sse2(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:214
static void volk_16i_convert_8i_u_sse2(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:105
static void volk_16i_convert_8i_a_generic(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:294
static void volk_16i_convert_8i_neon(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:256
static void volk_16i_convert_8i_generic(int8_t *outputVector, const int16_t *inputVector, unsigned int num_points)
Definition: volk_16i_convert_8i.h:146