Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_8u_conv_k7_r2puppet_8u.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
23#ifndef INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
24#define INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
25
26#include <string.h>
27#include <volk/volk.h>
29
30typedef union {
31 // decision_t is a BIT vector
32 unsigned char* t;
33 unsigned int* w;
35
36static inline int parity(int x, unsigned char* Partab)
37{
38 x ^= (x >> 16);
39 x ^= (x >> 8);
40 return Partab[x];
41}
42
43static inline int chainback_viterbi(unsigned char* data,
44 unsigned int nbits,
45 unsigned int endstate,
46 unsigned int tailsize,
47 unsigned char* decisions)
48{
49 unsigned char* d;
50 int d_ADDSHIFT = 0;
51 int d_numstates = (1 << 6);
52 int d_decision_t_size = d_numstates / 8;
53 unsigned int d_k = 7;
54 int d_framebits = nbits;
55 /* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
56 d = decisions;
57 /* Make room beyond the end of the encoder register so we can
58 * accumulate a full byte of decoded data
59 */
60
61 endstate = (endstate % d_numstates) << d_ADDSHIFT;
62
63 /* The store into data[] only needs to be done every 8 bits.
64 * But this avoids a conditional branch, and the writes will
65 * combine in the cache anyway
66 */
67
68 d += tailsize * d_decision_t_size; /* Look past tail */
69 int retval;
70 int dif = tailsize - (d_k - 1);
71 // printf("break, %d, %d\n", dif, (nbits+dif)%d_framebits);
72 p_decision_t dec;
73 while (nbits-- > d_framebits - (d_k - 1)) {
74 int k;
75 dec.t = &d[nbits * d_decision_t_size];
76 k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
77
78 endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
79 // data[((nbits+dif)%nbits)>>3] = endstate>>d_SUBSHIFT;
80 // printf("%d, %d\n", k, (nbits+dif)%d_framebits);
81 data[((nbits + dif) % d_framebits)] = k;
82
83 retval = endstate;
84 }
85 nbits += 1;
86
87 while (nbits-- != 0) {
88 int k;
89
90 dec.t = &d[nbits * d_decision_t_size];
91
92 k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
93
94 endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
95 data[((nbits + dif) % d_framebits)] = k;
96 }
97 // printf("%d, %d, %d, %d, %d, %d, %d, %d\n",
98 // data[4095],data[4094],data[4093],data[4092],data[4091],data[4090],data[4089],data[4088]);
99
100
101 return retval >> d_ADDSHIFT;
102}
103
104
105#if LV_HAVE_SSE3
106
107#include <emmintrin.h>
108#include <mmintrin.h>
109#include <pmmintrin.h>
110#include <stdio.h>
111#include <xmmintrin.h>
112
113static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* syms,
114 unsigned char* dec,
115 unsigned int framebits)
116{
117
118
119 static int once = 1;
120 int d_numstates = (1 << 6);
121 int rate = 2;
122 static unsigned char* D;
123 static unsigned char* Y;
124 static unsigned char* X;
125 static unsigned int excess = 6;
126 static unsigned char* Branchtab;
127 static unsigned char Partab[256];
128
129 int d_polys[2] = { 79, 109 };
130
131
132 if (once) {
133
134 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
135 Y = X + d_numstates;
136 Branchtab =
137 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
138 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
140 int state, i;
141 int cnt, ti;
142
143 /* Initialize parity lookup table */
144 for (i = 0; i < 256; i++) {
145 cnt = 0;
146 ti = i;
147 while (ti) {
148 if (ti & 1)
149 cnt++;
150 ti >>= 1;
151 }
152 Partab[i] = cnt & 1;
153 }
154 /* Initialize the branch table */
155 for (state = 0; state < d_numstates / 2; state++) {
156 for (i = 0; i < rate; i++) {
157 Branchtab[i * d_numstates / 2 + state] =
158 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
159 }
160 }
161
162 once = 0;
163 }
164
165 // unbias the old_metrics
166 memset(X, 31, d_numstates);
167
168 // initialize decisions
169 memset(D, 0, (d_numstates / 8) * (framebits + 6));
170
172 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
173
174 unsigned int min = X[0];
175 int i = 0, state = 0;
176 for (i = 0; i < (d_numstates); ++i) {
177 if (X[i] < min) {
178 min = X[i];
179 state = i;
180 }
181 }
182
183 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
184
185 return;
186}
187
188#endif /*LV_HAVE_SSE3*/
189
190
191//#if LV_HAVE_AVX2
192//
193//#include <immintrin.h>
194//#include <stdio.h>
195//
196// static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* syms,
197// unsigned char* dec,
198// unsigned int framebits)
199//{
200//
201//
202// static int once = 1;
203// int d_numstates = (1 << 6);
204// int rate = 2;
205// static unsigned char* D;
206// static unsigned char* Y;
207// static unsigned char* X;
208// static unsigned int excess = 6;
209// static unsigned char* Branchtab;
210// static unsigned char Partab[256];
211//
212// int d_polys[2] = { 79, 109 };
213//
214//
215// if (once) {
216//
217// X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
218// Y = X + d_numstates;
219// Branchtab =
220// (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
221// D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
222// volk_get_alignment());
223// int state, i;
224// int cnt, ti;
225//
226// /* Initialize parity lookup table */
227// for (i = 0; i < 256; i++) {
228// cnt = 0;
229// ti = i;
230// while (ti) {
231// if (ti & 1)
232// cnt++;
233// ti >>= 1;
234// }
235// Partab[i] = cnt & 1;
236// }
237// /* Initialize the branch table */
238// for (state = 0; state < d_numstates / 2; state++) {
239// for (i = 0; i < rate; i++) {
240// Branchtab[i * d_numstates / 2 + state] =
241// parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
242// }
243// }
244//
245// once = 0;
246// }
247//
248// // unbias the old_metrics
249// memset(X, 31, d_numstates);
250//
251// // initialize decisions
252// memset(D, 0, (d_numstates / 8) * (framebits + 6));
253//
254// volk_8u_x4_conv_k7_r2_8u_avx2(
255// Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
256//
257// unsigned int min = X[0];
258// int i = 0, state = 0;
259// for (i = 0; i < (d_numstates); ++i) {
260// if (X[i] < min) {
261// min = X[i];
262// state = i;
263// }
264// }
265//
266// chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
267//
268// return;
269//}
270//
271//#endif /*LV_HAVE_AVX2*/
272
273
274#if LV_HAVE_GENERIC
275
276
277static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* syms,
278 unsigned char* dec,
279 unsigned int framebits)
280{
281
282
283 static int once = 1;
284 int d_numstates = (1 << 6);
285 int rate = 2;
286 static unsigned char* Y;
287 static unsigned char* X;
288 static unsigned char* D;
289 static unsigned int excess = 6;
290 static unsigned char* Branchtab;
291 static unsigned char Partab[256];
292
293 int d_polys[2] = { 79, 109 };
294
295
296 if (once) {
297
298 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
299 Y = X + d_numstates;
300 Branchtab =
301 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
302 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
304
305 int state, i;
306 int cnt, ti;
307
308 /* Initialize parity lookup table */
309 for (i = 0; i < 256; i++) {
310 cnt = 0;
311 ti = i;
312 while (ti) {
313 if (ti & 1)
314 cnt++;
315 ti >>= 1;
316 }
317 Partab[i] = cnt & 1;
318 }
319 /* Initialize the branch table */
320 for (state = 0; state < d_numstates / 2; state++) {
321 for (i = 0; i < rate; i++) {
322 Branchtab[i * d_numstates / 2 + state] =
323 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
324 }
325 }
326
327 once = 0;
328 }
329
330 // unbias the old_metrics
331 memset(X, 31, d_numstates);
332
333 // initialize decisions
334 memset(D, 0, (d_numstates / 8) * (framebits + 6));
335
337 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
338
339 unsigned int min = X[0];
340 int i = 0, state = 0;
341 for (i = 0; i < (d_numstates); ++i) {
342 if (X[i] < min) {
343 min = X[i];
344 state = i;
345 }
346 }
347
348 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
349
350 return;
351}
352
353#endif /* LV_HAVE_GENERIC */
354
355#endif /*INCLUDED_volk_8u_conv_k7_r2puppet_8u_H*/
Definition: volk_8u_conv_k7_r2puppet_8u.h:30
unsigned int * w
Definition: volk_8u_conv_k7_r2puppet_8u.h:33
unsigned char * t
Definition: volk_8u_conv_k7_r2puppet_8u.h:32
size_t volk_get_alignment(void)
Get the machine alignment in bytes.
Definition: volk.tmpl.c:102
static void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char *syms, unsigned char *dec, unsigned int framebits)
Definition: volk_8u_conv_k7_r2puppet_8u.h:277
static void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char *syms, unsigned char *dec, unsigned int framebits)
Definition: volk_8u_conv_k7_r2puppet_8u.h:113
static int chainback_viterbi(unsigned char *data, unsigned int nbits, unsigned int endstate, unsigned int tailsize, unsigned char *decisions)
Definition: volk_8u_conv_k7_r2puppet_8u.h:43
static int parity(int x, unsigned char *Partab)
Definition: volk_8u_conv_k7_r2puppet_8u.h:36
static void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:343
static void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:638
for i
Definition: volk_config_fixed.tmpl.h:25
__VOLK_DECL_BEGIN VOLK_API void * volk_malloc(size_t size, size_t alignment)
Allocate size bytes of data aligned to alignment.
Definition: volk_malloc.c:51