Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_64u_popcnt.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
60#ifndef INCLUDED_volk_64u_popcnt_a_H
61#define INCLUDED_volk_64u_popcnt_a_H
62
63#include <inttypes.h>
64#include <stdio.h>
65
66
67#ifdef LV_HAVE_GENERIC
68
69
70static inline void volk_64u_popcnt_generic(uint64_t* ret, const uint64_t value)
71{
72 // const uint32_t* valueVector = (const uint32_t*)&value;
73
74 // This is faster than a lookup table
75 // uint32_t retVal = valueVector[0];
76 uint32_t retVal = (uint32_t)(value & 0x00000000FFFFFFFFull);
77
78 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
79 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
80 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
81 retVal = (retVal + (retVal >> 8));
82 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
83 uint64_t retVal64 = retVal;
84
85 // retVal = valueVector[1];
86 retVal = (uint32_t)((value & 0xFFFFFFFF00000000ull) >> 32);
87 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
88 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
89 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
90 retVal = (retVal + (retVal >> 8));
91 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
92 retVal64 += retVal;
93
94 *ret = retVal64;
95}
96
97#endif /*LV_HAVE_GENERIC*/
98
99
100#if LV_HAVE_SSE4_2 && LV_HAVE_64
101
102#include <nmmintrin.h>
103
104static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret, const uint64_t value)
105{
106 *ret = _mm_popcnt_u64(value);
107}
108
109#endif /*LV_HAVE_SSE4_2*/
110
111
112#if LV_HAVE_NEON
113#include <arm_neon.h>
114static inline void volk_64u_popcnt_neon(uint64_t* ret, const uint64_t value)
115{
116 uint8x8_t input_val, count8x8_val;
117 uint16x4_t count16x4_val;
118 uint32x2_t count32x2_val;
119 uint64x1_t count64x1_val;
120
121 input_val = vld1_u8((unsigned char*)&value);
122 count8x8_val = vcnt_u8(input_val);
123 count16x4_val = vpaddl_u8(count8x8_val);
124 count32x2_val = vpaddl_u16(count16x4_val);
125 count64x1_val = vpaddl_u32(count32x2_val);
126 vst1_u64(ret, count64x1_val);
127
128 //*ret = _mm_popcnt_u64(value);
129}
130#endif /*LV_HAVE_NEON*/
131
132
133#endif /*INCLUDED_volk_64u_popcnt_a_H*/
static void volk_64u_popcnt_neon(uint64_t *ret, const uint64_t value)
Definition: volk_64u_popcnt.h:114
static void volk_64u_popcnt_generic(uint64_t *ret, const uint64_t value)
Definition: volk_64u_popcnt.h:70