Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_common.h
Go to the documentation of this file.
1#ifndef INCLUDED_LIBVOLK_COMMON_H
2#define INCLUDED_LIBVOLK_COMMON_H
3
5// Cross-platform attribute macros
7#if _MSC_VER
8#define __VOLK_ATTR_ALIGNED(x) __declspec(align(x))
9#define __VOLK_ATTR_UNUSED
10#define __VOLK_ATTR_INLINE __forceinline
11#define __VOLK_ATTR_DEPRECATED __declspec(deprecated)
12#define __VOLK_ATTR_EXPORT __declspec(dllexport)
13#define __VOLK_ATTR_IMPORT __declspec(dllimport)
14#define __VOLK_PREFETCH(addr)
15#define __VOLK_ASM __asm
16#define __VOLK_VOLATILE
17#elif defined(__clang__)
18// AppleClang also defines __GNUC__, so do this check first. These
19// will probably be the same as for __GNUC__, but let's keep them
20// separate just to be safe.
21#define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x)))
22#define __VOLK_ATTR_UNUSED __attribute__((unused))
23#define __VOLK_ATTR_INLINE __attribute__((always_inline))
24#define __VOLK_ATTR_DEPRECATED __attribute__((deprecated))
25#define __VOLK_ASM __asm__
26#define __VOLK_VOLATILE __volatile__
27#define __VOLK_ATTR_EXPORT __attribute__((visibility("default")))
28#define __VOLK_ATTR_IMPORT __attribute__((visibility("default")))
29#define __VOLK_PREFETCH(addr) __builtin_prefetch(addr)
30#elif defined __GNUC__
31#define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x)))
32#define __VOLK_ATTR_UNUSED __attribute__((unused))
33#define __VOLK_ATTR_INLINE __attribute__((always_inline))
34#define __VOLK_ATTR_DEPRECATED __attribute__((deprecated))
35#define __VOLK_ASM __asm__
36#define __VOLK_VOLATILE __volatile__
37#if __GNUC__ >= 4
38#define __VOLK_ATTR_EXPORT __attribute__((visibility("default")))
39#define __VOLK_ATTR_IMPORT __attribute__((visibility("default")))
40#else
41#define __VOLK_ATTR_EXPORT
42#define __VOLK_ATTR_IMPORT
43#endif
44#define __VOLK_PREFETCH(addr) __builtin_prefetch(addr)
45#elif _MSC_VER
46#define __VOLK_ATTR_ALIGNED(x) __declspec(align(x))
47#define __VOLK_ATTR_UNUSED
48#define __VOLK_ATTR_INLINE __forceinline
49#define __VOLK_ATTR_DEPRECATED __declspec(deprecated)
50#define __VOLK_ATTR_EXPORT __declspec(dllexport)
51#define __VOLK_ATTR_IMPORT __declspec(dllimport)
52#define __VOLK_PREFETCH(addr)
53#define __VOLK_ASM __asm
54#define __VOLK_VOLATILE
55#else
56#define __VOLK_ATTR_ALIGNED(x)
57#define __VOLK_ATTR_UNUSED
58#define __VOLK_ATTR_INLINE
59#define __VOLK_ATTR_DEPRECATED
60#define __VOLK_ATTR_EXPORT
61#define __VOLK_ATTR_IMPORT
62#define __VOLK_PREFETCH(addr)
63#define __VOLK_ASM __asm__
64#define __VOLK_VOLATILE __volatile__
65#endif
66
68// Ignore annoying warnings in MSVC
70#if defined(_MSC_VER)
71#pragma warning(disable : 4244) //'conversion' conversion from 'type1' to 'type2',
72 // possible loss of data
73#pragma warning(disable : 4305) //'identifier' : truncation from 'type1' to 'type2'
74#endif
75
77// C-linkage declaration macros
78// FIXME: due to the usage of complex.h, require gcc for c-linkage
80#if defined(__cplusplus) && (__GNUC__)
81#define __VOLK_DECL_BEGIN extern "C" {
82#define __VOLK_DECL_END }
83#else
84#define __VOLK_DECL_BEGIN
85#define __VOLK_DECL_END
86#endif
87
89// Define VOLK_API for library symbols
90// http://gcc.gnu.org/wiki/Visibility
92#ifdef volk_EXPORTS
93#define VOLK_API __VOLK_ATTR_EXPORT
94#else
95#define VOLK_API __VOLK_ATTR_IMPORT
96#endif
97
99// The bit128 union used by some
101#include <stdint.h>
102
103#ifdef LV_HAVE_SSE
104#ifdef _WIN32
105#include <intrin.h>
106#else
107#include <x86intrin.h>
108#endif
109#endif
110
111union bit128 {
112 uint8_t i8[16];
113 uint16_t i16[8];
114 uint32_t i[4];
115 float f[4];
116 double d[2];
117
118#ifdef LV_HAVE_SSE
119 __m128 float_vec;
120#endif
121
122#ifdef LV_HAVE_SSE2
123 __m128i int_vec;
124 __m128d double_vec;
125#endif
126};
127
128union bit256 {
129 uint8_t i8[32];
130 uint16_t i16[16];
131 uint32_t i[8];
132 float f[8];
133 double d[4];
134
135#ifdef LV_HAVE_AVX
136 __m256 float_vec;
137 __m256i int_vec;
138 __m256d double_vec;
139#endif
140};
141
142#define bit128_p(x) ((union bit128*)(x))
143#define bit256_p(x) ((union bit256*)(x))
144
146// log2f
148#include <math.h>
149// +-Inf -> +-127.0f in order to match the behaviour of the SIMD kernels
150static inline float log2f_non_ieee(float f)
151{
152 float const result = log2f(f);
153 return isinf(result) ? copysignf(127.0f, result) : result;
154}
155
157// Constant used to do log10 calculations as faster log2
159// precalculated 10.0 / log2f_non_ieee(10.0) to allow for constexpr
160#define volk_log2to10factor 3.01029995663981209120
161
162#endif /*INCLUDED_LIBVOLK_COMMON_H*/
Definition: volk_common.h:111
float f[4]
Definition: volk_common.h:115
__m128i int_vec
Definition: volk_common.h:123
__m128d double_vec
Definition: volk_common.h:124
uint8_t i8[16]
Definition: volk_common.h:112
uint32_t i[4]
Definition: volk_common.h:114
double d[2]
Definition: volk_common.h:116
uint16_t i16[8]
Definition: volk_common.h:113
__m128 float_vec
Definition: volk_common.h:119
Definition: volk_common.h:128
float f[8]
Definition: volk_common.h:132
__m256d double_vec
Definition: volk_common.h:138
uint8_t i8[32]
Definition: volk_common.h:129
uint16_t i16[16]
Definition: volk_common.h:130
double d[4]
Definition: volk_common.h:133
uint32_t i[8]
Definition: volk_common.h:131
__m256 float_vec
Definition: volk_common.h:136
__m256i int_vec
Definition: volk_common.h:137
static float log2f_non_ieee(float f)
Definition: volk_common.h:150