diff -rup c/sysdeps/x86_64/fpu/multiarch/e_log.c d/sysdeps/x86_64/fpu/multiarch/e_log.c --- c/sysdeps/x86_64/fpu/multiarch/e_log.c 2012-01-01 05:16:32.000000000 -0700 +++ d/sysdeps/x86_64/fpu/multiarch/e_log.c 2012-05-11 12:53:39.491061476 -0600 @@ -14,8 +14,7 @@ extern double __ieee754_log_fma4 (double libm_ifunc (__ieee754_log, HAS_FMA4 ? __ieee754_log_fma4 - : (HAS_AVX ? __ieee754_log_avx - : __ieee754_log_sse2)); + : (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2)); strong_alias (__ieee754_log, __log_finite) # define __ieee754_log __ieee754_log_sse2 diff -rup c/sysdeps/x86_64/fpu/multiarch/s_atan.c d/sysdeps/x86_64/fpu/multiarch/s_atan.c --- c/sysdeps/x86_64/fpu/multiarch/s_atan.c 2012-01-01 05:16:32.000000000 -0700 +++ d/sysdeps/x86_64/fpu/multiarch/s_atan.c 2012-05-11 12:53:39.491061476 -0600 @@ -12,7 +12,8 @@ extern double __atan_fma4 (double); # define __atan_fma4 ((void *) 0) # endif -libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : HAS_AVX ? __atan_avx : __atan_sse2); +libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 : + HAS_AVX ? __atan_avx : __atan_sse2)); # define atan __atan_sse2 #endif diff -rup c/sysdeps/x86_64/fpu/multiarch/s_sin.c d/sysdeps/x86_64/fpu/multiarch/s_sin.c --- c/sysdeps/x86_64/fpu/multiarch/s_sin.c 2012-01-01 05:16:32.000000000 -0700 +++ d/sysdeps/x86_64/fpu/multiarch/s_sin.c 2012-05-11 12:53:39.491061476 -0600 @@ -17,10 +17,12 @@ extern double __sin_fma4 (double); # define __sin_fma4 ((void *) 0) # endif -libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : HAS_AVX ? __cos_avx : __cos_sse2); +libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 : + HAS_AVX ? __cos_avx : __cos_sse2)); weak_alias (__cos, cos) -libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : HAS_AVX ? __sin_avx : __sin_sse2); +libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 : + HAS_AVX ? __sin_avx : __sin_sse2)); weak_alias (__sin, sin) # define __cos __cos_sse2 diff -rup c/sysdeps/x86_64/fpu/multiarch/s_tan.c d/sysdeps/x86_64/fpu/multiarch/s_tan.c --- c/sysdeps/x86_64/fpu/multiarch/s_tan.c 2012-01-01 05:16:32.000000000 -0700 +++ d/sysdeps/x86_64/fpu/multiarch/s_tan.c 2012-05-11 12:53:39.491061476 -0600 @@ -12,7 +12,8 @@ extern double __tan_fma4 (double); # define __tan_fma4 ((void *) 0) # endif -libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : HAS_AVX ? __tan_avx : __tan_sse2); +libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 : + HAS_AVX ? __tan_avx : __tan_sse2)); # define tan __tan_sse2 #endif diff -rup c/sysdeps/x86_64/multiarch/init-arch.c d/sysdeps/x86_64/multiarch/init-arch.c --- c/sysdeps/x86_64/multiarch/init-arch.c 2012-01-01 05:16:32.000000000 -0700 +++ d/sysdeps/x86_64/multiarch/init-arch.c 2012-05-11 12:55:29.169490958 -0600 @@ -1,6 +1,6 @@ /* Initialize CPU feature data. This file is part of the GNU C Library. - Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2008-2012 Free Software Foundation, Inc. Contributed by Ulrich Drepper . The GNU C Library is free software; you can redistribute it and/or @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + . */ #include #include @@ -144,6 +143,23 @@ __init_cpu_features (void) else kind = arch_kind_other; + if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX) + { + /* Determine if AVX is usable. */ + if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0 + && ({ unsigned int xcrlow; + unsigned int xcrhigh; + asm ("xgetbv" + : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); + (xcrlow & (bit_YMM_state | bit_XMM_state)) == + (bit_YMM_state | bit_XMM_state); })) + __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable; + } + + /* FMA4 depends on AVX support. */ + if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx & bit_FMA4) && __cpu_features.feature[index_AVX_Usable]) + __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable; + __cpu_features.family = family; __cpu_features.model = model; atomic_write_barrier (); diff -rup c/sysdeps/x86_64/multiarch/init-arch.h d/sysdeps/x86_64/multiarch/init-arch.h --- c/sysdeps/x86_64/multiarch/init-arch.h 2012-01-01 05:16:32.000000000 -0700 +++ d/sysdeps/x86_64/multiarch/init-arch.h 2012-05-11 12:55:29.170490953 -0600 @@ -1,5 +1,5 @@ /* This file is part of the GNU C Library. - Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2008-2012 Free Software Foundation, Inc. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -12,9 +12,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + . */ #define bit_Fast_Rep_String (1 << 0) #define bit_Fast_Copy_Backward (1 << 1) @@ -22,16 +21,24 @@ #define bit_Prefer_SSE_for_memop (1 << 3) #define bit_Fast_Unaligned_Load (1 << 4) #define bit_Prefer_PMINUB_for_stringop (1 << 5) +#define bit_AVX_Usable (1 << 6) +#define bit_FMA4_Usable (1 << 7) +/* CPUID Feature flags. */ #define bit_SSE2 (1 << 26) #define bit_SSSE3 (1 << 9) #define bit_SSE4_1 (1 << 19) #define bit_SSE4_2 (1 << 20) +#define bit_OSXSAVE (1 << 27) #define bit_AVX (1 << 28) #define bit_POPCOUNT (1 << 23) #define bit_FMA (1 << 12) #define bit_FMA4 (1 << 16) +/* XCR0 Feature flags. */ +#define bit_XMM_state (1 << 1) +#define bit_YMM_state (2 << 1) + #ifdef __ASSEMBLER__ # include @@ -48,6 +55,8 @@ # define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE # define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE #else /* __ASSEMBLER__ */ @@ -92,7 +101,7 @@ extern struct cpu_features extern void __init_cpu_features (void) attribute_hidden; -#define INIT_ARCH()\ +# define INIT_ARCH() \ do \ if (__cpu_features.kind == arch_kind_unknown) \ __init_cpu_features (); \ @@ -111,37 +120,36 @@ extern const struct cpu_features *__get_ /* Following are the feature tests used throughout libc. */ +/* CPUID_* evaluates to true if the feature flag is enabled. */ +# define CPUID_OSXSAVE HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE) +# define CPUID_AVX HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX) +# define CPUID_FMA4 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4) + +/* HAS_* evaluates to true if we may use the feature at runtime. */ # define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2) # define HAS_POPCOUNT HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT) # define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3) # define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1) # define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2) # define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_FMA) -# define HAS_AVX HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX) -# define HAS_FMA4 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4) # define index_Fast_Rep_String FEATURE_INDEX_1 # define index_Fast_Copy_Backward FEATURE_INDEX_1 # define index_Slow_BSF FEATURE_INDEX_1 # define index_Prefer_SSE_for_memop FEATURE_INDEX_1 # define index_Fast_Unaligned_Load FEATURE_INDEX_1 +# define index_AVX_Usable FEATURE_INDEX_1 +# define index_FMA4_Usable FEATURE_INDEX_1 -#define HAS_ARCH_FEATURE(idx, bit) \ - ((__get_cpu_features ()->feature[idx] & (bit)) != 0) - -#define HAS_FAST_REP_STRING \ - HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String) - -#define HAS_FAST_COPY_BACKWARD \ - HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward) - -#define HAS_SLOW_BSF \ - HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF) - -#define HAS_PREFER_SSE_FOR_MEMOP \ - HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop) +# define HAS_ARCH_FEATURE(name) \ + ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) -#define HAS_FAST_UNALIGNED_LOAD \ - HAS_ARCH_FEATURE (index_Fast_Unaligned_Load, bit_Fast_Unaligned_Load) +# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String) +# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward) +# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF) +# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop) +# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load) +# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable) +# define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable) #endif /* __ASSEMBLER__ */ diff -rup c/sysdeps/x86_64/multiarch/strcmp.S d/sysdeps/x86_64/multiarch/strcmp.S --- c/sysdeps/x86_64/multiarch/strcmp.S 2012-01-01 05:16:32.000000000 -0700 +++ d/sysdeps/x86_64/multiarch/strcmp.S 2012-05-11 12:55:29.172490943 -0600 @@ -1,5 +1,5 @@ /* strcmp with SSE4.2 - Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2009-2012 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -107,7 +107,7 @@ ENTRY(__strcasecmp) 1: # ifdef HAVE_AVX_SUPPORT leaq __strcasecmp_avx(%rip), %rax - testl $bit_AVX, __cpu_features+CPUID_OFFSET+index_AVX(%rip) + testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip) jnz 2f # endif leaq __strcasecmp_sse42(%rip), %rax @@ -130,7 +130,7 @@ ENTRY(__strncasecmp) 1: # ifdef HAVE_AVX_SUPPORT leaq __strncasecmp_avx(%rip), %rax - testl $bit_AVX, __cpu_features+CPUID_OFFSET+index_AVX(%rip) + testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip) jnz 2f # endif leaq __strncasecmp_sse42(%rip), %rax Only in d/sysdeps/x86_64/multiarch: strcmp.S.orig