diff --git a/.cvsignore b/.cvsignore index 04150eb..d742735 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1 +1 @@ -gcc-4.1.2-20070925.tar.bz2 +gcc-4.1.2-20071124.tar.bz2 diff --git a/gcc41-c++-guard-visibility.patch b/gcc41-c++-guard-visibility.patch index b9ed079..971100b 100644 --- a/gcc41-c++-guard-visibility.patch +++ b/gcc41-c++-guard-visibility.patch @@ -1,18 +1,7 @@ 2007-09-06 Jason Merrill - * decl2.c (get_guard): Copy visibility from the guarded variable. + * g++.dg/ext/visibility/guard1.C: New test. ---- gcc/cp/decl2.c (revision 128225) -+++ gcc/cp/decl2.c (revision 128226) -@@ -2215,6 +2215,8 @@ get_guard (tree decl) - DECL_ONE_ONLY (guard) = DECL_ONE_ONLY (decl); - if (TREE_PUBLIC (decl)) - DECL_WEAK (guard) = DECL_WEAK (decl); -+ DECL_VISIBILITY (guard) = DECL_VISIBILITY (decl); -+ DECL_VISIBILITY_SPECIFIED (guard) = DECL_VISIBILITY_SPECIFIED (decl); - - DECL_ARTIFICIAL (guard) = 1; - DECL_IGNORED_P (guard) = 1; --- gcc/testsuite/g++.dg/ext/visibility/guard1.C (revision 0) +++ gcc/testsuite/g++.dg/ext/visibility/guard1.C (revision 128226) @@ -0,0 +1,29 @@ diff --git a/gcc41-debug-fortran-array.patch b/gcc41-debug-fortran-array.patch new file mode 100644 index 0000000..69764cb --- /dev/null +++ b/gcc41-debug-fortran-array.patch @@ -0,0 +1,33 @@ +2007-12-10 Jakub Jelinek + + * dwarf2out.c (gen_array_type_die, gen_descr_array_type_die): For + Fortran multi-dimensional arrays use DW_AT_ordering + DW_ORD_col_major. + +--- gcc/dwarf2out.c (revision 130741) ++++ gcc/dwarf2out.c (revision 130742) +@@ -11636,6 +11636,12 @@ gen_array_type_die (tree type, dw_die_re + add_AT_flag (array_die, DW_AT_GNU_vector, 1); + } + ++ /* For Fortran multidimensional arrays use DW_ORD_col_major ordering. */ ++ if (is_fortran () ++ && TREE_CODE (type) == ARRAY_TYPE ++ && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE) ++ add_AT_unsigned (array_die, DW_AT_ordering, DW_ORD_col_major); ++ + #if 0 + /* We default the array ordering. SDB will probably do + the right things even if DW_AT_ordering is not present. It's not even +@@ -11787,6 +11793,11 @@ gen_descr_array_type_die (tree type, str + add_name_attribute (array_die, type_tag (type)); + equate_type_number_to_die (type, array_die); + ++ /* For Fortran multidimensional arrays use DW_ORD_col_major ordering. */ ++ if (is_fortran () ++ && info->ndimensions >= 2) ++ add_AT_unsigned (array_die, DW_AT_ordering, DW_ORD_col_major); ++ + if (info->data_location) + add_descr_info_field (array_die, DW_AT_data_location, info->data_location, + info->base_decl); diff --git a/gcc41-java-arm1.patch b/gcc41-java-arm1.patch new file mode 100644 index 0000000..54858eb --- /dev/null +++ b/gcc41-java-arm1.patch @@ -0,0 +1,957 @@ +2007-05-18 Aurelien Jarno + + * src/arm/ffi.c (ffi_prep_closure_loc): Renamed and ajusted + from (ffi_prep_closure): ... this. + (FFI_INIT_TRAMPOLINE): Adjust. + +2005-12-31 Phil Blundell + + * src/arm/ffi.c (ffi_prep_incoming_args_SYSV, + ffi_closure_SYSV_inner, ffi_prep_closure): New, add closure support. + * src/arm/sysv.S(ffi_closure_SYSV): Likewise. + * src/arm/ffitarget.h (FFI_TRAMPOLINE_SIZE): Likewise. + (FFI_CLOSURES): Enable closure support. + +2007-07-03 Andrew Haley + + * testsuite/libffi.call/cls_multi_ushort.c, + testsuite/libffi.call/cls_align_uint16.c, + testsuite/libffi.call/nested_struct1.c, + testsuite/libffi.call/nested_struct3.c, + testsuite/libffi.call/cls_7_1_byte.c, + testsuite/libffi.call/cls_double.c, + testsuite/libffi.call/cls_sint.c, + testsuite/libffi.call/cls_20byte1.c, + testsuite/libffi.call/cls_multi_sshortchar.c, + testsuite/libffi.call/cls_align_sint64.c, + testsuite/libffi.call/cls_3byte2.c, + testsuite/libffi.call/cls_multi_schar.c, + testsuite/libffi.call/cls_multi_uchar.c, + testsuite/libffi.call/cls_19byte.c, + testsuite/libffi.call/cls_9byte1.c, + testsuite/libffi.call/cls_align_float.c, + testsuite/libffi.call/closure_fn1.c, + testsuite/libffi.call/problem1.c, + testsuite/libffi.call/closure_fn3.c, + testsuite/libffi.call/cls_sshort.c, + testsuite/libffi.call/closure_fn5.c, + testsuite/libffi.call/cls_align_double.c, + testsuite/libffi.call/cls_2byte.c, + testsuite/libffi.call/nested_struct.c, + testsuite/libffi.call/cls_4byte.c, + testsuite/libffi.call/cls_6byte.c, + testsuite/libffi.call/cls_8byte.c, + testsuite/libffi.call/cls_multi_sshort.c, + testsuite/libffi.call/cls_align_uint32.c, + testsuite/libffi.call/cls_align_sint16.c, + testsuite/libffi.call/cls_float.c, + testsuite/libffi.call/cls_20byte.c, + testsuite/libffi.call/cls_5_1_byte.c, + testsuite/libffi.call/nested_struct2.c, + testsuite/libffi.call/cls_24byte.c, + testsuite/libffi.call/cls_64byte.c, + testsuite/libffi.call/cls_uint.c, + testsuite/libffi.call/cls_multi_ushortchar.c, + testsuite/libffi.call/cls_schar.c, + testsuite/libffi.call/cls_uchar.c, + testsuite/libffi.call/cls_align_uint64.c, + testsuite/libffi.call/cls_ulonglong.c, + testsuite/libffi.call/cls_align_longdouble.c, + testsuite/libffi.call/cls_1_1byte.c, + testsuite/libffi.call/cls_12byte.c, + testsuite/libffi.call/cls_3_1byte.c, + testsuite/libffi.call/cls_3byte1.c, + testsuite/libffi.call/cls_4_1byte.c, + testsuite/libffi.call/cls_6_1_byte.c, + testsuite/libffi.call/cls_16byte.c, + testsuite/libffi.call/cls_18byte.c, + testsuite/libffi.call/closure_fn0.c, + testsuite/libffi.call/cls_9byte2.c, + testsuite/libffi.call/closure_fn2.c, + testsuite/libffi.call/closure_fn4.c, + testsuite/libffi.call/cls_ushort.c, + testsuite/libffi.call/closure_fn6.c, + testsuite/libffi.call/cls_5byte.c, + testsuite/libffi.call/cls_align_pointer.c, + testsuite/libffi.call/cls_7byte.c, + testsuite/libffi.call/cls_align_sint32.c, + testsuite/libffi.special/unwindtest.cc: Enable for ARM. + +--- libffi/src/arm/ffitarget.h.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/src/arm/ffitarget.h 2007-09-06 14:07:30.000000000 +0200 +@@ -40,7 +40,8 @@ typedef enum ffi_abi { + + /* ---- Definitions for closures ----------------------------------------- */ + +-#define FFI_CLOSURES 0 ++#define FFI_CLOSURES 1 ++#define FFI_TRAMPOLINE_SIZE 20 + #define FFI_NATIVE_RAW_API 0 + + #endif +--- libffi/src/arm/ffi.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/src/arm/ffi.c 2007-09-06 14:07:30.000000000 +0200 +@@ -168,3 +168,122 @@ void ffi_call(ffi_cif *cif, void (*fn)() + break; + } + } ++ ++/** private members **/ ++ ++static void ffi_prep_incoming_args_SYSV (char *stack, void **ret, ++ void** args, ffi_cif* cif); ++ ++void ffi_closure_SYSV (ffi_closure *); ++ ++/* This function is jumped to by the trampoline */ ++ ++unsigned int ++ffi_closure_SYSV_inner (closure, respp, args) ++ ffi_closure *closure; ++ void **respp; ++ void *args; ++{ ++ // our various things... ++ ffi_cif *cif; ++ void **arg_area; ++ ++ cif = closure->cif; ++ arg_area = (void**) alloca (cif->nargs * sizeof (void*)); ++ ++ /* this call will initialize ARG_AREA, such that each ++ * element in that array points to the corresponding ++ * value on the stack; and if the function returns ++ * a structure, it will re-set RESP to point to the ++ * structure return address. */ ++ ++ ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif); ++ ++ (closure->fun) (cif, *respp, arg_area, closure->user_data); ++ ++ return cif->flags; ++} ++ ++/*@-exportheader@*/ ++static void ++ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, ++ void **avalue, ffi_cif *cif) ++/*@=exportheader@*/ ++{ ++ register unsigned int i; ++ register void **p_argv; ++ register char *argp; ++ register ffi_type **p_arg; ++ ++ argp = stack; ++ ++ if ( cif->flags == FFI_TYPE_STRUCT ) { ++ *rvalue = *(void **) argp; ++ argp += 4; ++ } ++ ++ p_argv = avalue; ++ ++ for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++) ++ { ++ size_t z; ++ ++ /* Align if necessary */ ++ if ((sizeof(int) - 1) & (unsigned) argp) { ++ argp = (char *) ALIGN(argp, sizeof(int)); ++ } ++ ++ z = (*p_arg)->size; ++ ++ /* because we're little endian, this is what it turns into. */ ++ ++ *p_argv = (void*) argp; ++ ++ p_argv++; ++ argp += z; ++ } ++ ++ return; ++} ++ ++/* How to make a trampoline. */ ++ ++#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \ ++({ unsigned char *__tramp = (unsigned char*)(TRAMP); \ ++ unsigned int __fun = (unsigned int)(FUN); \ ++ unsigned int __ctx = (unsigned int)(CTX); \ ++ *(unsigned int*) &__tramp[0] = 0xe92d000f; /* stmfd sp!, {r0-r3} */ \ ++ *(unsigned int*) &__tramp[4] = 0xe59f0000; /* ldr r0, [pc] */ \ ++ *(unsigned int*) &__tramp[8] = 0xe59ff000; /* ldr pc, [pc] */ \ ++ *(unsigned int*) &__tramp[12] = __ctx; \ ++ *(unsigned int*) &__tramp[16] = __fun; \ ++ register unsigned long _beg __asm ("a1") = (unsigned long) (&__tramp[0]); \ ++ register unsigned long _end __asm ("a2") = (unsigned long) (&__tramp[19]); \ ++ register unsigned long _flg __asm ("a3") = 0; \ ++ __asm __volatile ("swi 0x9f0002 @ sys_cacheflush" \ ++ : "=r" (_beg) \ ++ : "0" (_beg), "r" (_end), "r" (_flg)); \ ++ }) ++ ++ ++/* the cif must already be prep'ed */ ++ ++ffi_status ++ffi_prep_closure_loc (ffi_closure* closure, ++ ffi_cif* cif, ++ void (*fun)(ffi_cif*,void*,void**,void*), ++ void *user_data, ++ void *codeloc) ++{ ++ FFI_ASSERT (cif->abi == FFI_SYSV); ++ ++ FFI_INIT_TRAMPOLINE (&closure->tramp[0], \ ++ &ffi_closure_SYSV, \ ++ codeloc); ++ ++ closure->cif = cif; ++ closure->user_data = user_data; ++ closure->fun = fun; ++ ++ return FFI_OK; ++} +--- libffi/src/arm/sysv.S.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/src/arm/sysv.S 2007-09-06 14:08:26.000000000 +0200 +@@ -207,6 +207,54 @@ LSYM(Lepilogue): + .ffi_call_SYSV_end: + .size CNAME(ffi_call_SYSV),.ffi_call_SYSV_end-CNAME(ffi_call_SYSV) + ++/* ++ unsigned int FFI_HIDDEN ++ ffi_closure_SYSV_inner (closure, respp, args) ++ ffi_closure *closure; ++ void **respp; ++ void *args; ++*/ ++ ++ARM_FUNC_START ffi_closure_SYSV ++ add ip, sp, #16 ++ stmfd sp!, {ip, lr} ++ add r2, sp, #8 ++ sub sp, sp, #16 ++ str sp, [sp, #8] ++ add r1, sp, #8 ++ bl ffi_closure_SYSV_inner ++ cmp r0, #FFI_TYPE_INT ++ beq .Lretint ++ cmp r0, #FFI_TYPE_FLOAT ++ beq .Lretfloat ++ cmp r0, #FFI_TYPE_DOUBLE ++ beq .Lretdouble ++ cmp r0, #FFI_TYPE_LONGDOUBLE ++ beq .Lretlongdouble ++ cmp r0, #FFI_TYPE_SINT64 ++ beq .Lretlonglong ++.Lclosure_epilogue: ++ add sp, sp, #16 ++ ldmfd sp, {sp, pc} ++.Lretint: ++ ldr r0, [sp] ++ b .Lclosure_epilogue ++.Lretlonglong: ++ ldr r0, [sp] ++ ldr r1, [sp, #4] ++ b .Lclosure_epilogue ++.Lretfloat: ++ ldfs f0, [sp] ++ b .Lclosure_epilogue ++.Lretdouble: ++ ldfd f0, [sp] ++ b .Lclosure_epilogue ++.Lretlongdouble: ++ ldfd f0, [sp] ++ b .Lclosure_epilogue ++.ffi_closure_SYSV_end: ++ .size CNAME(ffi_closure_SYSV),.ffi_closure_SYSV_end-CNAME(ffi_closure_SYSV) ++ + #if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits + #endif +--- libffi/testsuite/libffi.call/cls_multi_ushort.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_multi_ushort.c 2007-09-06 14:07:30.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: PR13221. + Originator: 20031129 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + unsigned short test_func_fn(unsigned short a1, unsigned short a2) +--- libffi/testsuite/libffi.call/cls_align_uint16.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_align_uint16.c 2007-09-06 14:07:30.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20031203 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_align { +--- libffi/testsuite/libffi.call/nested_struct1.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/nested_struct1.c 2007-09-06 14:07:30.000000000 +0200 +@@ -5,7 +5,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_16byte1 { +--- libffi/testsuite/libffi.call/nested_struct3.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/nested_struct3.c 2007-09-06 14:07:30.000000000 +0200 +@@ -6,7 +6,7 @@ + PR: none. + Originator: 20030911 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct A { +--- libffi/testsuite/libffi.call/cls_7_1_byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_7_1_byte.c 2007-09-06 14:07:30.000000000 +0200 +@@ -5,7 +5,7 @@ + PR: none. + Originator: 20050708 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_7byte { +--- libffi/testsuite/libffi.call/cls_double.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_double.c 2007-09-06 14:07:30.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + static void cls_ret_double_fn(ffi_cif* cif __UNUSED__, void* resp, void** args, +--- libffi/testsuite/libffi.call/cls_sint.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_sint.c 2007-09-06 14:07:33.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20031108 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + static void cls_ret_sint_fn(ffi_cif* cif __UNUSED__, void* resp, void** args, +--- libffi/testsuite/libffi.call/cls_20byte1.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_20byte1.c 2007-09-06 14:07:34.000000000 +0200 +@@ -5,7 +5,9 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++ ++ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_20byte { +--- libffi/testsuite/libffi.call/cls_multi_sshortchar.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_multi_sshortchar.c 2007-09-06 14:07:34.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: PR13221. + Originator: 20031129 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + signed short test_func_fn(signed char a1, signed short a2, +--- libffi/testsuite/libffi.call/cls_align_sint64.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_align_sint64.c 2007-09-06 14:07:34.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20031203 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_align { +--- libffi/testsuite/libffi.call/cls_3byte2.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_3byte2.c 2007-09-06 14:07:34.000000000 +0200 +@@ -6,7 +6,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_3byte_1 { +--- libffi/testsuite/libffi.call/cls_multi_schar.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_multi_schar.c 2007-09-06 14:07:34.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: PR13221. + Originator: 20031129 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + signed char test_func_fn(signed char a1, signed char a2) +--- libffi/testsuite/libffi.call/cls_multi_uchar.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_multi_uchar.c 2007-09-06 14:07:34.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: PR13221. + Originator: 20031129 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + unsigned char test_func_fn(unsigned char a1, unsigned char a2, +--- libffi/testsuite/libffi.call/cls_19byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_19byte.c 2007-09-06 14:07:34.000000000 +0200 +@@ -5,7 +5,7 @@ + PR: none. + Originator: 20030915 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_19byte { +--- libffi/testsuite/libffi.call/cls_9byte1.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_9byte1.c 2007-09-06 14:07:34.000000000 +0200 +@@ -7,7 +7,7 @@ + PR: none. + Originator: 20030914 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_9byte { +--- libffi/testsuite/libffi.call/cls_align_float.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_align_float.c 2007-09-06 14:07:34.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20031203 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_align { +--- libffi/testsuite/libffi.call/closure_fn1.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/closure_fn1.c 2007-09-06 14:07:34.000000000 +0200 +@@ -6,7 +6,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + +--- libffi/testsuite/libffi.call/problem1.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/problem1.c 2007-09-06 14:07:34.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct my_ffi_struct { +--- libffi/testsuite/libffi.call/closure_fn3.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/closure_fn3.c 2007-09-06 14:07:34.000000000 +0200 +@@ -6,7 +6,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + static void closure_test_fn3(ffi_cif* cif __UNUSED__, void* resp, void** args, +--- libffi/testsuite/libffi.call/cls_sshort.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_sshort.c 2007-09-06 14:07:34.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20031108 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + static void cls_ret_sshort_fn(ffi_cif* cif __UNUSED__, void* resp, void** args, +--- libffi/testsuite/libffi.call/closure_fn5.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/closure_fn5.c 2007-09-06 14:07:34.000000000 +0200 +@@ -6,7 +6,7 @@ + PR: none. + Originator: 20031026 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + static void +--- libffi/testsuite/libffi.call/cls_align_double.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_align_double.c 2007-09-06 14:07:34.000000000 +0200 +@@ -4,7 +4,9 @@ + PR: none. + Originator: 20031203 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++ ++ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_align { +--- libffi/testsuite/libffi.call/cls_2byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_2byte.c 2007-09-06 14:07:34.000000000 +0200 +@@ -6,7 +6,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_2byte { +--- libffi/testsuite/libffi.call/nested_struct.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/nested_struct.c 2007-09-06 14:07:34.000000000 +0200 +@@ -5,7 +5,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_16byte1 { +--- libffi/testsuite/libffi.call/cls_4byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_4byte.c 2007-09-06 14:07:35.000000000 +0200 +@@ -5,7 +5,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + + #include "ffitest.h" + +--- libffi/testsuite/libffi.call/cls_6byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_6byte.c 2007-09-06 14:07:35.000000000 +0200 +@@ -5,7 +5,8 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_6byte { +--- libffi/testsuite/libffi.call/cls_8byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_8byte.c 2007-09-06 14:07:35.000000000 +0200 +@@ -5,7 +5,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_8byte { +--- libffi/testsuite/libffi.call/cls_multi_sshort.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_multi_sshort.c 2007-09-06 14:07:35.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: PR13221. + Originator: 20031129 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + signed short test_func_fn(signed short a1, signed short a2) +--- libffi/testsuite/libffi.call/cls_align_uint32.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_align_uint32.c 2007-09-06 14:07:35.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20031203 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_align { +--- libffi/testsuite/libffi.call/cls_align_sint16.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_align_sint16.c 2007-09-06 14:07:35.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20031203 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_align { +--- libffi/testsuite/libffi.call/cls_float.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_float.c 2007-09-06 14:07:35.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + static void cls_ret_float_fn(ffi_cif* cif __UNUSED__, void* resp, void** args, +--- libffi/testsuite/libffi.call/cls_20byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_20byte.c 2007-09-06 14:07:35.000000000 +0200 +@@ -5,7 +5,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_20byte { +--- libffi/testsuite/libffi.call/cls_5_1_byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_5_1_byte.c 2007-09-06 14:07:35.000000000 +0200 +@@ -5,7 +5,7 @@ + PR: none. + Originator: 20050708 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_5byte { +--- libffi/testsuite/libffi.call/nested_struct2.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/nested_struct2.c 2007-09-06 14:07:35.000000000 +0200 +@@ -6,7 +6,7 @@ + PR: none. + Originator: 20030911 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct A { +--- libffi/testsuite/libffi.call/cls_24byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_24byte.c 2007-09-06 14:07:35.000000000 +0200 +@@ -5,7 +5,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_24byte { +--- libffi/testsuite/libffi.call/cls_64byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_64byte.c 2007-09-06 14:07:37.000000000 +0200 +@@ -6,7 +6,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_64byte { +--- libffi/testsuite/libffi.call/cls_uint.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_uint.c 2007-09-06 14:07:37.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + static void cls_ret_uint_fn(ffi_cif* cif __UNUSED__, void* resp, void** args, +--- libffi/testsuite/libffi.call/cls_multi_ushortchar.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_multi_ushortchar.c 2007-09-06 14:07:37.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: PR13221. + Originator: 20031129 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + unsigned short test_func_fn(unsigned char a1, unsigned short a2, +--- libffi/testsuite/libffi.call/cls_schar.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_schar.c 2007-09-06 14:07:37.000000000 +0200 +@@ -4,7 +4,9 @@ + PR: none. + Originator: 20031108 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++ ++ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + static void cls_ret_schar_fn(ffi_cif* cif __UNUSED__, void* resp, void** args, +--- libffi/testsuite/libffi.call/cls_uchar.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_uchar.c 2007-09-06 14:07:37.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + static void cls_ret_uchar_fn(ffi_cif* cif __UNUSED__, void* resp, void** args, +--- libffi/testsuite/libffi.call/cls_align_uint64.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_align_uint64.c 2007-09-06 14:07:37.000000000 +0200 +@@ -4,7 +4,8 @@ + PR: none. + Originator: 20031203 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_align { +--- libffi/testsuite/libffi.call/cls_ulonglong.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_ulonglong.c 2007-09-06 14:07:37.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + static void cls_ret_ulonglong_fn(ffi_cif* cif __UNUSED__, void* resp, +--- libffi/testsuite/libffi.call/cls_align_longdouble.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_align_longdouble.c 2007-09-06 14:07:37.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20031203 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + + #include "ffitest.h" + +--- libffi/testsuite/libffi.call/cls_1_1byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_1_1byte.c 2007-09-06 14:07:37.000000000 +0200 +@@ -6,7 +6,9 @@ + PR: none. + Originator: 20030902 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++ ++ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_1_1byte { +--- libffi/testsuite/libffi.call/cls_12byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_12byte.c 2007-09-06 14:07:37.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_12byte { +--- libffi/testsuite/libffi.call/cls_3_1byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_3_1byte.c 2007-09-06 14:07:37.000000000 +0200 +@@ -6,7 +6,7 @@ + PR: none. + Originator: 20030902 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_3_1byte { +--- libffi/testsuite/libffi.call/cls_3byte1.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_3byte1.c 2007-09-06 14:07:37.000000000 +0200 +@@ -6,7 +6,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_3byte { +--- libffi/testsuite/libffi.call/cls_4_1byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_4_1byte.c 2007-09-06 14:07:37.000000000 +0200 +@@ -6,7 +6,7 @@ + PR: none. + Originator: 20030902 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_4_1byte { +--- libffi/testsuite/libffi.call/cls_6_1_byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_6_1_byte.c 2007-09-06 14:07:37.000000000 +0200 +@@ -5,7 +5,7 @@ + PR: none. + Originator: 20050708 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_6byte { +--- libffi/testsuite/libffi.call/cls_16byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_16byte.c 2007-09-06 14:07:37.000000000 +0200 +@@ -5,7 +5,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_16byte { +--- libffi/testsuite/libffi.call/cls_18byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_18byte.c 2007-09-06 14:07:37.000000000 +0200 +@@ -5,7 +5,7 @@ + PR: none. + Originator: 20030915 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_18byte { +--- libffi/testsuite/libffi.call/closure_fn0.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/closure_fn0.c 2007-09-06 14:07:37.000000000 +0200 +@@ -6,7 +6,10 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++ ++ ++ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + static void +--- libffi/testsuite/libffi.call/cls_9byte2.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_9byte2.c 2007-09-06 14:07:37.000000000 +0200 +@@ -7,7 +7,7 @@ + PR: none. + Originator: 20030914 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_9byte { +--- libffi/testsuite/libffi.call/closure_fn2.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/closure_fn2.c 2007-09-06 14:07:37.000000000 +0200 +@@ -6,7 +6,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + static void closure_test_fn2(ffi_cif* cif __UNUSED__, void* resp, void** args, +--- libffi/testsuite/libffi.call/closure_fn4.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/closure_fn4.c 2007-09-06 14:07:37.000000000 +0200 +@@ -6,7 +6,7 @@ + PR: none. + Originator: 20031026 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + + #include "ffitest.h" + +--- libffi/testsuite/libffi.call/cls_ushort.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_ushort.c 2007-09-06 14:07:37.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + static void cls_ret_ushort_fn(ffi_cif* cif __UNUSED__, void* resp, void** args, +--- libffi/testsuite/libffi.call/cls_5byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_5byte.c 2007-09-06 14:07:38.000000000 +0200 +@@ -5,7 +5,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_5byte { +--- libffi/testsuite/libffi.call/cls_align_pointer.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_align_pointer.c 2007-09-06 14:07:38.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20031203 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_align { +--- libffi/testsuite/libffi.call/cls_7byte.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_7byte.c 2007-09-06 14:07:38.000000000 +0200 +@@ -5,7 +5,7 @@ + PR: none. + Originator: 20030828 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_7byte { +--- libffi/testsuite/libffi.call/cls_align_sint32.c.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.call/cls_align_sint32.c 2007-09-06 14:07:38.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: 20031203 */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitest.h" + + typedef struct cls_struct_align { +--- libffi/testsuite/libffi.special/unwindtest.cc.jj 2007-09-06 14:05:09.000000000 +0200 ++++ libffi/testsuite/libffi.special/unwindtest.cc 2007-09-06 14:07:39.000000000 +0200 +@@ -4,7 +4,7 @@ + PR: none. + Originator: Jeff Sturm */ + +-/* { dg-do run { xfail mips64*-*-* arm*-*-* strongarm*-*-* xscale*-*-* } } */ ++/* { dg-do run { xfail mips64*-*-* } } */ + #include "ffitestcxx.h" + + void diff --git a/gcc41-java-arm2.patch b/gcc41-java-arm2.patch new file mode 100644 index 0000000..16ffccc --- /dev/null +++ b/gcc41-java-arm2.patch @@ -0,0 +1,114 @@ +2007-07-13 Andrew Haley + + * libgcj.ver: Add __gcj_personality_sj0. + +2007-07-11 Andrew Haley + + * configure.host (arm*-linux-gnu): New. + * sysdep/arm/locks.h: New. + +--- libjava/configure.host (revision 126621) ++++ libjava/configure.host (revision 126623) +@@ -82,6 +82,10 @@ case "${host}" in + enable_getenv_properties_default=no + enable_main_args_default=no + ;; ++ arm*-linux-gnu) ++ libgcj_interpreter=yes ++ sysdeps_dir=arm ++ ;; + mips-tx39-*|mipstx39-unknown-*) + libgcj_flags="${libgcj_flags} -G 0" + LDFLAGS="$LDFLAGS -Tjmr3904dram.ld" +--- libjava/libgcj.ver (revision 126621) ++++ libjava/libgcj.ver (revision 126623) +@@ -2,6 +2,6 @@ + # symbols in libgcj.so. + + { +- global: Jv*; _Jv_*; __gcj_personality_v0; _Z*; ++ global: Jv*; _Jv_*; __gcj_personality_v0; __gcj_personality_sj0; _Z*; + local: *; + }; +--- libjava/sysdep/arm/locks.h (revision 0) ++++ libjava/sysdep/arm/locks.h (revision 126623) +@@ -0,0 +1,79 @@ ++// locks.h - Thread synchronization primitives. ARM implementation. ++ ++/* Copyright (C) 2007 Free Software Foundation ++ ++ This file is part of libgcj. ++ ++This software is copyrighted work licensed under the terms of the ++Libgcj License. Please consult the file "LIBGCJ_LICENSE" for ++details. */ ++ ++#ifndef __SYSDEP_LOCKS_H__ ++#define __SYSDEP_LOCKS_H__ ++ ++typedef size_t obj_addr_t; /* Integer type big enough for object */ ++ /* address. */ ++ ++/* Atomic compare and exchange. These sequences are not actually ++ atomic; there is a race if *ADDR != OLD_VAL and we are preempted ++ between the two swaps. However, they are very close to atomic, and ++ are the best that a pre-ARMv6 implementation can do without ++ operating system support. LinuxThreads has been using these ++ sequences for many years. */ ++ ++inline static bool ++compare_and_swap(volatile obj_addr_t *addr, ++ obj_addr_t old_val, ++ obj_addr_t new_val) ++{ ++ volatile obj_addr_t result, tmp; ++ __asm__ ("\n" ++ "0: ldr %[tmp],[%[addr]]\n" ++ " cmp %[tmp],%[old_val]\n" ++ " movne %[result],#0\n" ++ " bne 1f\n" ++ " swp %[result],%[new_val],[%[addr]]\n" ++ " cmp %[tmp],%[result]\n" ++ " swpne %[tmp],%[result],[%[addr]]\n" ++ " bne 0b\n" ++ " mov %[result],#1\n" ++ "1:" ++ : [result] "=&r" (result), [tmp] "=&r" (tmp) ++ : [addr] "r" (addr), [new_val] "r" (new_val), [old_val] "r" (old_val) ++ : "cc", "memory"); ++ ++ return result; ++} ++ ++inline static void ++release_set(volatile obj_addr_t *addr, obj_addr_t new_val) ++{ ++ __asm__ __volatile__("" : : : "memory"); ++ *(addr) = new_val; ++} ++ ++inline static bool ++compare_and_swap_release(volatile obj_addr_t *addr, ++ obj_addr_t old, ++ obj_addr_t new_val) ++{ ++ return compare_and_swap(addr, old, new_val); ++} ++ ++// Ensure that subsequent instructions do not execute on stale ++// data that was loaded from memory before the barrier. ++inline static void ++read_barrier() ++{ ++ __asm__ __volatile__("" : : : "memory"); ++} ++ ++// Ensure that prior stores to memory are completed with respect to other ++// processors. ++inline static void ++write_barrier() ++{ ++ __asm__ __volatile__("" : : : "memory"); ++} ++ ++#endif diff --git a/gcc41-java-arm3.patch b/gcc41-java-arm3.patch new file mode 100644 index 0000000..ac12ca1 --- /dev/null +++ b/gcc41-java-arm3.patch @@ -0,0 +1,33 @@ +2007-07-15 Andrew Haley + + * unwind-sjlj.c (_Unwind_GetIPInfo): Check for context->fc != NULL + before looking in the context. + + * configure.host: + arm*-linux-gnu -> arm*-linux*. + +--- gcc/unwind-sjlj.c (revision 126657) ++++ gcc/unwind-sjlj.c (revision 126659) +@@ -222,7 +222,10 @@ _Unwind_Ptr + _Unwind_GetIPInfo (struct _Unwind_Context *context, int *ip_before_insn) + { + *ip_before_insn = 0; +- return context->fc->call_site + 1; ++ if (context->fc != NULL) ++ return context->fc->call_site + 1; ++ else ++ return 0; + } + + /* Set the return landing pad index in CONTEXT. */ +--- libjava/configure.host (revision 126657) ++++ libjava/configure.host (revision 126659) +@@ -82,7 +82,7 @@ case "${host}" in + enable_getenv_properties_default=no + enable_main_args_default=no + ;; +- arm*-linux-gnu) ++ arm*-linux*) + libgcj_interpreter=yes + sysdeps_dir=arm + ;; diff --git a/gcc41-java-arm4.patch b/gcc41-java-arm4.patch new file mode 100644 index 0000000..e6e2947 --- /dev/null +++ b/gcc41-java-arm4.patch @@ -0,0 +1,138 @@ +2007-09-04 Andrew Haley + + * decl.c (java_init_decl_processing): Call "__cxa_end_cleanup" + when using the ARM EABI. + +2007-08-29 Andrew Haley + + * config/arm/libgcc-bpabi.ver: Add _Unwind_Backtrace as GCC_4.3.0. + +2007-08-08 Andrew Haley + + * config/arm/libunwind.S (UNWIND_WRAPPER _Unwind_Backtrace): New. + * config/arm/unwind-arm.h (__gnu_Unwind_Backtrace): New. + * config/arm/unwind-arm.c (__gnu_Unwind_Backtrace): New. + +--- gcc/java/decl.c (revision 128085) ++++ gcc/java/decl.c (revision 128088) +@@ -1075,7 +1075,10 @@ java_init_decl_processing (void) + eh_personality_libfunc = init_one_libfunc (USING_SJLJ_EXCEPTIONS + ? "__gcj_personality_sj0" + : "__gcj_personality_v0"); +- default_init_unwind_resume_libfunc (); ++ if (targetm.arm_eabi_unwinder) ++ unwind_resume_libfunc = init_one_libfunc ("__cxa_end_cleanup"); ++ else ++ default_init_unwind_resume_libfunc (); + + lang_eh_runtime_type = do_nothing; + +--- gcc/config/arm/libunwind.S (revision 128085) ++++ gcc/config/arm/libunwind.S (revision 128088) +@@ -237,5 +237,6 @@ UNWIND_WRAPPER _Unwind_RaiseException 1 + UNWIND_WRAPPER _Unwind_Resume 1 + UNWIND_WRAPPER _Unwind_Resume_or_Rethrow 1 + UNWIND_WRAPPER _Unwind_ForcedUnwind 3 ++UNWIND_WRAPPER _Unwind_Backtrace 2 + +-#endif /* __symbian__ */ ++#endif /* ndef __symbian__ */ +--- gcc/config/arm/libgcc-bpabi.ver (revision 128085) ++++ gcc/config/arm/libgcc-bpabi.ver (revision 128088) +@@ -81,3 +81,11 @@ GCC_3.5 { + # GNU-specific entry point. + __gnu_unwind_frame + } ++ ++%exclude { ++ _Unwind_Backtrace ++} ++%inherit GCC_4.3.0 GCC_4.2.0 ++GCC_4.3.0 { ++ _Unwind_Backtrace ++} +--- gcc/config/arm/unwind-arm.c (revision 128085) ++++ gcc/config/arm/unwind-arm.c (revision 128088) +@@ -950,6 +950,66 @@ _Unwind_DeleteException (_Unwind_Excepti + } + + ++/* Perform stack backtrace through unwind data. */ ++_Unwind_Reason_Code ++__gnu_Unwind_Backtrace(_Unwind_Trace_Fn trace, void * trace_argument, ++ phase2_vrs * entry_vrs); ++_Unwind_Reason_Code ++__gnu_Unwind_Backtrace(_Unwind_Trace_Fn trace, void * trace_argument, ++ phase2_vrs * entry_vrs) ++{ ++ phase1_vrs saved_vrs; ++ _Unwind_Reason_Code code; ++ ++ _Unwind_Control_Block ucb; ++ _Unwind_Control_Block *ucbp = &ucb; ++ ++ /* Set the pc to the call site. */ ++ entry_vrs->core.r[R_PC] = entry_vrs->core.r[R_LR]; ++ ++ /* Save the core registers. */ ++ saved_vrs.core = entry_vrs->core; ++ /* Set demand-save flags. */ ++ saved_vrs.demand_save_flags = ~(_uw) 0; ++ ++ do ++ { ++ /* Find the entry for this routine. */ ++ if (get_eit_entry (ucbp, saved_vrs.core.r[R_PC]) != _URC_OK) ++ { ++ code = _URC_FAILURE; ++ break; ++ } ++ ++ /* The dwarf unwinder assumes the context structure holds things ++ like the function and LSDA pointers. The ARM implementation ++ caches these in the exception header (UCB). To avoid ++ rewriting everything we make the virtual IP register point at ++ the UCB. */ ++ _Unwind_SetGR((_Unwind_Context *)&saved_vrs, 12, (_Unwind_Ptr) ucbp); ++ ++ /* Call trace function. */ ++ if ((*trace) ((_Unwind_Context *) &saved_vrs, trace_argument) ++ != _URC_NO_REASON) ++ { ++ code = _URC_FAILURE; ++ break; ++ } ++ ++ /* Call the pr to decide what to do. */ ++ code = ((personality_routine) UCB_PR_ADDR (ucbp)) ++ (_US_VIRTUAL_UNWIND_FRAME | _US_FORCE_UNWIND, ++ ucbp, (void *) &saved_vrs); ++ } ++ while (code != _URC_END_OF_STACK ++ && code != _URC_FAILURE); ++ ++ finish: ++ restore_non_core_regs (&saved_vrs); ++ return code; ++} ++ ++ + /* Common implementation for ARM ABI defined personality routines. + ID is the index of the personality routine, other arguments are as defined + by __aeabi_unwind_cpp_pr{0,1,2}. */ +--- gcc/config/arm/unwind-arm.h (revision 128085) ++++ gcc/config/arm/unwind-arm.h (revision 128088) +@@ -205,6 +205,13 @@ extern "C" { + _Unwind_Control_Block *, struct _Unwind_Context *, void *); + _Unwind_Reason_Code _Unwind_ForcedUnwind (_Unwind_Control_Block *, + _Unwind_Stop_Fn, void *); ++ /* @@@ Use unwind data to perform a stack backtrace. The trace callback ++ is called for every stack frame in the call chain, but no cleanup ++ actions are performed. */ ++ typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn) (_Unwind_Context *, void *); ++ _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn, ++ void*); ++ + _Unwind_Word _Unwind_GetCFA (struct _Unwind_Context *); + void _Unwind_Complete(_Unwind_Control_Block *ucbp); + void _Unwind_DeleteException (_Unwind_Exception *); diff --git a/gcc41-java-arm5.patch b/gcc41-java-arm5.patch new file mode 100644 index 0000000..c5c5b73 --- /dev/null +++ b/gcc41-java-arm5.patch @@ -0,0 +1,280 @@ +2007-09-04 Andrew Haley + + * src/arm/sysv.S (UNWIND): New. + (Whole file): Conditionally compile unwinder directives. + * src/arm/sysv.S: Add unwinder directives. + + * src/arm/ffi.c (ffi_prep_args): Align structs by at least 4 bytes. + Only treat r0 as a struct address if we're actually returning a + struct by address. + Only copy the bytes that are actually within a struct. + (ffi_prep_cif_machdep): A Composite Type not larger than 4 bytes + is returned in r0, not passed by address. + (ffi_call): Allocate a word-sized temporary for the case where + a composite is returned in r0. + (ffi_prep_incoming_args_SYSV): Align as necessary. + +2007-08-05 Steven Newbury + + * src/arm/ffi.c (FFI_INIT_TRAMPOLINE): Use __clear_cache instead of + directly using the sys_cacheflush syscall. + +2007-07-27 Andrew Haley + + * src/arm/sysv.S (ffi_closure_SYSV): Add soft-float. + +--- libffi/src/arm/ffi.c (revision 128092) ++++ libffi/src/arm/ffi.c (revision 128093) +@@ -40,7 +40,7 @@ void ffi_prep_args(char *stack, extended + + argp = stack; + +- if ( ecif->cif->rtype->type == FFI_TYPE_STRUCT ) { ++ if ( ecif->cif->flags == FFI_TYPE_STRUCT ) { + *(void **) argp = ecif->rvalue; + argp += 4; + } +@@ -58,6 +58,9 @@ void ffi_prep_args(char *stack, extended + argp = (char *) ALIGN(argp, (*p_arg)->alignment); + } + ++ if ((*p_arg)->type == FFI_TYPE_STRUCT) ++ argp = (char *) ALIGN(argp, 4); ++ + z = (*p_arg)->size; + if (z < sizeof(int)) + { +@@ -81,7 +84,7 @@ void ffi_prep_args(char *stack, extended + break; + + case FFI_TYPE_STRUCT: +- *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv); ++ memcpy(argp, *p_argv, (*p_arg)->size); + break; + + default: +@@ -115,7 +118,6 @@ ffi_status ffi_prep_cif_machdep(ffi_cif + switch (cif->rtype->type) + { + case FFI_TYPE_VOID: +- case FFI_TYPE_STRUCT: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + cif->flags = (unsigned) cif->rtype->type; +@@ -126,6 +128,17 @@ ffi_status ffi_prep_cif_machdep(ffi_cif + cif->flags = (unsigned) FFI_TYPE_SINT64; + break; + ++ case FFI_TYPE_STRUCT: ++ if (cif->rtype->size <= 4) ++ /* A Composite Type not larger than 4 bytes is returned in r0. */ ++ cif->flags = (unsigned)FFI_TYPE_INT; ++ else ++ /* A Composite Type larger than 4 bytes, or whose size cannot ++ be determined statically ... is stored in memory at an ++ address passed [in r0]. */ ++ cif->flags = (unsigned)FFI_TYPE_STRUCT; ++ break; ++ + default: + cif->flags = FFI_TYPE_INT; + break; +@@ -141,21 +154,27 @@ void ffi_call(ffi_cif *cif, void (*fn)() + { + extended_cif ecif; + ++ int small_struct = (cif->flags == FFI_TYPE_INT ++ && cif->rtype->type == FFI_TYPE_STRUCT); ++ + ecif.cif = cif; + ecif.avalue = avalue; ++ ++ unsigned int temp; + + /* If the return value is a struct and we don't have a return */ + /* value address then we need to make one */ + + if ((rvalue == NULL) && +- (cif->rtype->type == FFI_TYPE_STRUCT)) ++ (cif->flags == FFI_TYPE_STRUCT)) + { + ecif.rvalue = alloca(cif->rtype->size); + } ++ else if (small_struct) ++ ecif.rvalue = &temp; + else + ecif.rvalue = rvalue; +- +- ++ + switch (cif->abi) + { + case FFI_SYSV: +@@ -167,6 +186,8 @@ void ffi_call(ffi_cif *cif, void (*fn)() + FFI_ASSERT(0); + break; + } ++ if (small_struct) ++ memcpy (rvalue, &temp, cif->rtype->size); + } + + /** private members **/ +@@ -228,9 +249,12 @@ ffi_prep_incoming_args_SYSV(char *stack, + { + size_t z; + ++ size_t alignment = (*p_arg)->alignment; ++ if (alignment < 4) ++ alignment = 4; + /* Align if necessary */ +- if ((sizeof(int) - 1) & (unsigned) argp) { +- argp = (char *) ALIGN(argp, sizeof(int)); ++ if ((alignment - 1) & (unsigned) argp) { ++ argp = (char *) ALIGN(argp, alignment); + } + + z = (*p_arg)->size; +@@ -248,21 +272,16 @@ ffi_prep_incoming_args_SYSV(char *stack, + + /* How to make a trampoline. */ + +-#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \ +-({ unsigned char *__tramp = (unsigned char*)(TRAMP); \ +- unsigned int __fun = (unsigned int)(FUN); \ +- unsigned int __ctx = (unsigned int)(CTX); \ ++#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \ ++({ unsigned char *__tramp = (unsigned char*)(TRAMP); \ ++ unsigned int __fun = (unsigned int)(FUN); \ ++ unsigned int __ctx = (unsigned int)(CTX); \ + *(unsigned int*) &__tramp[0] = 0xe92d000f; /* stmfd sp!, {r0-r3} */ \ +- *(unsigned int*) &__tramp[4] = 0xe59f0000; /* ldr r0, [pc] */ \ +- *(unsigned int*) &__tramp[8] = 0xe59ff000; /* ldr pc, [pc] */ \ +- *(unsigned int*) &__tramp[12] = __ctx; \ +- *(unsigned int*) &__tramp[16] = __fun; \ +- register unsigned long _beg __asm ("a1") = (unsigned long) (&__tramp[0]); \ +- register unsigned long _end __asm ("a2") = (unsigned long) (&__tramp[19]); \ +- register unsigned long _flg __asm ("a3") = 0; \ +- __asm __volatile ("swi 0x9f0002 @ sys_cacheflush" \ +- : "=r" (_beg) \ +- : "0" (_beg), "r" (_end), "r" (_flg)); \ ++ *(unsigned int*) &__tramp[4] = 0xe59f0000; /* ldr r0, [pc] */ \ ++ *(unsigned int*) &__tramp[8] = 0xe59ff000; /* ldr pc, [pc] */ \ ++ *(unsigned int*) &__tramp[12] = __ctx; \ ++ *(unsigned int*) &__tramp[16] = __fun; \ ++ __clear_cache((&__tramp[0]), (&__tramp[19])); \ + }) + + +--- libffi/src/arm/sysv.S (revision 128092) ++++ libffi/src/arm/sysv.S (revision 128093) +@@ -82,6 +82,14 @@ + # define call_reg(x) mov lr, pc ; mov pc, x + #endif + ++/* Conditionally compile unwinder directives. */ ++#ifdef __ARM_EABI__ ++#define UNWIND ++#else ++#define UNWIND @ ++#endif ++ ++ + #if defined(__thumb__) && !defined(__THUMB_INTERWORK__) + .macro ARM_FUNC_START name + .text +@@ -92,6 +100,7 @@ + bx pc + nop + .arm ++ UNWIND .fnstart + /* A hook to tell gdb that we've switched to ARM mode. Also used to call + directly from other local arm routines. */ + _L__\name: +@@ -102,6 +111,7 @@ _L__\name: + .align 0 + .arm + ENTRY(\name) ++ UNWIND .fnstart + .endm + #endif + +@@ -134,8 +144,11 @@ _L__\name: + ARM_FUNC_START ffi_call_SYSV + @ Save registers + stmfd sp!, {r0-r3, fp, lr} ++ UNWIND .save {r0-r3, fp, lr} + mov fp, sp + ++ UNWIND .setfp fp, sp ++ + @ Make room for all of the new args. + sub sp, fp, r2 + +@@ -205,6 +218,7 @@ LSYM(Lepilogue): + RETLDM "r0-r3,fp" + + .ffi_call_SYSV_end: ++ UNWIND .fnend + .size CNAME(ffi_call_SYSV),.ffi_call_SYSV_end-CNAME(ffi_call_SYSV) + + /* +@@ -216,21 +230,40 @@ LSYM(Lepilogue): + */ + + ARM_FUNC_START ffi_closure_SYSV ++ UNWIND .pad #16 + add ip, sp, #16 + stmfd sp!, {ip, lr} ++ UNWIND .save {r0, lr} + add r2, sp, #8 ++ .pad #16 + sub sp, sp, #16 + str sp, [sp, #8] + add r1, sp, #8 + bl ffi_closure_SYSV_inner + cmp r0, #FFI_TYPE_INT + beq .Lretint ++ + cmp r0, #FFI_TYPE_FLOAT ++#ifdef __SOFTFP__ ++ beq .Lretint ++#else + beq .Lretfloat ++#endif ++ + cmp r0, #FFI_TYPE_DOUBLE ++#ifdef __SOFTFP__ ++ beq .Lretlonglong ++#else + beq .Lretdouble ++#endif ++ + cmp r0, #FFI_TYPE_LONGDOUBLE ++#ifdef __SOFTFP__ ++ beq .Lretlonglong ++#else + beq .Lretlongdouble ++#endif ++ + cmp r0, #FFI_TYPE_SINT64 + beq .Lretlonglong + .Lclosure_epilogue: +@@ -243,6 +276,8 @@ ARM_FUNC_START ffi_closure_SYSV + ldr r0, [sp] + ldr r1, [sp, #4] + b .Lclosure_epilogue ++ ++#ifndef __SOFTFP__ + .Lretfloat: + ldfs f0, [sp] + b .Lclosure_epilogue +@@ -252,6 +287,9 @@ ARM_FUNC_START ffi_closure_SYSV + .Lretlongdouble: + ldfd f0, [sp] + b .Lclosure_epilogue ++#endif ++ + .ffi_closure_SYSV_end: ++ UNWIND .fnend + .size CNAME(ffi_closure_SYSV),.ffi_closure_SYSV_end-CNAME(ffi_closure_SYSV) + diff --git a/gcc41-java-arm6.patch b/gcc41-java-arm6.patch new file mode 100644 index 0000000..221b1ab --- /dev/null +++ b/gcc41-java-arm6.patch @@ -0,0 +1,932 @@ +2007-09-04 Andrew Haley + + PR java/27908 + * testsuite/libjava.lang/PR27908.java + ({run1,run2,run3}.isRunning): New Method. + (main): Fix race condition. + +2007-08-29 Andrew Haley + + * gnu/classpath/natVMStackWalker.cc (VMStackWalker::getCallingClass): + Make sure we're not sibcalled. + (GET_CALLING_CLASS): Define for ARM EABI. + +2007-08-22 Andrew Haley + + * configure.host (BACKTRACESPEC): Add arm*-linux*. + +2007-08-22 Andrew Haley + + * configure.ac (LIBSTDCXXSPEC): New. + * configure.host: Add arm*-linux* to pthread test. + * configure.ac (LIBGCJTESTSPEC): Add path to libstdc++ for ARM + EABI. + * testsuite/libjava.jni/jni.exp (gcj_jni_compile_c_to_so): Use + -fexceptions for ARM EABI. + * testsuite/lib/libjava.exp (libjava_arguments): Add libgcj-test.spec. + (libjava_invoke): Log the invocation. + +2007-08-15 Andrew Haley + + * configure.ac (extra_ldflags): Define. + * Makefile.am: Use extra_ldflags for all executables. + +2007-08-14 Andrew Haley + + * sysdep/arm/backtrace.h: Remove stubs for _Unwind_GetIPInfo, + _Unwind_GetRegionStart, and _Unwind_Backtrace. + +2007-07-27 Andrew Haley + + * gnu/classpath/natVMStackWalker.cc (GET_CALLING_CLASS): Stub for + ARM EABI. + * exception.cc (get_exception_header_from_ue): New. + (get_ttype_entry): ARM EABI version. + (PERSONALITY_FUNCTION): Add ARM EABI code. + * sysdep/arm/backtrace.h: New file. + * stacktrace.cc (_URC_NORMAL_STOP): New. + * configure.ac (extra_ldflags_libjava): Add libsupc++.la for ARM + EABI. + * configure.host (BACKTRACESPEC): Add arm/backtrace.h. + +--- libjava/testsuite/Makefile.in.jj 2007-09-06 14:06:13.000000000 +0200 ++++ libjava/testsuite/Makefile.in 2007-09-06 17:20:42.000000000 +0200 +@@ -160,6 +160,7 @@ LIBLTDL = @LIBLTDL@ + LIBMAGIC = @LIBMAGIC@ + LIBOBJS = @LIBOBJS@ + LIBS = @LIBS@ ++LIBSTDCXXSPEC = @LIBSTDCXXSPEC@ + LIBTOOL = @LIBTOOL@ + LN_S = @LN_S@ + LTLIBICONV = @LTLIBICONV@ +@@ -264,6 +265,7 @@ build_vendor = @build_vendor@ + datadir = @datadir@ + dbexecdir = @dbexecdir@ + exec_prefix = @exec_prefix@ ++extra_ldflags = @extra_ldflags@ + extra_ldflags_libjava = @extra_ldflags_libjava@ + gxx_include_dir = @gxx_include_dir@ + here = @here@ +--- libjava/testsuite/libjava.jni/jni.exp.jj 2007-09-06 14:06:13.000000000 +0200 ++++ libjava/testsuite/libjava.jni/jni.exp 2007-09-06 17:20:42.000000000 +0200 +@@ -29,6 +29,10 @@ proc gcj_jni_compile_c_to_so {file {opti + set name [file rootname $filename] + set soname lib${name}.${so_extension} + ++ if { [istarget "*arm-*eabi*"] } { ++ lappend options "additional_flags=-fexceptions" ++ } ++ + lappend options "additional_flags=${so_flag} -fPIC" + # Find the generated header. + lappend options "additional_flags=-I. -I.. -I$srcdir/$subdir" +--- libjava/testsuite/lib/libjava.exp.jj 2007-09-06 14:06:13.000000000 +0200 ++++ libjava/testsuite/lib/libjava.exp 2007-09-06 17:20:42.000000000 +0200 +@@ -379,6 +379,7 @@ proc libjava_arguments {{mode compile}} + if {$mode == "link"} { + global wrapper_file wrap_compile_flags + lappend args "additional_flags=$wrap_compile_flags" ++ lappend args "additional_flags=-specs=libgcj-test.spec" + lappend args "libs=$wrapper_file" + lappend args "libs=$libjava" + lappend args debug +--- libjava/configure.jj 2007-09-06 14:06:15.000000000 +0200 ++++ libjava/configure 2007-09-06 17:27:04.000000000 +0200 +@@ -310,7 +310,7 @@ ac_includes_default="\ + # include + #endif" + +-ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os build_subdir host_subdir target_subdir libgcj_basedir host host_cpu host_vendor host_os target target_cpu target_vendor target_os target_noncanonical LN_S mkinstalldirs JAVA_MAINTAINER_MODE_TRUE JAVA_MAINTAINER_MODE_FALSE CC ac_ct_CC EXEEXT OBJEXT CXX ac_ct_CXX CFLAGS CXXFLAGS LDFLAGS INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CYGPATH_W PACKAGE VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP ac_ct_STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK SET_MAKE am__leading_dot AMTAR am__tar am__untar DEPDIR am__include am__quote AMDEP_TRUE AMDEP_FALSE AMDEPBACKSLASH CCDEPMODE am__fastdepCC_TRUE am__fastdepCC_FALSE CXXDEPMODE am__fastdepCXX_TRUE am__fastdepCXX_FALSE AS ac_ct_AS LD ac_ct_LD AR ac_ct_AR RANLIB ac_ct_RANLIB JAR ZIP UNZIP MAINTAINER_MODE_TRUE MAINTAINER_MODE_FALSE MAINT LIBGCJ_CFLAGS LIBGCJ_CXXFLAGS LIBGCJ_JAVAFLAGS LIBGCJ_LD_SYMBOLIC LIBGCJDEBUG TOOLKIT XLIB_AWT_TRUE XLIB_AWT_FALSE X_AWT_TRUE X_AWT_FALSE GCJ_FOR_ECJX GCJH host_exeext INCLTDL LIBLTDL DIRLTDL LIBTOOL CXXCPP CPPFLAGS GCJ GCJFLAGS GCJDEPMODE am__fastdepGCJ_TRUE am__fastdepGCJ_FALSE subdirs TESTSUBDIR_TRUE TESTSUBDIR_FALSE ECJ_BUILD_JAR ECJ_JAR BUILD_ECJ1_TRUE BUILD_ECJ1_FALSE INSTALL_ECJ_JAR_TRUE INSTALL_ECJ_JAR_FALSE JAVA_HOME_SET_TRUE JAVA_HOME_SET_FALSE JAVA_HOME INTERPRETER LIBFFI LIBFFIINCS PLATFORM CPP EGREP USING_WIN32_PLATFORM_TRUE USING_WIN32_PLATFORM_FALSE USING_POSIX_PLATFORM_TRUE USING_POSIX_PLATFORM_FALSE USING_DARWIN_CRT_TRUE USING_DARWIN_CRT_FALSE SYSTEMSPEC LIBGCJTESTSPEC ZLIBSPEC ZLIBTESTSPEC X_CFLAGS X_PRE_LIBS X_LIBS X_EXTRA_LIBS extra_ldflags_libjava GCLIBS GCINCS GCDEPS GCSPEC JC1GCSPEC GCTESTSPEC USING_BOEHMGC_TRUE USING_BOEHMGC_FALSE USING_NOGC_TRUE USING_NOGC_FALSE THREADLIBS THREADINCS THREADDEPS THREADSPEC THREADSTARTFILESPEC THREADLDFLAGS THREADCXXFLAGS USING_POSIX_THREADS_TRUE USING_POSIX_THREADS_FALSE USING_WIN32_THREADS_TRUE USING_WIN32_THREADS_FALSE USING_NO_THREADS_TRUE USING_NO_THREADS_FALSE USE_LIBGCJ_BC_TRUE USE_LIBGCJ_BC_FALSE LIBGCJ_SPEC HASH_SYNC_SPEC USING_GCC_TRUE USING_GCC_FALSE LIBICONV LTLIBICONV LIBMAGIC PKG_CONFIG GTK_CFLAGS GTK_LIBS GLIB_CFLAGS GLIB_LIBS LIBART_CFLAGS LIBART_LIBS CLASSPATH_SEPARATOR ac_ct_GCJ ZLIBS SYS_ZLIBS ZINCS DIVIDESPEC CHECKREFSPEC EXCEPTIONSPEC BACKTRACESPEC IEEESPEC NATIVE_TRUE NATIVE_FALSE ENABLE_SHARED_TRUE ENABLE_SHARED_FALSE NEEDS_DATA_START_TRUE NEEDS_DATA_START_FALSE GCC_UNWIND_INCLUDE toolexecdir toolexecmainlibdir toolexeclibdir dbexecdir GCJVERSION gxx_include_dir libstdcxx_incdir ALLOCA PERL SYSDEP_SOURCES ANONVERSCRIPT_TRUE ANONVERSCRIPT_FALSE LD_START_STATIC_SPEC LD_FINISH_STATIC_SPEC here LIBOBJS LTLIBOBJS' ++ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os build_subdir host_subdir target_subdir libgcj_basedir host host_cpu host_vendor host_os target target_cpu target_vendor target_os target_noncanonical LN_S mkinstalldirs JAVA_MAINTAINER_MODE_TRUE JAVA_MAINTAINER_MODE_FALSE CC ac_ct_CC EXEEXT OBJEXT CXX ac_ct_CXX CFLAGS CXXFLAGS LDFLAGS INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA CYGPATH_W PACKAGE VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP ac_ct_STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK SET_MAKE am__leading_dot AMTAR am__tar am__untar DEPDIR am__include am__quote AMDEP_TRUE AMDEP_FALSE AMDEPBACKSLASH CCDEPMODE am__fastdepCC_TRUE am__fastdepCC_FALSE CXXDEPMODE am__fastdepCXX_TRUE am__fastdepCXX_FALSE AS ac_ct_AS LD ac_ct_LD AR ac_ct_AR RANLIB ac_ct_RANLIB JAR ZIP UNZIP MAINTAINER_MODE_TRUE MAINTAINER_MODE_FALSE MAINT LIBGCJ_CFLAGS LIBGCJ_CXXFLAGS LIBGCJ_JAVAFLAGS LIBGCJ_LD_SYMBOLIC LIBGCJDEBUG TOOLKIT XLIB_AWT_TRUE XLIB_AWT_FALSE X_AWT_TRUE X_AWT_FALSE GCJ_FOR_ECJX GCJH host_exeext INCLTDL LIBLTDL DIRLTDL LIBTOOL CXXCPP CPPFLAGS GCJ GCJFLAGS GCJDEPMODE am__fastdepGCJ_TRUE am__fastdepGCJ_FALSE subdirs TESTSUBDIR_TRUE TESTSUBDIR_FALSE ECJ_BUILD_JAR ECJ_JAR BUILD_ECJ1_TRUE BUILD_ECJ1_FALSE INSTALL_ECJ_JAR_TRUE INSTALL_ECJ_JAR_FALSE JAVA_HOME_SET_TRUE JAVA_HOME_SET_FALSE JAVA_HOME INTERPRETER LIBFFI LIBFFIINCS PLATFORM CPP EGREP USING_WIN32_PLATFORM_TRUE USING_WIN32_PLATFORM_FALSE USING_POSIX_PLATFORM_TRUE USING_POSIX_PLATFORM_FALSE USING_DARWIN_CRT_TRUE USING_DARWIN_CRT_FALSE SYSTEMSPEC ZLIBSPEC ZLIBTESTSPEC X_CFLAGS X_PRE_LIBS X_LIBS X_EXTRA_LIBS extra_ldflags_libjava extra_ldflags LIBSTDCXXSPEC LIBGCJTESTSPEC GCLIBS GCINCS GCDEPS GCSPEC JC1GCSPEC GCTESTSPEC USING_BOEHMGC_TRUE USING_BOEHMGC_FALSE USING_NOGC_TRUE USING_NOGC_FALSE THREADLIBS THREADINCS THREADDEPS THREADSPEC THREADSTARTFILESPEC THREADLDFLAGS THREADCXXFLAGS USING_POSIX_THREADS_TRUE USING_POSIX_THREADS_FALSE USING_WIN32_THREADS_TRUE USING_WIN32_THREADS_FALSE USING_NO_THREADS_TRUE USING_NO_THREADS_FALSE USE_LIBGCJ_BC_TRUE USE_LIBGCJ_BC_FALSE LIBGCJ_SPEC HASH_SYNC_SPEC USING_GCC_TRUE USING_GCC_FALSE LIBICONV LTLIBICONV LIBMAGIC PKG_CONFIG GTK_CFLAGS GTK_LIBS GLIB_CFLAGS GLIB_LIBS LIBART_CFLAGS LIBART_LIBS CLASSPATH_SEPARATOR ac_ct_GCJ ZLIBS SYS_ZLIBS ZINCS DIVIDESPEC CHECKREFSPEC EXCEPTIONSPEC BACKTRACESPEC IEEESPEC NATIVE_TRUE NATIVE_FALSE ENABLE_SHARED_TRUE ENABLE_SHARED_FALSE NEEDS_DATA_START_TRUE NEEDS_DATA_START_FALSE GCC_UNWIND_INCLUDE toolexecdir toolexecmainlibdir toolexeclibdir dbexecdir GCJVERSION gxx_include_dir libstdcxx_incdir ALLOCA PERL SYSDEP_SOURCES ANONVERSCRIPT_TRUE ANONVERSCRIPT_FALSE LD_START_STATIC_SPEC LD_FINISH_STATIC_SPEC here LIBOBJS LTLIBOBJS' + ac_subst_files='' + + # Initialize some variables set by options. +@@ -7640,9 +7640,6 @@ case "${host}" in + esac + + +-LIBGCJTESTSPEC="-L`${PWDCMD-pwd}`/.libs -rpath `${PWDCMD-pwd}`/.libs" +- +- + + # Check whether --with-system-zlib or --without-system-zlib was given. + if test "${with_system_zlib+set}" = set; then +@@ -9126,6 +9123,9 @@ fi + # FIXME: this should be _libs on some hosts. + libsubdir=.libs + ++LIBGCJTESTSPEC="-L`${PWDCMD-pwd}`/.libs -rpath `${PWDCMD-pwd}`/.libs" ++ ++LIBSTDCXXSPEC= + # extra LD Flags which are required for targets + case "${host}" in + *-*-darwin[0-7].*) +@@ -9134,9 +9134,21 @@ case "${host}" in + # on Darwin -single_module speeds up loading of the dynamic libraries. + extra_ldflags_libjava=-Wl,-single_module + ;; ++arm*linux*eabi) ++ # Some of the ARM unwinder code is actually in libstdc++. We ++ # could in principle replicate it in libgcj, but it's better to ++ # have a dependency on libstdc++. ++ extra_ldflags='-L$(here)/../libstdc++-v3/src -lstdc++' ++ LIBSTDCXXSPEC=-lstdc++ ++ LIBGCJTESTSPEC="-L`${PWDCMD-pwd}`/.libs -L`${PWDCMD-pwd}`/../libstdc++-v3/src/.libs -rpath `${PWDCMD-pwd}`/.libs:`${PWDCMD-pwd}`/../libstdc++-v3/src/.libs -lstdc++" ++ ;; + esac + + ++ ++ ++ ++ + # Allow the GC to be disabled. Can be useful when debugging. + echo "$as_me:$LINENO: checking for garbage collector to use" >&5 + echo $ECHO_N "checking for garbage collector to use... $ECHO_C" >&6 +@@ -18024,7 +18036,6 @@ s,@USING_POSIX_PLATFORM_FALSE@,$USING_PO + s,@USING_DARWIN_CRT_TRUE@,$USING_DARWIN_CRT_TRUE,;t t + s,@USING_DARWIN_CRT_FALSE@,$USING_DARWIN_CRT_FALSE,;t t + s,@SYSTEMSPEC@,$SYSTEMSPEC,;t t +-s,@LIBGCJTESTSPEC@,$LIBGCJTESTSPEC,;t t + s,@ZLIBSPEC@,$ZLIBSPEC,;t t + s,@ZLIBTESTSPEC@,$ZLIBTESTSPEC,;t t + s,@X_CFLAGS@,$X_CFLAGS,;t t +@@ -18032,6 +18043,9 @@ s,@X_PRE_LIBS@,$X_PRE_LIBS,;t t + s,@X_LIBS@,$X_LIBS,;t t + s,@X_EXTRA_LIBS@,$X_EXTRA_LIBS,;t t + s,@extra_ldflags_libjava@,$extra_ldflags_libjava,;t t ++s,@extra_ldflags@,$extra_ldflags,;t t ++s,@LIBSTDCXXSPEC@,$LIBSTDCXXSPEC,;t t ++s,@LIBGCJTESTSPEC@,$LIBGCJTESTSPEC,;t t + s,@GCLIBS@,$GCLIBS,;t t + s,@GCINCS@,$GCINCS,;t t + s,@GCDEPS@,$GCDEPS,;t t +--- libjava/Makefile.in.jj 2007-09-06 14:06:15.000000000 +0200 ++++ libjava/Makefile.in 2007-09-06 17:28:51.000000000 +0200 +@@ -627,6 +627,7 @@ LIBLTDL = @LIBLTDL@ + LIBMAGIC = @LIBMAGIC@ + LIBOBJS = @LIBOBJS@ + LIBS = @LIBS@ ++LIBSTDCXXSPEC = @LIBSTDCXXSPEC@ + LIBTOOL = @LIBTOOL@ + LN_S = @LN_S@ + LTLIBICONV = @LTLIBICONV@ +@@ -731,6 +732,7 @@ build_vendor = @build_vendor@ + datadir = @datadir@ + dbexecdir = @dbexecdir@ + exec_prefix = @exec_prefix@ ++extra_ldflags = @extra_ldflags@ + extra_ldflags_libjava = @extra_ldflags_libjava@ $(am__append_5) + gxx_include_dir = @gxx_include_dir@ + here = @here@ +@@ -812,7 +814,7 @@ GCJLINK = $(LIBTOOL) --tag=GCJ --mode=li + + GCJ_FOR_ECJX_LINK = $(GCJ_FOR_ECJX) -o $@ + LIBLINK = $(LIBTOOL) --tag=CXX --mode=link $(CXX) -L$(here) $(JC1FLAGS) \ +- $(LDFLAGS) $(extra_ldflags_libjava) -o $@ ++ $(LDFLAGS) $(extra_ldflags_libjava) $(extra_ldflags) -o $@ + + WARNINGS = -Wextra -Wall + AM_CXXFLAGS = \ +@@ -872,7 +874,7 @@ xlib_nat_files = $(xlib_nat_source_files + + # Include THREADLIBS here to ensure that the correct version of + # certain linuxthread functions get linked: +-libgcj_la_LDFLAGS = -rpath $(toolexeclibdir) $(THREADLDFLAGS) $(THREADLIBS) \ ++libgcj_la_LDFLAGS = -rpath $(toolexeclibdir) $(THREADLDFLAGS) $(extra_ldflags) $(THREADLIBS) \ + $(LIBLTDL) $(SYS_ZLIBS) \ + -version-info `grep -v '^\#' $(srcdir)/libtool-version` + +@@ -7784,7 +7786,7 @@ NM = nm + + jv_convert_SOURCES = + jv_convert_LDFLAGS = --main=gnu.gcj.convert.Convert \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + jv_convert_LINK = $(GCJLINK) + jv_convert_LDADD = -L$(here)/.libs libgcj.la +@@ -7800,7 +7802,7 @@ gcj_dbtool_LDADD = gnu/gcj/tools/gcj_dbt + gcj_dbtool_DEPENDENCIES = gnu/gcj/tools/gcj_dbtool.lo libgcj.la libgcj.spec + gij_SOURCES = + gij_LDFLAGS = -rpath $(libdir)/gcj-$(gcc_version) -rpath $(toolexeclibdir) \ +- -shared-libgcc $(THREADLDFLAGS) ++ -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + gij_LINK = $(GCJLINK) + gij_LDADD = -L$(here)/.libs libgij.la +@@ -7820,91 +7822,91 @@ ECJX_BASE_FLAGS = -findirect-dispatch \ + @NATIVE_TRUE@ecjx_DEPENDENCIES = libgcj.la libgcj.spec + gappletviewer_SOURCES = + gappletviewer_LDFLAGS = --main=gnu.classpath.tools.appletviewer.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + gappletviewer_LINK = $(GCJLINK) + gappletviewer_LDADD = -L$(here)/.libs libgcj-tools.la + gappletviewer_DEPENDENCIES = libgcj-tools.la + gjarsigner_SOURCES = + gjarsigner_LDFLAGS = --main=gnu.classpath.tools.jarsigner.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + gjarsigner_LINK = $(GCJLINK) + gjarsigner_LDADD = -L$(here)/.libs libgcj-tools.la + gjarsigner_DEPENDENCIES = libgcj-tools.la + gkeytool_SOURCES = + gkeytool_LDFLAGS = --main=gnu.classpath.tools.keytool.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + gkeytool_LINK = $(GCJLINK) + gkeytool_LDADD = -L$(here)/.libs libgcj-tools.la + gkeytool_DEPENDENCIES = libgcj-tools.la + gjar_SOURCES = + gjar_LDFLAGS = --main=gnu.classpath.tools.jar.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + gjar_LINK = $(GCJLINK) + gjar_LDADD = -L$(here)/.libs libgcj-tools.la + gjar_DEPENDENCIES = libgcj-tools.la + gjavah_SOURCES = + gjavah_LDFLAGS = --main=gnu.classpath.tools.javah.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + gjavah_LINK = $(GCJLINK) + gjavah_LDADD = -L$(here)/.libs libgcj-tools.la + gjavah_DEPENDENCIES = libgcj-tools.la + gcjh_SOURCES = + gcjh_LDFLAGS = --main=gnu.classpath.tools.javah.GcjhMain \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + gcjh_LINK = $(GCJLINK) + gcjh_LDADD = -L$(here)/.libs libgcj-tools.la + gcjh_DEPENDENCIES = libgcj-tools.la + gnative2ascii_SOURCES = + gnative2ascii_LDFLAGS = --main=gnu.classpath.tools.native2ascii.Native2ASCII \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + gnative2ascii_LINK = $(GCJLINK) + gnative2ascii_LDADD = -L$(here)/.libs libgcj-tools.la + gnative2ascii_DEPENDENCIES = libgcj-tools.la + gorbd_SOURCES = + gorbd_LDFLAGS = --main=gnu.classpath.tools.orbd.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + gorbd_LINK = $(GCJLINK) + gorbd_LDADD = -L$(here)/.libs libgcj-tools.la + gorbd_DEPENDENCIES = libgcj-tools.la + grmid_SOURCES = + grmid_LDFLAGS = --main=gnu.classpath.tools.rmid.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + grmid_LINK = $(GCJLINK) + grmid_LDADD = -L$(here)/.libs libgcj-tools.la + grmid_DEPENDENCIES = libgcj-tools.la + gserialver_SOURCES = + gserialver_LDFLAGS = --main=gnu.classpath.tools.serialver.SerialVer \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + gserialver_LINK = $(GCJLINK) + gserialver_LDADD = -L$(here)/.libs libgcj-tools.la + gserialver_DEPENDENCIES = libgcj-tools.la + gtnameserv_SOURCES = + gtnameserv_LDFLAGS = --main=gnu.classpath.tools.tnameserv.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + gtnameserv_LINK = $(GCJLINK) + gtnameserv_LDADD = -L$(here)/.libs libgcj-tools.la + gtnameserv_DEPENDENCIES = libgcj-tools.la + grmic_SOURCES = + grmic_LDFLAGS = --main=gnu.classpath.tools.rmic.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + grmic_LINK = $(GCJLINK) + grmic_LDADD = -L$(here)/.libs libgcj-tools.la + grmic_DEPENDENCIES = libgcj-tools.la + grmiregistry_SOURCES = + grmiregistry_LDFLAGS = --main=gnu.classpath.tools.rmiregistry.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + + grmiregistry_LINK = $(GCJLINK) + grmiregistry_LDADD = -L$(here)/.libs libgcj-tools.la +--- libjava/gcj/Makefile.in.jj 2007-09-06 14:06:15.000000000 +0200 ++++ libjava/gcj/Makefile.in 2007-09-06 17:20:42.000000000 +0200 +@@ -172,6 +172,7 @@ LIBLTDL = @LIBLTDL@ + LIBMAGIC = @LIBMAGIC@ + LIBOBJS = @LIBOBJS@ + LIBS = @LIBS@ ++LIBSTDCXXSPEC = @LIBSTDCXXSPEC@ + LIBTOOL = @LIBTOOL@ + LN_S = @LN_S@ + LTLIBICONV = @LTLIBICONV@ +@@ -276,6 +277,7 @@ build_vendor = @build_vendor@ + datadir = @datadir@ + dbexecdir = @dbexecdir@ + exec_prefix = @exec_prefix@ ++extra_ldflags = @extra_ldflags@ + extra_ldflags_libjava = @extra_ldflags_libjava@ + gxx_include_dir = @gxx_include_dir@ + here = @here@ +--- libjava/include/Makefile.in.jj 2007-09-06 14:06:15.000000000 +0200 ++++ libjava/include/Makefile.in 2007-09-06 17:20:42.000000000 +0200 +@@ -171,6 +171,7 @@ LIBLTDL = @LIBLTDL@ + LIBMAGIC = @LIBMAGIC@ + LIBOBJS = @LIBOBJS@ + LIBS = @LIBS@ ++LIBSTDCXXSPEC = @LIBSTDCXXSPEC@ + LIBTOOL = @LIBTOOL@ + LN_S = @LN_S@ + LTLIBICONV = @LTLIBICONV@ +@@ -275,6 +276,7 @@ build_vendor = @build_vendor@ + datadir = @datadir@ + dbexecdir = @dbexecdir@ + exec_prefix = @exec_prefix@ ++extra_ldflags = @extra_ldflags@ + extra_ldflags_libjava = @extra_ldflags_libjava@ + gxx_include_dir = @gxx_include_dir@ + here = @here@ +--- libjava/configure.ac.jj 2007-09-06 14:06:15.000000000 +0200 ++++ libjava/configure.ac 2007-09-06 17:20:42.000000000 +0200 +@@ -777,9 +777,6 @@ case "${host}" in + esac + AC_SUBST(SYSTEMSPEC) + +-LIBGCJTESTSPEC="-L`${PWDCMD-pwd}`/.libs -rpath `${PWDCMD-pwd}`/.libs" +-AC_SUBST(LIBGCJTESTSPEC) +- + AC_ARG_WITH(system-zlib, + AS_HELP_STRING([--with-system-zlib], + [use installed libz])) +@@ -793,6 +790,9 @@ AC_PATH_XTRA + # FIXME: this should be _libs on some hosts. + libsubdir=.libs + ++LIBGCJTESTSPEC="-L`${PWDCMD-pwd}`/.libs -rpath `${PWDCMD-pwd}`/.libs" ++ ++LIBSTDCXXSPEC= + # extra LD Flags which are required for targets + case "${host}" in + *-*-darwin[[0-7]].*) +@@ -801,8 +801,20 @@ case "${host}" in + # on Darwin -single_module speeds up loading of the dynamic libraries. + extra_ldflags_libjava=-Wl,-single_module + ;; ++arm*linux*eabi) ++ # Some of the ARM unwinder code is actually in libstdc++. We ++ # could in principle replicate it in libgcj, but it's better to ++ # have a dependency on libstdc++. ++ extra_ldflags='-L$(here)/../libstdc++-v3/src -lstdc++' ++ LIBSTDCXXSPEC=-lstdc++ ++ LIBGCJTESTSPEC="-L`${PWDCMD-pwd}`/.libs -L`${PWDCMD-pwd}`/../libstdc++-v3/src/.libs -rpath `${PWDCMD-pwd}`/.libs:`${PWDCMD-pwd}`/../libstdc++-v3/src/.libs -lstdc++" ++ ;; + esac + AC_SUBST(extra_ldflags_libjava) ++AC_SUBST(extra_ldflags) ++AC_SUBST(LIBSTDCXXSPEC) ++ ++AC_SUBST(LIBGCJTESTSPEC) + + # Allow the GC to be disabled. Can be useful when debugging. + AC_MSG_CHECKING([for garbage collector to use]) +--- libjava/libgcj.spec.in.jj 2007-09-06 14:06:15.000000000 +0200 ++++ libjava/libgcj.spec.in 2007-09-06 17:20:42.000000000 +0200 +@@ -7,6 +7,6 @@ + *startfile: @THREADSTARTFILESPEC@ %(startfileorig) + + %rename lib liborig +-*lib: @LD_START_STATIC_SPEC@ @LIBGCJ_SPEC@ @LD_FINISH_STATIC_SPEC@ -lm @LIBICONV@ @GCSPEC@ @THREADSPEC@ @ZLIBSPEC@ @SYSTEMSPEC@ %(libgcc) %(liborig) ++*lib: @LD_START_STATIC_SPEC@ @LIBGCJ_SPEC@ @LD_FINISH_STATIC_SPEC@ -lm @LIBICONV@ @GCSPEC@ @THREADSPEC@ @ZLIBSPEC@ @SYSTEMSPEC@ %(libgcc) @LIBSTDCXXSPEC@ %(liborig) + + *jc1: @HASH_SYNC_SPEC@ @DIVIDESPEC@ @CHECKREFSPEC@ @JC1GCSPEC@ @EXCEPTIONSPEC@ @BACKTRACESPEC@ @IEEESPEC@ -fkeep-inline-functions +--- libjava/stacktrace.cc.jj 2007-09-06 14:06:16.000000000 +0200 ++++ libjava/stacktrace.cc 2007-09-06 17:20:42.000000000 +0200 +@@ -39,6 +39,10 @@ using namespace java::lang::reflect; + using namespace java::util; + using namespace gnu::gcj::runtime; + ++#ifdef __ARM_EABI_UNWINDER__ ++#define _URC_NORMAL_STOP _URC_FAILURE ++#endif ++ + // Maps ncode values to their containing native class. + // NOTE: Currently this Map contradicts class GC for native classes. This map + // (and the "new class stack") will need to use WeakReferences in order to +--- libjava/configure.host.jj 2007-09-06 17:11:05.000000000 +0200 ++++ libjava/configure.host 2007-09-06 17:20:42.000000000 +0200 +@@ -85,6 +85,7 @@ case "${host}" in + arm*-linux*) + libgcj_interpreter=yes + sysdeps_dir=arm ++ fallback_backtrace_h=sysdep/arm/backtrace.h + ;; + mips-tx39-*|mipstx39-unknown-*) + libgcj_flags="${libgcj_flags} -G 0" +@@ -258,6 +259,12 @@ EOF + sysdeps_dir=x86-64 + DIVIDESPEC=-f%{m32:no-}%{!m32:%{!m64:no-}}%{m64:}use-divide-subroutine + ;; ++ arm*-linux* ) ++ slow_pthread_self=no ++ can_unwind_signal=no ++ CHECKREFSPEC=-fcheck-references ++ DIVIDESPEC=-fuse-divide-subroutine ++ ;; + mips*-*-linux* ) + sysdeps_dir=mips + can_unwind_signal=yes +--- libjava/Makefile.am.jj 2007-09-06 14:06:16.000000000 +0200 ++++ libjava/Makefile.am 2007-09-06 17:28:27.000000000 +0200 +@@ -100,6 +100,7 @@ endif + GCJ_WITH_FLAGS = $(GCJ) --encoding=UTF-8 -Wno-deprecated + + extra_ldflags_libjava = @extra_ldflags_libjava@ ++extra_ldflags = @extra_ldflags@ + + if ANONVERSCRIPT + extra_ldflags_libjava += -Wl,--version-script=$(srcdir)/libgcj.ver +@@ -110,7 +111,7 @@ GCJLINK = $(LIBTOOL) --tag=GCJ --mode=li + GCJ_FOR_ECJX = @GCJ_FOR_ECJX@ + GCJ_FOR_ECJX_LINK = $(GCJ_FOR_ECJX) -o $@ + LIBLINK = $(LIBTOOL) --tag=CXX --mode=link $(CXX) -L$(here) $(JC1FLAGS) \ +- $(LDFLAGS) $(extra_ldflags_libjava) -o $@ ++ $(LDFLAGS) $(extra_ldflags_libjava) $(extra_ldflags) -o $@ + + GCC_UNWIND_INCLUDE = @GCC_UNWIND_INCLUDE@ + +@@ -232,7 +233,7 @@ xlib_nat_files = $(xlib_nat_source_files + # Include THREADLIBS here to ensure that the correct version of + # certain linuxthread functions get linked: + ## The mysterious backslash in the grep pattern is consumed by make. +-libgcj_la_LDFLAGS = -rpath $(toolexeclibdir) $(THREADLDFLAGS) $(THREADLIBS) \ ++libgcj_la_LDFLAGS = -rpath $(toolexeclibdir) $(THREADLDFLAGS) $(extra_ldflags) $(THREADLIBS) \ + $(LIBLTDL) $(SYS_ZLIBS) \ + -version-info `grep -v '^\#' $(srcdir)/libtool-version` + libgcj_la_LIBADD = \ +@@ -614,7 +615,7 @@ jv_convert_SOURCES = + ## need this because we are explicitly using libtool to link using the + ## `.la' file. + jv_convert_LDFLAGS = --main=gnu.gcj.convert.Convert \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + jv_convert_LINK = $(GCJLINK) + ## We don't explicitly link in the libraries we need; libgcj.la brings + ## in all dependencies. We need the -L so that gcj can find libgcj +@@ -635,7 +636,7 @@ gnu/gcj/tools/gcj_dbtool/natMain.cc + ## need this because we are explicitly using libtool to link using the + ## `.la' file. + gcj_dbtool_LDFLAGS = --main=gnu.gcj.tools.gcj_dbtool.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + gcj_dbtool_LINK = $(GCJLINK) + ## We don't explicitly link in the libraries we need; libgcj.la brings + ## in all dependencies. We need the -L so that gcj can find libgcj +@@ -654,7 +655,7 @@ gij_SOURCES = + ## need this because we are explicitly using libtool to link using the + ## `.la' file. + gij_LDFLAGS = -rpath $(libdir)/gcj-$(gcc_version) -rpath $(toolexeclibdir) \ +- -shared-libgcc $(THREADLDFLAGS) ++ -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + gij_LINK = $(GCJLINK) + ## See jv_convert_LDADD. + gij_LDADD = -L$(here)/.libs libgij.la +@@ -698,7 +699,7 @@ endif !NATIVE + ## This is a dummy definition. + gappletviewer_SOURCES = + gappletviewer_LDFLAGS = --main=gnu.classpath.tools.appletviewer.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + gappletviewer_LINK = $(GCJLINK) + ## See jv_convert_LDADD. + gappletviewer_LDADD = -L$(here)/.libs libgcj-tools.la +@@ -707,7 +708,7 @@ gappletviewer_DEPENDENCIES = libgcj-tool + ## This is a dummy definition. + gjarsigner_SOURCES = + gjarsigner_LDFLAGS = --main=gnu.classpath.tools.jarsigner.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + gjarsigner_LINK = $(GCJLINK) + ## See jv_convert_LDADD. + gjarsigner_LDADD = -L$(here)/.libs libgcj-tools.la +@@ -716,7 +717,7 @@ gjarsigner_DEPENDENCIES = libgcj-tools.l + ## This is a dummy definition. + gkeytool_SOURCES = + gkeytool_LDFLAGS = --main=gnu.classpath.tools.keytool.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + gkeytool_LINK = $(GCJLINK) + ## See jv_convert_LDADD. + gkeytool_LDADD = -L$(here)/.libs libgcj-tools.la +@@ -725,7 +726,7 @@ gkeytool_DEPENDENCIES = libgcj-tools.la + ## This is a dummy definition. + gjar_SOURCES = + gjar_LDFLAGS = --main=gnu.classpath.tools.jar.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + gjar_LINK = $(GCJLINK) + ## See jv_convert_LDADD. + gjar_LDADD = -L$(here)/.libs libgcj-tools.la +@@ -734,7 +735,7 @@ gjar_DEPENDENCIES = libgcj-tools.la + ## This is a dummy definition. + gjavah_SOURCES = + gjavah_LDFLAGS = --main=gnu.classpath.tools.javah.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + gjavah_LINK = $(GCJLINK) + ## See jv_convert_LDADD. + gjavah_LDADD = -L$(here)/.libs libgcj-tools.la +@@ -743,7 +744,7 @@ gjavah_DEPENDENCIES = libgcj-tools.la + ## This is a dummy definition. + gcjh_SOURCES = + gcjh_LDFLAGS = --main=gnu.classpath.tools.javah.GcjhMain \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + gcjh_LINK = $(GCJLINK) + ## See jv_convert_LDADD. + gcjh_LDADD = -L$(here)/.libs libgcj-tools.la +@@ -752,7 +753,7 @@ gcjh_DEPENDENCIES = libgcj-tools.la + ## This is a dummy definition. + gnative2ascii_SOURCES = + gnative2ascii_LDFLAGS = --main=gnu.classpath.tools.native2ascii.Native2ASCII \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + gnative2ascii_LINK = $(GCJLINK) + ## See jv_convert_LDADD. + gnative2ascii_LDADD = -L$(here)/.libs libgcj-tools.la +@@ -761,7 +762,7 @@ gnative2ascii_DEPENDENCIES = libgcj-tool + ## This is a dummy definition. + gorbd_SOURCES = + gorbd_LDFLAGS = --main=gnu.classpath.tools.orbd.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + gorbd_LINK = $(GCJLINK) + ## See jv_convert_LDADD. + gorbd_LDADD = -L$(here)/.libs libgcj-tools.la +@@ -770,7 +771,7 @@ gorbd_DEPENDENCIES = libgcj-tools.la + ## This is a dummy definition. + grmid_SOURCES = + grmid_LDFLAGS = --main=gnu.classpath.tools.rmid.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + grmid_LINK = $(GCJLINK) + ## See jv_convert_LDADD. + grmid_LDADD = -L$(here)/.libs libgcj-tools.la +@@ -779,7 +780,7 @@ grmid_DEPENDENCIES = libgcj-tools.la + ## This is a dummy definition. + gserialver_SOURCES = + gserialver_LDFLAGS = --main=gnu.classpath.tools.serialver.SerialVer \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + gserialver_LINK = $(GCJLINK) + ## See jv_convert_LDADD. + gserialver_LDADD = -L$(here)/.libs libgcj-tools.la +@@ -788,7 +789,7 @@ gserialver_DEPENDENCIES = libgcj-tools.l + ## This is a dummy definition. + gtnameserv_SOURCES = + gtnameserv_LDFLAGS = --main=gnu.classpath.tools.tnameserv.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + gtnameserv_LINK = $(GCJLINK) + ## See jv_convert_LDADD. + gtnameserv_LDADD = -L$(here)/.libs libgcj-tools.la +@@ -797,7 +798,7 @@ gtnameserv_DEPENDENCIES = libgcj-tools.l + ## This is a dummy definition. + grmic_SOURCES = + grmic_LDFLAGS = --main=gnu.classpath.tools.rmic.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + grmic_LINK = $(GCJLINK) + ## See jv_convert_LDADD. + grmic_LDADD = -L$(here)/.libs libgcj-tools.la +@@ -806,7 +807,7 @@ grmic_DEPENDENCIES = libgcj-tools.la + ## This is a dummy definition. + grmiregistry_SOURCES = + grmiregistry_LDFLAGS = --main=gnu.classpath.tools.rmiregistry.Main \ +- -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) ++ -rpath $(toolexeclibdir) -shared-libgcc $(THREADLDFLAGS) $(extra_ldflags) + grmiregistry_LINK = $(GCJLINK) + ## See jv_convert_LDADD. + grmiregistry_LDADD = -L$(here)/.libs libgcj-tools.la +--- libjava/exception.cc.jj 2007-09-06 14:06:15.000000000 +0200 ++++ libjava/exception.cc 2007-09-06 17:20:42.000000000 +0200 +@@ -58,6 +58,21 @@ struct java_exception_header + _Unwind_Exception unwindHeader; + }; + ++#ifdef __ARM_EABI_UNWINDER__ ++// This is the exception class we report -- "GNUCJAVA". ++ ++const _Unwind_Exception_Class __gcj_exception_class ++ = {'G', 'N', 'U', 'C', 'J', 'A', 'V', 'A'}; ++ ++static inline java_exception_header * ++get_exception_header_from_ue (_Unwind_Exception *exc) ++{ ++ return reinterpret_cast(exc + 1) - 1; ++} ++ ++extern "C" void __cxa_begin_cleanup (_Unwind_Exception*); ++ ++#else // !__ARM_EABI_UNWINDER__ + // This is the exception class we report -- "GNUCJAVA". + const _Unwind_Exception_Class __gcj_exception_class + = ((((((((_Unwind_Exception_Class) 'G' +@@ -75,6 +90,7 @@ get_exception_header_from_ue (_Unwind_Ex + { + return reinterpret_cast(exc + 1) - 1; + } ++#endif // !__ARM_EABI_UNWINDER__ + + /* Perform a throw, Java style. Throw will unwind through this call, + so there better not be any handlers or exception thrown here. */ +@@ -89,7 +105,8 @@ _Jv_Throw (jthrowable value) + value = new java::lang::NullPointerException (); + xh->value = value; + +- xh->unwindHeader.exception_class = __gcj_exception_class; ++ memcpy (&xh->unwindHeader.exception_class, &__gcj_exception_class, ++ sizeof xh->unwindHeader.exception_class); + xh->unwindHeader.exception_cleanup = NULL; + + /* We're happy with setjmp/longjmp exceptions or region-based +@@ -159,6 +176,21 @@ parse_lsda_header (_Unwind_Context *cont + return p; + } + ++#ifdef __ARM_EABI_UNWINDER__ ++ ++static void ** ++get_ttype_entry(_Unwind_Context *, lsda_header_info* info, _Unwind_Word i) ++{ ++ _Unwind_Ptr ptr; ++ ++ ptr = (_Unwind_Ptr) (info->TType - (i * 4)); ++ ptr = _Unwind_decode_target2(ptr); ++ ++ return reinterpret_cast(ptr); ++} ++ ++#else ++ + static void ** + get_ttype_entry (_Unwind_Context *context, lsda_header_info *info, long i) + { +@@ -170,6 +202,7 @@ get_ttype_entry (_Unwind_Context *contex + return reinterpret_cast(ptr); + } + ++#endif + + // Using a different personality function name causes link failures + // when trying to mix code using different exception handling models. +@@ -180,12 +213,33 @@ get_ttype_entry (_Unwind_Context *contex + #define PERSONALITY_FUNCTION __gcj_personality_v0 + #endif + ++#ifdef __ARM_EABI_UNWINDER__ ++ ++#define CONTINUE_UNWINDING \ ++ do \ ++ { \ ++ if (__gnu_unwind_frame(ue_header, context) != _URC_OK) \ ++ return _URC_FAILURE; \ ++ return _URC_CONTINUE_UNWIND; \ ++ } \ ++ while (0) ++ ++extern "C" _Unwind_Reason_Code ++PERSONALITY_FUNCTION (_Unwind_State state, ++ struct _Unwind_Exception* ue_header, ++ struct _Unwind_Context* context) ++#else ++ ++#define CONTINUE_UNWINDING return _URC_CONTINUE_UNWIND ++ + extern "C" _Unwind_Reason_Code + PERSONALITY_FUNCTION (int version, + _Unwind_Action actions, + _Unwind_Exception_Class exception_class, + struct _Unwind_Exception *ue_header, + struct _Unwind_Context *context) ++ ++#endif + { + java_exception_header *xh = get_exception_header_from_ue (ue_header); + +@@ -197,16 +251,56 @@ PERSONALITY_FUNCTION (int version, + int handler_switch_value; + bool saw_cleanup; + bool saw_handler; ++ bool foreign_exception; + int ip_before_insn = 0; + ++#ifdef __ARM_EABI_UNWINDER__ ++ _Unwind_Action actions; ++ ++ switch (state & _US_ACTION_MASK) ++ { ++ case _US_VIRTUAL_UNWIND_FRAME: ++ actions = _UA_SEARCH_PHASE; ++ break; ++ ++ case _US_UNWIND_FRAME_STARTING: ++ actions = _UA_CLEANUP_PHASE; ++ if (!(state & _US_FORCE_UNWIND) ++ && ue_header->barrier_cache.sp == _Unwind_GetGR(context, 13)) ++ actions |= _UA_HANDLER_FRAME; ++ break; ++ ++ case _US_UNWIND_FRAME_RESUME: ++ CONTINUE_UNWINDING; ++ break; ++ ++ default: ++ std::abort(); ++ } ++ actions |= state & _US_FORCE_UNWIND; ++ ++ // We don't know which runtime we're working with, so can't check this. ++ // However the ABI routines hide this from us, and we don't actually need ++ // to know. ++ foreign_exception = false; ++ ++ // The dwarf unwinder assumes the context structure holds things like the ++ // function and LSDA pointers. The ARM implementation caches these in ++ // the exception header (UCB). To avoid rewriting everything we make the ++ // virtual IP register point at the UCB. ++ ip = (_Unwind_Ptr) ue_header; ++ _Unwind_SetGR(context, 12, ip); + ++#else + // Interface version check. + if (version != 1) + return _URC_FATAL_PHASE1_ERROR; ++ foreign_exception = exception_class != __gcj_exception_class; ++#endif + + // Shortcut for phase 2 found handler for domestic exception. + if (actions == (_UA_CLEANUP_PHASE | _UA_HANDLER_FRAME) +- && exception_class == __gcj_exception_class) ++ && !foreign_exception) + { + handler_switch_value = xh->handlerSwitchValue; + landing_pad = xh->landingPad; +@@ -227,17 +321,17 @@ PERSONALITY_FUNCTION (int version, + + // If no LSDA, then there are no handlers or cleanups. + if (! language_specific_data) +- return _URC_CONTINUE_UNWIND; ++ CONTINUE_UNWINDING; + + // Parse the LSDA header. + p = parse_lsda_header (context, language_specific_data, &info); + #ifdef HAVE_GETIPINFO + ip = _Unwind_GetIPInfo (context, &ip_before_insn); +- if (! ip_before_insn) +- --ip; + #else + ip = _Unwind_GetIP (context) - 1; + #endif ++ if (! ip_before_insn) ++ --ip; + landing_pad = 0; + action_record = 0; + handler_switch_value = 0; +@@ -296,7 +390,7 @@ PERSONALITY_FUNCTION (int version, + // If ip is not present in the table, C++ would call terminate. + // ??? It is perhaps better to tweek the LSDA so that no-action + // is mapped to no-entry for Java. +- return _URC_CONTINUE_UNWIND; ++ CONTINUE_UNWINDING; + + found_something: + saw_cleanup = false; +@@ -334,7 +428,7 @@ PERSONALITY_FUNCTION (int version, + // During forced unwinding, we only run cleanups. With a + // foreign exception class, we have no class info to match. + else if ((actions & _UA_FORCE_UNWIND) +- || exception_class != __gcj_exception_class) ++ || foreign_exception) + ; + + else if (ar_filter > 0) +@@ -374,15 +468,15 @@ PERSONALITY_FUNCTION (int version, + } + + if (! saw_handler && ! saw_cleanup) +- return _URC_CONTINUE_UNWIND; ++ CONTINUE_UNWINDING; + + if (actions & _UA_SEARCH_PHASE) + { + if (! saw_handler) +- return _URC_CONTINUE_UNWIND; ++ CONTINUE_UNWINDING; + + // For domestic exceptions, we cache data from phase 1 for phase 2. +- if (exception_class == __gcj_exception_class) ++ if (! foreign_exception) + { + xh->handlerSwitchValue = handler_switch_value; + xh->landingPad = landing_pad; +@@ -396,5 +490,9 @@ PERSONALITY_FUNCTION (int version, + _Unwind_SetGR (context, __builtin_eh_return_data_regno (1), + handler_switch_value); + _Unwind_SetIP (context, landing_pad); ++#ifdef __ARM_EABI_UNWINDER__ ++ if (saw_cleanup) ++ __cxa_begin_cleanup(ue_header); ++#endif + return _URC_INSTALL_CONTEXT; + } +--- libjava/gnu/classpath/natVMStackWalker.cc.jj 2007-09-06 14:06:15.000000000 +0200 ++++ libjava/gnu/classpath/natVMStackWalker.cc 2007-09-06 17:20:42.000000000 +0200 +@@ -19,6 +19,7 @@ details. */ + #include + #include + ++#ifndef __ARM_EABI_UNWINDER__ + // Return the class of the method that contains PC. + // This is a macro not a function, since defining it as one would + // introduce an extra frame on the stack. */ +@@ -44,6 +45,11 @@ details. */ + \ + klass; \ + }) ++#else // __ARM_EABI_UNWINDER__ ++// ARM EABI doesn't support _Unwind_FindEnclosingFunction. ++#define GET_CALLING_CLASS(PC) \ ++ (_Jv_StackTrace::GetStackWalkerCallingClass ()) ++#endif + + JArray * + gnu::classpath::VMStackWalker::getClassContext(void) +@@ -59,14 +65,18 @@ jclass + gnu::classpath::VMStackWalker::getCallingClass(void) + { + _Jv_InitClass (&::gnu::classpath::VMStackWalker::class$); +- return _Jv_StackTrace::GetStackWalkerCallingClass (); ++ jclass result = _Jv_StackTrace::GetStackWalkerCallingClass (); ++ __asm__ __volatile__ ("" : : "g" (result)); ++ return result; + } + + jclass + gnu::classpath::VMStackWalker::getCallingClass(::gnu::gcj::RawData *pc) + { + _Jv_InitClass (&::gnu::classpath::VMStackWalker::class$); +- return GET_CALLING_CLASS(pc); ++ jclass result = GET_CALLING_CLASS(pc); ++ __asm__ __volatile__ ("" : : "g" (result)); ++ return result; + } + + ::java::lang::ClassLoader * +--- libjava/sysdep/arm/backtrace.h.jj 2007-09-06 17:20:42.000000000 +0200 ++++ libjava/sysdep/arm/backtrace.h 2007-09-06 17:20:42.000000000 +0200 +@@ -0,0 +1,35 @@ ++// backtrace.h - Fallback backtrace implementation. ARM implementation. ++ ++/* Copyright (C) 2005, 2006 Free Software Foundation ++ ++ This file is part of libgcj. ++ ++This software is copyrighted work licensed under the terms of the ++Libgcj License. Please consult the file "LIBGCJ_LICENSE" for ++details. */ ++ ++#ifndef __SYSDEP_BACKTRACE_H__ ++#define __SYSDEP_BACKTRACE_H__ ++ ++#include ++ ++extern "C" ++{ ++/* Unwind through the call stack calling TRACE_FN with STATE for every stack ++ frame. Returns the reason why the unwinding was stopped. */ ++#ifdef __ARM_EABI_UNWINDER__ ++ ++#define _Unwind_FindEnclosingFunction(PC) \ ++ (PC) ++ ++_Unwind_Reason_Code ++fallback_backtrace (_Unwind_Reason_Code (*)(struct _Unwind_Context*, void*), _Jv_UnwindState *) ++#else ++_Unwind_Reason_Code ++fallback_backtrace (_Unwind_Trace_Fn, _Jv_UnwindState *) ++#endif ++{ ++ return _URC_NO_REASON; ++} ++} ++#endif diff --git a/gcc41-java-arm7.patch b/gcc41-java-arm7.patch new file mode 100644 index 0000000..4cce530 --- /dev/null +++ b/gcc41-java-arm7.patch @@ -0,0 +1,32 @@ +2007-09-07 Andrew Haley + + * configure.in (noconfigdirs): Remove target-libffi and + target-libjava. + * configure: Regenerate. + +--- configure.in (revision 128249) ++++ configure.in (revision 128250) +@@ -501,8 +501,8 @@ case "${target}" in + noconfigdirs="$noconfigdirs target-libffi target-qthreads" + ;; + arm*-*-linux-gnueabi) +- noconfigdirs="$noconfigdirs target-libffi target-qthreads" +- noconfigdirs="$noconfigdirs target-libjava target-libobjc" ++ noconfigdirs="$noconfigdirs target-qthreads" ++ noconfigdirs="$noconfigdirs target-libobjc" + ;; + arm*-*-symbianelf*) + noconfigdirs="$noconfigdirs ${libgcj} target-libiberty" +--- configure (revision 128249) ++++ configure (revision 128250) +@@ -1293,8 +1293,8 @@ case "${target}" in + noconfigdirs="$noconfigdirs target-libffi target-qthreads" + ;; + arm*-*-linux-gnueabi) +- noconfigdirs="$noconfigdirs target-libffi target-qthreads" +- noconfigdirs="$noconfigdirs target-libjava target-libobjc" ++ noconfigdirs="$noconfigdirs target-qthreads" ++ noconfigdirs="$noconfigdirs target-libobjc" + ;; + arm*-*-symbianelf*) + noconfigdirs="$noconfigdirs ${libgcj} target-libiberty" diff --git a/gcc41-java-arm8.patch b/gcc41-java-arm8.patch new file mode 100644 index 0000000..4820731 --- /dev/null +++ b/gcc41-java-arm8.patch @@ -0,0 +1,65 @@ +2007-07-13 Andrew Haley + + * testsuite/libjava.jvmti/jvmti-interp.exp: Likewise. + * testsuite/libjava.jni/jni.exp: Use -fdollars-in-identifiers. + * testsuite/libjava.jni/cni.exp: Use -fdollars-in-identifiers. + * testsuite/libjava.jvmti/jvmti.exp (gcj_jvmti_compile_cxx_to_o): Likewise. + +--- libjava/testsuite/libjava.cni/cni.exp (revision 126621) ++++ libjava/testsuite/libjava.cni/cni.exp (revision 126622) +@@ -10,7 +10,7 @@ + set oname ${name}.o + + # Find the generated header. +- lappend options "additional_flags=-I. -I.. -I$srcdir/$subdir" ++ lappend options "additional_flags=-I. -I.. -I$srcdir/$subdir -fdollars-in-identifiers" + # Find libgcj headers. + lappend options "additional_flags=-I$srcdir/.." + +--- libjava/testsuite/libjava.jvmti/jvmti-interp.exp (revision 126621) ++++ libjava/testsuite/libjava.jvmti/jvmti-interp.exp (revision 126622) +@@ -47,7 +47,7 @@ + + # Find jni.h and jni_md.h. + lappend options "additional_flags=-I$srcdir/../include \ +- -I$srcdir/../classpath/include" ++ -I$srcdir/../classpath/include -fdollars-in-identifiers" + + # Append C++ options + lappend options "additional_flags=$options_cxx" +--- libjava/testsuite/libjava.jvmti/jvmti.exp (revision 126621) ++++ libjava/testsuite/libjava.jvmti/jvmti.exp (revision 126622) +@@ -11,11 +11,11 @@ + set oname ${name}.o + + # Find the generated header. +- lappend options "additional_flags=-g -I. -I.." ++ lappend options "additional_flags=-g -I. -I.. -fdollars-in-identifiers" + # Find libgcj headers. + lappend options "additional_flags=-I$srcdir/.." + # Find jvmti.h, jvmti_md.h, jvmti-int.h, jvm.h requirements +- lappend options "additional_flags=-I$srcdir/../include -I$srcdir/../classpath/include -I$objdir/../include -I$objdir/../../boehm-gc/include" ++ lappend options "additional_flags=-I$srcdir/../include -I$srcdir/../classpath/include -I$objdir/../include -I$objdir/../../boehm-gc/include " + + set x [libjava_prune_warnings \ + [target_compile $file $oname object $options]] +--- libjava/testsuite/libjava.jni/jni.exp (revision 126621) ++++ libjava/testsuite/libjava.jni/jni.exp (revision 126622) +@@ -31,7 +31,7 @@ + + lappend options "additional_flags=${so_flag} -fPIC" + # Find the generated header. +- lappend options "additional_flags=-I. -I.. -I$srcdir/$subdir" ++ lappend options "additional_flags=-I. -I.. -I$srcdir/$subdir -fdollars-in-identifiers" + + # Ensure that the generated header has correct prototypes. + set cfile [file rootname $file].c +@@ -219,7 +219,7 @@ + lappend options "additional_flags=-I. -I.. -I$srcdir/$subdir" + + # Find jni.h and jni_md.h. +- lappend options "additional_flags=-I$srcdir/../include -I$srcdir/../classpath/include" ++ lappend options "additional_flags=-I$srcdir/../include -I$srcdir/../classpath/include -fdollars-in-identifiers" + + # Append C++ options + lappend options "additional_flags=$options_cxx" diff --git a/gcc41-omp-outer-ctx.patch b/gcc41-omp-outer-ctx.patch new file mode 100644 index 0000000..7f0b1f6 --- /dev/null +++ b/gcc41-omp-outer-ctx.patch @@ -0,0 +1,133 @@ +2007-12-03 Jakub Jelinek + + * omp-low.c (lookup_decl_in_outer_ctx): Allow calling this + with !ctx->is_nested. + (maybe_lookup_decl_in_outer_ctx): Look up in outer contexts + even if !ctx->is_nested. + (lower_copyprivate_clauses, lower_send_clauses, + lower_send_shared_vars): Call lookup_decl_in_outer_ctx + unconditionally. + + * testsuite/libgomp.c/private-1.c: New test. + +--- gcc/omp-low.c (revision 130589) ++++ gcc/omp-low.c (revision 130590) +@@ -1518,12 +1518,10 @@ lookup_decl_in_outer_ctx (tree decl, omp + tree t; + omp_context *up; + +- gcc_assert (ctx->is_nested); +- + for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) + t = maybe_lookup_decl (decl, up); + +- gcc_assert (t || is_global_var (decl)); ++ gcc_assert (!ctx->is_nested || t || is_global_var (decl)); + + return t ? t : decl; + } +@@ -1538,9 +1536,8 @@ maybe_lookup_decl_in_outer_ctx (tree dec + tree t = NULL; + omp_context *up; + +- if (ctx->is_nested) +- for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) +- t = maybe_lookup_decl (decl, up); ++ for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) ++ t = maybe_lookup_decl (decl, up); + + return t ? t : decl; + } +@@ -2012,7 +2009,7 @@ lower_copyprivate_clauses (tree clauses, + by_ref = use_pointer_for_field (var, false); + + ref = build_sender_ref (var, ctx); +- x = (ctx->is_nested) ? lookup_decl_in_outer_ctx (var, ctx) : var; ++ x = lookup_decl_in_outer_ctx (var, ctx); + x = by_ref ? build_fold_addr_expr (x) : x; + x = build_gimple_modify_stmt (ref, x); + gimplify_and_add (x, slist); +@@ -2053,9 +2050,8 @@ lower_send_clauses (tree clauses, tree * + continue; + } + +- var = val = OMP_CLAUSE_DECL (c); +- if (ctx->is_nested) +- var = lookup_decl_in_outer_ctx (val, ctx); ++ val = OMP_CLAUSE_DECL (c); ++ var = lookup_decl_in_outer_ctx (val, ctx); + + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN + && is_global_var (var)) +@@ -2127,13 +2123,10 @@ lower_send_shared_vars (tree *ilist, tre + if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar)) + continue; + +- var = ovar; +- + /* If CTX is a nested parallel directive. Find the immediately + enclosing parallel or workshare construct that contains a + mapping for OVAR. */ +- if (ctx->is_nested) +- var = lookup_decl_in_outer_ctx (ovar, ctx); ++ var = lookup_decl_in_outer_ctx (ovar, ctx); + + if (use_pointer_for_field (ovar, true)) + { +--- libgomp/testsuite/libgomp.c/private-1.c (revision 0) ++++ libgomp/testsuite/libgomp.c/private-1.c (revision 130590) +@@ -0,0 +1,54 @@ ++extern void abort (void); ++ ++int a = 18; ++ ++void ++f1 (int i, int j, int k) ++{ ++ int l = 6, m = 7, n = 8; ++#pragma omp parallel private(j, m) shared(k, n) firstprivate(i, l) \ ++ num_threads(1) ++ { ++ j = 6; ++ m = 5; ++ if (++a != 19 || ++i != 9 || j != 6 || ++l != 7 || m != 5 || ++n != 9) ++ #pragma omp atomic ++ k++; ++ } ++ if (a != 19 || i != 8 || j != 26 || k != 0 || l != 6 || m != 7 || n != 9) ++ abort (); ++} ++ ++int v1 = 1, v2 = 2, v5 = 5; ++int err; ++ ++void ++f2 (void) ++{ ++ int v3 = 3; ++#pragma omp sections private (v1) firstprivate (v2) ++ { ++ #pragma omp section ++ { ++ int v4 = 4; ++ v1 = 7; ++ #pragma omp parallel num_threads(1) firstprivate(v1, v2, v3, v4) ++ { ++ if (++v1 != 8 || ++v2 != 3 || ++v3 != 4 || ++v4 != 5 || ++v5 != 6) ++ err = 1; ++ } ++ if (v1 != 7 || v2 != 2 || v3 != 3 || v4 != 4 || v5 != 6) ++ abort (); ++ if (err) ++ abort (); ++ } ++ } ++} ++ ++int ++main (void) ++{ ++ f1 (8, 26, 0); ++ f2 (); ++ return 0; ++} diff --git a/gcc41-ppc64-cr2-unwind.patch b/gcc41-ppc64-cr2-unwind.patch deleted file mode 100644 index f63bdb4..0000000 --- a/gcc41-ppc64-cr2-unwind.patch +++ /dev/null @@ -1,20 +0,0 @@ -2007-10-20 Jakub Jelinek - - * config/rs6000/linux-unwind.h (ppc_fallback_frame_state): Point - saved CR2 offset to low 32 bits of regs->ccr rather than the whole - 64-bit register in 64-bit libgcc. - ---- gcc/config/rs6000/linux-unwind.h.jj 2007-09-04 22:24:32.000000000 +0200 -+++ gcc/config/rs6000/linux-unwind.h 2007-10-18 14:42:25.000000000 +0200 -@@ -244,7 +244,10 @@ ppc_fallback_frame_state (struct _Unwind - } - - fs->regs.reg[CR2_REGNO].how = REG_SAVED_OFFSET; -- fs->regs.reg[CR2_REGNO].loc.offset = (long) ®s->ccr - new_cfa; -+ /* CR? regs are always 32-bit and PPC is big-endian, so in 64-bit -+ libgcc loc.offset needs to point to the low 32 bits of regs->ccr. */ -+ fs->regs.reg[CR2_REGNO].loc.offset = (long) ®s->ccr - new_cfa -+ + sizeof (long) - 4; - - fs->regs.reg[LINK_REGISTER_REGNUM].how = REG_SAVED_OFFSET; - fs->regs.reg[LINK_REGISTER_REGNUM].loc.offset = (long) ®s->link - new_cfa; diff --git a/gcc41-pr23848.patch b/gcc41-pr23848.patch new file mode 100644 index 0000000..1fef579 --- /dev/null +++ b/gcc41-pr23848.patch @@ -0,0 +1,225 @@ +2007-11-15 Jakub Jelinek + + PR middle-end/23848 + * tree-ssa-ccp.c (optimize_stack_restore): New function. + (execute_fold_all_builtins): Call optimize_stack_restore for + BUILT_IN_STACK_RESTORE. + + * gcc.dg/tree-ssa/pr23848-1.c: New test. + * gcc.dg/tree-ssa/pr23848-2.c: New test. + * gcc.dg/tree-ssa/pr23848-3.c: New test. + * gcc.dg/tree-ssa/pr23848-4.c: New test. + +--- gcc/tree-ssa-ccp.c (revision 130205) ++++ gcc/tree-ssa-ccp.c (revision 130206) +@@ -2394,6 +2394,75 @@ fold_stmt_inplace (tree stmt) + return changed; + } + ++/* Try to optimize out __builtin_stack_restore. Optimize it out ++ if there is another __builtin_stack_restore in the same basic ++ block and no calls or ASM_EXPRs are in between, or if this block's ++ only outgoing edge is to EXIT_BLOCK and there are no calls or ++ ASM_EXPRs after this __builtin_stack_restore. */ ++ ++static tree ++optimize_stack_restore (basic_block bb, tree call, block_stmt_iterator i) ++{ ++ tree stack_save, stmt, callee; ++ ++ if (TREE_CODE (call) != CALL_EXPR ++ || TREE_OPERAND (call, 1) == NULL_TREE ++ || TREE_CHAIN (TREE_OPERAND (call, 1)) != NULL_TREE ++ || TREE_CODE (TREE_VALUE (TREE_OPERAND (call, 1))) != SSA_NAME ++ || !POINTER_TYPE_P (TREE_TYPE (TREE_VALUE (TREE_OPERAND (call, 1))))) ++ return NULL_TREE; ++ ++ for (bsi_next (&i); !bsi_end_p (i); bsi_next (&i)) ++ { ++ tree call; ++ ++ stmt = bsi_stmt (i); ++ if (TREE_CODE (stmt) == ASM_EXPR) ++ return NULL_TREE; ++ call = get_call_expr_in (stmt); ++ if (call == NULL) ++ continue; ++ ++ callee = get_callee_fndecl (call); ++ if (!callee || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL) ++ return NULL_TREE; ++ ++ if (DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_RESTORE) ++ break; ++ } ++ ++ if (bsi_end_p (i) ++ && (! single_succ_p (bb) ++ || single_succ_edge (bb)->dest != EXIT_BLOCK_PTR)) ++ return NULL_TREE; ++ ++ stack_save = SSA_NAME_DEF_STMT (TREE_VALUE (TREE_OPERAND (call, 1))); ++ if (TREE_CODE (stack_save) != MODIFY_EXPR ++ || TREE_OPERAND (stack_save, 0) ++ != TREE_VALUE (TREE_OPERAND (call, 1)) ++ || TREE_CODE (TREE_OPERAND (stack_save, 1)) != CALL_EXPR ++ || tree_could_throw_p (stack_save) ++ || !has_single_use (TREE_VALUE (TREE_OPERAND (call, 1)))) ++ return NULL_TREE; ++ ++ callee = get_callee_fndecl (TREE_OPERAND (stack_save, 1)); ++ if (!callee ++ || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL ++ || DECL_FUNCTION_CODE (callee) != BUILT_IN_STACK_SAVE ++ || TREE_OPERAND (TREE_OPERAND (stack_save, 1), 1) != NULL_TREE) ++ return NULL_TREE; ++ ++ stmt = stack_save; ++ if (!set_rhs (&stmt, ++ build_int_cst (TREE_TYPE (TREE_VALUE (TREE_OPERAND (call, ++ 1))), 0))) ++ return NULL_TREE; ++ gcc_assert (stmt == stack_save); ++ mark_new_vars_to_rename (stmt); ++ ++ return integer_zero_node; ++} ++ + /* Convert EXPR into a GIMPLE value suitable for substitution on the + RHS of an assignment. Insert the necessary statements before + iterator *SI_P. */ +@@ -2469,6 +2538,12 @@ execute_fold_all_builtins (void) + result = integer_zero_node; + break; + ++ case BUILT_IN_STACK_RESTORE: ++ result = optimize_stack_restore (bb, *stmtp, i); ++ if (result) ++ break; ++ /* FALLTHRU */ ++ + default: + bsi_next (&i); + continue; +--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-1.c (revision 0) ++++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-1.c (revision 130206) +@@ -0,0 +1,32 @@ ++/* PR middle-end/23848 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++ ++void bar1 (char *, int); ++void foo1 (int size) ++{ ++ char temp[size]; ++ temp[size-1] = '\0'; ++ bar1 (temp, size); ++} ++ ++void bar2 (char *, char *, char *, char *, int); ++void foo2 (int size) ++{ ++ char temp[size]; ++ temp[size-1] = '\0'; ++ { ++ char temp2[size]; ++ { ++ char temp3[size]; ++ { ++ char temp4[size]; ++ bar2 (temp, temp2, temp3, temp4, size); ++ } ++ } ++ } ++} ++ ++/* { dg-final { scan-tree-dump-not "__builtin_stack_save" "optimized"} } */ ++/* { dg-final { scan-tree-dump-not "__builtin_stack_restore" "optimized"} } */ ++/* { dg-final { cleanup-tree-dump "optimized" } } */ +--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-2.c (revision 0) ++++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-2.c (revision 130206) +@@ -0,0 +1,25 @@ ++/* PR middle-end/23848 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++ ++void bar (char *, char *, char *, char *, int); ++void foo (int size) ++{ ++ char temp[size]; ++ temp[size-1] = '\0'; ++ { ++ char temp2[size]; ++ { ++ char temp3[size]; ++ { ++ char temp4[size]; ++ bar (temp, temp2, temp3, temp4, size); ++ } ++ } ++ bar (temp, temp2, (char *) 0, (char *) 0, size); ++ } ++} ++ ++/* { dg-final { scan-tree-dump-times "__builtin_stack_save" 1 "optimized"} } */ ++/* { dg-final { scan-tree-dump-times "__builtin_stack_restore" 1 "optimized"} } */ ++/* { dg-final { cleanup-tree-dump "optimized" } } */ +--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-3.c (revision 0) ++++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-3.c (revision 130206) +@@ -0,0 +1,28 @@ ++/* PR middle-end/23848 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++ ++void bar (int, char *, char *, char *, char *, int); ++void foo (int size) ++{ ++ int i; ++ for (i = 0; i < size; i++) ++ { ++ char temp[size]; ++ temp[size-1] = '\0'; ++ { ++ char temp2[size]; ++ { ++ char temp3[size]; ++ { ++ char temp4[size]; ++ bar (i, temp, temp2, temp3, temp4, size); ++ } ++ } ++ } ++ } ++} ++ ++/* { dg-final { scan-tree-dump-times "__builtin_stack_save" 1 "optimized"} } */ ++/* { dg-final { scan-tree-dump-times "__builtin_stack_restore" 1 "optimized"} } */ ++/* { dg-final { cleanup-tree-dump "optimized" } } */ +--- gcc/testsuite/gcc.dg/tree-ssa/pr23848-4.c (revision 0) ++++ gcc/testsuite/gcc.dg/tree-ssa/pr23848-4.c (revision 130206) +@@ -0,0 +1,25 @@ ++/* PR middle-end/23848 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++ ++void bar (char *, char *, char *, char *, int); ++void foo (int size) ++{ ++ char temp[size]; ++ temp[size-1] = '\0'; ++ { ++ char temp2[size]; ++ { ++ char temp3[size]; ++ { ++ char temp4[size]; ++ bar (temp, temp2, temp3, temp4, size); ++ } ++ } ++ __asm __volatile ("" : : "r" (&temp[0]), "r" (&temp2[0]) : "memory"); ++ } ++} ++ ++/* { dg-final { scan-tree-dump-times "__builtin_stack_save" 1 "optimized"} } */ ++/* { dg-final { scan-tree-dump-times "__builtin_stack_restore" 1 "optimized"} } */ ++/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc41-pr27643.patch b/gcc41-pr27643.patch new file mode 100644 index 0000000..f2547dd --- /dev/null +++ b/gcc41-pr27643.patch @@ -0,0 +1,71 @@ +2007-12-18 Andrew Haley + + PR java/27643 + * jcf-parse.c (java_parse_file): Remove call to + java_mark_class_local. + (parse_class_file): Reinstate call to java_mark_class_local here. + * decl.c (java_mark_cni_decl_local): If the ASSEMBLER_NAME is + already set, call java_mangle_decl() and make_decl_rtl() to + rewrite its name as a hidden alias. + +--- gcc/java/decl.c (revision 131035) ++++ gcc/java/decl.c (revision 131036) +@@ -1890,18 +1890,27 @@ java_mark_decl_local (tree decl) + static void + java_mark_cni_decl_local (tree decl) + { +- /* Setting DECL_LOCAL_CNI_METHOD_P changes the behavior of the mangler. +- We expect that we should not yet have referenced this decl in a +- context that requires it. Check this invariant even if we don't have +- support for hidden aliases. */ +- gcc_assert (!DECL_ASSEMBLER_NAME_SET_P (decl)); +- + #if !defined(HAVE_GAS_HIDDEN) || !defined(ASM_OUTPUT_DEF) + return; + #endif + + DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; + DECL_LOCAL_CNI_METHOD_P (decl) = 1; ++ ++ /* Setting DECL_LOCAL_CNI_METHOD_P changes the behavior of the ++ mangler. We might have already referenced this native method and ++ therefore created its name, but even if we have it won't hurt. ++ We'll just go via its externally visible name, rather than its ++ hidden alias. However, we must force things so that the correct ++ mangling is done. */ ++ ++ if (DECL_ASSEMBLER_NAME_SET_P (decl)) ++ java_mangle_decl (decl); ++ if (DECL_RTL_SET_P (decl)) ++ { ++ SET_DECL_RTL (decl, 0); ++ make_decl_rtl (decl); ++ } + } + + /* Use the preceding two functions and mark all members of the class. */ +--- gcc/java/jcf-parse.c (revision 131035) ++++ gcc/java/jcf-parse.c (revision 131036) +@@ -1596,6 +1596,8 @@ parse_class_file (void) + file_start_location = input_location; + (*debug_hooks->start_source_file) (input_line, input_filename); + ++ java_mark_class_local (current_class); ++ + gen_indirect_dispatch_tables (current_class); + + for (method = TYPE_METHODS (current_class); +@@ -1967,13 +1969,6 @@ java_parse_file (int set_yydebug ATTRIBU + } + } + +- /* Do this before lowering any code. */ +- for (node = current_file_list; node; node = TREE_CHAIN (node)) +- { +- if (CLASS_FILE_P (node)) +- java_mark_class_local (TREE_TYPE (node)); +- } +- + for (node = current_file_list; node; node = TREE_CHAIN (node)) + { + input_location = DECL_SOURCE_LOCATION (node); diff --git a/gcc41-pr29225.patch b/gcc41-pr29225.patch new file mode 100644 index 0000000..995cd8c --- /dev/null +++ b/gcc41-pr29225.patch @@ -0,0 +1,50 @@ +2007-11-13 Jakub Jelinek + + PR c++/29225 + * call.c (build_new_op): Call resolve_args before calling + build_over_call. + + * g++.dg/template/crash72.C: New test. + +--- gcc/cp/call.c (revision 130125) ++++ gcc/cp/call.c (revision 130126) +@@ -3918,7 +3918,10 @@ build_new_op (enum tree_code code, int f + if (overloaded_p) + *overloaded_p = true; + +- result = build_over_call (cand, LOOKUP_NORMAL); ++ if (resolve_args (arglist) == error_mark_node) ++ result = error_mark_node; ++ else ++ result = build_over_call (cand, LOOKUP_NORMAL); + } + else + { +--- gcc/testsuite/g++.dg/template/crash72.C (revision 0) ++++ gcc/testsuite/g++.dg/template/crash72.C (revision 130126) +@@ -0,0 +1,25 @@ ++// PR c++/29225 ++// { dg-do compile } ++ ++template bool operator< (L x, R y); ++struct T { int t (); }; ++class S {}; ++ ++struct U ++{ ++ typedef int (T::* M) (); ++ M m; ++ ++ bool operator() (S &x) ++ { ++ T a; ++ return (a.*m) < x; // { dg-error "invalid use of non-static member" } ++ } ++}; ++ ++void foo (S &x) ++{ ++ U m; ++ m.m = &T::t; ++ m (x); ++} diff --git a/gcc41-pr29712.patch b/gcc41-pr29712.patch new file mode 100644 index 0000000..ea130c8 --- /dev/null +++ b/gcc41-pr29712.patch @@ -0,0 +1,55 @@ +2007-01-15 Paul Thomas + + PR fortran/29712 + * resolve.c (resolve_function): Only a reference to the final + dimension of an assumed size array is an error in an inquiry + function. + + * gfortran.dg/bound_2.f90: Reinstate commented out line. + * gfortran.dg/initialization_1.f90: Change warning. + +--- gcc/fortran/resolve.c (revision 120789) ++++ gcc/fortran/resolve.c (revision 120790) +@@ -1498,10 +1498,16 @@ resolve_function (gfc_expr * expr) + + for (arg = expr->value.function.actual; arg; arg = arg->next) + { +- if (inquiry && arg->next != NULL && arg->next->expr +- && arg->next->expr->expr_type != EXPR_CONSTANT) +- break; +- ++ if (inquiry && arg->next != NULL && arg->next->expr) ++ { ++ if (arg->next->expr->expr_type != EXPR_CONSTANT) ++ break; ++ ++ if ((int)mpz_get_si (arg->next->expr->value.integer) ++ < arg->expr->rank) ++ break; ++ } ++ + if (arg->expr != NULL + && arg->expr->rank > 0 + && resolve_assumed_size_actual (arg->expr)) +--- gcc/testsuite/gfortran.dg/initialization_1.f90 (revision 120789) ++++ gcc/testsuite/gfortran.dg/initialization_1.f90 (revision 120790) +@@ -27,7 +27,7 @@ contains + integer :: l1 = len (ch1) ! { dg-warning "assumed character length variable" } + + ! These are warnings because they are gfortran extensions. +- integer :: m3 = size (x, 1) ! { dg-warning "upper bound in the last dimension" } ++ integer :: m3 = size (x, 1) ! { dg-warning "Evaluation of nonstandard initialization" } + integer :: m4(2) = shape (z) ! { dg-warning "Evaluation of nonstandard initialization" } + + ! This does not depend on non-constant properties. +--- gcc/testsuite/gfortran.dg/bound_2.f90 (revision 120789) ++++ gcc/testsuite/gfortran.dg/bound_2.f90 (revision 120790) +@@ -194,7 +194,7 @@ contains + subroutine foo (x,n) + integer :: x(7,n,2,*), n + +- !if (ubound(x,1) /= 7 .or. ubound(x,2) /= 4 .or. ubound(x,3) /= 2) call abort ++ if (ubound(x,1) /= 7 .or. ubound(x,2) /= 4 .or. ubound(x,3) /= 2) call abort + end subroutine foo + + subroutine jackal (b, c) diff --git a/gcc41-pr29978.patch b/gcc41-pr29978.patch new file mode 100644 index 0000000..12916d4 --- /dev/null +++ b/gcc41-pr29978.patch @@ -0,0 +1,64 @@ +2007-12-14 Jakub Jelinek + + PR target/29978 + * config/i386/i386.c (ix86_expand_branch): Optimize LE/LEU/GT/GTU + DImode comparisons against constant with all 1's in the lower word. + + * gcc.target/i386/pr29978.c: New test. + +--- gcc/config/i386/i386.c (revision 130937) ++++ gcc/config/i386/i386.c (revision 130938) +@@ -10496,16 +10496,28 @@ ix86_expand_branch (enum rtx_code code, + + /* Otherwise, if we are doing less-than or greater-or-equal-than, + op1 is a constant and the low word is zero, then we can just +- examine the high word. */ ++ examine the high word. Similarly for low word -1 and ++ less-or-equal-than or greater-than. */ + +- if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx) ++ if (GET_CODE (hi[1]) == CONST_INT) + switch (code) + { + case LT: case LTU: case GE: case GEU: +- ix86_compare_op0 = hi[0]; +- ix86_compare_op1 = hi[1]; +- ix86_expand_branch (code, label); +- return; ++ if (lo[1] == const0_rtx) ++ { ++ ix86_compare_op0 = hi[0]; ++ ix86_compare_op1 = hi[1]; ++ ix86_expand_branch (code, label); ++ return; ++ } ++ case LE: case LEU: case GT: case GTU: ++ if (lo[1] == constm1_rtx) ++ { ++ ix86_compare_op0 = hi[0]; ++ ix86_compare_op1 = hi[1]; ++ ix86_expand_branch (code, label); ++ return; ++ } + default: + break; + } +--- gcc/testsuite/gcc.target/i386/pr29978.c (revision 0) ++++ gcc/testsuite/gcc.target/i386/pr29978.c (revision 130938) +@@ -0,0 +1,16 @@ ++/* PR target/29978 */ ++/* { dg-do compile } */ ++/* { dg-options "-Os" } */ ++ ++void g (); ++ ++void ++f (long long v) ++{ ++ if (v > 0xfffffffffLL) ++ g (); ++ g (); ++} ++ ++/* Verify there are no redundant jumps jl .L2; jle .L2 */ ++/* { dg-final { scan-assembler-not "jl\[^e\]*\\.L" { target ilp32 } } } */ diff --git a/gcc41-pr30293.patch b/gcc41-pr30293.patch new file mode 100644 index 0000000..eb8549a --- /dev/null +++ b/gcc41-pr30293.patch @@ -0,0 +1,180 @@ +2007-11-23 Jakub Jelinek + + PR c++/30293 + PR c++/30294 + * decl.c (cp_finish_decl): Disallow variable or field + definitions if extern "Java" aggregates. + (grokparms): Disallow parameters with extern "Java" + aggregates. + (check_function_type): Disallow function return values + with extern "Java" aggregates. + * init.c (build_new_1): Disallow placement new with + extern "Java" aggregates. + + * g++.dg/ext/java-2.C: New test. + +--- gcc/cp/decl.c.jj 2007-11-19 17:46:41.000000000 +0100 ++++ gcc/cp/decl.c 2007-11-22 13:34:52.000000000 +0100 +@@ -5349,6 +5349,20 @@ cp_finish_decl (tree decl, tree init, bo + is *not* defined. */ + && (!DECL_EXTERNAL (decl) || init)) + { ++ if (TYPE_FOR_JAVA (type) && IS_AGGR_TYPE (type)) ++ { ++ tree jclass ++ = IDENTIFIER_GLOBAL_VALUE (get_identifier ("jclass")); ++ /* Allow libjava/prims.cc define primitive classes. */ ++ if (init != NULL_TREE ++ || jclass == NULL_TREE ++ || TREE_CODE (jclass) != TYPE_DECL ++ || !POINTER_TYPE_P (TREE_TYPE (jclass)) ++ || !same_type_ignoring_top_level_qualifiers_p ++ (type, TREE_TYPE (TREE_TYPE (jclass)))) ++ error ("Java object %qD not allocated with %", decl); ++ init = NULL_TREE; ++ } + if (init) + { + DECL_NONTRIVIALLY_INITIALIZED_P (decl) = 1; +@@ -5419,6 +5433,9 @@ cp_finish_decl (tree decl, tree init, bo + else if (TREE_CODE (type) == ARRAY_TYPE) + layout_type (type); + } ++ else if (TREE_CODE (decl) == FIELD_DECL ++ && TYPE_FOR_JAVA (type) && IS_AGGR_TYPE (type)) ++ error ("non-static data member %qD has Java class type", decl); + + /* Add this declaration to the statement-tree. This needs to happen + after the call to check_initializer so that the DECL_EXPR for a +@@ -8993,6 +9010,16 @@ grokparms (cp_parameter_declarator *firs + TREE_TYPE (decl) = error_mark_node; + } + ++ if (type != error_mark_node ++ && TYPE_FOR_JAVA (type) ++ && IS_AGGR_TYPE (type)) ++ { ++ error ("parameter %qD has Java class type", decl); ++ type = error_mark_node; ++ TREE_TYPE (decl) = error_mark_node; ++ init = NULL_TREE; ++ } ++ + if (type != error_mark_node) + { + /* Top-level qualifiers on the parameters are +@@ -10465,11 +10492,15 @@ check_function_type (tree decl, tree cur + + if (dependent_type_p (return_type)) + return; +- if (!COMPLETE_OR_VOID_TYPE_P (return_type)) ++ if (!COMPLETE_OR_VOID_TYPE_P (return_type) ++ || (TYPE_FOR_JAVA (return_type) && IS_AGGR_TYPE (return_type))) + { + tree args = TYPE_ARG_TYPES (fntype); +- +- error ("return type %q#T is incomplete", return_type); ++ ++ if (!COMPLETE_OR_VOID_TYPE_P (return_type)) ++ error ("return type %q#T is incomplete", return_type); ++ else ++ error ("return type has Java class type %q#T", return_type); + + /* Make it return void instead. */ + if (TREE_CODE (fntype) == METHOD_TYPE) +--- gcc/cp/init.c.jj 2007-09-20 21:26:48.000000000 +0200 ++++ gcc/cp/init.c 2007-11-22 10:49:47.000000000 +0100 +@@ -1786,6 +1786,11 @@ build_new_1 (tree placement, tree type, + (alloc_fn, + build_tree_list (NULL_TREE, class_addr))); + } ++ else if (TYPE_FOR_JAVA (elt_type)) ++ { ++ error ("Java class %q#T object allocated using placement new", elt_type); ++ return error_mark_node; ++ } + else + { + tree fnname; +--- gcc/testsuite/g++.dg/ext/java-2.C.jj 2007-11-22 10:55:10.000000000 +0100 ++++ gcc/testsuite/g++.dg/ext/java-2.C 2007-11-22 10:54:59.000000000 +0100 +@@ -0,0 +1,79 @@ ++// PR c++/30293 ++// PR c++/30294 ++// { dg-do compile } ++// { dg-options "" } ++ ++extern "Java" { ++typedef __java_byte jbyte; ++namespace java { ++namespace lang { ++ class Object {}; ++ class Class {}; ++} ++} ++typedef struct java::lang::Object* jobject; ++typedef java::lang::Class *jclass; ++} ++extern "C" jobject _Jv_AllocObject (jclass); ++ ++extern "Java" { ++ struct A { static java::lang::Class class$; }; ++} ++ ++struct B { ++ A a; // { dg-error "has Java class type" } ++}; ++ ++void* operator new (__SIZE_TYPE__, void*) throw(); ++char buf[1024]; ++ ++A a; // { dg-error "not allocated with" } ++A b = A (); // { dg-error "not allocated with" } ++A *c = new ((void *) buf) A (); // { dg-error "using placement new" } ++A *d = new A (); ++jbyte e = 6; ++ ++const A fn1 () // { dg-error "return type has Java class type" } ++{ ++ A a; // { dg-error "not allocated with" } ++ return a; ++} ++ ++A fn2 () // { dg-error "return type has Java class type" } ++{ ++ A a; // { dg-error "not allocated with" } ++ return a; ++} ++ ++A *fn3 () ++{ ++ return new A (); ++} ++ ++A &fn4 () ++{ ++ return *c; ++} ++ ++jbyte fn5 () ++{ ++ return 7; ++} ++ ++void fn6 (A x) // { dg-error "has Java class type" } ++{ ++} ++ ++void fn7 (const A x) // { dg-error "has Java class type" } ++{ ++} ++ ++void fn8 (A *x) ++{ ++ (void) x; ++} ++ ++void fn9 (jbyte x) ++{ ++ (void) x; ++} diff --git a/gcc41-pr30988.patch b/gcc41-pr30988.patch new file mode 100644 index 0000000..fc26668 --- /dev/null +++ b/gcc41-pr30988.patch @@ -0,0 +1,103 @@ +2007-11-18 Jakub Jelinek + + PR c++/30988 + * semantics.c (finish_call_expr): Set + current_function_returns_abnormally if fn is noreturn FUNCTION_DECL + or OVERLOAD with all noreturn functions. + + * g++.dg/warn/noreturn-4.C: New test. + * g++.dg/warn/noreturn-5.C: New test. + * g++.dg/warn/noreturn-6.C: New test. + * g++.dg/warn/noreturn-7.C: New test. + +--- gcc/cp/semantics.c (revision 130279) ++++ gcc/cp/semantics.c (revision 130280) +@@ -1846,6 +1846,20 @@ finish_call_expr (tree fn, tree args, bo + { + result = build_nt_call_list (fn, args); + KOENIG_LOOKUP_P (result) = koenig_p; ++ if (cfun) ++ { ++ do ++ { ++ tree fndecl = OVL_CURRENT (fn); ++ if (TREE_CODE (fndecl) != FUNCTION_DECL ++ || !TREE_THIS_VOLATILE (fndecl)) ++ break; ++ fn = OVL_NEXT (fn); ++ } ++ while (fn); ++ if (!fn) ++ current_function_returns_abnormally = 1; ++ } + return result; + } + if (!BASELINK_P (fn) +--- gcc/testsuite/g++.dg/warn/noreturn-4.C (revision 0) ++++ gcc/testsuite/g++.dg/warn/noreturn-4.C (revision 130280) +@@ -0,0 +1,13 @@ ++// PR c++/30988 ++// { dg-do compile } ++// { dg-options "-O2 -Wall" } ++ ++void f (const char *) __attribute__ ((noreturn)); ++ ++template struct A ++{ ++ int g () ++ { ++ f (__FUNCTION__); ++ } ++}; +--- gcc/testsuite/g++.dg/warn/noreturn-5.C (revision 0) ++++ gcc/testsuite/g++.dg/warn/noreturn-5.C (revision 130280) +@@ -0,0 +1,15 @@ ++// PR c++/30988 ++// { dg-do compile } ++// { dg-options "-O2 -Wall" } ++ ++void f (const char *) __attribute__ ((noreturn)); ++void f (int) __attribute__ ((noreturn)); ++void f (double) __attribute__ ((noreturn)); ++ ++template struct A ++{ ++ int g () ++ { ++ f ((T) 0); ++ } ++}; +--- gcc/testsuite/g++.dg/warn/noreturn-6.C (revision 0) ++++ gcc/testsuite/g++.dg/warn/noreturn-6.C (revision 130280) +@@ -0,0 +1,13 @@ ++// PR c++/30988 ++// { dg-do compile } ++// { dg-options "-O2 -Wall" } ++ ++void f (const char *); ++ ++template struct A ++{ ++ int g () ++ { ++ f (__FUNCTION__); ++ } // { dg-warning "no return statement in function returning non-void" } ++}; +--- gcc/testsuite/g++.dg/warn/noreturn-7.C (revision 0) ++++ gcc/testsuite/g++.dg/warn/noreturn-7.C (revision 130280) +@@ -0,0 +1,15 @@ ++// PR c++/30988 ++// { dg-do compile } ++// { dg-options "-O2 -Wall" } ++ ++void f (const char *) __attribute__ ((noreturn)); ++void f (int); ++void f (double) __attribute__ ((noreturn)); ++ ++template struct A ++{ ++ int g () ++ { ++ f ((T) 0); ++ } // { dg-warning "no return statement in function returning non-void" } ++}; diff --git a/gcc41-pr31483.patch b/gcc41-pr31483.patch new file mode 100644 index 0000000..9f91f10 --- /dev/null +++ b/gcc41-pr31483.patch @@ -0,0 +1,74 @@ +2007-04-05 Paul Thomas + + PR fortran/31483 + * trans-expr.c (gfc_conv_function_call): Give a dummy + procedure the correct type if it has alternate returns. + + * gfortran.dg/altreturn_5.f90: New test. + +--- gcc/fortran/trans-expr.c (revision 123517) ++++ gcc/fortran/trans-expr.c (revision 123518) +@@ -2154,17 +2154,23 @@ gfc_conv_function_call (gfc_se * se, gfc + + /* Generate the actual call. */ + gfc_conv_function_val (se, sym); ++ + /* If there are alternate return labels, function type should be + integer. Can't modify the type in place though, since it can be shared +- with other functions. */ ++ with other functions. For dummy arguments, the typing is done to ++ this result, even if it has to be repeated for each call. */ + if (has_alternate_specifier + && TREE_TYPE (TREE_TYPE (TREE_TYPE (se->expr))) != integer_type_node) + { +- gcc_assert (! sym->attr.dummy); +- TREE_TYPE (sym->backend_decl) +- = build_function_type (integer_type_node, +- TYPE_ARG_TYPES (TREE_TYPE (sym->backend_decl))); +- se->expr = gfc_build_addr_expr (NULL, sym->backend_decl); ++ if (!sym->attr.dummy) ++ { ++ TREE_TYPE (sym->backend_decl) ++ = build_function_type (integer_type_node, ++ TYPE_ARG_TYPES (TREE_TYPE (sym->backend_decl))); ++ se->expr = gfc_build_addr_expr (NULL, sym->backend_decl); ++ } ++ else ++ TREE_TYPE (TREE_TYPE (TREE_TYPE (se->expr))) = integer_type_node; + } + + fntype = TREE_TYPE (TREE_TYPE (se->expr)); +--- gcc/testsuite/gfortran.dg/altreturn_5.f90 (revision 0) ++++ gcc/testsuite/gfortran.dg/altreturn_5.f90 (revision 123518) +@@ -0,0 +1,31 @@ ++! { dg-do run } ++! Tests the fix for PR31483, in which dummy argument procedures ++! produced an ICE if they had an alternate return. ++! ++! Contributed by Mathias Fröhlich ++ ++ SUBROUTINE R (i, *, *) ++ INTEGER i ++ RETURN i ++ END ++ ++ SUBROUTINE PHLOAD (READER, i, res) ++ IMPLICIT NONE ++ EXTERNAL READER ++ integer i ++ character(3) res ++ CALL READER (i, *1, *2) ++ 1 res = "one" ++ return ++ 2 res = "two" ++ return ++ END ++ ++ EXTERNAL R ++ character(3) res ++ call PHLOAD (R, 1, res) ++ if (res .ne. "one") call abort () ++ CALL PHLOAD (R, 2, res) ++ if (res .ne. "two") call abort () ++ END ++ diff --git a/gcc41-pr32241.patch b/gcc41-pr32241.patch new file mode 100644 index 0000000..a3dfe3c --- /dev/null +++ b/gcc41-pr32241.patch @@ -0,0 +1,91 @@ +2007-11-10 Jakub Jelinek + + PR c++/32241 + * pt.c (tsubst_copy_and_build) : If object_type + is not scalar type, let finish_class_member_access_expr handle + diagnostics. Pass BIT_NOT_EXPR argument to + finish_pseudo_destructor_expr. Handle SCOPE_REF properly. + + * g++.dg/template/pseudodtor3.C: New test. + +--- gcc/cp/pt.c (revision 130065) ++++ gcc/cp/pt.c (revision 130066) +@@ -11004,15 +11004,23 @@ tsubst_copy_and_build (tree t, + + if (object_type && !CLASS_TYPE_P (object_type)) + { +- if (TREE_CODE (member) == BIT_NOT_EXPR) +- return finish_pseudo_destructor_expr (object, +- NULL_TREE, +- object_type); +- else if (TREE_CODE (member) == SCOPE_REF +- && (TREE_CODE (TREE_OPERAND (member, 1)) == BIT_NOT_EXPR)) +- return finish_pseudo_destructor_expr (object, +- object, +- object_type); ++ if (SCALAR_TYPE_P (object_type)) ++ { ++ tree s = NULL_TREE; ++ tree dtor = member; ++ ++ if (TREE_CODE (dtor) == SCOPE_REF) ++ { ++ s = TREE_OPERAND (dtor, 0); ++ dtor = TREE_OPERAND (dtor, 1); ++ } ++ if (TREE_CODE (dtor) == BIT_NOT_EXPR) ++ { ++ dtor = TREE_OPERAND (dtor, 0); ++ if (TYPE_P (dtor)) ++ return finish_pseudo_destructor_expr (object, s, dtor); ++ } ++ } + } + else if (TREE_CODE (member) == SCOPE_REF + && TREE_CODE (TREE_OPERAND (member, 1)) == TEMPLATE_ID_EXPR) +--- gcc/testsuite/g++.dg/template/pseudodtor3.C (revision 0) ++++ gcc/testsuite/g++.dg/template/pseudodtor3.C (revision 130066) +@@ -0,0 +1,43 @@ ++// PR c++/32241 ++// { dg-do compile } ++ ++struct A ++{ ++ typedef int T; ++ T &foo (); ++ A () { foo.~T (); } // { dg-error "does not have class type|expected" } ++}; ++ ++template struct B ++{ ++ T &foo (); ++ B () { foo.~T (); } // { dg-error "invalid use of member" } ++}; ++ ++B b; ++ ++template struct C ++{ ++ T t; ++ C () { t.~S (); } // { dg-error "is not of type" } ++}; ++ ++C c; ++ ++template struct D ++{ ++ T t; ++ typedef long int U; ++ D () { t.~U (); } // { dg-error "is not of type" } ++}; ++ ++D d; ++ ++template struct E ++{ ++ T &foo (); ++ typedef long int U; ++ E () { foo.~U (); } // { dg-error "is not of type" } ++}; ++ ++E e; diff --git a/gcc41-pr32384.patch b/gcc41-pr32384.patch new file mode 100644 index 0000000..983068f --- /dev/null +++ b/gcc41-pr32384.patch @@ -0,0 +1,108 @@ +2007-11-01 Jakub Jelinek + + PR c++/32384 + * parser.c (cp_parser_postfix_dot_deref_expression): If + POSTFIX_EXPRESSION is type dependent, try to parse it as pseudo dtor + first and if that succeeds and type is SCALAR_TYPE_P, create + PSEUDO_DTOR_EXPR. + + * g++.dg/template/pseudodtor1.C: New test. + * g++.dg/template/pseudodtor2.C: New test. + +--- gcc/cp/parser.c (revision 129835) ++++ gcc/cp/parser.c (revision 129836) +@@ -4850,8 +4850,10 @@ cp_parser_postfix_dot_deref_expression ( + pseudo_destructor_p = false; + + /* If the SCOPE is a scalar type, then, if this is a valid program, +- we must be looking at a pseudo-destructor-name. */ +- if (scope && SCALAR_TYPE_P (scope)) ++ we must be looking at a pseudo-destructor-name. If POSTFIX_EXPRESSION ++ is type dependent, it can be pseudo-destructor-name or something else. ++ Try to parse it as pseudo-destructor-name first. */ ++ if ((scope && SCALAR_TYPE_P (scope)) || dependent_p) + { + tree s; + tree type; +@@ -4860,7 +4862,12 @@ cp_parser_postfix_dot_deref_expression ( + /* Parse the pseudo-destructor-name. */ + s = NULL_TREE; + cp_parser_pseudo_destructor_name (parser, &s, &type); +- if (cp_parser_parse_definitely (parser)) ++ if (dependent_p ++ && (cp_parser_error_occurred (parser) ++ || TREE_CODE (type) != TYPE_DECL ++ || !SCALAR_TYPE_P (TREE_TYPE (type)))) ++ cp_parser_abort_tentative_parse (parser); ++ else if (cp_parser_parse_definitely (parser)) + { + pseudo_destructor_p = true; + postfix_expression +--- gcc/testsuite/g++.dg/template/pseudodtor1.C (revision 0) ++++ gcc/testsuite/g++.dg/template/pseudodtor1.C (revision 129836) +@@ -0,0 +1,44 @@ ++// PR c++/32384 ++// { dg-do compile } ++ ++struct A ++{ ++ typedef int T; ++ T foo (); ++ ++ A () { foo ().~T (); } ++}; ++ ++template struct B ++{ ++ typedef int T; ++ T foo (); ++ ++ B () { foo ().~T (); } ++}; ++ ++template struct C ++{ ++ T t; ++ C () { t.~T (); } ++}; ++ ++template struct D ++{ ++ typedef int T; ++ S foo (); ++ ++ D () { foo ().~T(); } ++}; ++ ++struct Z ++{ ++ Z () {} ++ ~Z () {} ++}; ++ ++A a; ++B b; ++C c1; ++C c2; ++D d; +--- gcc/testsuite/g++.dg/template/pseudodtor2.C (revision 0) ++++ gcc/testsuite/g++.dg/template/pseudodtor2.C (revision 129836) +@@ -0,0 +1,18 @@ ++// PR c++/32384 ++// { dg-do compile } ++ ++template struct D ++{ ++ typedef int T; ++ S foo (); ++ ++ D () { foo ().~T(); } // { dg-error "is not of type" } ++}; ++ ++struct Z ++{ ++ Z () {} ++ ~Z () {} ++}; ++ ++D d; diff --git a/gcc41-pr33501.patch b/gcc41-pr33501.patch new file mode 100644 index 0000000..0dab701 --- /dev/null +++ b/gcc41-pr33501.patch @@ -0,0 +1,77 @@ +2007-11-07 Jakub Jelinek + + PR c++/33501 + * call.c (build_over_call): Don't check TREE_ADDRESSABLE + on incomplete type. + + * g++.dg/warn/incomplete2.C: New test. + * g++.dg/template/incomplete4.C: New test. + * g++.dg/template/incomplete5.C: New test. + +--- gcc/cp/call.c (revision 129967) ++++ gcc/cp/call.c (revision 129968) +@@ -4993,7 +4993,8 @@ build_over_call (struct z_candidate *can + + /* Don't make a copy here if build_call is going to. */ + if (conv->kind == ck_rvalue +- && !TREE_ADDRESSABLE (complete_type (type))) ++ && COMPLETE_TYPE_P (complete_type (type)) ++ && !TREE_ADDRESSABLE (type)) + conv = conv->u.next; + + val = convert_like_with_context +--- gcc/testsuite/g++.dg/warn/incomplete2.C (revision 0) ++++ gcc/testsuite/g++.dg/warn/incomplete2.C (revision 129968) +@@ -0,0 +1,13 @@ ++// PR c++/33501 ++// { dg-do compile } ++ ++class A; // { dg-error "forward declaration" } ++ ++int f (A); ++const A &make (); ++ ++int ++main () ++{ ++ return f (make ()); // { dg-error "invalid use of undefined type|initializing argument" } ++} +--- gcc/testsuite/g++.dg/template/incomplete5.C (revision 0) ++++ gcc/testsuite/g++.dg/template/incomplete5.C (revision 129968) +@@ -0,0 +1,17 @@ ++// PR c++/33501 ++// { dg-do compile } ++ ++class A; // { dg-error "forward declaration" } ++ ++template struct X ++{ ++ static int f (T); ++ static const T &make (); ++ static const bool value = sizeof (f (make ())) == sizeof (int); // { dg-error "invalid use of undefined type|initializing argument" } ++}; ++ ++int ++main () ++{ ++ return X ::value; ++} +--- gcc/testsuite/g++.dg/template/incomplete4.C (revision 0) ++++ gcc/testsuite/g++.dg/template/incomplete4.C (revision 129968) +@@ -0,0 +1,16 @@ ++// PR c++/33501 ++// { dg-do compile } ++ ++class A; // { dg-error "forward declaration" } ++ ++template struct X ++{ ++ static int f (T); ++ static const T &make (); ++}; ++ ++int ++main () ++{ ++ return X::f (X::make ()); // { dg-error "invalid use of undefined type|initializing argument" } ++} diff --git a/gcc41-pr33516.patch b/gcc41-pr33516.patch new file mode 100644 index 0000000..2491e11 --- /dev/null +++ b/gcc41-pr33516.patch @@ -0,0 +1,63 @@ +2007-11-02 Jakub Jelinek + + PR c++/33516 + * parser.c (cp_parser_nested_name_specifier_opt): Use + TYPE_MAIN_VARIANT (new_scope) as scope if new_scope is an incomplete + typedef of currently open class. + + * g++.dg/lookup/typedef1.C: New test. + +--- gcc/cp/parser.c (revision 129861) ++++ gcc/cp/parser.c (revision 129862) +@@ -4085,7 +4085,15 @@ cp_parser_nested_name_specifier_opt (cp_ + && !COMPLETE_TYPE_P (new_scope) + /* Do not try to complete dependent types. */ + && !dependent_type_p (new_scope)) +- new_scope = complete_type (new_scope); ++ { ++ new_scope = complete_type (new_scope); ++ /* If it is a typedef to current class, use the current ++ class instead, as the typedef won't have any names inside ++ it yet. */ ++ if (!COMPLETE_TYPE_P (new_scope) ++ && currently_open_class (new_scope)) ++ new_scope = TYPE_MAIN_VARIANT (new_scope); ++ } + /* Make sure we look in the right scope the next time through + the loop. */ + parser->scope = new_scope; +--- gcc/testsuite/g++.dg/lookup/typedef1.C (revision 0) ++++ gcc/testsuite/g++.dg/lookup/typedef1.C (revision 129862) +@@ -0,0 +1,32 @@ ++// PR c++/33516 ++// { dg-do compile } ++ ++struct S1; ++typedef S1 T1; ++struct S1 { ++ typedef int U; ++ T1::U i; ++}; ++struct S2; ++typedef S2 T2; ++struct S2 { ++ typedef int U; ++}; ++T2::U j; ++struct S3; ++typedef S3 T3; ++struct S3 { ++ typedef int U; ++ S3::U i; ++}; ++ ++void ++foo () ++{ ++ S1 s1; ++ S2 s2; ++ S3 s3; ++ s1.i = 6; ++ j = 7; ++ s3.i = 8; ++} diff --git a/gcc41-pr33537.patch b/gcc41-pr33537.patch new file mode 100644 index 0000000..03fd500 --- /dev/null +++ b/gcc41-pr33537.patch @@ -0,0 +1,62 @@ +2007-11-01 Jakub Jelinek + + PR debug/33537 + * dwarf2out.c (gen_formal_parameter_die, gen_variable_die, + gen_decl_die): Use TREE_TYPE (TREE_TYPE (decl)) as type + rather than TREE_TYPE (decl) if DECL_BY_REFERENCE (decl). + +--- gcc/dwarf2out.c (revision 129819) ++++ gcc/dwarf2out.c (revision 129820) +@@ -11832,8 +11832,11 @@ gen_formal_parameter_die (tree node, dw_ + add_abstract_origin_attribute (parm_die, origin); + else + { ++ tree type = TREE_TYPE (node); + add_name_and_src_coords_attributes (parm_die, node); +- add_type_attribute (parm_die, TREE_TYPE (node), ++ if (DECL_BY_REFERENCE (node)) ++ type = TREE_TYPE (type); ++ add_type_attribute (parm_die, type, + TREE_READONLY (node), + TREE_THIS_VOLATILE (node), + context_die); +@@ -12437,8 +12440,14 @@ gen_variable_die (tree decl, dw_die_ref + } + else + { ++ tree type = TREE_TYPE (decl); ++ if ((TREE_CODE (decl) == PARM_DECL ++ || TREE_CODE (decl) == RESULT_DECL) ++ && DECL_BY_REFERENCE (decl)) ++ type = TREE_TYPE (type); ++ + add_name_and_src_coords_attributes (var_die, decl); +- add_type_attribute (var_die, TREE_TYPE (decl), TREE_READONLY (decl), ++ add_type_attribute (var_die, type, TREE_READONLY (decl), + TREE_THIS_VOLATILE (decl), context_die); + + if (TREE_PUBLIC (decl)) +@@ -13694,7 +13703,10 @@ gen_decl_die (tree decl, dw_die_ref cont + + /* Output any DIEs that are needed to specify the type of this data + object. */ +- gen_type_die (TREE_TYPE (decl), context_die); ++ if (TREE_CODE (decl) == RESULT_DECL && DECL_BY_REFERENCE (decl)) ++ gen_type_die (TREE_TYPE (TREE_TYPE (decl)), context_die); ++ else ++ gen_type_die (TREE_TYPE (decl), context_die); + + /* And its containing type. */ + origin = decl_class_context (decl); +@@ -13728,7 +13740,10 @@ gen_decl_die (tree decl, dw_die_ref cont + break; + + case PARM_DECL: +- gen_type_die (TREE_TYPE (decl), context_die); ++ if (DECL_BY_REFERENCE (decl)) ++ gen_type_die (TREE_TYPE (TREE_TYPE (decl)), context_die); ++ else ++ gen_type_die (TREE_TYPE (decl), context_die); + gen_formal_parameter_die (decl, context_die); + break; + diff --git a/gcc41-pr33616.patch b/gcc41-pr33616.patch new file mode 100644 index 0000000..7817fe1 --- /dev/null +++ b/gcc41-pr33616.patch @@ -0,0 +1,74 @@ +2007-10-30 Jakub Jelinek + + PR c++/33616 + * decl2.c (build_offset_ref_call_from_tree): Call + build_non_dependent_expr on object prior to building ADDR_EXPR from it + if FN is DOTSTAR_EXPR. + + * g++.dg/template/ptrmem18.C: New test. + +--- gcc/cp/decl2.c (revision 129783) ++++ gcc/cp/decl2.c (revision 129784) +@@ -3499,9 +3499,9 @@ build_offset_ref_call_from_tree (tree fn + parameter. That must be done before the FN is transformed + because we depend on the form of FN. */ + args = build_non_dependent_args (args); ++ object = build_non_dependent_expr (object); + if (TREE_CODE (fn) == DOTSTAR_EXPR) + object = build_unary_op (ADDR_EXPR, object, 0); +- object = build_non_dependent_expr (object); + args = tree_cons (NULL_TREE, object, args); + /* Now that the arguments are done, transform FN. */ + fn = build_non_dependent_expr (fn); +--- gcc/testsuite/g++.dg/template/ptrmem18.C (revision 0) ++++ gcc/testsuite/g++.dg/template/ptrmem18.C (revision 129784) +@@ -0,0 +1,49 @@ ++// PR c++/33616 ++// { dg-do run } ++// { dg-options "-O2" } ++ ++extern "C" void abort (); ++ ++struct S { ++ int c; ++ S () : c (0) {} ++ virtual void f1 () { c += 1; } ++ virtual void f2 () { c += 16; } ++}; ++ ++struct T { ++ S s; ++}; ++ ++typedef void (S::*Q) (); ++ ++template ++void test1 (T *t) ++{ ++ (t->s.*P)(); ++} ++ ++template ++void test2 (T *t) ++{ ++ S &s = t->s; ++ (s.*P)(); ++} ++ ++int ++main () ++{ ++ T t; ++ test1 <&S::f1> (&t); ++ if (t.s.c != 1) ++ abort (); ++ test1 <&S::f2> (&t); ++ if (t.s.c != 17) ++ abort (); ++ test2 <&S::f1> (&t); ++ if (t.s.c != 18) ++ abort (); ++ test2 <&S::f2> (&t); ++ if (t.s.c != 34) ++ abort (); ++} diff --git a/gcc41-pr33723.patch b/gcc41-pr33723.patch new file mode 100644 index 0000000..2d3e746 --- /dev/null +++ b/gcc41-pr33723.patch @@ -0,0 +1,189 @@ +2007-10-29 Jakub Jelinek + + PR tree-optimization/33723 + * c-gimplify.c (c_gimplify_expr): Optimize INIT_EXPR or + MODIFY_EXPR with non-addressable COMPOUND_LITERAL_EXPR as source. + +2007-11-20 Jakub Jelinek + + PR testsuite/33978 + * gcc.dg/tree-ssa/pr33723.c: Adjust scan pattern to make it less + dependent on target settings like move_by_pieces etc. + +2007-10-30 Jakub Jelinek + + PR tree-optimization/33723 + * gcc.dg/tree-ssa/pr33723.c (T): Decrease size of field s. + +2007-10-29 Jakub Jelinek + + PR tree-optimization/33723 + * gcc.c-torture/execute/20071029-1.c: New test. + * gcc.dg/tree-ssa/pr33723.c: New test. + +--- gcc/c-gimplify.c (revision 129742) ++++ gcc/c-gimplify.c (revision 129743) +@@ -233,6 +233,29 @@ c_gimplify_expr (tree *expr_p, tree *pre + case COMPOUND_LITERAL_EXPR: + return gimplify_compound_literal_expr (expr_p, pre_p); + ++ case INIT_EXPR: ++ case MODIFY_EXPR: ++ if (TREE_CODE (TREE_OPERAND (*expr_p, 1)) == COMPOUND_LITERAL_EXPR) ++ { ++ tree complit = TREE_OPERAND (*expr_p, 1); ++ tree decl_s = COMPOUND_LITERAL_EXPR_DECL_STMT (complit); ++ tree decl = DECL_EXPR_DECL (decl_s); ++ tree init = DECL_INITIAL (decl); ++ ++ /* struct T x = (struct T) { 0, 1, 2 } can be optimized ++ into struct T x = { 0, 1, 2 } if the address of the ++ compound literal has never been taken. */ ++ if (!TREE_ADDRESSABLE (complit) ++ && !TREE_ADDRESSABLE (decl) ++ && init) ++ { ++ *expr_p = copy_node (*expr_p); ++ TREE_OPERAND (*expr_p, 1) = init; ++ return GS_OK; ++ } ++ } ++ return GS_UNHANDLED; ++ + default: + return GS_UNHANDLED; + } +--- gcc/testsuite/gcc.c-torture/execute/20071029-1.c (revision 0) ++++ gcc/testsuite/gcc.c-torture/execute/20071029-1.c (revision 129743) +@@ -0,0 +1,56 @@ ++extern void exit (int); ++extern void abort (void); ++ ++typedef union ++{ ++ struct ++ { ++ int f1, f2, f3, f4, f5, f6, f7, f8; ++ long int f9, f10; ++ int f11; ++ } f; ++ char s[56]; ++ long int a; ++} T; ++ ++__attribute__((noinline)) ++void ++test (T *t) ++{ ++ static int i = 11; ++ if (t->f.f1 != i++) ++ abort (); ++ if (t->f.f2 || t->f.f3 || t->f.f4 || t->f.f5 || t->f.f6 ++ || t->f.f7 || t->f.f8 || t->f.f9 || t->f.f10 || t->f.f11) ++ abort (); ++ if (i == 20) ++ exit (0); ++} ++ ++__attribute__((noinline)) ++void ++foo (int i) ++{ ++ T t; ++again: ++ t = (T) { { ++i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; ++ test (&t); ++ goto again; ++} ++ ++int ++main (void) ++{ ++ T *t1, *t2; ++ int cnt = 0; ++ t1 = (T *) 0; ++loop: ++ t2 = t1; ++ t1 = & (T) { .f.f9 = cnt++ }; ++ if (cnt < 3) ++ goto loop; ++ if (t1 != t2 || t1->f.f9 != 2) ++ abort (); ++ foo (10); ++ return 0; ++} +--- gcc/testsuite/gcc.dg/tree-ssa/pr33723.c (revision 0) ++++ gcc/testsuite/gcc.dg/tree-ssa/pr33723.c (revision 129743) +@@ -0,0 +1,72 @@ ++/* PR tree-optimization/33723 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-gimple" } */ ++ ++typedef union ++{ ++ struct ++ { ++ int f1, f2, f3, f4, f5, f6, f7, f8; ++ long int f9, f10; ++ int f11; ++ } f; ++ char s[4]; ++ long int a; ++} T; ++ ++void ++foo1 (void) ++{ ++ T t; ++ t = (T) { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; ++ test (&t); ++} ++ ++void ++bar1 (void) ++{ ++ T t = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; ++ test (&t); ++} ++ ++void ++baz1 (void) ++{ ++ T t; ++ t = (const T) { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; ++ test (&t); ++} ++ ++void ++foo2 (void) ++{ ++ T t; ++ t = (T) { { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 } }; ++ test (&t); ++} ++ ++void ++bar2 (void) ++{ ++ T t = { { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 } }; ++ test (&t); ++} ++ ++void ++baz2 (void) ++{ ++ T t; ++ t = (const T) { { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 } }; ++ test (&t); ++} ++ ++void ++baz3 (void) ++{ ++ T t; ++ t = (const T) (T) { { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 } }; ++ test (&t); ++} ++ ++/* { dg-final { scan-tree-dump-not "t = D" "gimple"} } */ ++/* { dg-final { cleanup-tree-dump "gimple" } } */ diff --git a/gcc41-pr33744.patch b/gcc41-pr33744.patch deleted file mode 100644 index 594fd4d..0000000 --- a/gcc41-pr33744.patch +++ /dev/null @@ -1,67 +0,0 @@ -2007-10-15 Jakub Jelinek - - PR c++/33744 - * parser.c (cp_parser_parenthesized_expression_list): Set - greater_than_is_operator_p to true in between the parens. - - * g++.dg/template/arg6.C: New test. - ---- gcc/cp/parser.c.jj 2007-10-12 00:28:24.000000000 +0200 -+++ gcc/cp/parser.c 2007-10-15 14:52:02.000000000 +0200 -@@ -4976,6 +4976,7 @@ cp_parser_parenthesized_expression_list - tree expression_list = NULL_TREE; - bool fold_expr_p = is_attribute_list; - tree identifier = NULL_TREE; -+ bool saved_greater_than_is_operator_p; - - /* Assume all the expressions will be constant. */ - if (non_constant_p) -@@ -4984,6 +4985,12 @@ cp_parser_parenthesized_expression_list - if (!cp_parser_require (parser, CPP_OPEN_PAREN, "`('")) - return error_mark_node; - -+ /* Within a parenthesized expression, a `>' token is always -+ the greater-than operator. */ -+ saved_greater_than_is_operator_p -+ = parser->greater_than_is_operator_p; -+ parser->greater_than_is_operator_p = true; -+ - /* Consume expressions until there are no more. */ - if (cp_lexer_next_token_is_not (parser->lexer, CPP_CLOSE_PAREN)) - while (true) -@@ -5069,9 +5076,16 @@ cp_parser_parenthesized_expression_list - if (ending < 0) - goto get_comma; - if (!ending) -- return error_mark_node; -+ { -+ parser->greater_than_is_operator_p -+ = saved_greater_than_is_operator_p; -+ return error_mark_node; -+ } - } - -+ parser->greater_than_is_operator_p -+ = saved_greater_than_is_operator_p; -+ - /* We built up the list in reverse order so we must reverse it now. */ - expression_list = nreverse (expression_list); - if (identifier) ---- gcc/testsuite/g++.dg/template/arg6.C.jj 2007-10-15 14:59:13.000000000 +0200 -+++ gcc/testsuite/g++.dg/template/arg6.C 2007-10-15 14:57:31.000000000 +0200 -@@ -0,0 +1,15 @@ -+// PR c++/33744 -+// { dg-do run } -+ -+template struct A { bool b; A() : b(B) {}; }; -+A a; -+A b; -+A<(bool)(2>1)> c; -+A1))> d; -+A1)> e; -+ -+int -+main () -+{ -+ return (a.b && b.b && c.b && d.b && e.b) ? 0 : 1; -+} diff --git a/gcc41-pr33836.patch b/gcc41-pr33836.patch new file mode 100644 index 0000000..811e128 --- /dev/null +++ b/gcc41-pr33836.patch @@ -0,0 +1,51 @@ +2007-11-05 Jakub Jelinek + + PR c++/33836 + * parser.c (cp_parser_unary_expression): For &&label call + cp_parser_non_integral_constant_expression and return error_mark_node + if it returned true. + + * g++.dg/ext/label10.C: New test. + +--- gcc/cp/parser.c (revision 129895) ++++ gcc/cp/parser.c (revision 129896) +@@ -5329,13 +5329,18 @@ cp_parser_unary_expression (cp_parser *p + && token->type == CPP_AND_AND) + { + tree identifier; ++ tree expression; + + /* Consume the '&&' token. */ + cp_lexer_consume_token (parser->lexer); + /* Look for the identifier. */ + identifier = cp_parser_identifier (parser); + /* Create an expression representing the address. */ +- return finish_label_address_expr (identifier); ++ expression = finish_label_address_expr (identifier); ++ if (cp_parser_non_integral_constant_expression (parser, ++ "the address of a label")) ++ expression = error_mark_node; ++ return expression; + } + } + if (unary_operator != ERROR_MARK) +--- gcc/testsuite/g++.dg/ext/label10.C (revision 0) ++++ gcc/testsuite/g++.dg/ext/label10.C (revision 129896) +@@ -0,0 +1,17 @@ ++// PR c++/33836 ++// { dg-do compile } ++// { dg-options "-std=gnu++98" } ++ ++template struct A ++{ ++ enum { M = && N }; // { dg-error "referenced outside|cannot appear in" } ++}; ++ ++A<0> a; ++ ++void foo () ++{ ++ __label__ P; ++ enum { O = && P }; // { dg-error "cannot appear in" } ++ P:; ++} diff --git a/gcc41-pr33842.patch b/gcc41-pr33842.patch new file mode 100644 index 0000000..19803f1 --- /dev/null +++ b/gcc41-pr33842.patch @@ -0,0 +1,130 @@ +2007-10-27 Jakub Jelinek + + PR c++/33842 + * cxx-pretty-print.h (pp_cxx_offsetof_expression): New prototype. + * cxx-pretty-print.c (pp_cxx_primary_expression): Handle + OFFSETOF_EXPR. + (pp_cxx_offsetof_expression_1, pp_cxx_offsetof_expression): New + functions. + * error.c (dump_expr): Handle OFFSETOF_EXPR. + + * g++.dg/template/error34.C: New test. + +--- gcc/cp/error.c (revision 129676) ++++ gcc/cp/error.c (revision 129677) +@@ -1887,6 +1887,10 @@ dump_expr (tree t, int flags) + dump_expr (TREE_OPERAND (t, 0), flags); + break; + ++ case OFFSETOF_EXPR: ++ pp_cxx_offsetof_expression (cxx_pp, t); ++ break; ++ + /* This list is incomplete, but should suffice for now. + It is very important that `sorry' does not call + `report_error_function'. That could cause an infinite loop. */ +--- gcc/cp/cxx-pretty-print.c (revision 129676) ++++ gcc/cp/cxx-pretty-print.c (revision 129677) +@@ -356,6 +356,10 @@ pp_cxx_primary_expression (cxx_pretty_pr + pp_cxx_right_paren (pp); + break; + ++ case OFFSETOF_EXPR: ++ pp_cxx_offsetof_expression (pp, t); ++ break; ++ + default: + pp_c_primary_expression (pp_c_base (pp), t); + break; +@@ -1944,6 +1948,49 @@ typedef c_pretty_print_fn pp_fun; + + /* Initialization of a C++ pretty-printer object. */ + ++static bool ++pp_cxx_offsetof_expression_1 (cxx_pretty_printer *pp, tree t) ++{ ++ switch (TREE_CODE (t)) ++ { ++ case ARROW_EXPR: ++ if (TREE_CODE (TREE_OPERAND (t, 0)) == STATIC_CAST_EXPR ++ && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (t, 0)))) ++ { ++ pp_cxx_type_id (pp, TREE_TYPE (TREE_TYPE (TREE_OPERAND (t, 0)))); ++ pp_cxx_separate_with (pp, ','); ++ return true; ++ } ++ return false; ++ case COMPONENT_REF: ++ if (!pp_cxx_offsetof_expression_1 (pp, TREE_OPERAND (t, 0))) ++ return false; ++ if (TREE_CODE (TREE_OPERAND (t, 0)) != ARROW_EXPR) ++ pp_cxx_dot (pp); ++ pp_cxx_expression (pp, TREE_OPERAND (t, 1)); ++ return true; ++ case ARRAY_REF: ++ if (!pp_cxx_offsetof_expression_1 (pp, TREE_OPERAND (t, 0))) ++ return false; ++ pp_left_bracket (pp); ++ pp_cxx_expression (pp, TREE_OPERAND (t, 1)); ++ pp_right_bracket (pp); ++ return true; ++ default: ++ return false; ++ } ++} ++ ++void ++pp_cxx_offsetof_expression (cxx_pretty_printer *pp, tree t) ++{ ++ pp_cxx_identifier (pp, "offsetof"); ++ pp_cxx_left_paren (pp); ++ if (!pp_cxx_offsetof_expression_1 (pp, TREE_OPERAND (t, 0))) ++ pp_cxx_expression (pp, TREE_OPERAND (t, 0)); ++ pp_cxx_right_paren (pp); ++} ++ + void + pp_cxx_pretty_printer_init (cxx_pretty_printer *pp) + { +--- gcc/cp/cxx-pretty-print.h (revision 129676) ++++ gcc/cp/cxx-pretty-print.h (revision 129677) +@@ -70,6 +70,6 @@ void pp_cxx_separate_with (cxx_pretty_pr + + void pp_cxx_declaration (cxx_pretty_printer *, tree); + void pp_cxx_canonical_template_parameter (cxx_pretty_printer *, tree); +- ++void pp_cxx_offsetof_expression (cxx_pretty_printer *, tree); + + #endif /* GCC_CXX_PRETTY_PRINT_H */ +--- gcc/testsuite/g++.dg/template/error34.C (revision 0) ++++ gcc/testsuite/g++.dg/template/error34.C (revision 129677) +@@ -0,0 +1,29 @@ ++// PR c++/33842 ++// { dg-do compile } ++ ++template struct A ++{ ++ A<__builtin_offsetof(T, x)>(); // { dg-error "type/value mismatch|offsetof\\(T, x\\)" } ++}; ++ ++template struct B ++{ ++ B<__builtin_offsetof(T, x.y)>(); // { dg-error "type/value mismatch|offsetof\\(T, x.y\\)" } ++}; ++ ++template struct C ++{ ++ C<__builtin_offsetof(T, x[6])>(); // { dg-error "type/value mismatch|offsetof\\(T, x\\\[6\\\]\\)" } ++}; ++ ++template struct D ++{ ++ D<__builtin_offsetof(T, x.y[6].z)>(); // { dg-error "type/value mismatch|offsetof\\(T, x.y\\\[6\\\].z\\)" } ++}; ++ ++struct E { int x; }; ++ ++template struct F ++{ ++ F<__builtin_offsetof(E, x)>(); // { dg-error "type/value mismatch|offsetof\\(E, x\\)" } ++}; diff --git a/gcc41-pr33844.patch b/gcc41-pr33844.patch new file mode 100644 index 0000000..4d5d883 --- /dev/null +++ b/gcc41-pr33844.patch @@ -0,0 +1,56 @@ +2007-10-27 Jakub Jelinek + + PR c++/33844 + * cxx-pretty-print.c (pp_cxx_pm_expression) : Print + ->* rather than .*. + * error.c (dump_expr): Handle MEMBER_REF and DOTSTAR_EXPR. + + * g++.dg/other/ptrmem8.C: New test. + +--- gcc/cp/error.c (revision 129681) ++++ gcc/cp/error.c (revision 129682) +@@ -1891,6 +1891,11 @@ dump_expr (tree t, int flags) + pp_cxx_offsetof_expression (cxx_pp, t); + break; + ++ case MEMBER_REF: ++ case DOTSTAR_EXPR: ++ pp_multiplicative_expression (cxx_pp, t); ++ break; ++ + /* This list is incomplete, but should suffice for now. + It is very important that `sorry' does not call + `report_error_function'. That could cause an infinite loop. */ +--- gcc/cp/cxx-pretty-print.c (revision 129681) ++++ gcc/cp/cxx-pretty-print.c (revision 129682) +@@ -814,7 +814,10 @@ pp_cxx_pm_expression (cxx_pretty_printer + case MEMBER_REF: + case DOTSTAR_EXPR: + pp_cxx_pm_expression (pp, TREE_OPERAND (t, 0)); +- pp_cxx_dot (pp); ++ if (TREE_CODE (t) == MEMBER_REF) ++ pp_cxx_arrow (pp); ++ else ++ pp_cxx_dot (pp); + pp_star(pp); + pp_cxx_cast_expression (pp, TREE_OPERAND (t, 1)); + break; +--- gcc/testsuite/g++.dg/other/ptrmem8.C (revision 0) ++++ gcc/testsuite/g++.dg/other/ptrmem8.C (revision 129682) +@@ -0,0 +1,16 @@ ++// PR c++/33844 ++// { dg-do compile } ++ ++struct A {}; ++ ++template void foo(void (A::* f)()) ++{ ++ A a; ++ &(a.*f); // { dg-error "invalid use of\[^\n\]*\\.\\*\[^\n\]*to form|qualified-id is required" } ++} ++ ++template void bar(void (A::* f)()) ++{ ++ A *p; ++ &(p->*f); // { dg-error "invalid use of\[^\n\]*->\\*\[^\n\]*to form|qualified-id is required" } ++} diff --git a/gcc41-pr33890.patch b/gcc41-pr33890.patch new file mode 100644 index 0000000..1bf0a92 --- /dev/null +++ b/gcc41-pr33890.patch @@ -0,0 +1,82 @@ +2007-12-27 Jakub Jelinek + + PR c++/33890 + * semantics.c (finish_omp_for): Don't call + fold_build_cleanup_point_expr if processing_template_decl. + + * g++.dg/gomp/pr33890.C: New test. + +--- gcc/cp/semantics.c.jj 2007-12-05 21:42:07.000000000 +0100 ++++ gcc/cp/semantics.c 2007-12-27 23:45:39.000000000 +0100 +@@ -3893,15 +3893,17 @@ finish_omp_for (location_t locus, tree d + pre_body = NULL; + } + +- init = fold_build_cleanup_point_expr (TREE_TYPE (init), init); ++ if (!processing_template_decl) ++ init = fold_build_cleanup_point_expr (TREE_TYPE (init), init); + init = build_modify_expr (decl, NOP_EXPR, init); + if (cond && TREE_SIDE_EFFECTS (cond) && COMPARISON_CLASS_P (cond)) + { + int n = TREE_SIDE_EFFECTS (TREE_OPERAND (cond, 1)) != 0; + tree t = TREE_OPERAND (cond, n); + +- TREE_OPERAND (cond, n) +- = fold_build_cleanup_point_expr (TREE_TYPE (t), t); ++ if (!processing_template_decl) ++ TREE_OPERAND (cond, n) ++ = fold_build_cleanup_point_expr (TREE_TYPE (t), t); + } + omp_for = c_finish_omp_for (locus, decl, init, cond, incr, body, pre_body); + if (omp_for != NULL +@@ -3912,9 +3914,10 @@ finish_omp_for (location_t locus, tree d + tree t = TREE_OPERAND (OMP_FOR_INCR (omp_for), 1); + int n = TREE_SIDE_EFFECTS (TREE_OPERAND (t, 1)) != 0; + +- TREE_OPERAND (t, n) +- = fold_build_cleanup_point_expr (TREE_TYPE (TREE_OPERAND (t, n)), +- TREE_OPERAND (t, n)); ++ if (!processing_template_decl) ++ TREE_OPERAND (t, n) ++ = fold_build_cleanup_point_expr (TREE_TYPE (TREE_OPERAND (t, n)), ++ TREE_OPERAND (t, n)); + } + return omp_for; + } +--- gcc/testsuite/g++.dg/gomp/pr33890.C.jj 2007-12-27 23:45:39.000000000 +0100 ++++ gcc/testsuite/g++.dg/gomp/pr33890.C 2007-12-27 23:45:39.000000000 +0100 +@@ -0,0 +1,34 @@ ++// PR c++/33890 ++// { dg-do compile } ++// { dg-options "-fopenmp" } ++ ++struct A ++{ ++ int x; ++ A () : x (0) {} ++ int & getX (); ++}; ++ ++template void ++foo () ++{ ++ A a; ++ ++#pragma omp for ++ for (int i = a.getX (); i < 10; ++i) ++ ; ++#pragma omp for ++ for (int i = 0; i < a.getX (); ++i) ++ ; ++ a.x = 1; ++#pragma omp for ++ for (int i = 0; i < 10; i += a.getX ()) ++ ; ++} ++ ++void ++bar () ++{ ++ foo <0> (); ++ foo <1> (); ++} diff --git a/gcc41-pr33962.patch b/gcc41-pr33962.patch new file mode 100644 index 0000000..ecd83dc --- /dev/null +++ b/gcc41-pr33962.patch @@ -0,0 +1,39 @@ +2007-11-20 Jakub Jelinek + + PR c++/33962 + * pt.c (more_specialized_fn): Don't segfault if one or + both argument list end with ellipsis. + + * g++.dg/overload/template3.C: New test. + +--- gcc/cp/pt.c (revision 130307) ++++ gcc/cp/pt.c (revision 130308) +@@ -13523,6 +13523,10 @@ more_specialized_fn (tree pat1, tree pat + + args1 = TREE_CHAIN (args1); + args2 = TREE_CHAIN (args2); ++ ++ /* Stop when an ellipsis is seen. */ ++ if (args1 == NULL_TREE || args2 == NULL_TREE) ++ break; + } + + processing_template_decl--; +--- gcc/testsuite/g++.dg/overload/template3.C (revision 0) ++++ gcc/testsuite/g++.dg/overload/template3.C (revision 130308) +@@ -0,0 +1,15 @@ ++// PR c++/33962 ++// { dg-do compile } ++ ++template struct A; ++ ++template void foo (const U &x, ...); ++template void foo (const A &x, ...); ++ ++void bar (const A &x, const char *y) ++{ ++ foo (x, y); ++} ++ ++/* { dg-final { scan-assembler "_Z3fooIiEvRK1AIT_Ez" } } */ ++/* { dg-final { scan-assembler-not "_Z3fooI1AIiEEvRKT_z" } } */ diff --git a/gcc41-pr34070.patch b/gcc41-pr34070.patch new file mode 100644 index 0000000..b8b1f6a --- /dev/null +++ b/gcc41-pr34070.patch @@ -0,0 +1,62 @@ +2007-11-12 Richard Guenther + + PR middle-end/34070 + * fold-const.c (fold_binary): If testing for non-negative + operands with tree_expr_nonnegative_warnv_p make sure to + use op0 which has all (sign) conversions retained. + + * gcc.c-torture/execute/pr34070-1.c: New testcase. + * gcc.c-torture/execute/pr34070-2.c: Likewise. + +--- gcc/fold-const.c (revision 130097) ++++ gcc/fold-const.c (revision 130098) +@@ -8509,7 +8509,7 @@ fold_binary (enum tree_code code, tree t + /* Simplify A / (B << N) where A and B are positive and B is + a power of 2, to A >> (N + log2(B)). */ + if (TREE_CODE (arg1) == LSHIFT_EXPR +- && (TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (arg0))) ++ && (TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (op0))) + { + tree sval = TREE_OPERAND (arg1, 0); + if (integer_pow2p (sval) && tree_int_cst_sgn (sval) > 0) +@@ -8584,7 +8584,7 @@ fold_binary (enum tree_code code, tree t + /* Optimize TRUNC_MOD_EXPR by a power of two into a BIT_AND_EXPR, + i.e. "X % C" into "X & (C - 1)", if X and C are positive. */ + if ((code == TRUNC_MOD_EXPR || code == FLOOR_MOD_EXPR) +- && (TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (arg0))) ++ && (TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (op0))) + { + tree c = arg1; + /* Also optimize A % (C << N) where C is a power of 2, +--- gcc/testsuite/gcc.c-torture/execute/pr34070-1.c (revision 0) ++++ gcc/testsuite/gcc.c-torture/execute/pr34070-1.c (revision 130098) +@@ -0,0 +1,13 @@ ++extern void abort (void); ++ ++int f(unsigned int x) ++{ ++ return ((int)x) % 4; ++} ++ ++int main() ++{ ++ if (f(-1) != -1) ++ abort (); ++ return 0; ++} +--- gcc/testsuite/gcc.c-torture/execute/pr34070-2.c (revision 0) ++++ gcc/testsuite/gcc.c-torture/execute/pr34070-2.c (revision 130098) +@@ -0,0 +1,13 @@ ++extern void abort (void); ++ ++int f(unsigned int x, int n) ++{ ++ return ((int)x) / (1 << n); ++} ++ ++int main() ++{ ++ if (f(-1, 1) != 0) ++ abort (); ++ return 0; ++} diff --git a/gcc41-pr34089.patch b/gcc41-pr34089.patch new file mode 100644 index 0000000..76ff9cf --- /dev/null +++ b/gcc41-pr34089.patch @@ -0,0 +1,39 @@ +2007-11-20 Jakub Jelinek + + PR c++/34089 + * parser.c (cp_parser_class_head): Reject function template ids. + + * g++.dg/template/crash74.C: New test. + +--- gcc/cp/parser.c (revision 130315) ++++ gcc/cp/parser.c (revision 130316) +@@ -14536,8 +14536,18 @@ cp_parser_class_head (cp_parser* parser, + /* Look up the type. */ + if (template_id_p) + { +- type = TREE_TYPE (id); +- type = maybe_process_partial_specialization (type); ++ if (TREE_CODE (id) == TEMPLATE_ID_EXPR ++ && (DECL_FUNCTION_TEMPLATE_P (TREE_OPERAND (id, 0)) ++ || TREE_CODE (TREE_OPERAND (id, 0)) == OVERLOAD)) ++ { ++ error ("function template %qD redeclared as a class template", id); ++ type = error_mark_node; ++ } ++ else ++ { ++ type = TREE_TYPE (id); ++ type = maybe_process_partial_specialization (type); ++ } + if (nested_name_specifier) + pushed_scope = push_scope (nested_name_specifier); + } +--- gcc/testsuite/g++.dg/template/crash74.C (revision 0) ++++ gcc/testsuite/g++.dg/template/crash74.C (revision 130316) +@@ -0,0 +1,6 @@ ++// PR c++/34089 ++// { dg-do compile } ++// { dg-options "" } ++ ++template void foo () { } ++template struct foo { }; // { dg-error "redeclared as" } diff --git a/gcc41-pr34130.patch b/gcc41-pr34130.patch new file mode 100644 index 0000000..f67f8ae --- /dev/null +++ b/gcc41-pr34130.patch @@ -0,0 +1,35 @@ +2007-11-17 Richard Guenther + + PR middle-end/34130 + * fold-const.c (extract_muldiv_1): Do not move negative + constants inside ABS_EXPR. + + * gcc.c-torture/execute/pr34130.c: New testcase. + +--- gcc/fold-const.c (revision 130257) ++++ gcc/fold-const.c (revision 130258) +@@ -6095,6 +6095,9 @@ extract_muldiv_1 (tree t, tree c, enum t + } + break; + } ++ /* If the constant is negative, we cannot simplify this. */ ++ if (tree_int_cst_sgn (c) == -1) ++ break; + /* FALLTHROUGH */ + case NEGATE_EXPR: + if ((t1 = extract_muldiv (op0, c, code, wide_type, strict_overflow_p)) +--- gcc/testsuite/gcc.c-torture/execute/pr34130.c (revision 0) ++++ gcc/testsuite/gcc.c-torture/execute/pr34130.c (revision 130258) +@@ -0,0 +1,12 @@ ++extern void abort (void); ++int foo (int i) ++{ ++ return -2 * __builtin_abs(i - 2); ++} ++int main() ++{ ++ if (foo(1) != -2 ++ || foo(3) != -2) ++ abort (); ++ return 0; ++} diff --git a/gcc41-pr34146.patch b/gcc41-pr34146.patch new file mode 100644 index 0000000..9d830d6 --- /dev/null +++ b/gcc41-pr34146.patch @@ -0,0 +1,133 @@ +2007-11-20 Jakub Jelinek + + PR c/34146 + * c-gimplify.c (optimize_compound_literals_in_ctor): New function. + (c_gimplify_expr): Use it. + + * gcc.dg/tree-ssa/pr34146.c: New test. + +--- gcc/c-gimplify.c (revision 130310) ++++ gcc/c-gimplify.c (revision 130311) +@@ -208,6 +208,47 @@ gimplify_compound_literal_expr (tree *ex + return GS_OK; + } + ++/* Optimize embedded COMPOUND_LITERAL_EXPRs within a CONSTRUCTOR, ++ return a new CONSTRUCTOR if something changed. */ ++ ++static tree ++optimize_compound_literals_in_ctor (tree orig_ctor) ++{ ++ tree ctor = orig_ctor; ++ VEC(constructor_elt,gc) *elts = CONSTRUCTOR_ELTS (ctor); ++ unsigned int idx, num = VEC_length (constructor_elt, elts); ++ ++ for (idx = 0; idx < num; idx++) ++ { ++ tree value = VEC_index (constructor_elt, elts, idx)->value; ++ tree newval = value; ++ if (TREE_CODE (value) == CONSTRUCTOR) ++ newval = optimize_compound_literals_in_ctor (value); ++ else if (TREE_CODE (value) == COMPOUND_LITERAL_EXPR) ++ { ++ tree decl_s = COMPOUND_LITERAL_EXPR_DECL_STMT (value); ++ tree decl = DECL_EXPR_DECL (decl_s); ++ tree init = DECL_INITIAL (decl); ++ ++ if (!TREE_ADDRESSABLE (value) ++ && !TREE_ADDRESSABLE (decl) ++ && init) ++ newval = init; ++ } ++ if (newval == value) ++ continue; ++ ++ if (ctor == orig_ctor) ++ { ++ ctor = copy_node (orig_ctor); ++ CONSTRUCTOR_ELTS (ctor) = VEC_copy (constructor_elt, gc, elts); ++ elts = CONSTRUCTOR_ELTS (ctor); ++ } ++ VEC_index (constructor_elt, elts, idx)->value = newval; ++ } ++ return ctor; ++} ++ + /* Do C-specific gimplification. Args are as for gimplify_expr. */ + + int +@@ -254,6 +295,18 @@ c_gimplify_expr (tree *expr_p, tree *pre + return GS_OK; + } + } ++ else if (TREE_CODE (TREE_OPERAND (*expr_p, 1)) == CONSTRUCTOR) ++ { ++ tree ctor ++ = optimize_compound_literals_in_ctor (TREE_OPERAND (*expr_p, 1)); ++ ++ if (ctor != TREE_OPERAND (*expr_p, 1)) ++ { ++ *expr_p = copy_node (*expr_p); ++ TREE_OPERAND (*expr_p, 1) = ctor; ++ return GS_OK; ++ } ++ } + return GS_UNHANDLED; + + default: +--- gcc/testsuite/gcc.dg/tree-ssa/pr34146.c (revision 0) ++++ gcc/testsuite/gcc.dg/tree-ssa/pr34146.c (revision 130311) +@@ -0,0 +1,53 @@ ++/* PR c/34146 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-gimple" } */ ++ ++struct A ++{ ++ int f1, f2, f3; ++}; ++ ++struct B ++{ ++ struct A g1, g2; ++}; ++ ++struct C ++{ ++ struct B h1, h2; ++}; ++ ++typedef union ++{ ++ struct C c; ++ char s[4]; ++ long int a; ++} T; ++ ++void ++foo (void) ++{ ++ T t = { { { { 0, 0, 0 }, { 0, 0, 0 } }, { { 0, 0, 0 }, { 0, 0, 0 } } } }; ++ test (&t); ++} ++ ++void ++bar (void) ++{ ++ T t = { { { { 0, 0, 0 }, (struct A) { 0, 0, 0 } }, ++ (struct B) { (struct A) { 0, 0, 0 }, { 0, 0, 0 } } } }; ++ test (&t); ++} ++ ++void ++baz (void) ++{ ++ T t = { { { { 0, 0, 0 }, (struct A) { 1, 1, 1 } }, ++ (struct B) { (struct A) { 0, 0, 0 }, { 1, 1, 1 } } } }; ++ test (&t); ++} ++ ++/* { dg-final { scan-tree-dump-not "t = D" "gimple"} } */ ++/* { dg-final { scan-tree-dump-not "t\.c\.h\[12\] = D" "gimple"} } */ ++/* { dg-final { scan-tree-dump-not "\.g\[12\] = D" "gimple"} } */ ++/* { dg-final { cleanup-tree-dump "gimple" } } */ diff --git a/gcc41-pr34178.patch b/gcc41-pr34178.patch new file mode 100644 index 0000000..7d9e655 --- /dev/null +++ b/gcc41-pr34178.patch @@ -0,0 +1,128 @@ +2007-12-09 Jakub Jelinek + + PR c++/34178 + PR c++/34340 + * repo.c (repo_emit_p): Return 2 for DECL_INTEGRAL_CONSTANT_VAR_P + in class scope rather than DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P. + Return 2 also if DECL_EXPLICIT_INSTANTIATION. + * decl2.c (import_export_decl): Don't make VAR_DECLs import_p when + flag_use_repository and repo_emit_p returned 2. + + * g++.dg/template/repo6.C: New test. + * g++.dg/template/repo7.C: New test. + * g++.dg/template/repo8.C: New test. + +--- gcc/cp/decl2.c (revision 130726) ++++ gcc/cp/decl2.c (revision 130727) +@@ -2230,7 +2230,8 @@ import_export_decl (tree decl) + { + /* DECL is an implicit instantiation of a function or static + data member. */ +- if (flag_implicit_templates ++ if ((flag_implicit_templates ++ && !flag_use_repository) + || (flag_implicit_inline_templates + && TREE_CODE (decl) == FUNCTION_DECL + && DECL_DECLARED_INLINE_P (decl))) +--- gcc/cp/repo.c (revision 130726) ++++ gcc/cp/repo.c (revision 130727) +@@ -304,16 +304,19 @@ repo_emit_p (tree decl) + && (!TYPE_LANG_SPECIFIC (type) + || !CLASSTYPE_TEMPLATE_INSTANTIATION (type))) + return 2; +- /* Static data members initialized by constant expressions must ++ /* Const static data members initialized by constant expressions must + be processed where needed so that their definitions are + available. */ +- if (DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P (decl) ++ if (DECL_INTEGRAL_CONSTANT_VAR_P (decl) + && DECL_CLASS_SCOPE_P (decl)) + return 2; + } + else if (!DECL_TEMPLATE_INSTANTIATION (decl)) + return 2; + ++ if (DECL_EXPLICIT_INSTANTIATION (decl)) ++ return 2; ++ + /* For constructors and destructors, the repository contains + information about the clones -- not the original function -- + because only the clones are emitted in the object file. */ +--- gcc/testsuite/g++.dg/template/repo7.C (revision 0) ++++ gcc/testsuite/g++.dg/template/repo7.C (revision 130727) +@@ -0,0 +1,23 @@ ++// PR c++/34340 ++// { dg-options "-frepo" } ++// { dg-final { cleanup-repo-files } } ++ ++struct A ++{ ++ int a; ++}; ++ ++template struct D ++{ ++ static const A b; ++}; ++ ++template const A D::b = { 2 }; ++template class D; ++ ++const A *x = &D::b; ++ ++int ++main () ++{ ++} +--- gcc/testsuite/g++.dg/template/repo8.C (revision 0) ++++ gcc/testsuite/g++.dg/template/repo8.C (revision 130727) +@@ -0,0 +1,22 @@ ++// PR c++/34340 ++// { dg-options "-frepo" } ++// { dg-final { cleanup-repo-files } } ++ ++struct A ++{ ++ int a; ++}; ++ ++template struct D ++{ ++ static const A b; ++}; ++ ++template const A D::b = { 2 }; ++ ++const A *x = &D::b; ++ ++int ++main () ++{ ++} +--- gcc/testsuite/g++.dg/template/repo6.C (revision 0) ++++ gcc/testsuite/g++.dg/template/repo6.C (revision 130727) +@@ -0,0 +1,24 @@ ++// PR c++/34178 ++// { dg-options "-frepo" } ++// { dg-final { cleanup-repo-files } } ++ ++template ++class A ++{ ++private: ++ static const int x; ++ static int y; ++ ++public: ++ int getX () { return x + y; } ++}; ++ ++template const int A::x = 0; ++template int A::y = 0; ++ ++int ++main () ++{ ++ A a; ++ return a.getX(); ++} diff --git a/gcc41-pr34213.patch b/gcc41-pr34213.patch new file mode 100644 index 0000000..b102d39 --- /dev/null +++ b/gcc41-pr34213.patch @@ -0,0 +1,68 @@ +2007-11-27 Jakub Jelinek + + PR c++/34213 + * tree.c (decl_linkage): Static data members and static member + functions in anonymous ns classes are lk_external. + + * g++.dg/ext/visibility/anon8.C: New test. + +--- gcc/cp/tree.c (revision 130462) ++++ gcc/cp/tree.c (revision 130463) +@@ -2526,10 +2526,18 @@ decl_linkage (tree decl) + /* Members of the anonymous namespace also have TREE_PUBLIC unset, but + are considered to have external linkage for language purposes. DECLs + really meant to have internal linkage have DECL_THIS_STATIC set. */ +- if (TREE_CODE (decl) == TYPE_DECL +- || ((TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == FUNCTION_DECL) +- && !DECL_THIS_STATIC (decl))) ++ if (TREE_CODE (decl) == TYPE_DECL) + return lk_external; ++ if (TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == FUNCTION_DECL) ++ { ++ if (!DECL_THIS_STATIC (decl)) ++ return lk_external; ++ ++ /* Static data members and static member functions from classes ++ in anonymous namespace also don't have TREE_PUBLIC set. */ ++ if (DECL_CLASS_CONTEXT (decl)) ++ return lk_external; ++ } + + /* Everything else has internal linkage. */ + return lk_internal; +--- gcc/testsuite/g++.dg/ext/visibility/anon8.C (revision 0) ++++ gcc/testsuite/g++.dg/ext/visibility/anon8.C (revision 130463) +@@ -0,0 +1,33 @@ ++// PR c++/34213 ++// { dg-do compile } ++ ++template ++void call () ++{ ++ fn (); ++} ++ ++namespace ++{ ++ struct B1 ++ { ++ static void fn1 () {} ++ static void fn4 (); ++ }; ++ void fn3 () {} ++ void B1::fn4 () {} ++ static void fn5 () {} ++} ++ ++int main () ++{ ++ struct B2 ++ { ++ static void fn2 () {} ++ }; ++ call<&B1::fn1> (); ++ call<&B2::fn2> (); // { dg-error "not external linkage|no matching" } ++ call<&fn3> (); ++ call<&B1::fn4> (); ++ call<&fn5> (); // { dg-error "not external linkage|no matching" } ++} diff --git a/gcc41-pr34275.patch b/gcc41-pr34275.patch new file mode 100644 index 0000000..3545fc5 --- /dev/null +++ b/gcc41-pr34275.patch @@ -0,0 +1,35 @@ +2007-11-30 Jakub Jelinek + + PR c++/34275 + * error.c (dump_expr): Handle OBJ_TYPE_REF. + + * g++.dg/other/error20.C: New test. + +--- gcc/cp/error.c (revision 130532) ++++ gcc/cp/error.c (revision 130533) +@@ -2056,6 +2056,10 @@ dump_expr (tree t, int flags) + pp_expression (cxx_pp, t); + break; + ++ case OBJ_TYPE_REF: ++ dump_expr (resolve_virtual_fun_from_obj_type_ref (t), flags); ++ break; ++ + /* This list is incomplete, but should suffice for now. + It is very important that `sorry' does not call + `report_error_function'. That could cause an infinite loop. */ +--- gcc/testsuite/g++.dg/other/error20.C (revision 0) ++++ gcc/testsuite/g++.dg/other/error20.C (revision 130533) +@@ -0,0 +1,12 @@ ++// PR c++/34275 ++// { dg-do compile } ++ ++struct A ++{ // { dg-error "candidates" } ++ virtual A foo (); ++}; ++ ++void bar (A& a) ++{ ++ a.foo () = 0; // { dg-error "A::foo\\(\\) = 0" } ++} diff --git a/gcc41-pr34364.patch b/gcc41-pr34364.patch new file mode 100644 index 0000000..e21a607 --- /dev/null +++ b/gcc41-pr34364.patch @@ -0,0 +1,54 @@ +2007-12-11 Jakub Jelinek + + PR c++/34364 + * rtti.c (build_dynamic_cast): Call convert_from_reference even for + dynamic_cast in a template. + + * g++.dg/rtti/dyncast2.C: New test. + +--- gcc/cp/rtti.c (revision 130769) ++++ gcc/cp/rtti.c (revision 130770) +@@ -728,8 +728,7 @@ build_dynamic_cast (tree type, tree expr + { + expr = build_min (DYNAMIC_CAST_EXPR, type, expr); + TREE_SIDE_EFFECTS (expr) = 1; +- +- return expr; ++ return convert_from_reference (expr); + } + + return convert_from_reference (build_dynamic_cast_1 (type, expr)); +--- gcc/testsuite/g++.dg/rtti/dyncast2.C (revision 0) ++++ gcc/testsuite/g++.dg/rtti/dyncast2.C (revision 130770) +@@ -0,0 +1,31 @@ ++// PR c++/34364 ++// { dg-do run } ++ ++struct A ++{ ++ virtual ~A () {} ++}; ++ ++struct B : public A ++{ ++ template struct C ++ { ++ static void f (A &a) ++ { ++ dynamic_cast (a).g (); ++ } ++ }; ++ ++ B () : c (6) {} ++ void g () { c++; } ++ int c; ++}; ++ ++B b; ++ ++int ++main (void) ++{ ++ B::C::f (b); ++ return b.c != 7; ++} diff --git a/gcc41-pr34394.patch b/gcc41-pr34394.patch new file mode 100644 index 0000000..7449f43 --- /dev/null +++ b/gcc41-pr34394.patch @@ -0,0 +1,29 @@ +2007-12-10 Jakub Jelinek + + PR c++/34394 + * error.c (dump_expr): Handle ABS_EXPR. + + * g++.dg/other/error22.C: New test. + +--- gcc/cp/error.c (revision 130743) ++++ gcc/cp/error.c (revision 130744) +@@ -1757,6 +1757,7 @@ dump_expr (tree t, int flags) + break; + + case SCOPE_REF: ++ case ABS_EXPR: + pp_expression (cxx_pp, t); + break; + +--- gcc/testsuite/g++.dg/other/error22.C (revision 0) ++++ gcc/testsuite/g++.dg/other/error22.C (revision 130744) +@@ -0,0 +1,9 @@ ++// PR c++/34394 ++// { dg-do compile } ++ ++extern double fabs (double); ++ ++void foo (double x) ++{ ++ fabs (x) (); // { dg-error "__builtin_abs" } ++} diff --git a/gcc41-pr34506.patch b/gcc41-pr34506.patch new file mode 100644 index 0000000..ffc2f8a --- /dev/null +++ b/gcc41-pr34506.patch @@ -0,0 +1,112 @@ +2007-12-17 Jakub Jelinek + + PR c/34506 + * c-parser.c (c_parser_omp_all_clauses): Accept optional comma + in between clauses. + + * parser.c (cp_parser_omp_all_clauses): Accept optional comma + in between clauses. + + * gcc.dg/gomp/clause-2.c: New test. + * g++.dg/gomp/clause-4.C: New test. + +--- gcc/c-parser.c (revision 131007) ++++ gcc/c-parser.c (revision 131008) +@@ -7065,13 +7065,20 @@ c_parser_omp_all_clauses (c_parser *pars + const char *where) + { + tree clauses = NULL; ++ bool first = true; + + while (c_parser_next_token_is_not (parser, CPP_PRAGMA_EOL)) + { +- const pragma_omp_clause c_kind = c_parser_omp_clause_name (parser); ++ pragma_omp_clause c_kind; + const char *c_name; + tree prev = clauses; + ++ if (!first && c_parser_next_token_is (parser, CPP_COMMA)) ++ c_parser_consume_token (parser); ++ ++ first = false; ++ c_kind = c_parser_omp_clause_name (parser); ++ + switch (c_kind) + { + case PRAGMA_OMP_CLAUSE_COPYIN: +--- gcc/cp/parser.c (revision 131007) ++++ gcc/cp/parser.c (revision 131008) +@@ -19671,13 +19671,20 @@ cp_parser_omp_all_clauses (cp_parser *pa + const char *where, cp_token *pragma_tok) + { + tree clauses = NULL; ++ bool first = true; + + while (cp_lexer_next_token_is_not (parser->lexer, CPP_PRAGMA_EOL)) + { +- pragma_omp_clause c_kind = cp_parser_omp_clause_name (parser); ++ pragma_omp_clause c_kind; + const char *c_name; + tree prev = clauses; + ++ if (!first && cp_lexer_next_token_is (parser->lexer, CPP_COMMA)) ++ cp_lexer_consume_token (parser->lexer); ++ ++ c_kind = cp_parser_omp_clause_name (parser); ++ first = false; ++ + switch (c_kind) + { + case PRAGMA_OMP_CLAUSE_COPYIN: +--- gcc/testsuite/gcc.dg/gomp/clause-2.c (revision 0) ++++ gcc/testsuite/gcc.dg/gomp/clause-2.c (revision 131008) +@@ -0,0 +1,23 @@ ++/* PR c/34506 */ ++/* { dg-do compile } */ ++ ++#define p parallel ++ ++void ++foo (int x) ++{ ++#pragma omp p num_threads (4) if (1) private (x) ++ ; ++#pragma omp p num_threads(4)if(1)private(x) ++ ; ++#pragma omp p num_threads (4), if (1) , private (x) ++ ; ++#pragma omp p num_threads(4),if(1),private(x) ++ ; ++#pragma omp p, num_threads (4), if (1), private (x) /* { dg-error "clause before" } */ ++ ; ++#pragma omp p num_threads (4), if (1), private (x), /* { dg-error "clause before" } */ ++ ; ++#pragma omp p num_threads (4), , if (1), private (x) /* { dg-error "clause before" } */ ++ ; ++} +--- gcc/testsuite/g++.dg/gomp/clause-4.C (revision 0) ++++ gcc/testsuite/g++.dg/gomp/clause-4.C (revision 131008) +@@ -0,0 +1,23 @@ ++// PR c/34506 ++// { dg-do compile } ++ ++#define p parallel ++ ++void ++foo (int x) ++{ ++#pragma omp p num_threads (4) if (1) private (x) ++ ; ++#pragma omp p num_threads(4)if(1)private(x) ++ ; ++#pragma omp p num_threads (4), if (1) , private (x) ++ ; ++#pragma omp p num_threads(4),if(1),private(x) ++ ; ++#pragma omp p, num_threads (4), if (1), private (x) // { dg-error "clause before" } ++ ; ++#pragma omp p num_threads (4), if (1), private (x), // { dg-error "clause before" } ++ ; ++#pragma omp p num_threads (4), , if (1), private (x) // { dg-error "clause before" } ++ ; ++} diff --git a/gcc41-pr34513.patch b/gcc41-pr34513.patch new file mode 100644 index 0000000..3375b16 --- /dev/null +++ b/gcc41-pr34513.patch @@ -0,0 +1,92 @@ +2007-12-19 Jakub Jelinek + + PR c++/34513 + * parser.c (cp_parser_omp_parallel): For non-combined parallel + call cp_parser_statement rather than + cp_parser_already_scoped_statement. + + * testsuite/libgomp.c/pr34513.c: New test. + * testsuite/libgomp.c++/pr34513.C: New test. + +--- gcc/cp/parser.c (revision 131058) ++++ gcc/cp/parser.c (revision 131059) +@@ -20271,7 +20271,7 @@ cp_parser_omp_parallel (cp_parser *parse + switch (p_kind) + { + case PRAGMA_OMP_PARALLEL: +- cp_parser_already_scoped_statement (parser); ++ cp_parser_statement (parser, NULL_TREE, false); + par_clause = clauses; + break; + +--- libgomp/testsuite/libgomp.c++/pr34513.C (revision 0) ++++ libgomp/testsuite/libgomp.c++/pr34513.C (revision 131059) +@@ -0,0 +1,32 @@ ++// PR c++/34513 ++// { dg-do run } ++ ++#include ++ ++extern "C" void abort (); ++ ++static int errors = 0; ++static int thrs = 4; ++ ++int ++main () ++{ ++ omp_set_dynamic (0); ++ ++ #pragma omp parallel num_threads (thrs) ++ { ++ static int shrd = 0; ++ ++ #pragma omp atomic ++ shrd += 1; ++ ++ #pragma omp barrier ++ ++ if (shrd != thrs) ++ #pragma omp atomic ++ errors += 1; ++ } ++ ++ if (errors) ++ abort (); ++} +--- libgomp/testsuite/libgomp.c/pr34513.c (revision 0) ++++ libgomp/testsuite/libgomp.c/pr34513.c (revision 131059) +@@ -0,0 +1,33 @@ ++/* PR c++/34513 */ ++/* { dg-do run } */ ++ ++#include ++ ++extern void abort (); ++ ++static int errors = 0; ++static int thrs = 4; ++ ++int ++main () ++{ ++ omp_set_dynamic (0); ++ ++ #pragma omp parallel num_threads (thrs) ++ { ++ static int shrd = 0; ++ ++ #pragma omp atomic ++ shrd += 1; ++ ++ #pragma omp barrier ++ ++ if (shrd != thrs) ++ #pragma omp atomic ++ errors += 1; ++ } ++ ++ if (errors) ++ abort (); ++ return 0; ++} diff --git a/gcc41-pr7081.patch b/gcc41-pr7081.patch new file mode 100644 index 0000000..d145988 --- /dev/null +++ b/gcc41-pr7081.patch @@ -0,0 +1,251 @@ +2007-12-20 Jakub Jelinek + + PR debug/34535 + * cp-lang.c (cp_classify_record): Check TYPE_LANG_SPECIFIC + is non-NULL before testing CLASSTYPE_DECLARED_CLASS. + +2007-12-15 Alexandre Oliva + + PR debug/7081 + * dwarf2out.c (dwarf_tag_name): Synchronize with dwarf2.h. + (is_type_die): Cover interface types. + (class_or_namespace_scope_p): Cover interface and class types. + (record_type_tag): New. + (gen_inlined_structure_type_die): Use it. + (gen_struct_or_union_type_die): Likewise. + (prune_unused_types_walk): Cover interface types. + * langhooks.h (classify_record): New enum. + (classify_record): New member in struct langhooks_for_types. + * langhooks-def.h (LANG_HOOKS_CLASSIFY_RECORD): New. + (LANGHOOKS_FOR_TYPES_INITIALIZER): Adjust. +cp/ + * cp-lang.c (cp_classify_record): New. + (LANG_HOOKS_CLASSIFY_RECORD): Override. +java/ + * lang.c (java_classify_record): New. + (LANG_HOOKS_CLASSIFY_RECORD): Override. + +--- gcc/java/lang.c (revision 130959) ++++ gcc/java/lang.c (revision 130960) +@@ -67,6 +67,8 @@ static bool java_decl_ok_for_sibcall (tr + static tree java_get_callee_fndecl (tree); + static void java_clear_binding_stack (void); + ++static enum classify_record java_classify_record (tree type); ++ + #ifndef TARGET_OBJECT_SUFFIX + # define TARGET_OBJECT_SUFFIX ".o" + #endif +@@ -183,6 +185,8 @@ struct language_function GTY(()) + #define LANG_HOOKS_TYPE_FOR_MODE java_type_for_mode + #undef LANG_HOOKS_TYPE_FOR_SIZE + #define LANG_HOOKS_TYPE_FOR_SIZE java_type_for_size ++#undef LANG_HOOKS_CLASSIFY_RECORD ++#define LANG_HOOKS_CLASSIFY_RECORD java_classify_record + #undef LANG_HOOKS_SIGNED_TYPE + #define LANG_HOOKS_SIGNED_TYPE java_signed_type + #undef LANG_HOOKS_UNSIGNED_TYPE +@@ -1027,4 +1031,16 @@ java_clear_binding_stack (void) + poplevel (0, 0, 0); + } + ++static enum classify_record ++java_classify_record (tree type) ++{ ++ if (! CLASS_P (type)) ++ return RECORD_IS_STRUCT; ++ ++ if (0 && CLASS_INTERFACE (TYPE_NAME (type))) ++ return RECORD_IS_INTERFACE; ++ ++ return RECORD_IS_CLASS; ++} ++ + #include "gt-java-lang.h" +--- gcc/cp/cp-lang.c (revision 130959) ++++ gcc/cp/cp-lang.c (revision 130960) +@@ -36,6 +36,7 @@ Boston, MA 02110-1301, USA. */ + + enum c_language_kind c_language = clk_cxx; + static void cp_init_ts (void); ++static enum classify_record cp_classify_record (tree type); + + /* Lang hooks common to C++ and ObjC++ are declared in cp/cp-objcp-common.h; + consequently, there should be very few hooks below. */ +@@ -44,6 +45,8 @@ static void cp_init_ts (void); + #define LANG_HOOKS_NAME "GNU C++" + #undef LANG_HOOKS_INIT + #define LANG_HOOKS_INIT cxx_init ++#undef LANG_HOOKS_CLASSIFY_RECORD ++#define LANG_HOOKS_CLASSIFY_RECORD cp_classify_record + #undef LANG_HOOKS_DECL_PRINTABLE_NAME + #define LANG_HOOKS_DECL_PRINTABLE_NAME cxx_printable_name + #undef LANG_HOOKS_FOLD_OBJ_TYPE_REF +@@ -138,6 +141,15 @@ cp_init_ts (void) + + } + ++static enum classify_record ++cp_classify_record (tree type) ++{ ++ if (TYPE_LANG_SPECIFIC (type) && CLASSTYPE_DECLARED_CLASS (type)) ++ return RECORD_IS_CLASS; ++ ++ return RECORD_IS_STRUCT; ++} ++ + void + finish_file (void) + { +--- gcc/dwarf2out.c (revision 130959) ++++ gcc/dwarf2out.c (revision 130960) +@@ -4445,8 +4445,6 @@ dwarf_tag_name (unsigned int tag) + return "DW_TAG_namelist"; + case DW_TAG_namelist_item: + return "DW_TAG_namelist_item"; +- case DW_TAG_namespace: +- return "DW_TAG_namespace"; + case DW_TAG_packed_type: + return "DW_TAG_packed_type"; + case DW_TAG_subprogram: +@@ -4465,8 +4463,26 @@ dwarf_tag_name (unsigned int tag) + return "DW_TAG_variable"; + case DW_TAG_volatile_type: + return "DW_TAG_volatile_type"; ++ case DW_TAG_dwarf_procedure: ++ return "DW_TAG_dwarf_procedure"; ++ case DW_TAG_restrict_type: ++ return "DW_TAG_restrict_type"; ++ case DW_TAG_interface_type: ++ return "DW_TAG_interface_type"; ++ case DW_TAG_namespace: ++ return "DW_TAG_namespace"; + case DW_TAG_imported_module: + return "DW_TAG_imported_module"; ++ case DW_TAG_unspecified_type: ++ return "DW_TAG_unspecified_type"; ++ case DW_TAG_partial_unit: ++ return "DW_TAG_partial_unit"; ++ case DW_TAG_imported_unit: ++ return "DW_TAG_imported_unit"; ++ case DW_TAG_condition: ++ return "DW_TAG_condition"; ++ case DW_TAG_shared_type: ++ return "DW_TAG_shared_type"; + case DW_TAG_MIPS_loop: + return "DW_TAG_MIPS_loop"; + case DW_TAG_format_label: +@@ -6181,6 +6197,7 @@ is_type_die (dw_die_ref die) + { + case DW_TAG_array_type: + case DW_TAG_class_type: ++ case DW_TAG_interface_type: + case DW_TAG_enumeration_type: + case DW_TAG_pointer_type: + case DW_TAG_reference_type: +@@ -10986,6 +11003,8 @@ class_or_namespace_scope_p (dw_die_ref c + { + return (context_die + && (context_die->die_tag == DW_TAG_structure_type ++ || context_die->die_tag == DW_TAG_class_type ++ || context_die->die_tag == DW_TAG_interface_type + || context_die->die_tag == DW_TAG_union_type + || context_die->die_tag == DW_TAG_namespace)); + } +@@ -11381,12 +11400,36 @@ gen_inlined_enumeration_type_die (tree t + add_abstract_origin_attribute (type_die, type); + } + ++/* Determine what tag to use for a record type. */ ++ ++static enum dwarf_tag ++record_type_tag (tree type) ++{ ++ if (! lang_hooks.types.classify_record) ++ return DW_TAG_structure_type; ++ ++ switch (lang_hooks.types.classify_record (type)) ++ { ++ case RECORD_IS_STRUCT: ++ return DW_TAG_structure_type; ++ ++ case RECORD_IS_CLASS: ++ return DW_TAG_class_type; ++ ++ case RECORD_IS_INTERFACE: ++ return DW_TAG_interface_type; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ + /* Generate a DIE to represent an inlined instance of a structure type. */ + + static void + gen_inlined_structure_type_die (tree type, dw_die_ref context_die) + { +- dw_die_ref type_die = new_die (DW_TAG_structure_type, context_die, type); ++ dw_die_ref type_die = new_die (record_type_tag (type), context_die, type); + + /* We do not check for TREE_ASM_WRITTEN (type) being set, as the type may + be incomplete and such types are not marked. */ +@@ -12530,7 +12573,7 @@ gen_struct_or_union_type_die (tree type, + dw_die_ref old_die = type_die; + + type_die = new_die (TREE_CODE (type) == RECORD_TYPE +- ? DW_TAG_structure_type : DW_TAG_union_type, ++ ? record_type_tag (type) : DW_TAG_union_type, + scope_die, type); + equate_type_number_to_die (type, type_die); + if (old_die) +@@ -14183,6 +14226,7 @@ prune_unused_types_walk (dw_die_ref die) + case DW_TAG_structure_type: + case DW_TAG_union_type: + case DW_TAG_class_type: ++ case DW_TAG_interface_type: + case DW_TAG_friend: + case DW_TAG_variant_part: + case DW_TAG_enumeration_type: +--- gcc/langhooks.h (revision 130959) ++++ gcc/langhooks.h (revision 130960) +@@ -33,6 +33,9 @@ struct array_descr_info; + /* A print hook for print_tree (). */ + typedef void (*lang_print_tree_hook) (FILE *, tree, int indent); + ++enum classify_record ++ { RECORD_IS_STRUCT, RECORD_IS_CLASS, RECORD_IS_INTERFACE }; ++ + /* The following hooks are documented in langhooks.c. Must not be + NULL. */ + +@@ -91,6 +94,11 @@ struct lang_hooks_for_types + language-specific processing is required. */ + tree (*make_type) (enum tree_code); + ++ /* Return what kind of RECORD_TYPE this is, mainly for purposes of ++ debug information. If not defined, record types are assumed to ++ be structures. */ ++ enum classify_record (*classify_record) (tree); ++ + /* Given MODE and UNSIGNEDP, return a suitable type-tree with that + mode. */ + tree (*type_for_mode) (enum machine_mode, int); +--- gcc/langhooks-def.h (revision 130959) ++++ gcc/langhooks-def.h (revision 130960) +@@ -172,6 +172,7 @@ extern tree lhd_make_node (enum tree_cod + /* Types hooks. There are no reasonable defaults for most of them, + so we create a compile-time error instead. */ + #define LANG_HOOKS_MAKE_TYPE lhd_make_node ++#define LANG_HOOKS_CLASSIFY_RECORD NULL + #define LANG_HOOKS_INCOMPLETE_TYPE_ERROR lhd_incomplete_type_error + #define LANG_HOOKS_GENERIC_TYPE_P hook_bool_const_tree_false + #define LANG_HOOKS_TYPE_PROMOTES_TO lhd_type_promotes_to +@@ -185,6 +186,7 @@ extern tree lhd_make_node (enum tree_cod + + #define LANG_HOOKS_FOR_TYPES_INITIALIZER { \ + LANG_HOOKS_MAKE_TYPE, \ ++ LANG_HOOKS_CLASSIFY_RECORD, \ + LANG_HOOKS_TYPE_FOR_MODE, \ + LANG_HOOKS_TYPE_FOR_SIZE, \ + LANG_HOOKS_GENERIC_TYPE_P, \ diff --git a/gcc41-rh364001.patch b/gcc41-rh364001.patch new file mode 100644 index 0000000..c58c958 --- /dev/null +++ b/gcc41-rh364001.patch @@ -0,0 +1,676 @@ +2007-11-16 Jakub Jelinek + + PR fortran/22244 + * langhooks-def.h (LANG_HOOKS_GET_ARRAY_DESCR_INFO): Define. + (LANG_HOOKS_FOR_TYPES_INITIALIZER): Add it. + * langhooks.h (struct array_descr_info): Forward declaration. + (struct lang_hooks_for_types): Add get_array_descr_info field. + * dwarf2.h (DW_AT_bit_stride, DW_AT_byte_stride): New. + (DW_AT_stride_size, DW_AT_stride): Keep around for Dwarf2 + compatibility. + * dwarf2out.h (struct array_descr_info): New type. + * dwarf2out.c (dwarf_attr_name): Rename DW_AT_stride to + DW_AT_byte_stride and DW_AT_stride_size to DW_AT_bit_size. + (descr_info_loc, add_descr_info_field, gen_descr_array_type_die): + New functions. + (gen_type_die_with_usage): Call lang_hooks.types.get_array_descr_info + and gen_descr_array_type_die. + + * trans.h (struct array_descr_info): Forward declaration. + (gfc_get_array_descr_info): New prototype. + (enum gfc_array_kind): New type. + (struct lang_type): Add akind field. + (GFC_TYPE_ARRAY_AKIND): Define. + * trans-types.c: Include dwarf2out.h. + (gfc_build_array_type): Add akind argument. Adjust + gfc_get_array_type_bounds call. + (gfc_get_nodesc_array_type): Include proper debug info even for + assumed-size arrays. + (gfc_get_array_type_bounds): Add akind argument, set + GFC_TYPE_ARRAY_AKIND to it. + (gfc_sym_type, gfc_get_derived_type): Adjust gfc_build_array_type + callers. + (gfc_get_array_descr_info): New function. + * trans-array.c (gfc_trans_create_temp_array, + gfc_conv_expr_descriptor): Adjust gfc_get_array_type_bounds + callers. + * trans-stmt.c (gfc_trans_pointer_assign_need_temp): Likewise. + * trans-types.h (gfc_get_array_type_bounds): Adjust prototype. + * Make-lang.in (fortran/trans-types.o): Depend on dwarf2out.h. + * f95-lang.c (LANG_HOOKS_GET_ARRAY_DESCR_INFO): Define. + +--- gcc/fortran/trans.h.jj 2007-07-23 12:24:16.000000000 +0200 ++++ gcc/fortran/trans.h 2007-11-24 15:04:51.000000000 +0100 +@@ -453,6 +453,8 @@ tree getdecls (void); + tree gfc_truthvalue_conversion (tree); + tree builtin_function (const char *, tree, int, enum built_in_class, + const char *, tree); ++struct array_descr_info; ++bool gfc_get_array_descr_info (tree, struct array_descr_info *); + + /* In trans-openmp.c */ + bool gfc_omp_privatize_by_reference (tree); +@@ -541,10 +543,19 @@ extern GTY(()) tree gfor_fndecl_sr_kind; + + /* G95-specific declaration information. */ + ++enum gfc_array_kind ++{ ++ GFC_ARRAY_UNKNOWN, ++ GFC_ARRAY_ASSUMED_SHAPE, ++ GFC_ARRAY_ALLOCATABLE, ++ GFC_ARRAY_POINTER ++}; ++ + /* Array types only. */ + struct lang_type GTY(()) + { + int rank; ++ enum gfc_array_kind akind; + tree lbound[GFC_MAX_DIMENSIONS]; + tree ubound[GFC_MAX_DIMENSIONS]; + tree stride[GFC_MAX_DIMENSIONS]; +@@ -595,7 +606,8 @@ struct lang_decl GTY(()) + #define GFC_TYPE_ARRAY_RANK(node) (TYPE_LANG_SPECIFIC(node)->rank) + #define GFC_TYPE_ARRAY_SIZE(node) (TYPE_LANG_SPECIFIC(node)->size) + #define GFC_TYPE_ARRAY_OFFSET(node) (TYPE_LANG_SPECIFIC(node)->offset) +-/* Code should use gfc_get_dtype instead of accesig this directly. It may ++#define GFC_TYPE_ARRAY_AKIND(node) (TYPE_LANG_SPECIFIC(node)->akind) ++/* Code should use gfc_get_dtype instead of accesing this directly. It may + not be known when the type is created. */ + #define GFC_TYPE_ARRAY_DTYPE(node) (TYPE_LANG_SPECIFIC(node)->dtype) + #define GFC_TYPE_ARRAY_DATAPTR_TYPE(node) \ +--- gcc/fortran/trans-stmt.c.jj 2007-03-12 08:28:15.000000000 +0100 ++++ gcc/fortran/trans-stmt.c 2007-11-24 14:58:20.000000000 +0100 +@@ -2313,7 +2313,8 @@ gfc_trans_pointer_assign_need_temp (gfc_ + /* Make a new descriptor. */ + parmtype = gfc_get_element_type (TREE_TYPE (desc)); + parmtype = gfc_get_array_type_bounds (parmtype, loop.dimen, +- loop.from, loop.to, 1); ++ loop.from, loop.to, 1, ++ GFC_ARRAY_UNKNOWN); + + /* Allocate temporary for nested forall construct. */ + tmp1 = allocate_temp_for_forall_nest (nested_forall_info, parmtype, +--- gcc/fortran/f95-lang.c.jj 2007-02-20 22:38:20.000000000 +0100 ++++ gcc/fortran/f95-lang.c 2007-11-24 15:03:19.000000000 +0100 +@@ -124,6 +124,7 @@ static HOST_WIDE_INT gfc_get_alias_set ( + #undef LANG_HOOKS_OMP_DISREGARD_VALUE_EXPR + #undef LANG_HOOKS_OMP_PRIVATE_DEBUG_CLAUSE + #undef LANG_HOOKS_OMP_FIRSTPRIVATIZE_TYPE_SIZES ++#undef LANG_HOOKS_GET_ARRAY_DESCR_INFO + + /* Define lang hooks. */ + #define LANG_HOOKS_NAME "GNU F95" +@@ -150,6 +151,7 @@ static HOST_WIDE_INT gfc_get_alias_set ( + #define LANG_HOOKS_OMP_PRIVATE_DEBUG_CLAUSE gfc_omp_private_debug_clause + #define LANG_HOOKS_OMP_FIRSTPRIVATIZE_TYPE_SIZES \ + gfc_omp_firstprivatize_type_sizes ++#define LANG_HOOKS_GET_ARRAY_DESCR_INFO gfc_get_array_descr_info + + const struct lang_hooks lang_hooks = LANG_HOOKS_INITIALIZER; + +--- gcc/fortran/Make-lang.in.jj 2007-09-25 11:32:17.000000000 +0200 ++++ gcc/fortran/Make-lang.in 2007-11-24 15:02:36.000000000 +0100 +@@ -292,7 +292,7 @@ fortran/trans-decl.o: $(GFORTRAN_TRANS_D + cgraph.h $(TARGET_H) function.h $(FLAGS_H) $(RTL_H) tree-gimple.h \ + tree-dump.h + fortran/trans-types.o: $(GFORTRAN_TRANS_DEPS) gt-fortran-trans-types.h \ +- real.h toplev.h $(TARGET_H) $(FLAGS_H) ++ real.h toplev.h $(TARGET_H) $(FLAGS_H) dwarf2out.h + fortran/trans-const.o: $(GFORTRAN_TRANS_DEPS) + fortran/trans-expr.o: $(GFORTRAN_TRANS_DEPS) fortran/dependency.h + fortran/trans-stmt.o: $(GFORTRAN_TRANS_DEPS) fortran/dependency.h +--- gcc/fortran/trans-types.c.jj 2007-09-25 11:32:17.000000000 +0200 ++++ gcc/fortran/trans-types.c 2007-11-24 15:06:59.000000000 +0100 +@@ -36,6 +36,7 @@ Software Foundation, 51 Franklin Street, + #include "trans-const.h" + #include "real.h" + #include "flags.h" ++#include "dwarf2out.h" + + + #if (GFC_MAX_DIMENSIONS < 10) +@@ -821,7 +822,8 @@ gfc_is_nodesc_array (gfc_symbol * sym) + /* Create an array descriptor type. */ + + static tree +-gfc_build_array_type (tree type, gfc_array_spec * as) ++gfc_build_array_type (tree type, gfc_array_spec * as, ++ enum gfc_array_kind akind) + { + tree lbound[GFC_MAX_DIMENSIONS]; + tree ubound[GFC_MAX_DIMENSIONS]; +@@ -837,7 +839,9 @@ gfc_build_array_type (tree type, gfc_arr + ubound[n] = gfc_conv_array_bound (as->upper[n]); + } + +- return gfc_get_array_type_bounds (type, as->rank, lbound, ubound, 0); ++ if (as->type == AS_ASSUMED_SHAPE) ++ akind = GFC_ARRAY_ASSUMED_SHAPE; ++ return gfc_get_array_type_bounds (type, as->rank, lbound, ubound, 0, akind); + } + + /* Returns the struct descriptor_dimension type. */ +@@ -1015,7 +1019,7 @@ gfc_get_nodesc_array_type (tree etype, g + if (expr->expr_type == EXPR_CONSTANT) + { + tmp = gfc_conv_mpz_to_tree (expr->value.integer, +- gfc_index_integer_kind); ++ gfc_index_integer_kind); + } + else + { +@@ -1107,7 +1111,7 @@ gfc_get_nodesc_array_type (tree etype, g + /* In debug info represent packed arrays as multi-dimensional + if they have rank > 1 and with proper bounds, instead of flat + arrays. */ +- if (known_stride && write_symbols != NO_DEBUG) ++ if (known_offset && write_symbols != NO_DEBUG) + { + tree gtype = etype, rtype, type_decl; + +@@ -1193,7 +1197,8 @@ gfc_get_array_descriptor_base (int dimen + + tree + gfc_get_array_type_bounds (tree etype, int dimen, tree * lbound, +- tree * ubound, int packed) ++ tree * ubound, int packed, ++ enum gfc_array_kind akind) + { + char name[8 + GFC_RANK_DIGITS + GFC_MAX_SYMBOL_LEN]; + tree fat_type, base_type, arraytype, lower, upper, stride, tmp; +@@ -1220,6 +1225,7 @@ gfc_get_array_type_bounds (tree etype, i + + GFC_TYPE_ARRAY_RANK (fat_type) = dimen; + GFC_TYPE_ARRAY_DTYPE (fat_type) = NULL_TREE; ++ GFC_TYPE_ARRAY_AKIND (fat_type) = akind; + + /* Build an array descriptor record type. */ + if (packed != 0) +@@ -1337,7 +1343,14 @@ gfc_sym_type (gfc_symbol * sym) + } + } + else +- type = gfc_build_array_type (type, sym->as); ++ { ++ enum gfc_array_kind akind = GFC_ARRAY_UNKNOWN; ++ if (sym->attr.pointer) ++ akind = GFC_ARRAY_POINTER; ++ else if (sym->attr.allocatable) ++ akind = GFC_ARRAY_ALLOCATABLE; ++ type = gfc_build_array_type (type, sym->as, akind); ++ } + } + else + { +@@ -1550,7 +1563,8 @@ gfc_get_derived_type (gfc_symbol * deriv + { + /* Pointers to arrays aren't actually pointer types. The + descriptors are separate, but the data is common. */ +- field_type = gfc_build_array_type (field_type, c->as); ++ field_type = gfc_build_array_type (field_type, c->as, ++ GFC_ARRAY_POINTER); + } + else + field_type = gfc_get_nodesc_array_type (field_type, c->as, 3); +@@ -1893,4 +1907,124 @@ gfc_signed_type (tree type) + return gfc_signed_or_unsigned_type (0, type); + } + ++/* Return TRUE if TYPE is a type with a hidden descriptor, fill in INFO ++ in that case. */ ++ ++bool ++gfc_get_array_descr_info (tree type, struct array_descr_info *info) ++{ ++ int rank, dim; ++ bool indirect = false; ++ tree etype, ptype, field, t, base_decl; ++ tree data_off, offset_off, dim_off, dim_size, elem_size; ++ tree lower_suboff, upper_suboff, stride_suboff; ++ ++ if (! GFC_DESCRIPTOR_TYPE_P (type)) ++ { ++ if (! POINTER_TYPE_P (type)) ++ return false; ++ type = TREE_TYPE (type); ++ if (! GFC_DESCRIPTOR_TYPE_P (type)) ++ return false; ++ indirect = true; ++ } ++ ++ rank = GFC_TYPE_ARRAY_RANK (type); ++ if (rank >= (int) (sizeof (info->dimen) / sizeof (info->dimen[0]))) ++ return false; ++ ++ etype = GFC_TYPE_ARRAY_DATAPTR_TYPE (type); ++ gcc_assert (POINTER_TYPE_P (etype)); ++ etype = TREE_TYPE (etype); ++ gcc_assert (TREE_CODE (etype) == ARRAY_TYPE); ++ etype = TREE_TYPE (etype); ++ /* Can't handle variable sized elements yet. */ ++ if (int_size_in_bytes (etype) <= 0) ++ return false; ++ /* Nor non-constant lower bounds in assumed shape arrays. */ ++ if (GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_ASSUMED_SHAPE) ++ { ++ for (dim = 0; dim < rank; dim++) ++ if (GFC_TYPE_ARRAY_LBOUND (type, dim) == NULL_TREE ++ || TREE_CODE (GFC_TYPE_ARRAY_LBOUND (type, dim)) != INTEGER_CST) ++ return false; ++ } ++ ++ memset (info, '\0', sizeof (*info)); ++ info->ndimensions = rank; ++ info->element_type = etype; ++ ptype = build_pointer_type (gfc_array_index_type); ++ if (indirect) ++ { ++ info->base_decl = build_decl (VAR_DECL, NULL_TREE, ++ build_pointer_type (ptype)); ++ base_decl = build1 (INDIRECT_REF, ptype, info->base_decl); ++ } ++ else ++ info->base_decl = base_decl = build_decl (VAR_DECL, NULL_TREE, ptype); ++ ++ elem_size = fold_convert (gfc_array_index_type, TYPE_SIZE_UNIT (etype)); ++ field = TYPE_FIELDS (TYPE_MAIN_VARIANT (type)); ++ data_off = byte_position (field); ++ field = TREE_CHAIN (field); ++ offset_off = byte_position (field); ++ field = TREE_CHAIN (field); ++ field = TREE_CHAIN (field); ++ dim_off = byte_position (field); ++ dim_size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (field))); ++ field = TYPE_FIELDS (TREE_TYPE (TREE_TYPE (field))); ++ stride_suboff = byte_position (field); ++ field = TREE_CHAIN (field); ++ lower_suboff = byte_position (field); ++ field = TREE_CHAIN (field); ++ upper_suboff = byte_position (field); ++ ++ t = base_decl; ++ if (!integer_zerop (data_off)) ++ t = build2 (PLUS_EXPR, ptype, t, data_off); ++ t = build1 (NOP_EXPR, build_pointer_type (ptr_type_node), t); ++ info->data_location = build1 (INDIRECT_REF, ptr_type_node, t); ++ if (GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_ALLOCATABLE) ++ info->allocated = build2 (NE_EXPR, boolean_type_node, ++ info->data_location, null_pointer_node); ++ else if (GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_POINTER) ++ info->associated = build2 (NE_EXPR, boolean_type_node, ++ info->data_location, null_pointer_node); ++ ++ for (dim = 0; dim < rank; dim++) ++ { ++ t = build2 (PLUS_EXPR, ptype, base_decl, ++ size_binop (PLUS_EXPR, dim_off, lower_suboff)); ++ t = build1 (INDIRECT_REF, gfc_array_index_type, t); ++ info->dimen[dim].lower_bound = t; ++ t = build2 (PLUS_EXPR, ptype, base_decl, ++ size_binop (PLUS_EXPR, dim_off, upper_suboff)); ++ t = build1 (INDIRECT_REF, gfc_array_index_type, t); ++ info->dimen[dim].upper_bound = t; ++ if (GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_ASSUMED_SHAPE) ++ { ++ /* Assumed shape arrays have known lower bounds. */ ++ info->dimen[dim].upper_bound ++ = build2 (MINUS_EXPR, gfc_array_index_type, ++ info->dimen[dim].upper_bound, ++ info->dimen[dim].lower_bound); ++ info->dimen[dim].lower_bound ++ = fold_convert (gfc_array_index_type, ++ GFC_TYPE_ARRAY_LBOUND (type, dim)); ++ info->dimen[dim].upper_bound ++ = build2 (PLUS_EXPR, gfc_array_index_type, ++ info->dimen[dim].lower_bound, ++ info->dimen[dim].upper_bound); ++ } ++ t = build2 (PLUS_EXPR, ptype, base_decl, ++ size_binop (PLUS_EXPR, dim_off, stride_suboff)); ++ t = build1 (INDIRECT_REF, gfc_array_index_type, t); ++ t = build2 (MULT_EXPR, gfc_array_index_type, t, elem_size); ++ info->dimen[dim].stride = t; ++ dim_off = size_binop (PLUS_EXPR, dim_off, dim_size); ++ } ++ ++ return true; ++} ++ + #include "gt-fortran-trans-types.h" +--- gcc/fortran/trans-array.c.jj 2007-04-03 13:10:00.000000000 +0200 ++++ gcc/fortran/trans-array.c 2007-11-24 14:58:20.000000000 +0100 +@@ -612,7 +612,8 @@ gfc_trans_allocate_temp_array (stmtblock + + /* Initialize the descriptor. */ + type = +- gfc_get_array_type_bounds (eltype, info->dimen, loop->from, loop->to, 1); ++ gfc_get_array_type_bounds (eltype, info->dimen, loop->from, loop->to, 1, ++ GFC_ARRAY_UNKNOWN); + desc = gfc_create_var (type, "atmp"); + GFC_DECL_PACKED_ARRAY (desc) = 1; + +@@ -4345,7 +4346,8 @@ gfc_conv_expr_descriptor (gfc_se * se, g + /* Otherwise make a new one. */ + parmtype = gfc_get_element_type (TREE_TYPE (desc)); + parmtype = gfc_get_array_type_bounds (parmtype, loop.dimen, +- loop.from, loop.to, 0); ++ loop.from, loop.to, 0, ++ GFC_ARRAY_UNKNOWN); + parm = gfc_create_var (parmtype, "parm"); + } + +--- gcc/fortran/trans-types.h.jj 2007-02-20 22:38:20.000000000 +0100 ++++ gcc/fortran/trans-types.h 2007-11-24 15:03:58.000000000 +0100 +@@ -79,7 +79,8 @@ tree gfc_signed_type (tree); + tree gfc_signed_or_unsigned_type (int, tree); + + tree gfc_get_element_type (tree); +-tree gfc_get_array_type_bounds (tree, int, tree *, tree *, int); ++tree gfc_get_array_type_bounds (tree, int, tree *, tree *, int, ++ enum gfc_array_kind); + tree gfc_get_nodesc_array_type (tree, gfc_array_spec *, int); + + /* Add a field of given name and type to a UNION_TYPE or RECORD_TYPE. */ +--- gcc/dwarf2.h.jj 2007-02-20 22:39:12.000000000 +0100 ++++ gcc/dwarf2.h 2007-11-24 14:58:20.000000000 +0100 +@@ -275,7 +275,8 @@ enum dwarf_attribute + DW_AT_prototyped = 0x27, + DW_AT_return_addr = 0x2a, + DW_AT_start_scope = 0x2c, +- DW_AT_stride_size = 0x2e, ++ DW_AT_bit_stride = 0x2e, ++ DW_AT_stride_size = DW_AT_bit_stride, + DW_AT_upper_bound = 0x2f, + DW_AT_abstract_origin = 0x31, + DW_AT_accessibility = 0x32, +@@ -310,7 +311,8 @@ enum dwarf_attribute + DW_AT_allocated = 0x4e, + DW_AT_associated = 0x4f, + DW_AT_data_location = 0x50, +- DW_AT_stride = 0x51, ++ DW_AT_byte_stride = 0x51, ++ DW_AT_stride = DW_AT_byte_stride, + DW_AT_entry_pc = 0x52, + DW_AT_use_UTF8 = 0x53, + DW_AT_extension = 0x54, +--- gcc/langhooks.h.jj 2007-11-23 17:41:15.000000000 +0100 ++++ gcc/langhooks.h 2007-11-24 15:01:32.000000000 +0100 +@@ -28,6 +28,8 @@ struct diagnostic_info; + + struct gimplify_omp_ctx; + ++struct array_descr_info; ++ + /* A print hook for print_tree (). */ + typedef void (*lang_print_tree_hook) (FILE *, tree, int indent); + +@@ -153,6 +155,10 @@ struct lang_hooks_for_types + Called only after doing all language independent checks. */ + bool (*type_hash_eq) (tree, tree); + ++ /* Return TRUE if TYPE uses a hidden descriptor and fills in information ++ for the debugger about the array bounds, strides, etc. */ ++ bool (*get_array_descr_info) (tree, struct array_descr_info *); ++ + /* Nonzero if types that are identical are to be hashed so that only + one copy is kept. If a language requires unique types for each + user-specified type, such as Ada, this should be set to TRUE. */ +--- gcc/langhooks-def.h.jj 2007-11-23 17:41:15.000000000 +0100 ++++ gcc/langhooks-def.h 2007-11-24 15:00:47.000000000 +0100 +@@ -223,6 +223,7 @@ extern tree lhd_make_node (enum tree_cod + #define LANG_HOOKS_OMP_FIRSTPRIVATIZE_TYPE_SIZES \ + lhd_omp_firstprivatize_type_sizes + #define LANG_HOOKS_TYPE_HASH_EQ lhd_type_hash_eq ++#define LANG_HOOKS_GET_ARRAY_DESCR_INFO NULL + #define LANG_HOOKS_HASH_TYPES true + + #define LANG_HOOKS_FOR_TYPES_INITIALIZER { \ +@@ -238,6 +239,7 @@ extern tree lhd_make_node (enum tree_cod + LANG_HOOKS_TYPE_MAX_SIZE, \ + LANG_HOOKS_OMP_FIRSTPRIVATIZE_TYPE_SIZES, \ + LANG_HOOKS_TYPE_HASH_EQ, \ ++ LANG_HOOKS_GET_ARRAY_DESCR_INFO, \ + LANG_HOOKS_HASH_TYPES \ + } + +--- gcc/dwarf2out.c.jj 2007-11-23 18:10:20.000000000 +0100 ++++ gcc/dwarf2out.c 2007-11-24 14:58:20.000000000 +0100 +@@ -4146,6 +4146,7 @@ static tree member_declared_type (tree); + static const char *decl_start_label (tree); + #endif + static void gen_array_type_die (tree, dw_die_ref); ++static void gen_descr_array_type_die (tree, struct array_descr_info *, dw_die_ref); + #if 0 + static void gen_entry_point_die (tree, dw_die_ref); + #endif +@@ -4552,8 +4553,8 @@ dwarf_attr_name (unsigned int attr) + return "DW_AT_return_addr"; + case DW_AT_start_scope: + return "DW_AT_start_scope"; +- case DW_AT_stride_size: +- return "DW_AT_stride_size"; ++ case DW_AT_bit_stride: ++ return "DW_AT_bit_stride"; + case DW_AT_upper_bound: + return "DW_AT_upper_bound"; + case DW_AT_abstract_origin: +@@ -4621,8 +4622,8 @@ dwarf_attr_name (unsigned int attr) + return "DW_AT_associated"; + case DW_AT_data_location: + return "DW_AT_data_location"; +- case DW_AT_stride: +- return "DW_AT_stride"; ++ case DW_AT_byte_stride: ++ return "DW_AT_byte_stride"; + case DW_AT_entry_pc: + return "DW_AT_entry_pc"; + case DW_AT_use_UTF8: +@@ -11169,6 +11170,159 @@ gen_array_type_die (tree type, dw_die_re + add_type_attribute (array_die, element_type, 0, 0, context_die); + } + ++static dw_loc_descr_ref ++descr_info_loc (tree val, tree base_decl) ++{ ++ HOST_WIDE_INT size; ++ dw_loc_descr_ref loc, loc2; ++ enum dwarf_location_atom op; ++ ++ if (val == base_decl) ++ return new_loc_descr (DW_OP_push_object_address, 0, 0); ++ ++ switch (TREE_CODE (val)) ++ { ++ case NOP_EXPR: ++ case CONVERT_EXPR: ++ return descr_info_loc (TREE_OPERAND (val, 0), base_decl); ++ case INTEGER_CST: ++ if (host_integerp (val, 0)) ++ return int_loc_descriptor (tree_low_cst (val, 0)); ++ break; ++ case INDIRECT_REF: ++ size = int_size_in_bytes (TREE_TYPE (val)); ++ if (size < 0) ++ break; ++ loc = descr_info_loc (TREE_OPERAND (val, 0), base_decl); ++ if (!loc) ++ break; ++ if (size == DWARF2_ADDR_SIZE) ++ add_loc_descr (&loc, new_loc_descr (DW_OP_deref, 0, 0)); ++ else ++ add_loc_descr (&loc, new_loc_descr (DW_OP_deref_size, size, 0)); ++ return loc; ++ case PLUS_EXPR: ++ if (host_integerp (TREE_OPERAND (val, 1), 1) ++ && (unsigned HOST_WIDE_INT) tree_low_cst (TREE_OPERAND (val, 1), 1) ++ < 16384) ++ { ++ loc = descr_info_loc (TREE_OPERAND (val, 0), base_decl); ++ if (!loc) ++ break; ++ add_loc_descr (&loc, ++ new_loc_descr (DW_OP_plus_uconst, ++ tree_low_cst (TREE_OPERAND (val, 1), ++ 1), 0)); ++ } ++ else ++ { ++ op = DW_OP_plus; ++ do_binop: ++ loc = descr_info_loc (TREE_OPERAND (val, 0), base_decl); ++ if (!loc) ++ break; ++ loc2 = descr_info_loc (TREE_OPERAND (val, 1), base_decl); ++ if (!loc2) ++ break; ++ add_loc_descr (&loc, loc2); ++ add_loc_descr (&loc2, new_loc_descr (op, 0, 0)); ++ } ++ return loc; ++ case MINUS_EXPR: ++ op = DW_OP_minus; ++ goto do_binop; ++ case MULT_EXPR: ++ op = DW_OP_mul; ++ goto do_binop; ++ case EQ_EXPR: ++ op = DW_OP_eq; ++ goto do_binop; ++ case NE_EXPR: ++ op = DW_OP_ne; ++ goto do_binop; ++ default: ++ break; ++ } ++ return NULL; ++} ++ ++static void ++add_descr_info_field (dw_die_ref die, enum dwarf_attribute attr, ++ tree val, tree base_decl) ++{ ++ dw_loc_descr_ref loc; ++ ++ if (host_integerp (val, 0)) ++ { ++ add_AT_unsigned (die, attr, tree_low_cst (val, 0)); ++ return; ++ } ++ ++ loc = descr_info_loc (val, base_decl); ++ if (!loc) ++ return; ++ ++ add_AT_loc (die, attr, loc); ++} ++ ++/* This routine generates DIE for array with hidden descriptor, details ++ are filled into *info by a langhook. */ ++ ++static void ++gen_descr_array_type_die (tree type, struct array_descr_info *info, ++ dw_die_ref context_die) ++{ ++ dw_die_ref scope_die = scope_die_for (type, context_die); ++ dw_die_ref array_die; ++ int dim; ++ ++ array_die = new_die (DW_TAG_array_type, scope_die, type); ++ add_name_attribute (array_die, type_tag (type)); ++ equate_type_number_to_die (type, array_die); ++ ++ if (info->data_location) ++ add_descr_info_field (array_die, DW_AT_data_location, info->data_location, ++ info->base_decl); ++ if (info->associated) ++ add_descr_info_field (array_die, DW_AT_associated, info->associated, ++ info->base_decl); ++ if (info->allocated) ++ add_descr_info_field (array_die, DW_AT_allocated, info->allocated, ++ info->base_decl); ++ ++ for (dim = 0; dim < info->ndimensions; dim++) ++ { ++ dw_die_ref subrange_die ++ = new_die (DW_TAG_subrange_type, array_die, NULL); ++ ++ if (info->dimen[dim].lower_bound) ++ { ++ /* If it is the default value, omit it. */ ++ if ((is_c_family () || is_java ()) ++ && integer_zerop (info->dimen[dim].lower_bound)) ++ ; ++ else if (is_fortran () ++ && integer_onep (info->dimen[dim].lower_bound)) ++ ; ++ else ++ add_descr_info_field (subrange_die, DW_AT_lower_bound, ++ info->dimen[dim].lower_bound, ++ info->base_decl); ++ } ++ if (info->dimen[dim].upper_bound) ++ add_descr_info_field (subrange_die, DW_AT_upper_bound, ++ info->dimen[dim].upper_bound, ++ info->base_decl); ++ if (info->dimen[dim].stride) ++ add_descr_info_field (subrange_die, DW_AT_byte_stride, ++ info->dimen[dim].stride, ++ info->base_decl); ++ } ++ ++ gen_type_die (info->element_type, context_die); ++ add_type_attribute (array_die, info->element_type, 0, 0, context_die); ++} ++ + #if 0 + static void + gen_entry_point_die (tree decl, dw_die_ref context_die) +@@ -12478,6 +12632,7 @@ static void + gen_type_die (tree type, dw_die_ref context_die) + { + int need_pop; ++ struct array_descr_info info; + + if (type == NULL_TREE || type == error_mark_node) + return; +@@ -12496,6 +12651,16 @@ gen_type_die (tree type, dw_die_ref cont + return; + } + ++ /* If this is an array type with hidden descriptor, handle it first. */ ++ if (!TREE_ASM_WRITTEN (type) ++ && lang_hooks.types.get_array_descr_info ++ && lang_hooks.types.get_array_descr_info (type, &info)) ++ { ++ gen_descr_array_type_die (type, &info, context_die); ++ TREE_ASM_WRITTEN (type) = 1; ++ return; ++ } ++ + /* We are going to output a DIE to represent the unqualified version + of this type (i.e. without any const or volatile qualifiers) so + get the main variant (i.e. the unqualified version) of this type +--- gcc/dwarf2out.h.jj 2007-02-20 22:39:12.000000000 +0100 ++++ gcc/dwarf2out.h 2007-11-24 15:00:14.000000000 +0100 +@@ -27,3 +27,19 @@ struct die_struct; + extern void debug_dwarf_die (struct die_struct *); + extern void dwarf2out_set_demangle_name_func (const char *(*) (const char *)); + extern void dwarf2out_add_library_unit_info (const char *, const char *); ++ ++struct array_descr_info ++{ ++ int ndimensions; ++ tree element_type; ++ tree base_decl; ++ tree data_location; ++ tree allocated; ++ tree associated; ++ struct array_descr_dimen ++ { ++ tree lower_bound; ++ tree upper_bound; ++ tree stride; ++ } dimen[10]; ++}; diff --git a/gcc41-rh407281.patch b/gcc41-rh407281.patch new file mode 100644 index 0000000..aabd073 --- /dev/null +++ b/gcc41-rh407281.patch @@ -0,0 +1,190 @@ +2007-12-02 Jakub Jelinek + + * gcc.c-torture/execute/20071202-1.c: New test. + +2007-02-19 Eric Botcazou + + * gimplify.c (gimplify_init_ctor_preeval_1): Detect potential overlap + due to calls to functions taking pointers as parameters. + + * gnat.dg/self_aggregate_with_call.adb: New test. + +2006-08-21 Olivier Hainque + + * gimplify.c (gimplify_init_constructor) : + Arrange for the temporary captures of components overlapping the lhs + to happen before the lhs is possibly cleared. + + * gnat.dg/self_aggregate_with_zeros.adb: New test. + * gnat.dg/self_aggregate_with_array.adb: New test. + +--- gcc/gimplify.c (revision 116299) ++++ gcc/gimplify.c (revision 116300) +@@ -2638,6 +2638,21 @@ gimplify_init_ctor_preeval_1 (tree *tp, + && alias_sets_conflict_p (data->lhs_alias_set, get_alias_set (t))) + return t; + ++ /* If the constructor component is a call, determine if it can hide a ++ potential overlap with the lhs through an INDIRECT_REF like above. */ ++ if (TREE_CODE (t) == CALL_EXPR) ++ { ++ tree type, fntype = TREE_TYPE (TREE_TYPE (TREE_OPERAND (t, 0))); ++ ++ for (type = TYPE_ARG_TYPES (fntype); type; type = TREE_CHAIN (type)) ++ if (POINTER_TYPE_P (TREE_VALUE (type)) ++ && (!data->lhs_base_decl || TREE_ADDRESSABLE (data->lhs_base_decl)) ++ && alias_sets_conflict_p (data->lhs_alias_set, ++ get_alias_set ++ (TREE_TYPE (TREE_VALUE (type))))) ++ return t; ++ } ++ + if (IS_TYPE_OR_DECL_P (t)) + *walk_subtrees = 0; + return NULL; +@@ -3061,6 +3061,20 @@ gimplify_init_constructor (tree *expr_p, + } + } + ++ /* If there are nonzero elements, pre-evaluate to capture elements ++ overlapping with the lhs into temporaries. We must do this before ++ clearing to fetch the values before they are zeroed-out. */ ++ if (num_nonzero_elements > 0) ++ { ++ preeval_data.lhs_base_decl = get_base_address (object); ++ if (!DECL_P (preeval_data.lhs_base_decl)) ++ preeval_data.lhs_base_decl = NULL; ++ preeval_data.lhs_alias_set = get_alias_set (object); ++ ++ gimplify_init_ctor_preeval (&TREE_OPERAND (*expr_p, 1), ++ pre_p, post_p, &preeval_data); ++ } ++ + if (cleared) + { + /* Zap the CONSTRUCTOR element list, which simplifies this case. +@@ -3076,16 +3090,7 @@ gimplify_init_constructor (tree *expr_p, + elements in the constructor, add assignments to the individual + scalar fields of the object. */ + if (!cleared || num_nonzero_elements > 0) +- { +- preeval_data.lhs_base_decl = get_base_address (object); +- if (!DECL_P (preeval_data.lhs_base_decl)) +- preeval_data.lhs_base_decl = NULL; +- preeval_data.lhs_alias_set = get_alias_set (object); +- +- gimplify_init_ctor_preeval (&TREE_OPERAND (*expr_p, 1), +- pre_p, post_p, &preeval_data); +- gimplify_init_ctor_eval (object, elts, pre_p, cleared); +- } ++ gimplify_init_ctor_eval (object, elts, pre_p, cleared); + + *expr_p = NULL_TREE; + } +--- gcc/testsuite/gcc.c-torture/execute/20071202-1.c.jj 2007-12-02 19:26:19.000000000 +0100 ++++ gcc/testsuite/gcc.c-torture/execute/20071202-1.c 2007-12-02 19:24:28.000000000 +0100 +@@ -0,0 +1,25 @@ ++extern void abort (void); ++struct T { int t; int r[8]; }; ++struct S { int a; int b; int c[6]; struct T d; }; ++ ++__attribute__((noinline)) void ++foo (struct S *s) ++{ ++ *s = (struct S) { s->b, s->a, { 0, 0, 0, 0, 0, 0 }, s->d }; ++} ++ ++int ++main (void) ++{ ++ struct S s = { 6, 12, { 1, 2, 3, 4, 5, 6 }, ++ { 7, { 8, 9, 10, 11, 12, 13, 14, 15 } } }; ++ foo (&s); ++ if (s.a != 12 || s.b != 6 ++ || s.c[0] || s.c[1] || s.c[2] || s.c[3] || s.c[4] || s.c[5]) ++ abort (); ++ if (s.d.t != 7 || s.d.r[0] != 8 || s.d.r[1] != 9 || s.d.r[2] != 10 ++ || s.d.r[3] != 11 || s.d.r[4] != 12 || s.d.r[5] != 13 ++ || s.d.r[6] != 14 || s.d.r[7] != 15) ++ abort (); ++ return 0; ++} +--- gcc/testsuite/gnat.dg/self_aggregate_with_zeros.adb (revision 0) ++++ gcc/testsuite/gnat.dg/self_aggregate_with_zeros.adb (revision 116300) +@@ -0,0 +1,19 @@ ++-- { dg-do run } ++ ++procedure self_aggregate_with_zeros is ++ ++ type Sensor is record ++ Value : Natural; ++ A, B, C, D, E, F, G, H, I, J, K, L, M : Natural; ++ end record; ++ ++ Pressure : Sensor; ++ ++begin ++ Pressure.Value := 256; ++ Pressure := (Pressure.Value, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); ++ ++ if Pressure.Value /= 256 then ++ raise Program_Error; ++ end if; ++end; +--- gcc/testsuite/gnat.dg/self_aggregate_with_array.adb (revision 0) ++++ gcc/testsuite/gnat.dg/self_aggregate_with_array.adb (revision 116300) +@@ -0,0 +1,21 @@ ++-- { dg-do run } ++ ++procedure self_aggregate_with_array is ++ ++ type Value_Bounds is array (1 .. 2) of Natural; ++ ++ type Sensor is record ++ Value : Natural; ++ Bounds : Value_Bounds; ++ end record; ++ ++ Pressure : Sensor; ++ ++begin ++ Pressure.Value := 256; ++ Pressure := (Value => Pressure.Value, Bounds => (1, 2)); ++ ++ if Pressure.Value /= 256 then ++ raise Program_Error; ++ end if; ++end; +--- gcc/testsuite/gnat.dg/self_aggregate_with_call.adb (revision 0) ++++ gcc/testsuite/gnat.dg/self_aggregate_with_call.adb (revision 122134) +@@ -0,0 +1,30 @@ ++-- { dg-do run } ++-- { dg-options "-O2" } ++ ++procedure self_aggregate_with_call is ++ ++ type Values is array (1 .. 8) of Natural; ++ ++ type Vector is record ++ Components : Values; ++ end record; ++ ++ function Clone (Components: Values) return Values is ++ begin ++ return Components; ++ end; ++ ++ procedure Process (V : in out Vector) is ++ begin ++ V.Components (Values'First) := 1; ++ V := (Components => Clone (V.Components)); ++ ++ if V.Components (Values'First) /= 1 then ++ raise Program_Error; ++ end if; ++ end; ++ ++ V : Vector; ++begin ++ Process (V); ++end; diff --git a/gcc41-rh426846.patch b/gcc41-rh426846.patch new file mode 100644 index 0000000..2039a11 --- /dev/null +++ b/gcc41-rh426846.patch @@ -0,0 +1,538 @@ +2007-10-10 Jason Merrill + + * libsupc++/guard.cc (struct mutex_wrapper): Move into + anonymous namespace. + +2007-10-09 Zhou Drangon + + PR libstdc++/33682 + * libsupc++/guard.cc: Make single conditional variable + implementation dependent to __GTHREAD_HAS_COND. + +2007-10-05 Hans-Peter Nilsson + + * gthr-single.h: Revert last change. + +2007-10-04 Doug Kwan + + * gthr-posix.h (__gthread_cond_broadcast, __gthread_cond_wait, + __gthread_cond_wait_recursive): Add to extend interface for POSIX + conditional variables. (__GTHREAD_HAS_COND): Macro defined to signify + support of conditional variables. + * gthr-posix95.h (__gthread_cond_broadcast, __gthread_cond_wait, + __gthread_cond_wait_recursive): Add to extend interface for POSIX + conditional variables. (__GTHREAD_HAS_COND): Macro defined to signify + support of conditional variables. + * gthr-single.h (__gthread_cond_broadcast, __gthread_cond_wait, + __gthread_cond_wait_recursive): Add to extend interface for POSIX + conditional variables. + * gthr.h: Update comments to document new interface. + + * include/ext/concurrent.h (class __mutex, + class __recursive_mutex): Add new method gthread_mutex to access + inner gthread mutex. + [__GTHREAD_HAS_COND] (class __concurrence_broadcast_error, + class __concurrence_wait_error, class __cond): Add. + * libsupc++/guard.cc (recursive_push, recursive_pop): Delete. + (init_in_progress_flag, set_init_in_progress_flag): Add to + replace recursive_push and recursive_pop. + (throw_recursive_init_exception): Add. + (acquire, __cxa_guard_acquire, __cxa_guard_abort and + __cxa_guard_release): [__GTHREAD_HAS_COND] Use a conditional + for synchronization of static variable initialization. + The global mutex is only held briefly when guards are + accessed. [!__GTHREAD_HAS_COND] Fall back to the old code, + which deadlocks. + * testsuite/thread/guard.cc: Add new test. It deadlocks with the + old locking code in libstdc++-v3/libsup++/guard.cc. + +--- gcc/gthr.h (revision 129029) ++++ gcc/gthr.h (revision 129030) +@@ -81,6 +81,24 @@ Software Foundation, 51 Franklin Street, + int __gthread_recursive_mutex_trylock (__gthread_recursive_mutex_t *mutex); + int __gthread_recursive_mutex_unlock (__gthread_recursive_mutex_t *mutex); + ++ The following are supported in POSIX threads only. They are required to ++ fix a deadlock in static initialization inside libsupc++. The header file ++ gthr-posix.h defines a symbol __GTHREAD_HAS_COND to signify that these extra ++ features are supported. ++ ++ Types: ++ __gthread_cond_t ++ ++ Macros: ++ __GTHREAD_COND_INIT ++ __GTHREAD_COND_INIT_FUNCTION ++ ++ Interface: ++ int __gthread_cond_broadcast (__gthread_cond_t *cond); ++ int __gthread_cond_wait (__gthread_cond_t *cond, __gthread_mutex_t *mutex); ++ int __gthread_cond_wait_recursive (__gthread_cond_t *cond, ++ __gthread_recursive_mutex_t *mutex); ++ + All functions returning int should return zero on success or the error + number. If the operation is not supported, -1 is returned. + +--- gcc/gthr-posix.h (revision 129029) ++++ gcc/gthr-posix.h (revision 129030) +@@ -47,6 +47,11 @@ typedef pthread_key_t __gthread_key_t; + typedef pthread_once_t __gthread_once_t; + typedef pthread_mutex_t __gthread_mutex_t; + typedef pthread_mutex_t __gthread_recursive_mutex_t; ++typedef pthread_cond_t __gthread_cond_t; ++ ++/* POSIX like conditional variables are supported. Please look at comments ++ in gthr.h for details. */ ++#define __GTHREAD_HAS_COND 1 + + #define __GTHREAD_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER + #define __GTHREAD_ONCE_INIT PTHREAD_ONCE_INIT +@@ -57,6 +62,7 @@ typedef pthread_mutex_t __gthread_recurs + #else + #define __GTHREAD_RECURSIVE_MUTEX_INIT_FUNCTION __gthread_recursive_mutex_init_function + #endif ++#define __GTHREAD_COND_INIT PTHREAD_COND_INITIALIZER + + #if SUPPORTS_WEAK && GTHREAD_USE_WEAK + # ifndef __gthrw_pragma +@@ -88,6 +94,8 @@ __gthrw3(pthread_mutex_lock) + __gthrw3(pthread_mutex_trylock) + __gthrw3(pthread_mutex_unlock) + __gthrw3(pthread_mutex_init) ++__gthrw3(pthread_cond_broadcast) ++__gthrw3(pthread_cond_wait) + #else + __gthrw(pthread_once) + __gthrw(pthread_getspecific) +@@ -98,6 +106,8 @@ __gthrw(pthread_mutex_lock) + __gthrw(pthread_mutex_trylock) + __gthrw(pthread_mutex_unlock) + __gthrw(pthread_mutex_init) ++__gthrw(pthread_cond_broadcast) ++__gthrw(pthread_cond_wait) + #endif + + __gthrw(pthread_key_create) +@@ -110,20 +120,16 @@ __gthrw(pthread_mutexattr_destroy) + #if defined(_LIBOBJC) || defined(_LIBOBJC_WEAK) + /* Objective-C. */ + #if defined(__osf__) && defined(_PTHREAD_USE_MANGLED_NAMES_) +-__gthrw3(pthread_cond_broadcast) + __gthrw3(pthread_cond_destroy) + __gthrw3(pthread_cond_init) + __gthrw3(pthread_cond_signal) +-__gthrw3(pthread_cond_wait) + __gthrw3(pthread_exit) + __gthrw3(pthread_mutex_destroy) + __gthrw3(pthread_self) + #else +-__gthrw(pthread_cond_broadcast) + __gthrw(pthread_cond_destroy) + __gthrw(pthread_cond_init) + __gthrw(pthread_cond_signal) +-__gthrw(pthread_cond_wait) + __gthrw(pthread_exit) + __gthrw(pthread_mutex_destroy) + __gthrw(pthread_self) +@@ -737,6 +743,25 @@ __gthread_recursive_mutex_unlock (__gthr + return __gthread_mutex_unlock (mutex); + } + ++static inline int ++__gthread_cond_broadcast (__gthread_cond_t *cond) ++{ ++ return __gthrw_(pthread_cond_broadcast) (cond); ++} ++ ++static inline int ++__gthread_cond_wait (__gthread_cond_t *cond, __gthread_mutex_t *mutex) ++{ ++ return __gthrw_(pthread_cond_wait) (cond, mutex); ++} ++ ++static inline int ++__gthread_cond_wait_recursive (__gthread_cond_t *cond, ++ __gthread_recursive_mutex_t *mutex) ++{ ++ return __gthread_cond_wait (cond, mutex); ++} ++ + #endif /* _LIBOBJC */ + + #endif /* ! GCC_GTHR_POSIX_H */ +--- gcc/gthr-posix95.h (revision 129029) ++++ gcc/gthr-posix95.h (revision 129030) +@@ -45,6 +45,11 @@ Software Foundation, 51 Franklin Street, + typedef pthread_key_t __gthread_key_t; + typedef pthread_once_t __gthread_once_t; + typedef pthread_mutex_t __gthread_mutex_t; ++typedef pthread_cond_t __gthread_cond_t; ++ ++/* POSIX like conditional variables are supported. Please look at comments ++ in gthr.h for details. */ ++#define __GTHREAD_HAS_COND 1 + + typedef struct { + long depth; +@@ -55,6 +60,7 @@ typedef struct { + #define __GTHREAD_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER + #define __GTHREAD_ONCE_INIT PTHREAD_ONCE_INIT + #define __GTHREAD_RECURSIVE_MUTEX_INIT_FUNCTION __gthread_recursive_mutex_init_function ++#define __GTHREAD_COND_INIT PTHREAD_COND_INITIALIZER + + #if SUPPORTS_WEAK && GTHREAD_USE_WEAK + # define __gthrw(name) \ +@@ -81,14 +87,14 @@ __gthrw(pthread_mutexattr_init) + __gthrw(pthread_mutexattr_destroy) + + __gthrw(pthread_mutex_init) ++__gthrw(pthread_cond_broadcast) ++__gthrw(pthread_cond_wait) + + #if defined(_LIBOBJC) || defined(_LIBOBJC_WEAK) + /* Objective-C. */ +-__gthrw(pthread_cond_broadcast) + __gthrw(pthread_cond_destroy) + __gthrw(pthread_cond_init) + __gthrw(pthread_cond_signal) +-__gthrw(pthread_cond_wait) + __gthrw(pthread_exit) + __gthrw(pthread_mutex_destroy) + #ifdef _POSIX_PRIORITY_SCHEDULING +@@ -719,6 +725,25 @@ __gthread_recursive_mutex_unlock (__gthr + return 0; + } + ++static inline int ++__gthread_cond_broadcast (__gthread_cond_t *cond) ++{ ++ return __gthrw_(pthread_cond_broadcast) (cond); ++} ++ ++static inline int ++__gthread_cond_wait (__gthread_cond_t *cond, __gthread_mutex_t *mutex) ++{ ++ return __gthrw_(pthread_cond_wait) (cond, mutex); ++} ++ ++static inline int ++__gthread_cond_wait_recursive (__gthread_cond_t *cond, ++ __gthread_recursive_mutex_t *mutex) ++{ ++ return __gthrw_(pthread_cond_wait) (cond, mutex->actual); ++} ++ + #endif /* _LIBOBJC */ + + #endif /* ! GCC_GTHR_POSIX_H */ +--- libstdc++-v3/libsupc++/guard.cc (revision 129029) ++++ libstdc++-v3/libsupc++/guard.cc (revision 129218) +@@ -34,6 +34,9 @@ + #include + #include + #include ++#if defined(__GTHREAD_HAS_COND) && !defined(__GTHREAD_COND_INIT) ++#undef __GTHREAD_HAS_COND ++#endif + + // The IA64/generic ABI uses the first byte of the guard variable. + // The ARM EABI uses the least significant bit. +@@ -46,7 +49,7 @@ namespace + // This is a static class--the need for a static initialization function + // to pass to __gthread_once precludes creating multiple instances, though + // I suppose you could achieve the same effect with a template. +- class static_mutex ++ struct static_mutex + { + static __gthread_recursive_mutex_t mutex; + +@@ -85,8 +88,30 @@ namespace + { + __gthread_recursive_mutex_unlock (&mutex); + } ++ ++ // Simple wrapper for exception safety. ++ struct mutex_wrapper ++ { ++ bool unlock; ++ mutex_wrapper() : unlock(true) ++ { static_mutex::lock(); } ++ ++ ~mutex_wrapper() ++ { ++ if (unlock) ++ static_mutex::unlock(); ++ } ++ }; + } + ++#ifdef __GTHREAD_HAS_COND ++namespace ++{ ++ // A single conditional variable controlling all static initializations. ++ static __gthread_cond_t static_cond = __GTHREAD_COND_INIT; ++} ++#endif ++ + #ifndef _GLIBCXX_GUARD_TEST_AND_ACQUIRE + inline bool + __test_and_acquire (__cxxabiv1::__guard *g) +@@ -135,19 +160,43 @@ namespace __gnu_cxx + recursive_init::~recursive_init() throw() { } + } + ++// ++// Here are C++ run-time routines for guarded initiailization of static ++// variables. There are 4 scenarios under which these routines are called: ++// ++// 1. Threads not supported (__GTHREADS not defined) ++// 2. Threads are supported but not enabled at run-time. ++// 3. Threads enabled at run-time but __gthreads_* are not fully POSIX. ++// 4. Threads enabled at run-time and __gthreads_* support all POSIX threads ++// primitives we need here. ++// ++// The old code supported scenarios 1-3 but was broken since it used a global ++// mutex for all threads and had the mutex locked during the whole duration of ++// initlization of a guarded static variable. The following created a dead-lock ++// with the old code. ++// ++// Thread 1 acquires the global mutex. ++// Thread 1 starts initializing static variable. ++// Thread 1 creates thread 2 during initialization. ++// Thread 2 attempts to acuqire mutex to initialize another variable. ++// Thread 2 blocks since thread 1 is locking the mutex. ++// Thread 1 waits for result from thread 2 and also blocks. A deadlock. ++// ++// The new code here can handle this situation and thus is more robust. Howere, ++// we need to use the POSIX thread conditional variable, which is not supported ++// in all platforms, notably older versions of Microsoft Windows. The gthr*.h ++// headers define a symbol __GTHREAD_HAS_COND for platforms that support POSIX ++// like conditional variables. For platforms that do not support conditional ++// variables, we need to fall back to the old code. + namespace __cxxabiv1 + { + static inline int +- recursion_push (__guard* g) +- { +- return ((char *)g)[1]++; +- } ++ init_in_progress_flag(__guard* g) ++ { return ((char *)g)[1]; } + + static inline void +- recursion_pop (__guard* g) +- { +- --((char *)g)[1]; +- } ++ set_init_in_progress_flag(__guard* g, int v) ++ { ((char *)g)[1] = v; } + + static int + acquire_1 (__guard *g) +@@ -155,7 +204,7 @@ namespace __cxxabiv1 + if (_GLIBCXX_GUARD_TEST (g)) + return 0; + +- if (recursion_push (g)) ++ if (init_in_progress_flag (g)) + { + #ifdef __EXCEPTIONS + throw __gnu_cxx::recursive_init(); +@@ -164,6 +213,7 @@ namespace __cxxabiv1 + __builtin_trap (); + #endif + } ++ set_init_in_progress_flag(g, 1); + return 1; + } + +@@ -179,28 +229,40 @@ namespace __cxxabiv1 + + if (__gthread_active_p ()) + { +- // Simple wrapper for exception safety. +- struct mutex_wrapper +- { +- bool unlock; +- mutex_wrapper (): unlock(true) +- { +- static_mutex::lock (); +- } +- ~mutex_wrapper () +- { +- if (unlock) +- static_mutex::unlock (); +- } +- } mw; ++ mutex_wrapper mw; + +- if (acquire_1 (g)) +- { +- mw.unlock = false; +- return 1; ++ while (1) // When this loop is executing, mutex is locked. ++ { ++#ifdef __GTHREAD_HAS_COND ++ // The static is allready initialized. ++ if (_GLIBCXX_GUARD_TEST(g)) ++ return 0; // The mutex will be unlocked via wrapper ++ ++ if (init_in_progress_flag(g)) ++ { ++ // The guarded static is currently being initialized by ++ // another thread, so we release mutex and wait for the ++ // conditional variable. We will lock the mutex again after ++ // this. ++ __gthread_cond_wait_recursive(&static_cond, ++ &static_mutex::mutex); ++ } ++ else ++ { ++ set_init_in_progress_flag(g, 1); ++ return 1; // The mutex will be unlocked via wrapper. ++ } ++#else ++ // This provides compatibility with older systems not supporting ++ // POSIX like conditional variables. ++ if (acquire_1 (g)) ++ { ++ mw.unlock = false; ++ return 1; // The mutex still locked. ++ } ++ return 0; // The mutex will be unlocked via wrapper. ++#endif + } +- +- return 0; + } + #endif + +@@ -210,8 +272,24 @@ namespace __cxxabiv1 + extern "C" + void __cxa_guard_abort (__guard *g) + { +- recursion_pop (g); +-#ifdef __GTHREADS ++#ifdef __GTHREAD_HAS_COND ++ if (__gthread_active_p()) ++ { ++ mutex_wrapper mw; ++ ++ set_init_in_progress_flag(g, 0); ++ ++ // If we abort, we still need to wake up all other threads waiting for ++ // the conditional variable. ++ __gthread_cond_broadcast(&static_cond); ++ return; ++ } ++#endif ++ ++ set_init_in_progress_flag(g, 0); ++#if defined(__GTHREADS) && !defined(__GTHREAD_HAS_COND) ++ // This provides compatibility with older systems not supporting POSIX like ++ // conditional variables. + if (__gthread_active_p ()) + static_mutex::unlock (); + #endif +@@ -220,11 +298,27 @@ namespace __cxxabiv1 + extern "C" + void __cxa_guard_release (__guard *g) + { +- recursion_pop (g); ++#ifdef __GTHREAD_HAS_COND ++ if (__gthread_active_p()) ++ { ++ mutex_wrapper mw; ++ ++ set_init_in_progress_flag(g, 0); ++ _GLIBCXX_GUARD_SET_AND_RELEASE(g); ++ ++ __gthread_cond_broadcast(&static_cond); ++ return; ++ } ++#endif ++ ++ set_init_in_progress_flag(g, 0); + _GLIBCXX_GUARD_SET_AND_RELEASE (g); +-#ifdef __GTHREADS +- if (__gthread_active_p ()) +- static_mutex::unlock (); ++ ++#if defined(__GTHREADS) && !defined(__GTHREAD_HAS_COND) ++ // This provides compatibility with older systems not supporting POSIX like ++ // conditional variables. ++ if (__gthread_active_p()) ++ static_mutex::unlock(); + #endif + } + } +--- libstdc++-v3/testsuite/thread/guard.cc (revision 0) ++++ libstdc++-v3/testsuite/thread/guard.cc (revision 129030) +@@ -0,0 +1,67 @@ ++// ++// Copyright (C) 2007 Free Software Foundation, Inc. ++// ++// This file is part of the GNU ISO C++ Library. This library is free ++// software; you can redistribute it and/or modify it under the ++// terms of the GNU General Public License as published by the ++// Free Software Foundation; either version 2, or (at your option) ++// any later version. ++// ++// This library is distributed in the hope that it will be useful, ++// but WITHOUT ANY WARRANTY; without even the implied warranty of ++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++// GNU General Public License for more details. ++// ++// You should have received a copy of the GNU General Public License along ++// with this library; see the file COPYING. If not, write to the Free ++// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, ++// USA. ++ ++// { dg-do run { target *-*-freebsd* *-*-netbsd* *-*-linux* *-*-solaris* *-*-cygwin *-*-darwin* alpha*-*-osf* } } ++// { dg-options "-pthread" { target *-*-freebsd* *-*-netbsd* *-*-linux* *-*-solaris* *-*-darwin* alpha*-*-osf* } } ++ ++#include ++#include ++ ++// This used to deadlock with the old libstdc++ because there is only one ++// global mutex guarding initialization of statics and it is held during by ++// the initializer thread of a static until the variable is completely ++// initialized. If the initializer thread creates and waits for another thread ++// which also initializes a static variable, there will be a deadlock because ++// the first thread is holding the mutex and waiting for the second thread, ++// which is blocked when it is acquiring the mutex. ++ ++int ++get_bar (void) ++{ ++ return 1; ++} ++ ++void* ++do_something (void *arg) ++{ ++ static int bar = get_bar (); ++ return NULL; ++} ++ ++int ++get_foo (void) ++{ ++ int status; ++ pthread_t new_thread; ++ ++ if (pthread_create (&new_thread, NULL, do_something, NULL) != 0) ++ std::abort (); ++ ++ if (pthread_join (new_thread, NULL) != 0) ++ std::abort (); ++ ++ return 1; ++} ++ ++int ++main (int argc, char **argv) ++{ ++ static int foo = get_foo (); ++ return 0; ++} diff --git a/gcc41-sse5-pperm.patch b/gcc41-sse5-pperm.patch new file mode 100644 index 0000000..076a0c6 --- /dev/null +++ b/gcc41-sse5-pperm.patch @@ -0,0 +1,113 @@ +2007-12-29 Jakub Jelinek + + * config/i386/sse.md (sse5_pperm, sse5_pperm_pack_v2di_v4si, + sse5_pperm_pack_v4si_v8hi, sse5_pperm_pack_v8hi_v16qi, + sse5_perm): Fix constraints. + + * gcc.target/i386/i386.exp (check_effective_target_sse5): Use __v8hi + rather than __v2di type. + +--- gcc/config/i386/sse.md.jj 2007-12-29 20:58:15.000000000 +0100 ++++ gcc/config/i386/sse.md 2007-12-29 21:12:49.000000000 +0100 +@@ -8350,13 +8350,13 @@ + [(set_attr "type" "sseiadd1")]) + + ;; SSE5 permute instructions + (define_insn "sse5_pperm" + [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") +- (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,xm,xm") +- (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,0,x") +- (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0")] ++ (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm") ++ (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x") ++ (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")] + UNSPEC_SSE5_PERMUTE))] + "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" + "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) +@@ -8453,52 +8453,52 @@ + ;; SSE5 pack instructions that combine two vectors into a smaller vector + (define_insn "sse5_pperm_pack_v2di_v4si" + [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x") + (vec_concat:V4SI + (truncate:V2SI +- (match_operand:V2DI 1 "nonimmediate_operand" "0,0,xm,xm")) ++ (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm")) + (truncate:V2SI +- (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,0,x")))) +- (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))] ++ (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x")))) ++ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] + "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" + "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + + (define_insn "sse5_pperm_pack_v4si_v8hi" + [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x") + (vec_concat:V8HI + (truncate:V4HI +- (match_operand:V4SI 1 "nonimmediate_operand" "0,0,xm,xm")) ++ (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm")) + (truncate:V4HI +- (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,0,x")))) +- (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))] ++ (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x")))) ++ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] + "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" + "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + + (define_insn "sse5_pperm_pack_v8hi_v16qi" + [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") + (vec_concat:V16QI + (truncate:V8QI +- (match_operand:V8HI 1 "nonimmediate_operand" "0,0,xm,xm")) ++ (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm")) + (truncate:V8QI +- (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,0,x")))) +- (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))] ++ (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x")))) ++ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] + "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" + "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + + ;; Floating point permutation (permps, permpd) + (define_insn "sse5_perm" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") + (unspec:SSEMODEF2P +- [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,xm,xm") +- (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,0,x") +- (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0")] ++ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm") ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x") ++ (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")] + UNSPEC_SSE5_PERMUTE))] + "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" + "perm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "")]) +--- gcc/testsuite/gcc.target/i386/i386.exp.jj 2007-09-14 11:54:26.000000000 +0200 ++++ gcc/testsuite/gcc.target/i386/i386.exp 2007-12-29 21:42:08.000000000 +0100 +@@ -68,13 +68,13 @@ proc check_effective_target_sse4a { } { + proc check_effective_target_sse5 { } { + return [check_no_compiler_messages sse5 object { + typedef long long __m128i __attribute__ ((__vector_size__ (16))); +- typedef long long __v2di __attribute__ ((__vector_size__ (16))); ++ typedef short __v8hi __attribute__ ((__vector_size__ (16))); + + __m128i _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) + { +- return (__m128i) __builtin_ia32_pmacssww ((__v2di)__A, +- (__v2di)__B, +- (__v2di)__C); ++ return (__m128i) __builtin_ia32_pmacssww ((__v8hi)__A, ++ (__v8hi)__B, ++ (__v8hi)__C); + } + } "-O2 -msse5" ] + } diff --git a/gcc41-sse5.patch b/gcc41-sse5.patch new file mode 100644 index 0000000..4398430 --- /dev/null +++ b/gcc41-sse5.patch @@ -0,0 +1,7735 @@ +2007-11-12 Dwarakanath Rajagopal + Michael Meissner + + * config/i386/i386.md (sse5_setcc): Use to + get the appropriate suffix for the coms* instruction. + + * config/i386/sse.md (sse5_pcmov_): Correct the operand + constraints to follow the mnemonics for the pcmov instruction + +2007-10-27 Uros Bizjak + + * g++.dg/other/i386-2a.C: New test. + * gcc.target/i386/sse-13.c: Also include mm3dnow.h. Add + "-march=k8 -m3dnow" to check 3dnow and 3dnowA intrinsics. + * gcc.target/i386/sse-14.c: Ditto. + +2007-09-22 Uros Bizjak + + * gcc.target/i386/sse5-hadduX.c: Remove target selector. + * gcc.target/i386/sse5-haddX.c: Ditto. + * gcc.target/i386/sse5-hsubX.c: Ditto. + * gcc.target/i386/sse5-maccXX.c: Ditto. + * gcc.target/i386/sse5-msubXX.c: Ditto. + * gcc.target/i386/sse5-nmaccXX.c: Ditto. + * gcc.target/i386/sse5-nmsubXX.c: Ditto. + * gcc.target/i386/sse5-permpX.c: Ditto. + * gcc.target/i386/sse5-fma.c: Remove target selector. Require lp64 + effective target. + * gcc.target/i386/sse5-fma-vector.c: Ditto. + * gcc.target/i386/sse5-ima-vector.c: Ditto. + * gcc.target/i386/sse5-pcmov.c: Ditto. + * gcc.target/i386/sse5-pcmov2.c: Ditto. + +2007-09-12 Michael Meissner + Dwarakanath Rajagopal + Tony Linthicum + + * config/i386/i386.h (TARGET_ROUND): New macro for the round/ptest + instructions which are shared between SSE4.1 and SSE5. + (TARGET_FUSED_MADD): New macro for -mfused-madd swtich. + (TARGET_CPU_CPP_BUILTINS): Add SSE5 support. + + * config/i386/i386.opt (-msse5): New switch for SSE5 support. + (-mfused-madd): New switch to give users control over whether the + compiler optimizes to use the multiply/add SSE5 instructions. + + * config/i386/i386.c (ix86_handle_option): Turn off -msse5 if + -mno-sse, -mno-sse2, -mno-sse3 or -mno-sse4a. + (override_options): Add SSE5 support. + (print_operand): %Y prints comparison codes for SSE5 com/pcom + instructions. + (ix86_expand_sse_movcc): Add SSE5 support. + (IX86_BUILTIN_ROUNDPD): New for SSE4.1/SSE5 intrinsic. + (IX86_BUILTIN_ROUNDPS): Likewise. + (IX86_BUILTIN_ROUNDSD): Likewise. + (IX86_BUILTIN_ROUNDSS): Likewise. + (IX86_BUILTIN_PTESTZ): Likewise. + (IX86_BUILTIN_PTESTC): Likewise. + (IX86_BUILTIN_PTESTNZC): Likewise. + (IX86_BUILTIN_FMADDSS): New for SSE5 intrinsic. + (IX86_BUILTIN_FMADDSD): Ditto. + (IX86_BUILTIN_FMADDPS): Ditto. + (IX86_BUILTIN_FMADDPD): Ditto. + (IX86_BUILTIN_FMSUBSS): Ditto. + (IX86_BUILTIN_FMSUBSD): Ditto. + (IX86_BUILTIN_FMSUBPS): Ditto. + (IX86_BUILTIN_FMSUBPD): Ditto. + (IX86_BUILTIN_FNMADDSS): Ditto. + (IX86_BUILTIN_FNMADDSD): Ditto. + (IX86_BUILTIN_FNMADDPS): Ditto. + (IX86_BUILTIN_FNMADDPD): Ditto. + (IX86_BUILTIN_FNMSUBSS): Ditto. + (IX86_BUILTIN_FNMSUBSD): Ditto. + (IX86_BUILTIN_FNMSUBPS): Ditto. + (IX86_BUILTIN_FNMSUBPD): Ditto. + (IX86_BUILTIN_PCMOV_V2DI): Ditto. + (IX86_BUILTIN_PCMOV_V4SI): Ditto. + (IX86_BUILTIN_PCMOV_V8HI): Ditto. + (IX86_BUILTIN_PCMOV_V16QI): Ditto. + (IX86_BUILTIN_PCMOV_V4SF): Ditto. + (IX86_BUILTIN_PCMOV_V2DF): Ditto. + (IX86_BUILTIN_PPERM): Ditto. + (IX86_BUILTIN_PERMPS): Ditto. + (IX86_BUILTIN_PERMPD): Ditto. + (IX86_BUILTIN_PMACSSWW): Ditto. + (IX86_BUILTIN_PMACSWW): Ditto. + (IX86_BUILTIN_PMACSSWD): Ditto. + (IX86_BUILTIN_PMACSWD): Ditto. + (IX86_BUILTIN_PMACSSDD): Ditto. + (IX86_BUILTIN_PMACSDD): Ditto. + (IX86_BUILTIN_PMACSSDQL): Ditto. + (IX86_BUILTIN_PMACSSDQH): Ditto. + (IX86_BUILTIN_PMACSDQL): Ditto. + (IX86_BUILTIN_PMACSDQH): Ditto. + (IX86_BUILTIN_PMADCSSWD): Ditto. + (IX86_BUILTIN_PMADCSWD): Ditto. + (IX86_BUILTIN_PHADDBW): Ditto. + (IX86_BUILTIN_PHADDBD): Ditto. + (IX86_BUILTIN_PHADDBQ): Ditto. + (IX86_BUILTIN_PHADDWD): Ditto. + (IX86_BUILTIN_PHADDWQ): Ditto. + (IX86_BUILTIN_PHADDDQ): Ditto. + (IX86_BUILTIN_PHADDUBW): Ditto. + (IX86_BUILTIN_PHADDUBD): Ditto. + (IX86_BUILTIN_PHADDUBQ): Ditto. + (IX86_BUILTIN_PHADDUWD): Ditto. + (IX86_BUILTIN_PHADDUWQ): Ditto. + (IX86_BUILTIN_PHADDUDQ): Ditto. + (IX86_BUILTIN_PHSUBBW): Ditto. + (IX86_BUILTIN_PHSUBWD): Ditto. + (IX86_BUILTIN_PHSUBDQ): Ditto. + (IX86_BUILTIN_PROTB): Ditto. + (IX86_BUILTIN_PROTW): Ditto. + (IX86_BUILTIN_PROTD): Ditto. + (IX86_BUILTIN_PROTQ): Ditto. + (IX86_BUILTIN_PROTB_IMM): Ditto. + (IX86_BUILTIN_PROTW_IMM): Ditto. + (IX86_BUILTIN_PROTD_IMM): Ditto. + (IX86_BUILTIN_PROTQ_IMM): Ditto. + (IX86_BUILTIN_PSHLB): Ditto. + (IX86_BUILTIN_PSHLW): Ditto. + (IX86_BUILTIN_PSHLD): Ditto. + (IX86_BUILTIN_PSHLQ): Ditto. + (IX86_BUILTIN_PSHAB): Ditto. + (IX86_BUILTIN_PSHAW): Ditto. + (IX86_BUILTIN_PSHAD): Ditto. + (IX86_BUILTIN_PSHAQ): Ditto. + (IX86_BUILTIN_FRCZSS): Ditto. + (IX86_BUILTIN_FRCZSD): Ditto. + (IX86_BUILTIN_FRCZPS): Ditto. + (IX86_BUILTIN_FRCZPD): Ditto. + (IX86_BUILTIN_CVTPH2PS): Ditto. + (IX86_BUILTIN_CVTPS2PH): Ditto. + (IX86_BUILTIN_COMEQSS): Ditto. + (IX86_BUILTIN_COMNESS): Ditto. + (IX86_BUILTIN_COMLTSS): Ditto. + (IX86_BUILTIN_COMLESS): Ditto. + (IX86_BUILTIN_COMGTSS): Ditto. + (IX86_BUILTIN_COMGESS): Ditto. + (IX86_BUILTIN_COMUEQSS): Ditto. + (IX86_BUILTIN_COMUNESS): Ditto. + (IX86_BUILTIN_COMULTSS): Ditto. + (IX86_BUILTIN_COMULESS): Ditto. + (IX86_BUILTIN_COMUGTSS): Ditto. + (IX86_BUILTIN_COMUGESS): Ditto. + (IX86_BUILTIN_COMORDSS): Ditto. + (IX86_BUILTIN_COMUNORDSS): Ditto. + (IX86_BUILTIN_COMFALSESS): Ditto. + (IX86_BUILTIN_COMTRUESS): Ditto. + (IX86_BUILTIN_COMEQSD): Ditto. + (IX86_BUILTIN_COMNESD): Ditto. + (IX86_BUILTIN_COMLTSD): Ditto. + (IX86_BUILTIN_COMLESD): Ditto. + (IX86_BUILTIN_COMGTSD): Ditto. + (IX86_BUILTIN_COMGESD): Ditto. + (IX86_BUILTIN_COMUEQSD): Ditto. + (IX86_BUILTIN_COMUNESD): Ditto. + (IX86_BUILTIN_COMULTSD): Ditto. + (IX86_BUILTIN_COMULESD): Ditto. + (IX86_BUILTIN_COMUGTSD): Ditto. + (IX86_BUILTIN_COMUGESD): Ditto. + (IX86_BUILTIN_COMORDSD): Ditto. + (IX86_BUILTIN_COMUNORDSD): Ditto. + (IX86_BUILTIN_COMFALSESD): Ditto. + (IX86_BUILTIN_COMTRUESD): Ditto. + (IX86_BUILTIN_COMEQPS): Ditto. + (IX86_BUILTIN_COMNEPS): Ditto. + (IX86_BUILTIN_COMLTPS): Ditto. + (IX86_BUILTIN_COMLEPS): Ditto. + (IX86_BUILTIN_COMGTPS): Ditto. + (IX86_BUILTIN_COMGEPS): Ditto. + (IX86_BUILTIN_COMUEQPS): Ditto. + (IX86_BUILTIN_COMUNEPS): Ditto. + (IX86_BUILTIN_COMULTPS): Ditto. + (IX86_BUILTIN_COMULEPS): Ditto. + (IX86_BUILTIN_COMUGTPS): Ditto. + (IX86_BUILTIN_COMUGEPS): Ditto. + (IX86_BUILTIN_COMORDPS): Ditto. + (IX86_BUILTIN_COMUNORDPS): Ditto. + (IX86_BUILTIN_COMFALSEPS): Ditto. + (IX86_BUILTIN_COMTRUEPS): Ditto. + (IX86_BUILTIN_COMEQPD): Ditto. + (IX86_BUILTIN_COMNEPD): Ditto. + (IX86_BUILTIN_COMLTPD): Ditto. + (IX86_BUILTIN_COMLEPD): Ditto. + (IX86_BUILTIN_COMGTPD): Ditto. + (IX86_BUILTIN_COMGEPD): Ditto. + (IX86_BUILTIN_COMUEQPD): Ditto. + (IX86_BUILTIN_COMUNEPD): Ditto. + (IX86_BUILTIN_COMULTPD): Ditto. + (IX86_BUILTIN_COMULEPD): Ditto. + (IX86_BUILTIN_COMUGTPD): Ditto. + (IX86_BUILTIN_COMUGEPD): Ditto. + (IX86_BUILTIN_COMORDPD): Ditto. + (IX86_BUILTIN_COMUNORDPD): Ditto. + (IX86_BUILTIN_COMFALSEPD): Ditto. + (IX86_BUILTIN_COMTRUEPD): Ditto. + (IX86_BUILTIN_PCOMEQUB): Ditto. + (IX86_BUILTIN_PCOMNEUB): Ditto. + (IX86_BUILTIN_PCOMLTUB): Ditto. + (IX86_BUILTIN_PCOMLEUB): Ditto. + (IX86_BUILTIN_PCOMGTUB): Ditto. + (IX86_BUILTIN_PCOMGEUB): Ditto. + (IX86_BUILTIN_PCOMFALSEUB): Ditto. + (IX86_BUILTIN_PCOMTRUEUB): Ditto. + (IX86_BUILTIN_PCOMEQUW): Ditto. + (IX86_BUILTIN_PCOMNEUW): Ditto. + (IX86_BUILTIN_PCOMLTUW): Ditto. + (IX86_BUILTIN_PCOMLEUW): Ditto. + (IX86_BUILTIN_PCOMGTUW): Ditto. + (IX86_BUILTIN_PCOMGEUW): Ditto. + (IX86_BUILTIN_PCOMFALSEUW): Ditto. + (IX86_BUILTIN_PCOMTRUEUW): Ditto. + (IX86_BUILTIN_PCOMEQUD): Ditto. + (IX86_BUILTIN_PCOMNEUD): Ditto. + (IX86_BUILTIN_PCOMLTUD): Ditto. + (IX86_BUILTIN_PCOMLEUD): Ditto. + (IX86_BUILTIN_PCOMGTUD): Ditto. + (IX86_BUILTIN_PCOMGEUD): Ditto. + (IX86_BUILTIN_PCOMFALSEUD): Ditto. + (IX86_BUILTIN_PCOMTRUEUD): Ditto. + (IX86_BUILTIN_PCOMEQUQ): Ditto. + (IX86_BUILTIN_PCOMNEUQ): Ditto. + (IX86_BUILTIN_PCOMLTUQ): Ditto. + (IX86_BUILTIN_PCOMLEUQ): Ditto. + (IX86_BUILTIN_PCOMGTUQ): Ditto. + (IX86_BUILTIN_PCOMGEUQ): Ditto. + (IX86_BUILTIN_PCOMFALSEUQ): Ditto. + (IX86_BUILTIN_PCOMTRUEUQ): Ditto. + (IX86_BUILTIN_PCOMEQB): Ditto. + (IX86_BUILTIN_PCOMNEB): Ditto. + (IX86_BUILTIN_PCOMLTB): Ditto. + (IX86_BUILTIN_PCOMLEB): Ditto. + (IX86_BUILTIN_PCOMGTB): Ditto. + (IX86_BUILTIN_PCOMGEB): Ditto. + (IX86_BUILTIN_PCOMFALSEB): Ditto. + (IX86_BUILTIN_PCOMTRUEB): Ditto. + (IX86_BUILTIN_PCOMEQW): Ditto. + (IX86_BUILTIN_PCOMNEW): Ditto. + (IX86_BUILTIN_PCOMLTW): Ditto. + (IX86_BUILTIN_PCOMLEW): Ditto. + (IX86_BUILTIN_PCOMGTW): Ditto. + (IX86_BUILTIN_PCOMGEW): Ditto. + (IX86_BUILTIN_PCOMFALSEW): Ditto. + (IX86_BUILTIN_PCOMTRUEW): Ditto. + (IX86_BUILTIN_PCOMEQD): Ditto. + (IX86_BUILTIN_PCOMNED): Ditto. + (IX86_BUILTIN_PCOMLTD): Ditto. + (IX86_BUILTIN_PCOMLED): Ditto. + (IX86_BUILTIN_PCOMGTD): Ditto. + (IX86_BUILTIN_PCOMGED): Ditto. + (IX86_BUILTIN_PCOMFALSED): Ditto. + (IX86_BUILTIN_PCOMTRUED): Ditto. + (IX86_BUILTIN_PCOMEQQ): Ditto. + (IX86_BUILTIN_PCOMNEQ): Ditto. + (IX86_BUILTIN_PCOMLTQ): Ditto. + (IX86_BUILTIN_PCOMLEQ): Ditto. + (IX86_BUILTIN_PCOMGTQ): Ditto. + (IX86_BUILTIN_PCOMGEQ): Ditto. + (IX86_BUILTIN_PCOMFALSEQ): Ditto. + (IX86_BUILTIN_PCOMTRUEQ): Ditto. + (bdesc_ptest): New table for SSE4.1/SSE5 intrinsic support. + (bdesc_sse_3arg): Likewise. + (bdesc_1arg): Add roundpd/roundps. + (enum multi_arg_type): New enum for describing the various SSE5 + intrinsic argument types. + (bdesc_multi_arg): New table for SSE5 intrinsics. + (ix86_init_mmx_sse_builtins): Add SSE5 intrinsic support. + (ix86_expand_sse_4_operands_builtin, ix86_expand_sse_ptest): Add + SSE4.1/SSE5 intrinsic support. + (ix86_expand_multi_arg_builtin): New function for creating SSE5 + intrinsics. + (ix86_expand_unop_builtin): Handle CODE_FOR_sse4_1_roundpd and + CODE_FOR_sse4_1_roundps. + (ix86_expand_builtin): Add SSE5 intrinsic support. + (ix86_sse5_valid_op_p): New function to validate SSE5 3 and 4 + operand instructions. + (ix86_expand_sse5_multiple_memory): New function to split the + second memory reference from SSE5 instructions. + (type_has_variadic_args_p): Delete in favor of stdarg_p. + + * config/i386/i386-protos.h (ix86_sse5_valid_op_p): Add declaration. + (ix86_expand_sse5_multiple_memory): Ditto. + + * config/i386/i386.md (UNSPEC_PTEST, UNSPEC_ROUND): Add new UNSPEC + constants for SSE4.1/SSE5 support. + (UNSPEC_SSE5_INTRINSIC): Add new UNSPEC constant for SSE5 support. + (UNSPEC_SSE5_UNSIGNED_CMP): Ditto. + (UNSPEC_SSE5_TRUEFALSE): Ditto. + (UNSPEC_SSE5_PERMUTE): Ditto. + (UNSPEC_SSE5_ASHIFT): Ditto. + (UNSPEC_SSE5_LSHIFT): Ditto. + (UNSPEC_FRCZ): Ditto. + (UNSPEC_CVTPH2PS): Ditto. + (UNSPEC_CVTPS2PH): Ditto. + (PCOM_FALSE): Add new constant for true/false SSE5 comparisons. + (PCOM_TRUE): Ditto. + (COM_FALSE_S): Ditto. + (COM_FALSE_P): Ditto. + (COM_TRUE_S): Ditto. + (COM_TRUE_P): Ditto. + (type attribute): Add ssemuladd, sseiadd1, ssecvt1, sse4arg types. + (unit attribute): Add support for ssemuladd, ssecvt1, sseiadd1 sse4arg + types. + (memory attribute): Ditto. + (MODEF): New mode macro. + (ssemodefsuffix): New mode attr. + (sse4_1_round2): New. + (sse_setccsf): Disable if SSE5. + (sse_setccdf): Ditto. + (sse5_setcc): New support for SSE5 conditional move. + (sse5_pcmov_): Ditto. + + * config/i386/sse.md (SSEMODE1248): New mode iterator for SSE5. + (SSEMODEF4): Ditto. + (SSEMODEF2P): Ditto. + (ssemodesuffixf4): New mode attribute for SSE5. + (ssemodesuffixf2s): Ditto. + (ssemodesuffixf2c): Ditto. + (sserotatemax): Ditto. + (ssescalarmode): Ditto. + (sse5_fmadd4): Add SSE5 floating point multiply/add + instructions. + (sse5_vmfmadd4): Ditto. + (sse5_fmsub4): Ditto. + (sse5_vmfmsub4): Ditto. + (sse5_fnmadd4): Ditto. + (sse5_vmfnmadd4): Ditto. + (sse5_fnmsub4): Ditto. + (sse5_vmfnmsub4): Ditto. + (sse5i_fmadd4): Ditto. + (sse5i_fmsub4): Ditto. + (sse5i_fnmadd4): Ditto. + (sse5i_fnmsub4): Ditto. + (sse5i_vmfmadd4): Ditto. + (sse5i_vmfmsub4): Ditto. + (sse5i_vmfnmadd4): Ditto. + (sse5i_vmfnmsub4): Ditto. + (mulv4si3): Add SSE5 support. + (sse5_mulv4si3): New insn for 32-bit multiply support on SSE5. + (sse4_1_ptest): New. + (sse4_1_roundpd): Ditto. + (sse4_1_roundps): Ditto. + (sse4_1_roundsd): Ditto. + (sse4_1_roundss): Ditto. + (sse_maskcmpv4sf3): Disable if SSE5 so the SSE5 instruction will + be generated. + (sse_vmmaskcmpv4sf3): Ditto. + (sse2_maskcmpv2df3): Ditto. + (sse2_vmmaskcmpv2df3): Ditto. + (sse2_eq3): Ditto. + (sse2_gt3): Ditto. + (sse5_pcmov_): Add SSE5 support. + (vec_unpacku_hi_v16qi): Ditto. + (vec_unpacks_hi_v16qi): Ditto. + (vec_unpacku_lo_v16qi): Ditto. + (vec_unpacks_lo_v16qi): Ditto. + (vec_unpacku_hi_v8hi): Ditto. + (vec_unpacks_hi_v8hi): Ditto. + (vec_unpacku_lo_v8hi): Ditto. + (vec_unpacks_lo_v8hi): Ditto. + (vec_unpacku_hi_v4si): Ditto. + (vec_unpacks_hi_v4si): Ditto. + (vec_unpacku_lo_v4si): Ditto. + (vec_unpacks_lo_v4si): Ditto. + (sse5_pmacsww): New SSE5 intrinsic insn. + (sse5_pmacssww): Ditto. + (sse5_pmacsdd): Ditto. + (sse5_pmacssdd): Ditto. + (sse5_pmacssdql): Ditto. + (sse5_pmacssdqh): Ditto. + (sse5_pmacsdqh): Ditto. + (sse5_pmacsswd): Ditto. + (sse5_pmacswd): Ditto. + (sse5_pmadcsswd): Ditto. + (sse5_pmadcswd): Ditto. + (sse5_pcmov_): Conditional move support on SSE5. + (sse5_phaddbw): New SSE5 intrinsic insn. + (sse5_phaddbd): Ditto. + (sse5_phaddbq): Ditto. + (sse5_phaddwd): Ditto. + (sse5_phaddwq): Ditto. + (sse5_phadddq): Ditto. + (sse5_phaddubw): Ditto. + (sse5_phaddubd): Ditto. + (sse5_phaddubq): Ditto. + (sse5_phadduwd): Ditto. + (sse5_phadduwq): Ditto. + (sse5_phaddudq): Ditto. + (sse5_phsubbw): Ditto. + (sse5_phsubwd): Ditto. + (sse5_phsubdq): Ditto. + (sse5_pperm): Ditto. + (sse5_pperm_sign_v16qi_v8hi): New insns for pack/unpack with SSE5. + (sse5_pperm_zero_v16qi_v8hi): Ditto. + (sse5_pperm_sign_v8hi_v4si): Ditto. + (sse5_pperm_zero_v8hi_v4si): Ditto. + (sse5_pperm_sign_v4si_v2di): Ditto. + (sse5_pperm_sign_v4si_v2di): Ditto. + (sse5_pperm_pack_v2di_v4si): Ditto. + (sse5_pperm_pack_v4si_v8hi): Ditto. + (sse5_pperm_pack_v8hi_v16qi): Ditto. + (sse5_perm): New SSE5 intrinsic insn. + (rotl3): Ditto. + (sse5_rotl3): Ditto. + (sse5_ashl3): Ditto. + (sse5_lshl3): Ditto. + (sse5_frcz2): Ditto. + (sse5s_frcz2): Ditto. + (sse5_cvtph2ps): Ditto. + (sse5_cvtps2ph): Ditto. + (sse5_vmmaskcmp3): Ditto. + (sse5_com_tf3): Ditto. + (sse5_maskcmp3): Ditto. + (sse5_maskcmp_uns3): Ditto. + (sse5_maskcmp_uns23): Ditto. + (sse5_pcom_tf3): Ditto. + + * config/i386/predicates.md (const_0_to_31_operand): New predicate + to match 0..31. + (sse5_comparison_float_operator): New predicate to match the + comparison operators supported by the SSE5 com instruction. + (ix86_comparison_int_operator): New predicate to match just the + signed int comparisons. + (ix86_comparison_uns_operator): New predicate to match just the + unsigned int comparisons. + + * doc/invoke.texi (-msse5): Add documentation. + (-mfused-madd): Ditto. + + * doc/extend.texi (x86 intrinsics): Document new SSE5 intrinsics. + + * config.gcc (i[34567]86-*-*): Include bmmintrin.h and + mmintrin-common.h. + (x86_64-*-*): Ditto. + + * config/i386/bmmintrin.h: New file, provide common x86 compiler + intrinisics for SSE5. + + * config/i386/mmintrin-common.h: New file, to contain common + instructions between SSE4.1 and SSE5. + + * gcc.dg/i386-cpuid.h (bit_SSE5): Define SSE5 bit. + + * gcc.target/i386/sse5-hadduX.c: Add support for SSE5 tests. + * gcc.target/i386/sse5-hsubX.c: Ditto. + * gcc.target/i386/sse5-permpX.c: Ditto. + * gcc.target/i386/sse5-haddX.c: Ditto. + * gcc.target/i386/sse5-maccXX.c: Ditto. + * gcc.target/i386/sse5-msubXX.c: Ditto. + * gcc.target/i386/sse5-nmaccXX.c: Ditto. + * gcc.target/i386/sse5-nmsubXX.c: Ditto. + + * gcc.target/i386/sse5-pcmov.c: New file to make sure the compiler + optimizes floating point conditional moves into the pcmov + instruction on SSE5. + * gcc.target/i386/sse5-pcmov2.c: Ditto. + + * gcc.target/i386/sse5-ima-vector.c: New file to make sure the + compiler optimizes vector 32-bit int (a*b)+c into pmacsdd on + SSE5. + + * gcc.target/i386/sse5-fma-vector.c: New file to make sure the + compiler optimizes vector (a*b)+c into fmadd on SSE5. + + * gcc.target/i386/sse5-fma.c: New file to make sure the compiler + optimizes (a*b)+c into fmadd on SSE5. + + * gcc.target/i386/i386.exp (check_effective_target_sse5): Check + whether the SSE5 instructions can be generated. + + * gcc.target/i386/sse5-check.h: New. Add support for + SSE5 tests. + + * gcc.target/i386/sse-12a.c: New test. + * gcc.target/i386/sse-13a.c: New test. + * gcc.target/i386/sse-14a.c: New test. + +2007-05-22 H.J. Lu + Richard Henderson + + * config/i386/i386-modes.def (V2QI): New. + +2007-05-20 Uros Bizjak + + * config/386/mm3dnow.h (_m_from_float): Use C++ compatible + assignment. + (_m_to_float): Use C89 compatible assignment. + +2006-05-23 Kazu Hirata + + PR target/27696 + * config/i386/i386.c (ix86_expand_builtin): Use + gen_sse3_monitor64 for TARGET_64BIT. + +--- gcc/config.gcc.jj 2007-12-28 15:43:25.000000000 +0100 ++++ gcc/config.gcc 2007-12-28 20:04:35.000000000 +0100 +@@ -264,12 +264,14 @@ xscale-*-*) + i[34567]86-*-*) + cpu_type=i386 + extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h +- pmmintrin.h tmmintrin.h ammintrin.h" ++ pmmintrin.h tmmintrin.h ammintrin.h bmmintrin.h ++ mmintrin-common.h" + ;; + x86_64-*-*) + cpu_type=i386 + extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h +- pmmintrin.h tmmintrin.h ammintrin.h" ++ pmmintrin.h tmmintrin.h ammintrin.h bmmintrin.h ++ mmintrin-common.h" + need_64bit_hwint=yes + ;; + ia64-*-*) +--- gcc/config/i386/bmmintrin.h 2007-12-28 20:04:35.000000000 +0100 ++++ gcc/config/i386/bmmintrin.h 2007-12-31 10:19:52.000000000 +0100 +@@ -0,0 +1,1260 @@ ++/* Copyright (C) 2007 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 2, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING. If not, write to ++ the Free Software Foundation, 51 Franklin Street, Fifth Floor, ++ Boston, MA 02110-1301, USA. */ ++ ++/* As a special exception, if you include this header file into source ++ files compiled by GCC, this header file does not by itself cause ++ the resulting executable to be covered by the GNU General Public ++ License. This exception does not however invalidate any other ++ reasons why the executable file might be covered by the GNU General ++ Public License. */ ++ ++#ifndef _BMMINTRIN_H_INCLUDED ++#define _BMMINTRIN_H_INCLUDED ++ ++#ifndef __SSE5__ ++# error "SSE5 instruction set not enabled" ++#else ++ ++/* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files. */ ++#include ++#include ++ ++/* Floating point multiply/add type instructions */ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) ++{ ++ return (__m128) __builtin_ia32_fmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) ++{ ++ return (__m128d) __builtin_ia32_fmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) ++{ ++ return (__m128) __builtin_ia32_fmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) ++{ ++ return (__m128d) __builtin_ia32_fmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) ++{ ++ return (__m128) __builtin_ia32_fmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) ++{ ++ return (__m128d) __builtin_ia32_fmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) ++{ ++ return (__m128) __builtin_ia32_fmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) ++{ ++ return (__m128d) __builtin_ia32_fmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) ++{ ++ return (__m128) __builtin_ia32_fnmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) ++{ ++ return (__m128d) __builtin_ia32_fnmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) ++{ ++ return (__m128) __builtin_ia32_fnmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) ++{ ++ return (__m128d) __builtin_ia32_fnmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) ++{ ++ return (__m128) __builtin_ia32_fnmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) ++{ ++ return (__m128d) __builtin_ia32_fnmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) ++{ ++ return (__m128) __builtin_ia32_fnmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) ++{ ++ return (__m128d) __builtin_ia32_fnmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C); ++} ++ ++/* Integer multiply/add intructions. */ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) ++{ ++ return (__m128i) __builtin_ia32_pmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) ++{ ++ return (__m128i) __builtin_ia32_pmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) ++{ ++ return (__m128i) __builtin_ia32_pmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) ++{ ++ return (__m128i) __builtin_ia32_pmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) ++{ ++ return (__m128i) __builtin_ia32_pmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) ++{ ++ return (__m128i) __builtin_ia32_pmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) ++{ ++ return (__m128i) __builtin_ia32_pmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) ++{ ++ return (__m128i) __builtin_ia32_pmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) ++{ ++ return (__m128i) __builtin_ia32_pmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) ++{ ++ return (__m128i) __builtin_ia32_pmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) ++{ ++ return (__m128i) __builtin_ia32_pmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) ++{ ++ return (__m128i) __builtin_ia32_pmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); ++} ++ ++/* Packed Integer Horizontal Add and Subtract */ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_haddw_epi8(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phaddbw ((__v16qi)__A); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_haddd_epi8(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phaddbd ((__v16qi)__A); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_haddq_epi8(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phaddbq ((__v16qi)__A); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_haddd_epi16(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phaddwd ((__v8hi)__A); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_haddq_epi16(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phaddwq ((__v8hi)__A); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_haddq_epi32(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phadddq ((__v4si)__A); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_haddw_epu8(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phaddubw ((__v16qi)__A); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_haddd_epu8(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phaddubd ((__v16qi)__A); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_haddq_epu8(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phaddubq ((__v16qi)__A); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_haddd_epu16(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phadduwd ((__v8hi)__A); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_haddq_epu16(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phadduwq ((__v8hi)__A); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_haddq_epu32(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phaddudq ((__v4si)__A); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_hsubw_epi8(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phsubbw ((__v16qi)__A); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_hsubd_epi16(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phsubwd ((__v8hi)__A); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_hsubq_epi32(__m128i __A) ++{ ++ return (__m128i) __builtin_ia32_phsubdq ((__v4si)__A); ++} ++ ++/* Vector conditional move and permute */ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) ++{ ++ return (__m128i) __builtin_ia32_pcmov (__A, __B, __C); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) ++{ ++ return (__m128i) __builtin_ia32_pperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_perm_ps(__m128 __A, __m128 __B, __m128i __C) ++{ ++ return (__m128) __builtin_ia32_permps ((__m128)__A, (__m128)__B, (__v16qi)__C); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_perm_pd(__m128d __A, __m128d __B, __m128i __C) ++{ ++ return (__m128d) __builtin_ia32_permpd ((__m128d)__A, (__m128d)__B, (__v16qi)__C); ++} ++ ++/* Packed Integer Rotates and Shifts */ ++ ++/* Rotates - Non-Immediate form */ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_rot_epi8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_protb ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_rot_epi16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_protw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_rot_epi32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_protd ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_rot_epi64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_protq ((__v2di)__A, (__v2di)__B); ++} ++ ++ ++/* Rotates - Immediate form */ ++#ifdef __OPTIMIZE__ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_roti_epi8(__m128i __A, int __B) ++{ ++ return (__m128i) __builtin_ia32_protbi ((__v16qi)__A, __B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_roti_epi16(__m128i __A, int __B) ++{ ++ return (__m128i) __builtin_ia32_protwi ((__v8hi)__A, __B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_roti_epi32(__m128i __A, int __B) ++{ ++ return (__m128i) __builtin_ia32_protdi ((__v4si)__A, __B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_roti_epi64(__m128i __A, int __B) ++{ ++ return (__m128i) __builtin_ia32_protqi ((__v2di)__A, __B); ++} ++#else ++#define _mm_roti_epi8(A, B) ((__m128i) __builtin_ia32_protbi ((__v16qi)(A), B)) ++#define _mm_roti_epi16(A, B) ((__m128i) __builtin_ia32_protwi ((__v8hi)(A), B)) ++#define _mm_roti_epi32(A, B) ((__m128i) __builtin_ia32_protdi ((__v4si)(A), B)) ++#define _mm_roti_epi64(A, B) ((__m128i) __builtin_ia32_protqi ((__v2di)(A), B)) ++#endif ++ ++/* pshl */ ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_shl_epi8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pshlb ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_shl_epi16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pshlw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_shl_epi32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pshld ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_shl_epi64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pshlq ((__v2di)__A, (__v2di)__B); ++} ++ ++/* psha */ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_sha_epi8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pshab ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_sha_epi16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pshaw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_sha_epi32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pshad ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_sha_epi64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pshaq ((__v2di)__A, (__v2di)__B); ++} ++ ++/* Compare and Predicate Generation */ ++ ++/* com (floating point, packed single) */ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comeq_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comeqps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comlt_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comltps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comle_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comleps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comunord_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comunordps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comneq_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comuneqps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comnlt_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comunltps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comnle_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comunleps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comord_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comordps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comueq_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comueqps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comnge_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comungeps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comngt_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comungtps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comfalse_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comfalseps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comoneq_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comneqps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comge_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comgeps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comgt_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comgtps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comtrue_ps(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comtrueps ((__v4sf)__A, (__v4sf)__B); ++} ++ ++/* com (floating point, packed double) */ ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comeq_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comeqpd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comlt_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comltpd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comle_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comlepd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comunord_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comunordpd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comneq_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comuneqpd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comnlt_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comunltpd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comnle_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comunlepd ((__v2df)__A, (__v2df)__B); ++} ++ ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comord_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comordpd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comueq_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comueqpd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comnge_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comungepd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comngt_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comungtpd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comfalse_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comfalsepd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comoneq_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comneqpd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comge_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comgepd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comgt_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comgtpd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comtrue_pd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comtruepd ((__v2df)__A, (__v2df)__B); ++} ++ ++/* com (floating point, scalar single) */ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comeq_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comeqss ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comlt_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comltss ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comle_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comless ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comunord_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comunordss ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comneq_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comuneqss ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comnlt_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comunltss ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comnle_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comunless ((__v4sf)__A, (__v4sf)__B); ++} ++ ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comord_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comordss ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comueq_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comueqss ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comnge_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comungess ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comngt_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comungtss ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comfalse_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comfalsess ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comoneq_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comneqss ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comge_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comgess ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comgt_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comgtss ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_comtrue_ss(__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_comtruess ((__v4sf)__A, (__v4sf)__B); ++} ++ ++/* com (floating point, scalar double) */ ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comeq_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comeqsd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comlt_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comltsd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comle_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comlesd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comunord_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comunordsd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comneq_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comuneqsd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comnlt_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comunltsd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comnle_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comunlesd ((__v2df)__A, (__v2df)__B); ++} ++ ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comord_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comordsd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comueq_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comueqsd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comnge_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comungesd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comngt_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comungtsd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comfalse_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comfalsesd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comoneq_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comneqsd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comge_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comgesd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comgt_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comgtsd ((__v2df)__A, (__v2df)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_comtrue_sd(__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_comtruesd ((__v2df)__A, (__v2df)__B); ++} ++ ++ ++/*pcom (integer, unsinged bytes) */ ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comlt_epu8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomltub ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comle_epu8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomleub ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comgt_epu8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgtub ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comge_epu8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgeub ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comeq_epu8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomequb ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comneq_epu8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomnequb ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comfalse_epu8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomfalseub ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comtrue_epu8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomtrueub ((__v16qi)__A, (__v16qi)__B); ++} ++ ++/*pcom (integer, unsinged words) */ ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comlt_epu16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomltuw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comle_epu16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomleuw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comgt_epu16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgtuw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comge_epu16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgeuw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comeq_epu16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomequw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comneq_epu16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomnequw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comfalse_epu16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomfalseuw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comtrue_epu16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomtrueuw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++/*pcom (integer, unsinged double words) */ ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comlt_epu32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomltud ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comle_epu32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomleud ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comgt_epu32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgtud ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comge_epu32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgeud ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comeq_epu32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomequd ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comneq_epu32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomnequd ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comfalse_epu32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomfalseud ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comtrue_epu32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomtrueud ((__v4si)__A, (__v4si)__B); ++} ++ ++/*pcom (integer, unsinged quad words) */ ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comlt_epu64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomltuq ((__v2di)__A, (__v2di)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comle_epu64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomleuq ((__v2di)__A, (__v2di)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comgt_epu64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgtuq ((__v2di)__A, (__v2di)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comge_epu64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgeuq ((__v2di)__A, (__v2di)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comeq_epu64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomequq ((__v2di)__A, (__v2di)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comneq_epu64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomnequq ((__v2di)__A, (__v2di)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comfalse_epu64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomfalseuq ((__v2di)__A, (__v2di)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comtrue_epu64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomtrueuq ((__v2di)__A, (__v2di)__B); ++} ++ ++/*pcom (integer, signed bytes) */ ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comlt_epi8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomltb ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comle_epi8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomleb ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comgt_epi8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgtb ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comge_epi8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgeb ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comeq_epi8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomeqb ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comneq_epi8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomneqb ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comfalse_epi8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomfalseb ((__v16qi)__A, (__v16qi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comtrue_epi8(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomtrueb ((__v16qi)__A, (__v16qi)__B); ++} ++ ++/*pcom (integer, signed words) */ ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comlt_epi16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomltw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comle_epi16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomlew ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comgt_epi16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgtw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comge_epi16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgew ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comeq_epi16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomeqw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comneq_epi16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomneqw ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comfalse_epi16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomfalsew ((__v8hi)__A, (__v8hi)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comtrue_epi16(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomtruew ((__v8hi)__A, (__v8hi)__B); ++} ++ ++/*pcom (integer, signed double words) */ ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comlt_epi32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomltd ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comle_epi32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomled ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comgt_epi32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgtd ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comge_epi32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomged ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comeq_epi32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomeqd ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comneq_epi32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomneqd ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comfalse_epi32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomfalsed ((__v4si)__A, (__v4si)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comtrue_epi32(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomtrued ((__v4si)__A, (__v4si)__B); ++} ++ ++/*pcom (integer, signed quad words) */ ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comlt_epi64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomltq ((__v2di)__A, (__v2di)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comle_epi64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomleq ((__v2di)__A, (__v2di)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comgt_epi64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgtq ((__v2di)__A, (__v2di)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comge_epi64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomgeq ((__v2di)__A, (__v2di)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comeq_epi64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomeqq ((__v2di)__A, (__v2di)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comneq_epi64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomneqq ((__v2di)__A, (__v2di)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comfalse_epi64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomfalseq ((__v2di)__A, (__v2di)__B); ++} ++ ++static __inline __m128i __attribute__((__always_inline__, __artificial__)) ++_mm_comtrue_epi64(__m128i __A, __m128i __B) ++{ ++ return (__m128i) __builtin_ia32_pcomtrueq ((__v2di)__A, (__v2di)__B); ++} ++ ++/* FRCZ */ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_frcz_ps (__m128 __A) ++{ ++ return (__m128) __builtin_ia32_frczps ((__v4sf)__A); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_frcz_pd (__m128d __A) ++{ ++ return (__m128d) __builtin_ia32_frczpd ((__v2df)__A); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_frcz_ss (__m128 __A, __m128 __B) ++{ ++ return (__m128) __builtin_ia32_frczss ((__v4sf)__A, (__v4sf)__B); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_frcz_sd (__m128d __A, __m128d __B) ++{ ++ return (__m128d) __builtin_ia32_frczsd ((__v2df)__A, (__v2df)__B); ++} ++ ++#endif /* __SSE5__ */ ++ ++#endif /* _BMMINTRIN_H_INCLUDED */ +--- gcc/config/i386/i386-modes.def.jj 2007-02-20 22:38:59.000000000 +0100 ++++ gcc/config/i386/i386-modes.def 2007-12-28 20:04:35.000000000 +0100 +@@ -68,6 +68,7 @@ VECTOR_MODES (INT, 8); /* V + VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ + VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ + VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ ++VECTOR_MODE (INT, QI, 2); /* V2QI */ + VECTOR_MODE (INT, DI, 4); /* V4DI */ + VECTOR_MODE (INT, SI, 8); /* V8SI */ + VECTOR_MODE (INT, HI, 16); /* V16HI */ +--- gcc/config/i386/i386-protos.h.jj 2007-02-20 22:38:59.000000000 +0100 ++++ gcc/config/i386/i386-protos.h 2007-12-28 20:04:35.000000000 +0100 +@@ -48,6 +48,10 @@ extern bool x86_extended_QIreg_mentioned + extern bool x86_extended_reg_mentioned_p (rtx); + extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx); + ++extern bool ix86_sse5_valid_op_p (rtx [], rtx, int, bool, int); ++extern void ix86_expand_sse5_multiple_memory (rtx [], int, ++ enum machine_mode mode); ++ + extern int ix86_expand_movmem (rtx, rtx, rtx, rtx); + extern int ix86_expand_clrmem (rtx, rtx, rtx); + extern int ix86_expand_strlen (rtx, rtx, rtx, rtx); +--- gcc/config/i386/i386.c 2007-12-28 22:12:02.000000000 +0100 ++++ gcc/config/i386/i386.c 2007-12-31 10:09:34.000000000 +0100 +@@ -1636,24 +1636,32 @@ ix86_handle_option (size_t code, const c + case OPT_msse: + if (!value) + { +- target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A); +- target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A; ++ target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A | MASK_SSE5); ++ target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A | MASK_SSE5; + } + return true; + + case OPT_msse2: + if (!value) + { +- target_flags &= ~(MASK_SSE3 | MASK_SSE4A); +- target_flags_explicit |= MASK_SSE3 | MASK_SSE4A; ++ target_flags &= ~(MASK_SSE3 | MASK_SSE4A | MASK_SSE5); ++ target_flags_explicit |= MASK_SSE3 | MASK_SSE4A | MASK_SSE5; + } + return true; + + case OPT_msse3: + if (!value) + { +- target_flags &= ~MASK_SSE4A; +- target_flags_explicit |= MASK_SSE4A; ++ target_flags &= ~(MASK_SSE4A | MASK_SSE5); ++ target_flags_explicit |= MASK_SSE4A | MASK_SSE5; ++ } ++ return true; ++ ++ case OPT_msse4a: ++ if (!value) ++ { ++ target_flags &= ~MASK_SSE5; ++ target_flags_explicit |= MASK_SSE5; + } + return true; + +@@ -1728,7 +1736,8 @@ override_options (void) + PTA_CX16 = 512, + PTA_POPCNT = 1024, + PTA_ABM = 2048, +- PTA_SSE4A = 4096 ++ PTA_SSE4A = 4096, ++ PTA_SSE5 = 8192 + } flags; + } + const processor_alias_table[] = +@@ -1948,6 +1957,9 @@ override_options (void) + if (processor_alias_table[i].flags & PTA_SSE4A + && !(target_flags_explicit & MASK_SSE4A)) + target_flags |= MASK_SSE4A; ++ if (processor_alias_table[i].flags & PTA_SSE5 ++ && !(target_flags_explicit & MASK_SSE5)) ++ target_flags |= MASK_SSE5; + if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) + error ("CPU you selected does not support x86-64 " + "instruction set"); +@@ -2139,6 +2151,10 @@ override_options (void) + if (TARGET_SSSE3) + target_flags |= MASK_SSE3; + ++ /* Turn on SSE4A builtins for -msse5. */ ++ if (TARGET_SSE5) ++ target_flags |= MASK_SSE4A; ++ + /* Turn on SSE3 builtins for -msse4a. */ + if (TARGET_SSE4A) + target_flags |= MASK_SSE3; +@@ -2476,6 +2492,171 @@ optimization_options (int level, int siz + #endif + } + ++/* Validate whether a SSE5 instruction is valid or not. ++ OPERANDS is the array of operands. ++ NUM is the number of operands. ++ USES_OC0 is true if the instruction uses OC0 and provides 4 variants. ++ NUM_MEMORY is the maximum number of memory operands to accept. */ ++bool ++ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num, bool uses_oc0, int num_memory) ++{ ++ int mem_mask; ++ int mem_count; ++ int i; ++ ++ /* Count the number of memory arguments */ ++ mem_mask = 0; ++ mem_count = 0; ++ for (i = 0; i < num; i++) ++ { ++ enum machine_mode mode = GET_MODE (operands[i]); ++ if (register_operand (operands[i], mode)) ++ ; ++ ++ else if (memory_operand (operands[i], mode)) ++ { ++ mem_mask |= (1 << i); ++ mem_count++; ++ } ++ ++ else ++ return false; ++ } ++ ++ /* If there were no memory operations, allow the insn */ ++ if (mem_mask == 0) ++ return true; ++ ++ /* Do not allow the destination register to be a memory operand. */ ++ else if (mem_mask & (1 << 0)) ++ return false; ++ ++ /* If there are too many memory operations, disallow the instruction. While ++ the hardware only allows 1 memory reference, before register allocation ++ for some insns, we allow two memory operations sometimes in order to allow ++ code like the following to be optimized: ++ ++ float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; } ++ ++ or similar cases that are vectorized into using the fmaddss ++ instruction. */ ++ else if (mem_count > num_memory) ++ return false; ++ ++ /* Don't allow more than one memory operation if not optimizing. */ ++ else if (mem_count > 1 && !optimize) ++ return false; ++ ++ else if (num == 4 && mem_count == 1) ++ { ++ /* formats (destination is the first argument), example fmaddss: ++ xmm1, xmm1, xmm2, xmm3/mem ++ xmm1, xmm1, xmm2/mem, xmm3 ++ xmm1, xmm2, xmm3/mem, xmm1 ++ xmm1, xmm2/mem, xmm3, xmm1 */ ++ if (uses_oc0) ++ return ((mem_mask == (1 << 1)) ++ || (mem_mask == (1 << 2)) ++ || (mem_mask == (1 << 3))); ++ ++ /* format, example pmacsdd: ++ xmm1, xmm2, xmm3/mem, xmm1 */ ++ else ++ return (mem_mask == (1 << 2)); ++ } ++ ++ else if (num == 4 && num_memory == 2) ++ { ++ /* If there are two memory operations, we can load one of the memory ops ++ into the destination register. This is for optimizing the ++ multiply/add ops, which the combiner has optimized both the multiply ++ and the add insns to have a memory operation. We have to be careful ++ that the destination doesn't overlap with the inputs. */ ++ rtx op0 = operands[0]; ++ ++ if (reg_mentioned_p (op0, operands[1]) ++ || reg_mentioned_p (op0, operands[2]) ++ || reg_mentioned_p (op0, operands[3])) ++ return false; ++ ++ /* formats (destination is the first argument), example fmaddss: ++ xmm1, xmm1, xmm2, xmm3/mem ++ xmm1, xmm1, xmm2/mem, xmm3 ++ xmm1, xmm2, xmm3/mem, xmm1 ++ xmm1, xmm2/mem, xmm3, xmm1 ++ ++ For the oc0 case, we will load either operands[1] or operands[3] into ++ operands[0], so any combination of 2 memory operands is ok. */ ++ if (uses_oc0) ++ return true; ++ ++ /* format, example pmacsdd: ++ xmm1, xmm2, xmm3/mem, xmm1 ++ ++ For the integer multiply/add instructions be more restrictive and ++ require operands[2] and operands[3] to be the memory operands. */ ++ else ++ return (mem_mask == ((1 << 2) | (1 << 3))); ++ } ++ ++ else if (num == 3 && num_memory == 1) ++ { ++ /* formats, example protb: ++ xmm1, xmm2, xmm3/mem ++ xmm1, xmm2/mem, xmm3 */ ++ if (uses_oc0) ++ return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2))); ++ ++ /* format, example comeq: ++ xmm1, xmm2, xmm3/mem */ ++ else ++ return (mem_mask == (1 << 2)); ++ } ++ ++ else ++ gcc_unreachable (); ++ ++ return false; ++} ++ ++ ++/* Fixup an SSE5 instruction that has 2 memory input references into a form the ++ hardware will allow by using the destination register to load one of the ++ memory operations. Presently this is used by the multiply/add routines to ++ allow 2 memory references. */ ++ ++void ++ix86_expand_sse5_multiple_memory (rtx operands[], ++ int num, ++ enum machine_mode mode) ++{ ++ rtx op0 = operands[0]; ++ if (num != 4 ++ || memory_operand (op0, mode) ++ || reg_mentioned_p (op0, operands[1]) ++ || reg_mentioned_p (op0, operands[2]) ++ || reg_mentioned_p (op0, operands[3])) ++ gcc_unreachable (); ++ ++ /* For 2 memory operands, pick either operands[1] or operands[3] to move into ++ the destination register. */ ++ if (memory_operand (operands[1], mode)) ++ { ++ emit_move_insn (op0, operands[1]); ++ operands[1] = op0; ++ } ++ else if (memory_operand (operands[3], mode)) ++ { ++ emit_move_insn (op0, operands[3]); ++ operands[3] = op0; ++ } ++ else ++ gcc_unreachable (); ++ ++ return; ++} ++ ++ + /* Table of valid machine attributes. */ + const struct attribute_spec ix86_attribute_table[] = + { +@@ -7679,6 +7860,7 @@ get_some_local_dynamic_name_1 (rtx *px, + X -- don't print any sort of PIC '@' suffix for a symbol. + & -- print some in-use local-dynamic symbol name. + H -- print a memory address offset by 8; used for sse high-parts ++ Y -- print condition for SSE5 com* instruction. + */ + + void +@@ -7950,6 +8132,61 @@ print_operand (FILE *file, rtx x, int co + } + return; + } ++ ++ case 'Y': ++ switch (GET_CODE (x)) ++ { ++ case NE: ++ fputs ("neq", file); ++ break; ++ case EQ: ++ fputs ("eq", file); ++ break; ++ case GE: ++ case GEU: ++ fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file); ++ break; ++ case GT: ++ case GTU: ++ fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file); ++ break; ++ case LE: ++ case LEU: ++ fputs ("le", file); ++ break; ++ case LT: ++ case LTU: ++ fputs ("lt", file); ++ break; ++ case UNORDERED: ++ fputs ("unord", file); ++ break; ++ case ORDERED: ++ fputs ("ord", file); ++ break; ++ case UNEQ: ++ fputs ("ueq", file); ++ break; ++ case UNGE: ++ fputs ("nlt", file); ++ break; ++ case UNGT: ++ fputs ("nle", file); ++ break; ++ case UNLE: ++ fputs ("ule", file); ++ break; ++ case UNLT: ++ fputs ("ult", file); ++ break; ++ case LTGT: ++ fputs ("une", file); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ return; ++ + default: + output_operand_lossage ("invalid operand code '%c'", code); + } +@@ -11510,6 +11747,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp + x = gen_rtx_AND (mode, x, op_false); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + } ++ else if (TARGET_SSE5) ++ { ++ rtx pcmov = gen_rtx_SET (mode, dest, ++ gen_rtx_IF_THEN_ELSE (mode, cmp, ++ op_true, ++ op_false)); ++ emit_insn (pcmov); ++ } + else + { + op_true = force_reg (mode, op_true); +@@ -14608,6 +14853,226 @@ enum ix86_builtins + IX86_BUILTIN_VEC_SET_V8HI, + IX86_BUILTIN_VEC_SET_V4HI, + ++ /* SSE4.1/SSE5 Common instructions */ ++ IX86_BUILTIN_ROUNDPD, ++ IX86_BUILTIN_ROUNDPS, ++ IX86_BUILTIN_ROUNDSD, ++ IX86_BUILTIN_ROUNDSS, ++ ++ IX86_BUILTIN_PTESTZ, ++ IX86_BUILTIN_PTESTC, ++ IX86_BUILTIN_PTESTNZC, ++ ++ /* SSE5 instructions */ ++ IX86_BUILTIN_FMADDSS, ++ IX86_BUILTIN_FMADDSD, ++ IX86_BUILTIN_FMADDPS, ++ IX86_BUILTIN_FMADDPD, ++ IX86_BUILTIN_FMSUBSS, ++ IX86_BUILTIN_FMSUBSD, ++ IX86_BUILTIN_FMSUBPS, ++ IX86_BUILTIN_FMSUBPD, ++ IX86_BUILTIN_FNMADDSS, ++ IX86_BUILTIN_FNMADDSD, ++ IX86_BUILTIN_FNMADDPS, ++ IX86_BUILTIN_FNMADDPD, ++ IX86_BUILTIN_FNMSUBSS, ++ IX86_BUILTIN_FNMSUBSD, ++ IX86_BUILTIN_FNMSUBPS, ++ IX86_BUILTIN_FNMSUBPD, ++ IX86_BUILTIN_PCMOV_V2DI, ++ IX86_BUILTIN_PCMOV_V4SI, ++ IX86_BUILTIN_PCMOV_V8HI, ++ IX86_BUILTIN_PCMOV_V16QI, ++ IX86_BUILTIN_PCMOV_V4SF, ++ IX86_BUILTIN_PCMOV_V2DF, ++ IX86_BUILTIN_PPERM, ++ IX86_BUILTIN_PERMPS, ++ IX86_BUILTIN_PERMPD, ++ IX86_BUILTIN_PMACSSWW, ++ IX86_BUILTIN_PMACSWW, ++ IX86_BUILTIN_PMACSSWD, ++ IX86_BUILTIN_PMACSWD, ++ IX86_BUILTIN_PMACSSDD, ++ IX86_BUILTIN_PMACSDD, ++ IX86_BUILTIN_PMACSSDQL, ++ IX86_BUILTIN_PMACSSDQH, ++ IX86_BUILTIN_PMACSDQL, ++ IX86_BUILTIN_PMACSDQH, ++ IX86_BUILTIN_PMADCSSWD, ++ IX86_BUILTIN_PMADCSWD, ++ IX86_BUILTIN_PHADDBW, ++ IX86_BUILTIN_PHADDBD, ++ IX86_BUILTIN_PHADDBQ, ++ IX86_BUILTIN_PHADDWD, ++ IX86_BUILTIN_PHADDWQ, ++ IX86_BUILTIN_PHADDDQ, ++ IX86_BUILTIN_PHADDUBW, ++ IX86_BUILTIN_PHADDUBD, ++ IX86_BUILTIN_PHADDUBQ, ++ IX86_BUILTIN_PHADDUWD, ++ IX86_BUILTIN_PHADDUWQ, ++ IX86_BUILTIN_PHADDUDQ, ++ IX86_BUILTIN_PHSUBBW, ++ IX86_BUILTIN_PHSUBWD, ++ IX86_BUILTIN_PHSUBDQ, ++ IX86_BUILTIN_PROTB, ++ IX86_BUILTIN_PROTW, ++ IX86_BUILTIN_PROTD, ++ IX86_BUILTIN_PROTQ, ++ IX86_BUILTIN_PROTB_IMM, ++ IX86_BUILTIN_PROTW_IMM, ++ IX86_BUILTIN_PROTD_IMM, ++ IX86_BUILTIN_PROTQ_IMM, ++ IX86_BUILTIN_PSHLB, ++ IX86_BUILTIN_PSHLW, ++ IX86_BUILTIN_PSHLD, ++ IX86_BUILTIN_PSHLQ, ++ IX86_BUILTIN_PSHAB, ++ IX86_BUILTIN_PSHAW, ++ IX86_BUILTIN_PSHAD, ++ IX86_BUILTIN_PSHAQ, ++ IX86_BUILTIN_FRCZSS, ++ IX86_BUILTIN_FRCZSD, ++ IX86_BUILTIN_FRCZPS, ++ IX86_BUILTIN_FRCZPD, ++ IX86_BUILTIN_CVTPH2PS, ++ IX86_BUILTIN_CVTPS2PH, ++ ++ IX86_BUILTIN_COMEQSS, ++ IX86_BUILTIN_COMNESS, ++ IX86_BUILTIN_COMLTSS, ++ IX86_BUILTIN_COMLESS, ++ IX86_BUILTIN_COMGTSS, ++ IX86_BUILTIN_COMGESS, ++ IX86_BUILTIN_COMUEQSS, ++ IX86_BUILTIN_COMUNESS, ++ IX86_BUILTIN_COMULTSS, ++ IX86_BUILTIN_COMULESS, ++ IX86_BUILTIN_COMUGTSS, ++ IX86_BUILTIN_COMUGESS, ++ IX86_BUILTIN_COMORDSS, ++ IX86_BUILTIN_COMUNORDSS, ++ IX86_BUILTIN_COMFALSESS, ++ IX86_BUILTIN_COMTRUESS, ++ ++ IX86_BUILTIN_COMEQSD, ++ IX86_BUILTIN_COMNESD, ++ IX86_BUILTIN_COMLTSD, ++ IX86_BUILTIN_COMLESD, ++ IX86_BUILTIN_COMGTSD, ++ IX86_BUILTIN_COMGESD, ++ IX86_BUILTIN_COMUEQSD, ++ IX86_BUILTIN_COMUNESD, ++ IX86_BUILTIN_COMULTSD, ++ IX86_BUILTIN_COMULESD, ++ IX86_BUILTIN_COMUGTSD, ++ IX86_BUILTIN_COMUGESD, ++ IX86_BUILTIN_COMORDSD, ++ IX86_BUILTIN_COMUNORDSD, ++ IX86_BUILTIN_COMFALSESD, ++ IX86_BUILTIN_COMTRUESD, ++ ++ IX86_BUILTIN_COMEQPS, ++ IX86_BUILTIN_COMNEPS, ++ IX86_BUILTIN_COMLTPS, ++ IX86_BUILTIN_COMLEPS, ++ IX86_BUILTIN_COMGTPS, ++ IX86_BUILTIN_COMGEPS, ++ IX86_BUILTIN_COMUEQPS, ++ IX86_BUILTIN_COMUNEPS, ++ IX86_BUILTIN_COMULTPS, ++ IX86_BUILTIN_COMULEPS, ++ IX86_BUILTIN_COMUGTPS, ++ IX86_BUILTIN_COMUGEPS, ++ IX86_BUILTIN_COMORDPS, ++ IX86_BUILTIN_COMUNORDPS, ++ IX86_BUILTIN_COMFALSEPS, ++ IX86_BUILTIN_COMTRUEPS, ++ ++ IX86_BUILTIN_COMEQPD, ++ IX86_BUILTIN_COMNEPD, ++ IX86_BUILTIN_COMLTPD, ++ IX86_BUILTIN_COMLEPD, ++ IX86_BUILTIN_COMGTPD, ++ IX86_BUILTIN_COMGEPD, ++ IX86_BUILTIN_COMUEQPD, ++ IX86_BUILTIN_COMUNEPD, ++ IX86_BUILTIN_COMULTPD, ++ IX86_BUILTIN_COMULEPD, ++ IX86_BUILTIN_COMUGTPD, ++ IX86_BUILTIN_COMUGEPD, ++ IX86_BUILTIN_COMORDPD, ++ IX86_BUILTIN_COMUNORDPD, ++ IX86_BUILTIN_COMFALSEPD, ++ IX86_BUILTIN_COMTRUEPD, ++ ++ IX86_BUILTIN_PCOMEQUB, ++ IX86_BUILTIN_PCOMNEUB, ++ IX86_BUILTIN_PCOMLTUB, ++ IX86_BUILTIN_PCOMLEUB, ++ IX86_BUILTIN_PCOMGTUB, ++ IX86_BUILTIN_PCOMGEUB, ++ IX86_BUILTIN_PCOMFALSEUB, ++ IX86_BUILTIN_PCOMTRUEUB, ++ IX86_BUILTIN_PCOMEQUW, ++ IX86_BUILTIN_PCOMNEUW, ++ IX86_BUILTIN_PCOMLTUW, ++ IX86_BUILTIN_PCOMLEUW, ++ IX86_BUILTIN_PCOMGTUW, ++ IX86_BUILTIN_PCOMGEUW, ++ IX86_BUILTIN_PCOMFALSEUW, ++ IX86_BUILTIN_PCOMTRUEUW, ++ IX86_BUILTIN_PCOMEQUD, ++ IX86_BUILTIN_PCOMNEUD, ++ IX86_BUILTIN_PCOMLTUD, ++ IX86_BUILTIN_PCOMLEUD, ++ IX86_BUILTIN_PCOMGTUD, ++ IX86_BUILTIN_PCOMGEUD, ++ IX86_BUILTIN_PCOMFALSEUD, ++ IX86_BUILTIN_PCOMTRUEUD, ++ IX86_BUILTIN_PCOMEQUQ, ++ IX86_BUILTIN_PCOMNEUQ, ++ IX86_BUILTIN_PCOMLTUQ, ++ IX86_BUILTIN_PCOMLEUQ, ++ IX86_BUILTIN_PCOMGTUQ, ++ IX86_BUILTIN_PCOMGEUQ, ++ IX86_BUILTIN_PCOMFALSEUQ, ++ IX86_BUILTIN_PCOMTRUEUQ, ++ ++ IX86_BUILTIN_PCOMEQB, ++ IX86_BUILTIN_PCOMNEB, ++ IX86_BUILTIN_PCOMLTB, ++ IX86_BUILTIN_PCOMLEB, ++ IX86_BUILTIN_PCOMGTB, ++ IX86_BUILTIN_PCOMGEB, ++ IX86_BUILTIN_PCOMFALSEB, ++ IX86_BUILTIN_PCOMTRUEB, ++ IX86_BUILTIN_PCOMEQW, ++ IX86_BUILTIN_PCOMNEW, ++ IX86_BUILTIN_PCOMLTW, ++ IX86_BUILTIN_PCOMLEW, ++ IX86_BUILTIN_PCOMGTW, ++ IX86_BUILTIN_PCOMGEW, ++ IX86_BUILTIN_PCOMFALSEW, ++ IX86_BUILTIN_PCOMTRUEW, ++ IX86_BUILTIN_PCOMEQD, ++ IX86_BUILTIN_PCOMNED, ++ IX86_BUILTIN_PCOMLTD, ++ IX86_BUILTIN_PCOMLED, ++ IX86_BUILTIN_PCOMGTD, ++ IX86_BUILTIN_PCOMGED, ++ IX86_BUILTIN_PCOMFALSED, ++ IX86_BUILTIN_PCOMTRUED, ++ IX86_BUILTIN_PCOMEQQ, ++ IX86_BUILTIN_PCOMNEQ, ++ IX86_BUILTIN_PCOMLTQ, ++ IX86_BUILTIN_PCOMLEQ, ++ IX86_BUILTIN_PCOMGTQ, ++ IX86_BUILTIN_PCOMGEQ, ++ IX86_BUILTIN_PCOMFALSEQ, ++ IX86_BUILTIN_PCOMTRUEQ, ++ + IX86_BUILTIN_MAX + }; + +@@ -14663,6 +15128,22 @@ static const struct builtin_description + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, + }; + ++static const struct builtin_description bdesc_ptest[] = ++{ ++ /* SSE4.1/SSE5 */ ++ { MASK_SSE5, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 }, ++ { MASK_SSE5, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 }, ++ { MASK_SSE5, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 }, ++}; ++ ++/* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */ ++static const struct builtin_description bdesc_sse_3arg[] = ++{ ++ /* SSE4.1/SSE5 */ ++ { MASK_SSE5, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 }, ++ { MASK_SSE5, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 }, ++}; ++ + static const struct builtin_description bdesc_2arg[] = + { + /* SSE */ +@@ -15015,7 +15496,300 @@ static const struct builtin_description + { MASK_SSSE3, CODE_FOR_ssse3_pabsv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pabsv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 }, + { MASK_SSSE3, CODE_FOR_ssse3_pabsv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 }, +- { MASK_SSSE3, CODE_FOR_ssse3_pabsv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 } ++ { MASK_SSSE3, CODE_FOR_ssse3_pabsv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 }, ++ ++ /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */ ++ { MASK_SSE5, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, 0, 0 }, ++ { MASK_SSE5, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, 0, 0 } ++}; ++ ++/* SSE5 */ ++enum multi_arg_type { ++ MULTI_ARG_UNKNOWN, ++ MULTI_ARG_3_SF, ++ MULTI_ARG_3_DF, ++ MULTI_ARG_3_DI, ++ MULTI_ARG_3_SI, ++ MULTI_ARG_3_SI_DI, ++ MULTI_ARG_3_HI, ++ MULTI_ARG_3_HI_SI, ++ MULTI_ARG_3_QI, ++ MULTI_ARG_3_PERMPS, ++ MULTI_ARG_3_PERMPD, ++ MULTI_ARG_2_SF, ++ MULTI_ARG_2_DF, ++ MULTI_ARG_2_DI, ++ MULTI_ARG_2_SI, ++ MULTI_ARG_2_HI, ++ MULTI_ARG_2_QI, ++ MULTI_ARG_2_DI_IMM, ++ MULTI_ARG_2_SI_IMM, ++ MULTI_ARG_2_HI_IMM, ++ MULTI_ARG_2_QI_IMM, ++ MULTI_ARG_2_SF_CMP, ++ MULTI_ARG_2_DF_CMP, ++ MULTI_ARG_2_DI_CMP, ++ MULTI_ARG_2_SI_CMP, ++ MULTI_ARG_2_HI_CMP, ++ MULTI_ARG_2_QI_CMP, ++ MULTI_ARG_2_DI_TF, ++ MULTI_ARG_2_SI_TF, ++ MULTI_ARG_2_HI_TF, ++ MULTI_ARG_2_QI_TF, ++ MULTI_ARG_2_SF_TF, ++ MULTI_ARG_2_DF_TF, ++ MULTI_ARG_1_SF, ++ MULTI_ARG_1_DF, ++ MULTI_ARG_1_DI, ++ MULTI_ARG_1_SI, ++ MULTI_ARG_1_HI, ++ MULTI_ARG_1_QI, ++ MULTI_ARG_1_SI_DI, ++ MULTI_ARG_1_HI_DI, ++ MULTI_ARG_1_HI_SI, ++ MULTI_ARG_1_QI_DI, ++ MULTI_ARG_1_QI_SI, ++ MULTI_ARG_1_QI_HI, ++ MULTI_ARG_1_PH2PS, ++ MULTI_ARG_1_PS2PH ++}; ++ ++static const struct builtin_description bdesc_multi_arg[] = ++{ ++ { MASK_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF }, ++ { MASK_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF }, ++ { MASK_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF }, ++ { MASK_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF }, ++ { MASK_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF }, ++ { MASK_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF }, ++ { MASK_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF }, ++ { MASK_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF }, ++ { MASK_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF }, ++ { MASK_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF }, ++ { MASK_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF }, ++ { MASK_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF }, ++ { MASK_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF }, ++ { MASK_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF }, ++ { MASK_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF }, ++ { MASK_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI }, ++ { MASK_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI }, ++ { MASK_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF }, ++ { MASK_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI }, ++ { MASK_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS }, ++ { MASK_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD }, ++ { MASK_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI }, ++ { MASK_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI }, ++ { MASK_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI }, ++ { MASK_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI }, ++ { MASK_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM }, ++ { MASK_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM }, ++ { MASK_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM }, ++ { MASK_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM }, ++ { MASK_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI }, ++ { MASK_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI }, ++ { MASK_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI }, ++ { MASK_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI }, ++ { MASK_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF }, ++ { MASK_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF }, ++ { MASK_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF }, ++ { MASK_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF }, ++ { MASK_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS }, ++ { MASK_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH }, ++ { MASK_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI }, ++ { MASK_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI }, ++ { MASK_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI }, ++ { MASK_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI }, ++ { MASK_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI }, ++ { MASK_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP }, ++ { MASK_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, ++ ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, ++ { MASK_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, + }; + + static void +@@ -15332,6 +16106,137 @@ ix86_init_mmx_sse_builtins (void) + tree v2di_ftype_v2di_v16qi + = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node, + NULL_TREE); ++ tree v2df_ftype_v2df_v2df_v2df ++ = build_function_type_list (V2DF_type_node, ++ V2DF_type_node, V2DF_type_node, ++ V2DF_type_node, NULL_TREE); ++ tree v4sf_ftype_v4sf_v4sf_v4sf ++ = build_function_type_list (V4SF_type_node, ++ V4SF_type_node, V4SF_type_node, ++ V4SF_type_node, NULL_TREE); ++ tree v8hi_ftype_v16qi ++ = build_function_type_list (V8HI_type_node, V16QI_type_node, ++ NULL_TREE); ++ tree v4si_ftype_v16qi ++ = build_function_type_list (V4SI_type_node, V16QI_type_node, ++ NULL_TREE); ++ tree v2di_ftype_v16qi ++ = build_function_type_list (V2DI_type_node, V16QI_type_node, ++ NULL_TREE); ++ tree v4si_ftype_v8hi ++ = build_function_type_list (V4SI_type_node, V8HI_type_node, ++ NULL_TREE); ++ tree v2di_ftype_v8hi ++ = build_function_type_list (V2DI_type_node, V8HI_type_node, ++ NULL_TREE); ++ tree v2di_ftype_v4si ++ = build_function_type_list (V2DI_type_node, V4SI_type_node, ++ NULL_TREE); ++ tree v16qi_ftype_v16qi_v16qi_int ++ = build_function_type_list (V16QI_type_node, V16QI_type_node, ++ V16QI_type_node, integer_type_node, ++ NULL_TREE); ++ tree v8hi_ftype_v8hi_v8hi_int ++ = build_function_type_list (V8HI_type_node, V8HI_type_node, ++ V8HI_type_node, integer_type_node, ++ NULL_TREE); ++ tree v4si_ftype_v4si_v4si_int ++ = build_function_type_list (V4SI_type_node, V4SI_type_node, ++ V4SI_type_node, integer_type_node, ++ NULL_TREE); ++ tree int_ftype_v2di_v2di ++ = build_function_type_list (integer_type_node, ++ V2DI_type_node, V2DI_type_node, ++ NULL_TREE); ++ tree v16qi_ftype_v16qi_v16qi_v16qi ++ = build_function_type_list (V16QI_type_node, V16QI_type_node, ++ V16QI_type_node, V16QI_type_node, ++ NULL_TREE); ++ /* SSE5 instructions */ ++ tree v2di_ftype_v2di_v2di_v2di ++ = build_function_type_list (V2DI_type_node, ++ V2DI_type_node, ++ V2DI_type_node, ++ V2DI_type_node, ++ NULL_TREE); ++ ++ tree v4si_ftype_v4si_v4si_v4si ++ = build_function_type_list (V4SI_type_node, ++ V4SI_type_node, ++ V4SI_type_node, ++ V4SI_type_node, ++ NULL_TREE); ++ ++ tree v4si_ftype_v4si_v4si_v2di ++ = build_function_type_list (V4SI_type_node, ++ V4SI_type_node, ++ V4SI_type_node, ++ V2DI_type_node, ++ NULL_TREE); ++ ++ tree v8hi_ftype_v8hi_v8hi_v8hi ++ = build_function_type_list (V8HI_type_node, ++ V8HI_type_node, ++ V8HI_type_node, ++ V8HI_type_node, ++ NULL_TREE); ++ ++ tree v8hi_ftype_v8hi_v8hi_v4si ++ = build_function_type_list (V8HI_type_node, ++ V8HI_type_node, ++ V8HI_type_node, ++ V4SI_type_node, ++ NULL_TREE); ++ ++ tree v2df_ftype_v2df_v2df_v16qi ++ = build_function_type_list (V2DF_type_node, ++ V2DF_type_node, ++ V2DF_type_node, ++ V16QI_type_node, ++ NULL_TREE); ++ ++ tree v4sf_ftype_v4sf_v4sf_v16qi ++ = build_function_type_list (V4SF_type_node, ++ V4SF_type_node, ++ V4SF_type_node, ++ V16QI_type_node, ++ NULL_TREE); ++ ++ tree v2di_ftype_v2di_si ++ = build_function_type_list (V2DI_type_node, ++ V2DI_type_node, ++ integer_type_node, ++ NULL_TREE); ++ ++ tree v4si_ftype_v4si_si ++ = build_function_type_list (V4SI_type_node, ++ V4SI_type_node, ++ integer_type_node, ++ NULL_TREE); ++ ++ tree v8hi_ftype_v8hi_si ++ = build_function_type_list (V8HI_type_node, ++ V8HI_type_node, ++ integer_type_node, ++ NULL_TREE); ++ ++ tree v16qi_ftype_v16qi_si ++ = build_function_type_list (V16QI_type_node, ++ V16QI_type_node, ++ integer_type_node, ++ NULL_TREE); ++ tree v4sf_ftype_v4hi ++ = build_function_type_list (V4SF_type_node, ++ V4HI_type_node, ++ NULL_TREE); ++ ++ tree v4hi_ftype_v4sf ++ = build_function_type_list (V4HI_type_node, ++ V4SF_type_node, ++ NULL_TREE); ++ ++ tree v2di_ftype_v2di ++ = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); + + tree float80_type; + tree float128_type; +@@ -15358,6 +16263,48 @@ ix86_init_mmx_sse_builtins (void) + (*lang_hooks.types.register_builtin_type) (float128_type, "__float128"); + } + ++ /* Add all SSE builtins that are more or less simple operations on ++ three operands. */ ++ for (i = 0, d = bdesc_sse_3arg; ++ i < ARRAY_SIZE (bdesc_sse_3arg); ++ i++, d++) ++ { ++ /* Use one of the operands; the target can have a different mode for ++ mask-generating compares. */ ++ enum machine_mode mode; ++ tree type; ++ ++ if (d->name == 0) ++ continue; ++ mode = insn_data[d->icode].operand[1].mode; ++ ++ switch (mode) ++ { ++ case V16QImode: ++ type = v16qi_ftype_v16qi_v16qi_int; ++ break; ++ case V8HImode: ++ type = v8hi_ftype_v8hi_v8hi_int; ++ break; ++ case V4SImode: ++ type = v4si_ftype_v4si_v4si_int; ++ break; ++ case V2DImode: ++ type = v2di_ftype_v2di_v2di_int; ++ break; ++ case V2DFmode: ++ type = v2df_ftype_v2df_v2df_int; ++ break; ++ case V4SFmode: ++ type = v4sf_ftype_v4sf_v4sf_int; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ def_builtin (d->mask, d->name, type, d->code); ++ } ++ + /* Add all builtins that are more or less simple operations on two + operands. */ + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) +@@ -15483,6 +16430,10 @@ ix86_init_mmx_sse_builtins (void) + def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW); + def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD); + ++ /* ptest insns. */ ++ for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++) ++ def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code); ++ + /* comi/ucomi insns. */ + for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) + if (d->mask == MASK_SSE2) +@@ -15680,6 +16631,12 @@ ix86_init_mmx_sse_builtins (void) + def_builtin (MASK_SSE4A, "__builtin_ia32_insertq", + v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ); + ++ /* SSE4.1 and SSE5 */ ++ def_builtin (MASK_SSE5, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD); ++ def_builtin (MASK_SSE5, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS); ++ def_builtin (MASK_SSE5, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD); ++ def_builtin (MASK_SSE5, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS); ++ + /* Access to the vec_init patterns. */ + ftype = build_function_type_list (V2SI_type_node, integer_type_node, + integer_type_node, NULL_TREE); +@@ -15750,6 +16707,72 @@ ix86_init_mmx_sse_builtins (void) + integer_type_node, NULL_TREE); + def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi", + ftype, IX86_BUILTIN_VEC_SET_V4HI); ++ ++ ++ /* Add SSE5 multi-arg argument instructions */ ++ for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) ++ { ++ tree mtype = NULL_TREE; ++ ++ if (d->name == 0) ++ continue; ++ ++ switch ((enum multi_arg_type)d->flag) ++ { ++ case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break; ++ case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break; ++ case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break; ++ case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break; ++ case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break; ++ case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break; ++ case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break; ++ case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break; ++ case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break; ++ case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break; ++ case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break; ++ case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break; ++ case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break; ++ case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break; ++ case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break; ++ case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break; ++ case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break; ++ case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break; ++ case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break; ++ case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break; ++ case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break; ++ case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break; ++ case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break; ++ case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break; ++ case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break; ++ case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break; ++ case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break; ++ case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break; ++ case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break; ++ case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break; ++ case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break; ++ case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break; ++ case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break; ++ case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break; ++ case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break; ++ case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break; ++ case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break; ++ case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break; ++ case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break; ++ case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break; ++ case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break; ++ case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break; ++ case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break; ++ case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break; ++ case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break; ++ case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break; ++ case MULTI_ARG_UNKNOWN: ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (mtype) ++ def_builtin (d->mask, d->name, mtype, d->code); ++ } + } + + /* Errors in the source file can cause expand_expr to return const0_rtx +@@ -15763,6 +16786,65 @@ safe_vector_operand (rtx x, enum machine + return x; + } + ++/* Subroutine of ix86_expand_builtin to take care of SSE insns with ++ 4 operands. The third argument must be a constant smaller than 8 ++ bits or xmm0. */ ++ ++static rtx ++ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree arglist, ++ rtx target) ++{ ++ rtx pat; ++ tree arg0 = TREE_VALUE (arglist); ++ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); ++ tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); ++ rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); ++ rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); ++ rtx op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); ++ enum machine_mode tmode = insn_data[icode].operand[0].mode; ++ enum machine_mode mode1 = insn_data[icode].operand[1].mode; ++ enum machine_mode mode2 = insn_data[icode].operand[2].mode; ++ enum machine_mode mode3 = insn_data[icode].operand[3].mode; ++ ++ if (VECTOR_MODE_P (mode1)) ++ op0 = safe_vector_operand (op0, mode1); ++ if (VECTOR_MODE_P (mode2)) ++ op1 = safe_vector_operand (op1, mode2); ++ if (VECTOR_MODE_P (mode3)) ++ op2 = safe_vector_operand (op2, mode3); ++ ++ if (optimize ++ || target == 0 ++ || GET_MODE (target) != tmode ++ || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ ++ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) ++ op0 = copy_to_mode_reg (mode1, op0); ++ if ((optimize && !register_operand (op1, mode2)) ++ || !(*insn_data[icode].operand[2].predicate) (op1, mode2)) ++ op1 = copy_to_mode_reg (mode2, op1); ++ ++ if (! (*insn_data[icode].operand[3].predicate) (op2, mode3)) ++ switch (icode) ++ { ++ case CODE_FOR_sse4_1_roundsd: ++ case CODE_FOR_sse4_1_roundss: ++ error ("the third argument must be a 4-bit immediate"); ++ return const0_rtx; ++ ++ default: ++ error ("the third argument must be an 8-bit immediate"); ++ return const0_rtx; ++ } ++ ++ pat = GEN_FCN (icode) (target, op0, op1, op2); ++ if (! pat) ++ return 0; ++ emit_insn (pat); ++ return target; ++} ++ + /* Subroutine of ix86_expand_builtin to take care of binop insns. */ + + static rtx +@@ -15831,6 +16913,189 @@ ix86_expand_binop_builtin (enum insn_cod + return target; + } + ++/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */ ++ ++static rtx ++ix86_expand_multi_arg_builtin (enum insn_code icode, tree arglist, rtx target, ++ enum multi_arg_type m_type, ++ enum insn_code sub_code) ++{ ++ rtx pat; ++ int i; ++ int nargs; ++ bool comparison_p = false; ++ bool tf_p = false; ++ bool last_arg_constant = false; ++ int num_memory = 0; ++ struct { ++ rtx op; ++ enum machine_mode mode; ++ } args[4]; ++ ++ enum machine_mode tmode = insn_data[icode].operand[0].mode; ++ ++ switch (m_type) ++ { ++ case MULTI_ARG_3_SF: ++ case MULTI_ARG_3_DF: ++ case MULTI_ARG_3_DI: ++ case MULTI_ARG_3_SI: ++ case MULTI_ARG_3_SI_DI: ++ case MULTI_ARG_3_HI: ++ case MULTI_ARG_3_HI_SI: ++ case MULTI_ARG_3_QI: ++ case MULTI_ARG_3_PERMPS: ++ case MULTI_ARG_3_PERMPD: ++ nargs = 3; ++ break; ++ ++ case MULTI_ARG_2_SF: ++ case MULTI_ARG_2_DF: ++ case MULTI_ARG_2_DI: ++ case MULTI_ARG_2_SI: ++ case MULTI_ARG_2_HI: ++ case MULTI_ARG_2_QI: ++ nargs = 2; ++ break; ++ ++ case MULTI_ARG_2_DI_IMM: ++ case MULTI_ARG_2_SI_IMM: ++ case MULTI_ARG_2_HI_IMM: ++ case MULTI_ARG_2_QI_IMM: ++ nargs = 2; ++ last_arg_constant = true; ++ break; ++ ++ case MULTI_ARG_1_SF: ++ case MULTI_ARG_1_DF: ++ case MULTI_ARG_1_DI: ++ case MULTI_ARG_1_SI: ++ case MULTI_ARG_1_HI: ++ case MULTI_ARG_1_QI: ++ case MULTI_ARG_1_SI_DI: ++ case MULTI_ARG_1_HI_DI: ++ case MULTI_ARG_1_HI_SI: ++ case MULTI_ARG_1_QI_DI: ++ case MULTI_ARG_1_QI_SI: ++ case MULTI_ARG_1_QI_HI: ++ case MULTI_ARG_1_PH2PS: ++ case MULTI_ARG_1_PS2PH: ++ nargs = 1; ++ break; ++ ++ case MULTI_ARG_2_SF_CMP: ++ case MULTI_ARG_2_DF_CMP: ++ case MULTI_ARG_2_DI_CMP: ++ case MULTI_ARG_2_SI_CMP: ++ case MULTI_ARG_2_HI_CMP: ++ case MULTI_ARG_2_QI_CMP: ++ nargs = 2; ++ comparison_p = true; ++ break; ++ ++ case MULTI_ARG_2_SF_TF: ++ case MULTI_ARG_2_DF_TF: ++ case MULTI_ARG_2_DI_TF: ++ case MULTI_ARG_2_SI_TF: ++ case MULTI_ARG_2_HI_TF: ++ case MULTI_ARG_2_QI_TF: ++ nargs = 2; ++ tf_p = true; ++ break; ++ ++ case MULTI_ARG_UNKNOWN: ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (optimize || !target ++ || GET_MODE (target) != tmode ++ || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ ++ gcc_assert (nargs <= 4); ++ ++ for (i = 0; i < nargs; i++) ++ { ++ int adjust = (comparison_p) ? 1 : 0; ++ enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode; ++ tree arg; ++ rtx op; ++ ++ gcc_assert (arglist != NULL); ++ arg = TREE_VALUE (arglist); ++ ++ gcc_assert (arg != NULL); ++ op = expand_expr (arg, NULL_RTX, VOIDmode, 0); ++ ++ if (last_arg_constant && i == nargs-1) ++ { ++ if (GET_CODE (op) != CONST_INT) ++ { ++ error ("last argument must be an immediate"); ++ return gen_reg_rtx (tmode); ++ } ++ } ++ else ++ { ++ if (VECTOR_MODE_P (mode)) ++ op = safe_vector_operand (op, mode); ++ ++ /* If we aren't optimizing, only allow one memory operand to be ++ generated. */ ++ if (memory_operand (op, mode)) ++ num_memory++; ++ ++ gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode); ++ ++ if (optimize ++ || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode) ++ || num_memory > 1) ++ op = force_reg (mode, op); ++ } ++ ++ args[i].op = op; ++ args[i].mode = mode; ++ arglist = TREE_CHAIN (arglist); ++ } ++ ++ switch (nargs) ++ { ++ case 1: ++ pat = GEN_FCN (icode) (target, args[0].op); ++ break; ++ ++ case 2: ++ if (tf_p) ++ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, ++ GEN_INT ((int)sub_code)); ++ else if (! comparison_p) ++ pat = GEN_FCN (icode) (target, args[0].op, args[1].op); ++ else ++ { ++ rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target), ++ args[0].op, ++ args[1].op); ++ ++ pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op); ++ } ++ break; ++ ++ case 3: ++ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (! pat) ++ return 0; ++ ++ emit_insn (pat); ++ return target; ++} ++ + /* Subroutine of ix86_expand_builtin to take care of stores. */ + + static rtx +@@ -15884,7 +17149,28 @@ ix86_expand_unop_builtin (enum insn_code + op0 = copy_to_mode_reg (mode0, op0); + } + +- pat = GEN_FCN (icode) (target, op0); ++ switch (icode) ++ { ++ case CODE_FOR_sse4_1_roundpd: ++ case CODE_FOR_sse4_1_roundps: ++ { ++ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); ++ rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); ++ enum machine_mode mode1 = insn_data[icode].operand[2].mode; ++ ++ if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) ++ { ++ error ("the second argument must be a 4-bit immediate"); ++ return const0_rtx; ++ } ++ pat = GEN_FCN (icode) (target, op0, op1); ++ } ++ break; ++ default: ++ pat = GEN_FCN (icode) (target, op0); ++ break; ++ } ++ + if (! pat) + return 0; + emit_insn (pat); +@@ -16033,6 +17319,52 @@ ix86_expand_sse_comi (const struct built + return SUBREG_REG (target); + } + ++/* Subroutine of ix86_expand_builtin to take care of ptest insns. */ ++ ++static rtx ++ix86_expand_sse_ptest (const struct builtin_description *d, tree arglist, ++ rtx target) ++{ ++ rtx pat; ++ tree arg0 = TREE_VALUE (arglist); ++ tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); ++ rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); ++ rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); ++ enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; ++ enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; ++ enum rtx_code comparison = d->comparison; ++ ++ if (VECTOR_MODE_P (mode0)) ++ op0 = safe_vector_operand (op0, mode0); ++ if (VECTOR_MODE_P (mode1)) ++ op1 = safe_vector_operand (op1, mode1); ++ ++ target = gen_reg_rtx (SImode); ++ emit_move_insn (target, const0_rtx); ++ target = gen_rtx_SUBREG (QImode, target, 0); ++ ++ if ((optimize && !register_operand (op0, mode0)) ++ || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) ++ op0 = copy_to_mode_reg (mode0, op0); ++ if ((optimize && !register_operand (op1, mode1)) ++ || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) ++ op1 = copy_to_mode_reg (mode1, op1); ++ ++ pat = GEN_FCN (d->icode) (op0, op1); ++ if (! pat) ++ return 0; ++ emit_insn (pat); ++ emit_insn (gen_rtx_SET (VOIDmode, ++ gen_rtx_STRICT_LOW_PART (VOIDmode, target), ++ gen_rtx_fmt_ee (comparison, QImode, ++ SET_DEST (pat), ++ const0_rtx))); ++ ++ return SUBREG_REG (target); ++} ++ ++ ++ + /* Return the integer constant in ARG. Constrain it to be in the range + of the subparts of VEC_TYPE; issue an error if not. */ + +@@ -16617,12 +17949,15 @@ ix86_expand_builtin (tree exp, rtx targe + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); + if (!REG_P (op0)) +- op0 = copy_to_mode_reg (SImode, op0); ++ op0 = copy_to_mode_reg (Pmode, op0); + if (!REG_P (op1)) + op1 = copy_to_mode_reg (SImode, op1); + if (!REG_P (op2)) + op2 = copy_to_mode_reg (SImode, op2); +- emit_insn (gen_sse3_monitor (op0, op1, op2)); ++ if (!TARGET_64BIT) ++ emit_insn (gen_sse3_monitor (op0, op1, op2)); ++ else ++ emit_insn (gen_sse3_monitor64 (op0, op1, op2)); + return 0; + + case IX86_BUILTIN_MWAIT: +@@ -16817,6 +18152,10 @@ ix86_expand_builtin (tree exp, rtx targe + break; + } + ++ for (i = 0, d = bdesc_sse_3arg; i < ARRAY_SIZE (bdesc_sse_3arg); i++, d++) ++ if (d->code == fcode) ++ return ix86_expand_sse_4_operands_builtin (d->icode, arglist, target); ++ + for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++) + if (d->code == fcode) + { +@@ -16838,6 +18177,16 @@ ix86_expand_builtin (tree exp, rtx targe + if (d->code == fcode) + return ix86_expand_sse_comi (d, arglist, target); + ++ for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++) ++ if (d->code == fcode) ++ return ix86_expand_sse_ptest (d, arglist, target); ++ ++ for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) ++ if (d->code == fcode) ++ return ix86_expand_multi_arg_builtin (d->icode, arglist, target, ++ (enum multi_arg_type)d->flag, ++ d->comparison); ++ + gcc_unreachable (); + } + +--- gcc/config/i386/i386.h.jj 2007-06-26 13:38:46.000000000 +0200 ++++ gcc/config/i386/i386.h 2007-12-28 20:08:53.000000000 +0100 +@@ -143,6 +143,8 @@ extern const struct processor_costs *ix8 + #define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64) + #define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10) + ++#define TARGET_ROUND TARGET_SSE5 ++ + #define TUNEMASK (1 << ix86_tune) + extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and; + extern const int x86_use_bit_test, x86_cmove, x86_fisttp, x86_deep_branch; +@@ -228,6 +230,7 @@ extern int x86_prefetch_sse, x86_cmpxchg + #define TARGET_USE_BT (x86_use_bt & TUNEMASK) + #define TARGET_USE_INCDEC (x86_use_incdec & TUNEMASK) + #define TARGET_PAD_RETURNS (x86_pad_returns & TUNEMASK) ++#define TARGET_FUSED_MADD x86_fused_muladd + + #define ASSEMBLER_DIALECT (ix86_asm_dialect) + +@@ -408,6 +411,8 @@ extern int x86_prefetch_sse, x86_cmpxchg + } \ + if (TARGET_SSE4A) \ + builtin_define ("__SSE4A__"); \ ++ if (TARGET_SSE5) \ ++ builtin_define ("__SSE5__"); \ + if (TARGET_SSE_MATH && TARGET_SSE) \ + builtin_define ("__SSE_MATH__"); \ + if (TARGET_SSE_MATH && TARGET_SSE2) \ +--- gcc/config/i386/i386.md.jj 2007-06-26 13:38:46.000000000 +0200 ++++ gcc/config/i386/i386.md 2007-12-28 20:12:10.000000000 +0100 +@@ -157,6 +157,21 @@ + (UNSPEC_EXTRQ 131) + (UNSPEC_INSERTQI 132) + (UNSPEC_INSERTQ 133) ++ ++ ; For SSE4.1/SSE5 support ++ (UNSPEC_PTEST 145) ++ (UNSPEC_ROUND 146) ++ ++ ;; For SSE5 ++ (UNSPEC_SSE5_INTRINSIC 150) ++ (UNSPEC_SSE5_UNSIGNED_CMP 151) ++ (UNSPEC_SSE5_TRUEFALSE 152) ++ (UNSPEC_SSE5_PERMUTE 153) ++ (UNSPEC_SSE5_ASHIFT 154) ++ (UNSPEC_SSE5_LSHIFT 155) ++ (UNSPEC_FRCZ 156) ++ (UNSPEC_CVTPH2PS 157) ++ (UNSPEC_CVTPS2PH 158) + ]) + + (define_constants +@@ -176,6 +191,16 @@ + (UNSPECV_LOCK 13) + ]) + ++;; Constants to represent pcomtrue/pcomfalse variants ++(define_constants ++ [(PCOM_FALSE 0) ++ (PCOM_TRUE 1) ++ (COM_FALSE_S 2) ++ (COM_FALSE_P 3) ++ (COM_TRUE_S 4) ++ (COM_TRUE_P 5) ++ ]) ++ + ;; Registers by name. + (define_constants + [(BP_REG 6) +@@ -210,8 +235,9 @@ + push,pop,call,callv,leave, + str,bitmanip,cld, + fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, +- sselog,sselog1,sseiadd,sseishft,sseimul, +- sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins, ++ sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, ++ sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins, ++ ssemuladd,sse4arg, + mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" + (const_string "other")) + +@@ -224,8 +250,9 @@ + (define_attr "unit" "integer,i387,sse,mmx,unknown" + (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") + (const_string "i387") +- (eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul, +- sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins") ++ (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, ++ sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt, ++ ssediv,sseins,ssemuladd,sse4arg") + (const_string "sse") + (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") + (const_string "mmx") +@@ -417,12 +444,12 @@ + "!alu1,negnot,ishift1, + imov,imovx,icmp,test,bitmanip, + fmov,fcmp,fsgn, +- sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1, +- mmx,mmxmov,mmxcmp,mmxcvt") ++ sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,sselog1, ++ sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt") + (match_operand 2 "memory_operand" "")) + (const_string "load") +- (and (eq_attr "type" "icmov") +- (match_operand 3 "memory_operand" "")) ++ (and (eq_attr "type" "icmov,ssemuladd,sse4arg") ++ (match_operand 3 "memory_operand" "")) + (const_string "load") + ] + (const_string "none"))) +@@ -469,10 +496,14 @@ + + ;; All SSE floating point modes + (define_mode_macro SSEMODEF [SF DF]) ++(define_mode_macro MODEF [SF DF]) + + ;; All integer modes handled by SSE cvtts?2si* operators. + (define_mode_macro SSEMODEI24 [SI DI]) + ++;; SSE asm suffix for floating point modes ++(define_mode_attr ssemodefsuffix [(SF "s") (DF "d")]) ++ + + ;; Scheduling descriptions + +@@ -7417,6 +7448,9 @@ + (match_operand:SF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || TARGET_SSE_MATH" + "") ++ ++;; SSE5 scalar multiply/add instructions are defined in sse.md. ++ + + ;; Divide instructions + +@@ -13429,7 +13463,7 @@ + (match_operator:SF 1 "sse_comparison_operator" + [(match_operand:SF 2 "register_operand" "0") + (match_operand:SF 3 "nonimmediate_operand" "xm")]))] +- "TARGET_SSE" ++ "TARGET_SSE && !TARGET_SSE5" + "cmp%D1ss\t{%3, %0|%0, %3}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "SF")]) +@@ -13439,10 +13473,21 @@ + (match_operator:DF 1 "sse_comparison_operator" + [(match_operand:DF 2 "register_operand" "0") + (match_operand:DF 3 "nonimmediate_operand" "Ym")]))] +- "TARGET_SSE2" ++ "TARGET_SSE2 && !TARGET_SSE5" + "cmp%D1sd\t{%3, %0|%0, %3}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "DF")]) ++ ++(define_insn "*sse5_setcc" ++ [(set (match_operand:MODEF 0 "register_operand" "=x") ++ (match_operator:MODEF 1 "sse5_comparison_float_operator" ++ [(match_operand:MODEF 2 "register_operand" "x") ++ (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] ++ "TARGET_SSE5" ++ "com%Y1s\t{%3, %2, %0|%0, %2, %3}" ++ [(set_attr "type" "sse4arg") ++ (set_attr "mode" "")]) ++ + + ;; Basic conditional jump instructions. + ;; We ignore the overflow flag for signed branch instructions. +@@ -17208,6 +17253,15 @@ + operands[i] = gen_reg_rtx (XFmode); + }) + ++(define_insn "sse4_1_round2" ++ [(set (match_operand:MODEF 0 "register_operand" "=x") ++ (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x") ++ (match_operand:SI 2 "const_0_to_15_operand" "n")] ++ UNSPEC_ROUND))] ++ "TARGET_ROUND" ++ "rounds\t{%2, %1, %0|%0, %1, %2}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "mode" "")]) + + (define_insn "frndintxf2" + [(set (match_operand:XF 0 "register_operand" "=f") +@@ -19180,6 +19234,20 @@ + [(set_attr "type" "fcmov") + (set_attr "mode" "XF")]) + ++;; All moves in SSE5 pcmov instructions are 128 bits and hence we restrict ++;; the scalar versions to have only XMM registers as operands. ++ ++;; SSE5 conditional move ++(define_insn "*sse5_pcmov_" ++ [(set (match_operand:MODEF 0 "register_operand" "=x,x") ++ (if_then_else:MODEF ++ (match_operand:MODEF 1 "register_operand" "x,0") ++ (match_operand:MODEF 2 "register_operand" "0,x") ++ (match_operand:MODEF 3 "register_operand" "x,x")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "pcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}" ++ [(set_attr "type" "sse4arg")]) ++ + ;; These versions of the min/max patterns are intentionally ignorant of + ;; their behavior wrt -0.0 and NaN (via the commutative operand mark). + ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator +--- gcc/config/i386/i386.opt.jj 2007-06-26 13:38:46.000000000 +0200 ++++ gcc/config/i386/i386.opt 2007-12-28 21:59:55.000000000 +0100 +@@ -209,6 +209,10 @@ msse4a + Target Report Mask(SSE4A) + Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation + ++msse5 ++Target Report Mask(SSE5) ++Support SSE5 built-in functions and code generation ++ + mpopcnt + Target Report Mask(POPCNT) + Support code generation of popcount instruction for popcount built-ins +@@ -247,3 +251,9 @@ Schedule code for given CPU + + ;; Support Athlon 3Dnow builtins + Mask(3DNOW_A) ++ ++mfused-madd ++Target Report Var(x86_fused_muladd) Init(1) ++Enable automatic generation of fused floating point multiply-add instructions ++if the ISA supports such instructions. The -mfused-madd option is on by ++default. +--- gcc/config/i386/mm3dnow.h.jj 2007-12-28 15:43:26.000000000 +0100 ++++ gcc/config/i386/mm3dnow.h 2007-12-30 22:13:34.000000000 +0100 +@@ -172,13 +172,17 @@ _m_prefetchw (void *__P) + static __inline __m64 + _m_from_float (float __A) + { +- return (__m64)(__v2sf){ __A, 0 }; ++ union { __v2sf v; float a[2]; } __tmp; ++ __tmp.a[0] = __A; ++ __tmp.a[1] = 0; ++ return (__m64)__tmp.v; + } + + static __inline float + _m_to_float (__m64 __A) + { +- union { __v2sf v; float a[2]; } __tmp = { (__v2sf)__A }; ++ union { __v2sf v; float a[2]; } __tmp; ++ __tmp.v = (__v2sf)__A; + return __tmp.a[0]; + } + +--- gcc/config/i386/mmintrin-common.h.jj 2007-12-28 20:04:35.000000000 +0100 ++++ gcc/config/i386/mmintrin-common.h 2007-12-28 20:04:35.000000000 +0100 +@@ -0,0 +1,156 @@ ++/* Copyright (C) 2007 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 2, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING. If not, write to ++ the Free Software Foundation, 51 Franklin Street, Fifth Floor, ++ Boston, MA 02110-1301, USA. */ ++ ++/* As a special exception, if you include this header file into source ++ files compiled by GCC, this header file does not by itself cause ++ the resulting executable to be covered by the GNU General Public ++ License. This exception does not however invalidate any other ++ reasons why the executable file might be covered by the GNU General ++ Public License. */ ++ ++/* Common definition of the ROUND and PTEST intrinsics that are shared ++ between SSE4.1 and SSE5. */ ++ ++#ifndef _MMINTRIN_COMMON_H_INCLUDED ++#define _MMINTRIN_COMMON_H_INCLUDED ++ ++#if !defined(__SSE5__) && !defined(__SSE4_1__) ++# error "SSE5 or SSE4.1 instruction set not enabled" ++#else ++ ++/* Rounding mode macros. */ ++#define _MM_FROUND_TO_NEAREST_INT 0x00 ++#define _MM_FROUND_TO_NEG_INF 0x01 ++#define _MM_FROUND_TO_POS_INF 0x02 ++#define _MM_FROUND_TO_ZERO 0x03 ++#define _MM_FROUND_CUR_DIRECTION 0x04 ++ ++#define _MM_FROUND_RAISE_EXC 0x00 ++#define _MM_FROUND_NO_EXC 0x08 ++ ++#define _MM_FROUND_NINT \ ++ (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) ++#define _MM_FROUND_FLOOR \ ++ (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) ++#define _MM_FROUND_CEIL \ ++ (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) ++#define _MM_FROUND_TRUNC \ ++ (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) ++#define _MM_FROUND_RINT \ ++ (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) ++#define _MM_FROUND_NEARBYINT \ ++ (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) ++ ++/* Test Instruction */ ++/* Packed integer 128-bit bitwise comparison. Return 1 if ++ (__V & __M) == 0. */ ++static __inline int __attribute__((__always_inline__, __artificial__)) ++_mm_testz_si128 (__m128i __M, __m128i __V) ++{ ++ return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V); ++} ++ ++/* Packed integer 128-bit bitwise comparison. Return 1 if ++ (__V & ~__M) == 0. */ ++static __inline int __attribute__((__always_inline__, __artificial__)) ++_mm_testc_si128 (__m128i __M, __m128i __V) ++{ ++ return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V); ++} ++ ++/* Packed integer 128-bit bitwise comparison. Return 1 if ++ (__V & __M) != 0 && (__V & ~__M) != 0. */ ++static __inline int __attribute__((__always_inline__, __artificial__)) ++_mm_testnzc_si128 (__m128i __M, __m128i __V) ++{ ++ return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V); ++} ++ ++/* Macros for packed integer 128-bit comparison intrinsics. */ ++#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V)) ++ ++#define _mm_test_all_ones(V) \ ++ _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V))) ++ ++#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V)) ++ ++/* Packed/scalar double precision floating point rounding. */ ++ ++#ifdef __OPTIMIZE__ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_round_pd (__m128d __V, const int __M) ++{ ++ return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M); ++} ++ ++static __inline __m128d __attribute__((__always_inline__, __artificial__)) ++_mm_round_sd(__m128d __D, __m128d __V, const int __M) ++{ ++ return (__m128d) __builtin_ia32_roundsd ((__v2df)__D, ++ (__v2df)__V, ++ __M); ++} ++#else ++#define _mm_round_pd(V, M) \ ++ ((__m128d) __builtin_ia32_roundpd ((__v2df)(V), (M))) ++ ++#define _mm_round_sd(D, V, M) \ ++ ((__m128d) __builtin_ia32_roundsd ((__v2df)(D), (__v2df)(V), (M))) ++#endif ++ ++/* Packed/scalar single precision floating point rounding. */ ++ ++#ifdef __OPTIMIZE__ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_round_ps (__m128 __V, const int __M) ++{ ++ return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M); ++} ++ ++static __inline __m128 __attribute__((__always_inline__, __artificial__)) ++_mm_round_ss (__m128 __D, __m128 __V, const int __M) ++{ ++ return (__m128) __builtin_ia32_roundss ((__v4sf)__D, ++ (__v4sf)__V, ++ __M); ++} ++#else ++#define _mm_round_ps(V, M) \ ++ ((__m128) __builtin_ia32_roundps ((__v4sf)(V), (M))) ++ ++#define _mm_round_ss(D, V, M) \ ++ ((__m128) __builtin_ia32_roundss ((__v4sf)(D), (__v4sf)(V), (M))) ++#endif ++ ++/* Macros for ceil/floor intrinsics. */ ++#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL) ++#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL) ++ ++#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR) ++#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR) ++ ++#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL) ++#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL) ++ ++#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR) ++#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR) ++ ++#endif /* __SSE5__/__SSE4_1__ */ ++ ++#endif /* _MMINTRIN_COMMON_H_INCLUDED */ +--- gcc/config/i386/predicates.md.jj 2007-02-20 22:38:59.000000000 +0100 ++++ gcc/config/i386/predicates.md 2007-12-28 20:04:35.000000000 +0100 +@@ -566,6 +566,11 @@ + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 15"))) + ++;; Match 0 to 31. ++(define_predicate "const_0_to_31_operand" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 31)"))) ++ + ;; Match 0 to 63. + (define_predicate "const_0_to_63_operand" + (and (match_code "const_int") +@@ -806,6 +811,18 @@ + (define_special_predicate "sse_comparison_operator" + (match_code "eq,lt,le,unordered,ne,unge,ungt,ordered")) + ++;; Return 1 if OP is a comparison operator that can be issued by sse predicate ++;; generation instructions ++(define_predicate "sse5_comparison_float_operator" ++ (and (match_test "TARGET_SSE5") ++ (match_code "ne,eq,ge,gt,le,lt,unordered,ordered,uneq,unge,ungt,unle,unlt,ltgt"))) ++ ++(define_predicate "ix86_comparison_int_operator" ++ (match_code "ne,eq,ge,gt,le,lt")) ++ ++(define_predicate "ix86_comparison_uns_operator" ++ (match_code "ne,eq,geu,gtu,leu,ltu")) ++ + ;; Return 1 if OP is a valid comparison operator in valid mode. + (define_predicate "ix86_comparison_operator" + (match_operand 0 "comparison_operator") +--- gcc/config/i386/sse.md.jj 2007-07-23 12:11:56.000000000 +0200 ++++ gcc/config/i386/sse.md 2007-12-30 21:39:00.000000000 +0100 +@@ -33,10 +33,24 @@ + (define_mode_macro SSEMODE14 [V16QI V4SI]) + (define_mode_macro SSEMODE124 [V16QI V8HI V4SI]) + (define_mode_macro SSEMODE248 [V8HI V4SI V2DI]) ++(define_mode_macro SSEMODE1248 [V16QI V8HI V4SI V2DI]) ++(define_mode_macro SSEMODEF4 [SF DF V4SF V2DF]) ++(define_mode_macro SSEMODEF2P [V4SF V2DF]) + + ;; Mapping from integer vector mode to mnemonic suffix + (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) + ++;; Mapping of the sse5 suffix ++(define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd") (V4SF "ps") (V2DF "pd")]) ++(define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd") (V4SF "ss") (V2DF "sd")]) ++(define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")]) ++ ++;; Mapping of the max integer size for sse5 rotate immediate constraint ++(define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) ++ ++;; Mapping of vector modes back to the scalar modes ++(define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")]) ++ + ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +@@ -745,7 +759,7 @@ + (match_operator:V4SF 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))] +- "TARGET_SSE" ++ "TARGET_SSE && !TARGET_SSE5" + "cmp%D3ps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "V4SF")]) +@@ -758,7 +772,7 @@ + (match_operand:V4SF 2 "register_operand" "x")]) + (match_dup 1) + (const_int 1)))] +- "TARGET_SSE" ++ "TARGET_SSE && !TARGET_SSE5" + "cmp%D3ss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "SF")]) +@@ -1386,6 +1400,563 @@ + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; ++;; SSE5 floating point multiply/accumulate instructions This includes the ++;; scalar version of the instructions as well as the vector ++;; ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++ ++;; In order to match (*a * *b) + *c, particularly when vectorizing, allow ++;; combine to generate a multiply/add with two memory references. We then ++;; split this insn, into loading up the destination register with one of the ++;; memory operations. If we don't manage to split the insn, reload will ++;; generate the appropriate moves. The reason this is needed, is that combine ++;; has already folded one of the memory references into both the multiply and ++;; add insns, and it can't generate a new pseudo. I.e.: ++;; (set (reg1) (mem (addr1))) ++;; (set (reg2) (mult (reg1) (mem (addr2)))) ++;; (set (reg3) (plus (reg2) (mem (addr3)))) ++ ++(define_insn "sse5_fmadd4" ++ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x") ++ (plus:SSEMODEF4 ++ (mult:SSEMODEF4 ++ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") ++ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x")) ++ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))] ++ "TARGET_SSE5 && TARGET_FUSED_MADD ++ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)" ++ "fmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++;; Split fmadd with two memory operands into a load and the fmadd. ++(define_split ++ [(set (match_operand:SSEMODEF4 0 "register_operand" "") ++ (plus:SSEMODEF4 ++ (mult:SSEMODEF4 ++ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") ++ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) ++ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] ++ "TARGET_SSE5 ++ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1) ++ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2) ++ && !reg_mentioned_p (operands[0], operands[1]) ++ && !reg_mentioned_p (operands[0], operands[2]) ++ && !reg_mentioned_p (operands[0], operands[3])" ++ [(const_int 0)] ++{ ++ ix86_expand_sse5_multiple_memory (operands, 4, mode); ++ emit_insn (gen_sse5_fmadd4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ DONE; ++}) ++ ++;; For the scalar operations, use operand1 for the upper words that aren't ++;; modified, so restrict the forms that are generated. ++;; Scalar version of fmadd ++(define_insn "sse5_vmfmadd4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") ++ (vec_merge:SSEMODEF2P ++ (plus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) ++ (match_dup 1) ++ (const_int 1)))] ++ "TARGET_SSE5 && TARGET_FUSED_MADD ++ && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "fmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++;; Floating multiply and subtract ++;; Allow two memory operands the same as fmadd ++(define_insn "sse5_fmsub4" ++ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x") ++ (minus:SSEMODEF4 ++ (mult:SSEMODEF4 ++ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") ++ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x")) ++ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))] ++ "TARGET_SSE5 && TARGET_FUSED_MADD ++ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)" ++ "fmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++;; Split fmsub with two memory operands into a load and the fmsub. ++(define_split ++ [(set (match_operand:SSEMODEF4 0 "register_operand" "") ++ (minus:SSEMODEF4 ++ (mult:SSEMODEF4 ++ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") ++ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) ++ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] ++ "TARGET_SSE5 ++ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1) ++ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2) ++ && !reg_mentioned_p (operands[0], operands[1]) ++ && !reg_mentioned_p (operands[0], operands[2]) ++ && !reg_mentioned_p (operands[0], operands[3])" ++ [(const_int 0)] ++{ ++ ix86_expand_sse5_multiple_memory (operands, 4, mode); ++ emit_insn (gen_sse5_fmsub4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ DONE; ++}) ++ ++;; For the scalar operations, use operand1 for the upper words that aren't ++;; modified, so restrict the forms that are generated. ++;; Scalar version of fmsub ++(define_insn "sse5_vmfmsub4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") ++ (vec_merge:SSEMODEF2P ++ (minus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) ++ (match_dup 1) ++ (const_int 1)))] ++ "TARGET_SSE5 && TARGET_FUSED_MADD ++ && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "fmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++;; Floating point negative multiply and add ++;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b) ++;; Note operands are out of order to simplify call to ix86_sse5_valid_p ++;; Allow two memory operands to help in optimizing. ++(define_insn "sse5_fnmadd4" ++ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x") ++ (minus:SSEMODEF4 ++ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0") ++ (mult:SSEMODEF4 ++ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") ++ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))] ++ "TARGET_SSE5 && TARGET_FUSED_MADD ++ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)" ++ "fnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++;; Split fnmadd with two memory operands into a load and the fnmadd. ++(define_split ++ [(set (match_operand:SSEMODEF4 0 "register_operand" "") ++ (minus:SSEMODEF4 ++ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "") ++ (mult:SSEMODEF4 ++ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") ++ (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))] ++ "TARGET_SSE5 ++ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1) ++ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2) ++ && !reg_mentioned_p (operands[0], operands[1]) ++ && !reg_mentioned_p (operands[0], operands[2]) ++ && !reg_mentioned_p (operands[0], operands[3])" ++ [(const_int 0)] ++{ ++ ix86_expand_sse5_multiple_memory (operands, 4, mode); ++ emit_insn (gen_sse5_fnmadd4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ DONE; ++}) ++ ++;; For the scalar operations, use operand1 for the upper words that aren't ++;; modified, so restrict the forms that are generated. ++;; Scalar version of fnmadd ++(define_insn "sse5_vmfnmadd4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") ++ (vec_merge:SSEMODEF2P ++ (minus:SSEMODEF2P ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))) ++ (match_dup 1) ++ (const_int 1)))] ++ "TARGET_SSE5 && TARGET_FUSED_MADD ++ && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "fnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++;; Floating point negative multiply and subtract ++;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c ++;; Allow 2 memory operands to help with optimization ++(define_insn "sse5_fnmsub4" ++ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") ++ (minus:SSEMODEF4 ++ (mult:SSEMODEF4 ++ (neg:SSEMODEF4 ++ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0")) ++ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm")) ++ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] ++ "TARGET_SSE5 && TARGET_FUSED_MADD ++ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)" ++ "fnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++;; Split fnmsub with two memory operands into a load and the fmsub. ++(define_split ++ [(set (match_operand:SSEMODEF4 0 "register_operand" "") ++ (minus:SSEMODEF4 ++ (mult:SSEMODEF4 ++ (neg:SSEMODEF4 ++ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")) ++ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) ++ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] ++ "TARGET_SSE5 ++ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1) ++ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2) ++ && !reg_mentioned_p (operands[0], operands[1]) ++ && !reg_mentioned_p (operands[0], operands[2]) ++ && !reg_mentioned_p (operands[0], operands[3])" ++ [(const_int 0)] ++{ ++ ix86_expand_sse5_multiple_memory (operands, 4, mode); ++ emit_insn (gen_sse5_fnmsub4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ DONE; ++}) ++ ++;; For the scalar operations, use operand1 for the upper words that aren't ++;; modified, so restrict the forms that are generated. ++;; Scalar version of fnmsub ++(define_insn "sse5_vmfnmsub4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") ++ (vec_merge:SSEMODEF2P ++ (minus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (neg:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")) ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) ++ (match_dup 1) ++ (const_int 1)))] ++ "TARGET_SSE5 && TARGET_FUSED_MADD ++ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2)" ++ "fnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++;; The same instructions using an UNSPEC to allow the intrinsic to be used ++;; even if the user used -mno-fused-madd ++;; Parallel instructions. During instruction generation, just default ++;; to registers, and let combine later build the appropriate instruction. ++(define_expand "sse5i_fmadd4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "") ++ (unspec:SSEMODEF2P ++ [(plus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "register_operand" "") ++ (match_operand:SSEMODEF2P 2 "register_operand" "")) ++ (match_operand:SSEMODEF2P 3 "register_operand" ""))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5" ++{ ++ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ ++ if (TARGET_FUSED_MADD) ++ { ++ emit_insn (gen_sse5_fmadd4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ DONE; ++ } ++}) ++ ++(define_insn "*sse5i_fmadd4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") ++ (unspec:SSEMODEF2P ++ [(plus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm") ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")) ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "fmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++(define_expand "sse5i_fmsub4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "") ++ (unspec:SSEMODEF2P ++ [(minus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "register_operand" "") ++ (match_operand:SSEMODEF2P 2 "register_operand" "")) ++ (match_operand:SSEMODEF2P 3 "register_operand" ""))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5" ++{ ++ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ ++ if (TARGET_FUSED_MADD) ++ { ++ emit_insn (gen_sse5_fmsub4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ DONE; ++ } ++}) ++ ++(define_insn "*sse5i_fmsub4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") ++ (unspec:SSEMODEF2P ++ [(minus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "register_operand" "%0,0,x,xm") ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")) ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "fmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b) ++;; Note operands are out of order to simplify call to ix86_sse5_valid_p ++(define_expand "sse5i_fnmadd4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "") ++ (unspec:SSEMODEF2P ++ [(minus:SSEMODEF2P ++ (match_operand:SSEMODEF2P 3 "register_operand" "") ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "register_operand" "") ++ (match_operand:SSEMODEF2P 2 "register_operand" "")))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5" ++{ ++ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ ++ if (TARGET_FUSED_MADD) ++ { ++ emit_insn (gen_sse5_fnmadd4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ DONE; ++ } ++}) ++ ++(define_insn "*sse5i_fnmadd4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") ++ (unspec:SSEMODEF2P ++ [(minus:SSEMODEF2P ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0") ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm") ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "fnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c ++(define_expand "sse5i_fnmsub4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "") ++ (unspec:SSEMODEF2P ++ [(minus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (neg:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "register_operand" "")) ++ (match_operand:SSEMODEF2P 2 "register_operand" "")) ++ (match_operand:SSEMODEF2P 3 "register_operand" ""))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5" ++{ ++ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ ++ if (TARGET_FUSED_MADD) ++ { ++ emit_insn (gen_sse5_fnmsub4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ DONE; ++ } ++}) ++ ++(define_insn "*sse5i_fnmsub4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") ++ (unspec:SSEMODEF2P ++ [(minus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (neg:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")) ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")) ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "fnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++;; Scalar instructions ++(define_expand "sse5i_vmfmadd4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "") ++ (unspec:SSEMODEF2P ++ [(vec_merge:SSEMODEF2P ++ (plus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "register_operand" "") ++ (match_operand:SSEMODEF2P 2 "register_operand" "")) ++ (match_operand:SSEMODEF2P 3 "register_operand" "")) ++ (match_dup 1) ++ (const_int 0))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5" ++{ ++ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ ++ if (TARGET_FUSED_MADD) ++ { ++ emit_insn (gen_sse5_vmfmadd4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ DONE; ++ } ++}) ++ ++;; For the scalar operations, use operand1 for the upper words that aren't ++;; modified, so restrict the forms that are accepted. ++(define_insn "*sse5i_vmfmadd4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") ++ (unspec:SSEMODEF2P ++ [(vec_merge:SSEMODEF2P ++ (plus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "register_operand" "0,0") ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) ++ (match_dup 0) ++ (const_int 0))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "fmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++(define_expand "sse5i_vmfmsub4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "") ++ (unspec:SSEMODEF2P ++ [(vec_merge:SSEMODEF2P ++ (minus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "register_operand" "") ++ (match_operand:SSEMODEF2P 2 "register_operand" "")) ++ (match_operand:SSEMODEF2P 3 "register_operand" "")) ++ (match_dup 0) ++ (const_int 1))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5" ++{ ++ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ ++ if (TARGET_FUSED_MADD) ++ { ++ emit_insn (gen_sse5_vmfmsub4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ DONE; ++ } ++}) ++ ++(define_insn "*sse5i_vmfmsub4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") ++ (unspec:SSEMODEF2P ++ [(vec_merge:SSEMODEF2P ++ (minus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) ++ (match_dup 1) ++ (const_int 1))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "fmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++;; Note operands are out of order to simplify call to ix86_sse5_valid_p ++(define_expand "sse5i_vmfnmadd4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "") ++ (unspec:SSEMODEF2P ++ [(vec_merge:SSEMODEF2P ++ (minus:SSEMODEF2P ++ (match_operand:SSEMODEF2P 3 "register_operand" "") ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "register_operand" "") ++ (match_operand:SSEMODEF2P 2 "register_operand" ""))) ++ (match_dup 1) ++ (const_int 1))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5" ++{ ++ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ ++ if (TARGET_FUSED_MADD) ++ { ++ emit_insn (gen_sse5_vmfnmadd4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ DONE; ++ } ++}) ++ ++(define_insn "*sse5i_vmfnmadd4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") ++ (unspec:SSEMODEF2P ++ [(vec_merge:SSEMODEF2P ++ (minus:SSEMODEF2P ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") ++ (mult:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))) ++ (match_dup 1) ++ (const_int 1))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "fnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++(define_expand "sse5i_vmfnmsub4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "") ++ (unspec:SSEMODEF2P ++ [(vec_merge:SSEMODEF2P ++ (minus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (neg:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "register_operand" "")) ++ (match_operand:SSEMODEF2P 2 "register_operand" "")) ++ (match_operand:SSEMODEF2P 3 "register_operand" "")) ++ (match_dup 1) ++ (const_int 1))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5" ++{ ++ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ ++ if (TARGET_FUSED_MADD) ++ { ++ emit_insn (gen_sse5_vmfnmsub4 (operands[0], operands[1], ++ operands[2], operands[3])); ++ DONE; ++ } ++}) ++ ++(define_insn "*sse5i_vmfnmsub4" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") ++ (unspec:SSEMODEF2P ++ [(vec_merge:SSEMODEF2P ++ (minus:SSEMODEF2P ++ (mult:SSEMODEF2P ++ (neg:SSEMODEF2P ++ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")) ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) ++ (match_dup 1) ++ (const_int 1))] ++ UNSPEC_SSE5_INTRINSIC))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "fnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "")]) ++ ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++;; + ;; Parallel double-precision floating point arithmetic + ;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +@@ -1716,7 +2287,7 @@ + (match_operator:V2DF 3 "sse_comparison_operator" + [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))] +- "TARGET_SSE2" ++ "TARGET_SSE2 && !TARGET_SSE5" + "cmp%D3pd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "V2DF")]) +@@ -1729,7 +2300,7 @@ + (match_operand:V2DF 2 "nonimmediate_operand" "xm")]) + (match_dup 1) + (const_int 1)))] +- "TARGET_SSE2" ++ "TARGET_SSE2 && !TARGET_SSE5" + "cmp%D3sd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "DF")]) +@@ -2677,6 +3248,31 @@ + [(set_attr "type" "sseiadd") + (set_attr "mode" "TI")]) + ++;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a ++;; multiply/add. In general, we expect the define_split to occur before ++;; register allocation, so we have to handle the corner case where the target ++;; is used as the base or index register in operands 1/2. ++(define_insn_and_split "sse5_mulv4si3" ++ [(set (match_operand:V4SI 0 "register_operand" "=&x") ++ (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x") ++ (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] ++ "TARGET_SSE5" ++ "#" ++ "&& (reload_completed ++ || (!reg_mentioned_p (operands[0], operands[1]) ++ && !reg_mentioned_p (operands[0], operands[2])))" ++ [(set (match_dup 0) ++ (match_dup 3)) ++ (set (match_dup 0) ++ (plus:V4SI (mult:V4SI (match_dup 1) ++ (match_dup 2)) ++ (match_dup 0)))] ++{ ++ operands[3] = CONST0_RTX (V4SImode); ++} ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "TI")]) ++ + (define_expand "mulv4si3" + [(set (match_operand:V4SI 0 "register_operand" "") + (mult:V4SI (match_operand:V4SI 1 "register_operand" "") +@@ -2686,6 +3282,13 @@ + rtx t1, t2, t3, t4, t5, t6, thirtytwo; + rtx op0, op1, op2; + ++ if (TARGET_SSE5) ++ { ++ ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands); ++ emit_insn (gen_sse5_mulv4si3 (operands[0], operands[1], operands[2])); ++ DONE; ++ } ++ + op0 = operands[0]; + op1 = operands[1]; + op2 = operands[2]; +@@ -3013,7 +3616,8 @@ + (eq:SSEMODE124 + (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0") + (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] +- "TARGET_SSE2 && ix86_binary_operator_ok (EQ, mode, operands)" ++ "TARGET_SSE2 && !TARGET_SSE5 ++ && ix86_binary_operator_ok (EQ, mode, operands)" + "pcmpeq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) +@@ -3023,7 +3627,7 @@ + (gt:SSEMODE124 + (match_operand:SSEMODE124 1 "register_operand" "0") + (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] +- "TARGET_SSE2" ++ "TARGET_SSE2 && !TARGET_SSE5" + "pcmpgt\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "TI")]) +@@ -4658,3 +5262,1245 @@ + "insertq\t{%2, %0|%0, %2}" + [(set_attr "type" "sseins") + (set_attr "mode" "TI")]) ++ ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++;; ++;; Common SSE4.1/SSE5 instructions ++;; ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++ ++;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG. ++;; But it is not a really compare instruction. ++(define_insn "sse4_1_ptest" ++ [(set (reg:CC FLAGS_REG) ++ (unspec:CC [(match_operand:V2DI 0 "register_operand" "x") ++ (match_operand:V2DI 1 "nonimmediate_operand" "xm")] ++ UNSPEC_PTEST))] ++ "TARGET_ROUND" ++ "ptest\t{%1, %0|%0, %1}" ++ [(set_attr "type" "ssecomi") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse4_1_roundpd" ++ [(set (match_operand:V2DF 0 "register_operand" "=x") ++ (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm") ++ (match_operand:SI 2 "const_0_to_15_operand" "n")] ++ UNSPEC_ROUND))] ++ "TARGET_ROUND" ++ "roundpd\t{%2, %1, %0|%0, %1, %2}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "mode" "V2DF")]) ++ ++(define_insn "sse4_1_roundps" ++ [(set (match_operand:V4SF 0 "register_operand" "=x") ++ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm") ++ (match_operand:SI 2 "const_0_to_15_operand" "n")] ++ UNSPEC_ROUND))] ++ "TARGET_ROUND" ++ "roundps\t{%2, %1, %0|%0, %1, %2}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "mode" "V4SF")]) ++ ++(define_insn "sse4_1_roundsd" ++ [(set (match_operand:V2DF 0 "register_operand" "=x") ++ (vec_merge:V2DF ++ (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x") ++ (match_operand:SI 3 "const_0_to_15_operand" "n")] ++ UNSPEC_ROUND) ++ (match_operand:V2DF 1 "register_operand" "0") ++ (const_int 1)))] ++ "TARGET_ROUND" ++ "roundsd\t{%3, %2, %0|%0, %2, %3}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "mode" "V2DF")]) ++ ++(define_insn "sse4_1_roundss" ++ [(set (match_operand:V4SF 0 "register_operand" "=x") ++ (vec_merge:V4SF ++ (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x") ++ (match_operand:SI 3 "const_0_to_15_operand" "n")] ++ UNSPEC_ROUND) ++ (match_operand:V4SF 1 "register_operand" "0") ++ (const_int 1)))] ++ "TARGET_ROUND" ++ "roundss\t{%3, %2, %0|%0, %2, %3}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "mode" "V4SF")]) ++ ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++;; ++;; SSE5 instructions ++;; ++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ++ ++;; SSE5 parallel integer multiply/add instructions. ++;; Note the instruction does not allow the value being added to be a memory ++;; operation. However by pretending via the nonimmediate_operand predicate ++;; that it does and splitting it later allows the following to be recognized: ++;; a[i] = b[i] * c[i] + d[i]; ++(define_insn "sse5_pmacsww" ++ [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") ++ (plus:V8HI ++ (mult:V8HI ++ (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") ++ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")) ++ (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)" ++ "@ ++ pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "TI")]) ++ ++;; Split pmacsww with two memory operands into a load and the pmacsww. ++(define_split ++ [(set (match_operand:V8HI 0 "register_operand" "") ++ (plus:V8HI ++ (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") ++ (match_operand:V8HI 2 "nonimmediate_operand" "")) ++ (match_operand:V8HI 3 "nonimmediate_operand" "")))] ++ "TARGET_SSE5 ++ && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1) ++ && ix86_sse5_valid_op_p (operands, insn, 4, false, 2) ++ && !reg_mentioned_p (operands[0], operands[1]) ++ && !reg_mentioned_p (operands[0], operands[2]) ++ && !reg_mentioned_p (operands[0], operands[3])" ++ [(const_int 0)] ++{ ++ ix86_expand_sse5_multiple_memory (operands, 4, V8HImode); ++ emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2], ++ operands[3])); ++ DONE; ++}) ++ ++(define_insn "sse5_pmacssww" ++ [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") ++ (ss_plus:V8HI ++ (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") ++ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")) ++ (match_operand:V8HI 3 "nonimmediate_operand" "0,0,0")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)" ++ "@ ++ pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "TI")]) ++ ++;; Note the instruction does not allow the value being added to be a memory ++;; operation. However by pretending via the nonimmediate_operand predicate ++;; that it does and splitting it later allows the following to be recognized: ++;; a[i] = b[i] * c[i] + d[i]; ++(define_insn "sse5_pmacsdd" ++ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") ++ (plus:V4SI ++ (mult:V4SI ++ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") ++ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")) ++ (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2)" ++ "@ ++ pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "TI")]) ++ ++;; Split pmacsdd with two memory operands into a load and the pmacsdd. ++(define_split ++ [(set (match_operand:V4SI 0 "register_operand" "") ++ (plus:V4SI ++ (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "") ++ (match_operand:V4SI 2 "nonimmediate_operand" "")) ++ (match_operand:V4SI 3 "nonimmediate_operand" "")))] ++ "TARGET_SSE5 ++ && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1) ++ && ix86_sse5_valid_op_p (operands, insn, 4, false, 2) ++ && !reg_mentioned_p (operands[0], operands[1]) ++ && !reg_mentioned_p (operands[0], operands[2]) ++ && !reg_mentioned_p (operands[0], operands[3])" ++ [(const_int 0)] ++{ ++ ix86_expand_sse5_multiple_memory (operands, 4, V4SImode); ++ emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2], ++ operands[3])); ++ DONE; ++}) ++ ++(define_insn "sse5_pmacssdd" ++ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") ++ (ss_plus:V4SI ++ (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") ++ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")) ++ (match_operand:V4SI 3 "nonimmediate_operand" "0,0,0")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)" ++ "@ ++ pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pmacssdql" ++ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") ++ (ss_plus:V2DI ++ (mult:V2DI ++ (sign_extend:V2DI ++ (vec_select:V2SI ++ (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m") ++ (parallel [(const_int 1) ++ (const_int 3)]))) ++ (vec_select:V2SI ++ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") ++ (parallel [(const_int 1) ++ (const_int 3)]))) ++ (match_operand:V2DI 3 "register_operand" "0,0,0")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)" ++ "@ ++ pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pmacssdqh" ++ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") ++ (ss_plus:V2DI ++ (mult:V2DI ++ (sign_extend:V2DI ++ (vec_select:V2SI ++ (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m") ++ (parallel [(const_int 0) ++ (const_int 2)]))) ++ (sign_extend:V2DI ++ (vec_select:V2SI ++ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") ++ (parallel [(const_int 0) ++ (const_int 2)])))) ++ (match_operand:V2DI 3 "register_operand" "0,0,0")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)" ++ "@ ++ pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pmacsdql" ++ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") ++ (plus:V2DI ++ (mult:V2DI ++ (sign_extend:V2DI ++ (vec_select:V2SI ++ (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m") ++ (parallel [(const_int 1) ++ (const_int 3)]))) ++ (sign_extend:V2DI ++ (vec_select:V2SI ++ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") ++ (parallel [(const_int 1) ++ (const_int 3)])))) ++ (match_operand:V2DI 3 "register_operand" "0,0,0")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)" ++ "@ ++ pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pmacsdqh" ++ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") ++ (plus:V2DI ++ (mult:V2DI ++ (sign_extend:V2DI ++ (vec_select:V2SI ++ (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m") ++ (parallel [(const_int 0) ++ (const_int 2)]))) ++ (sign_extend:V2DI ++ (vec_select:V2SI ++ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") ++ (parallel [(const_int 0) ++ (const_int 2)])))) ++ (match_operand:V2DI 3 "register_operand" "0,0,0")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)" ++ "@ ++ pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "TI")]) ++ ++;; SSE5 parallel integer multiply/add instructions for the intrinisics ++(define_insn "sse5_pmacsswd" ++ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") ++ (ss_plus:V4SI ++ (mult:V4SI ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m") ++ (parallel [(const_int 1) ++ (const_int 3) ++ (const_int 5) ++ (const_int 7)]))) ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") ++ (parallel [(const_int 1) ++ (const_int 3) ++ (const_int 5) ++ (const_int 7)])))) ++ (match_operand:V4SI 3 "register_operand" "0,0,0")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)" ++ "@ ++ pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pmacswd" ++ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") ++ (plus:V4SI ++ (mult:V4SI ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m") ++ (parallel [(const_int 1) ++ (const_int 3) ++ (const_int 5) ++ (const_int 7)]))) ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") ++ (parallel [(const_int 1) ++ (const_int 3) ++ (const_int 5) ++ (const_int 7)])))) ++ (match_operand:V4SI 3 "register_operand" "0,0,0")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)" ++ "@ ++ pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pmadcsswd" ++ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") ++ (ss_plus:V4SI ++ (plus:V4SI ++ (mult:V4SI ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m") ++ (parallel [(const_int 0) ++ (const_int 2) ++ (const_int 4) ++ (const_int 6)]))) ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") ++ (parallel [(const_int 0) ++ (const_int 2) ++ (const_int 4) ++ (const_int 6)])))) ++ (mult:V4SI ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 3) ++ (const_int 5) ++ (const_int 7)]))) ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_dup 2) ++ (parallel [(const_int 1) ++ (const_int 3) ++ (const_int 5) ++ (const_int 7)]))))) ++ (match_operand:V4SI 3 "register_operand" "0,0,0")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)" ++ "@ ++ pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pmadcswd" ++ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") ++ (plus:V4SI ++ (plus:V4SI ++ (mult:V4SI ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m") ++ (parallel [(const_int 0) ++ (const_int 2) ++ (const_int 4) ++ (const_int 6)]))) ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") ++ (parallel [(const_int 0) ++ (const_int 2) ++ (const_int 4) ++ (const_int 6)])))) ++ (mult:V4SI ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 3) ++ (const_int 5) ++ (const_int 7)]))) ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_dup 2) ++ (parallel [(const_int 1) ++ (const_int 3) ++ (const_int 5) ++ (const_int 7)]))))) ++ (match_operand:V4SI 3 "register_operand" "0,0,0")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1)" ++ "@ ++ pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" ++ [(set_attr "type" "ssemuladd") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pcmov_" ++ [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x") ++ (if_then_else:SSEMODE ++ (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x") ++ (match_operand:SSEMODE 1 "nonimmediate_operand" "x,xm,0,0") ++ (match_operand:SSEMODE 2 "nonimmediate_operand" "xm,x,x,xm")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "@ ++ pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} ++ pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "sse4arg")]) ++ ++;; SSE5 horizontal add/subtract instructions ++(define_insn "sse5_phaddbw" ++ [(set (match_operand:V8HI 0 "register_operand" "=x") ++ (plus:V8HI ++ (sign_extend:V8HI ++ (vec_select:V8QI ++ (match_operand:V16QI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 2) ++ (const_int 4) ++ (const_int 6) ++ (const_int 8) ++ (const_int 10) ++ (const_int 12) ++ (const_int 14)]))) ++ (sign_extend:V8HI ++ (vec_select:V8QI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 3) ++ (const_int 5) ++ (const_int 7) ++ (const_int 9) ++ (const_int 11) ++ (const_int 13) ++ (const_int 15)])))))] ++ "TARGET_SSE5" ++ "phaddbw\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++(define_insn "sse5_phaddbd" ++ [(set (match_operand:V4SI 0 "register_operand" "=x") ++ (plus:V4SI ++ (plus:V4SI ++ (sign_extend:V4SI ++ (vec_select:V4QI ++ (match_operand:V16QI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 4) ++ (const_int 8) ++ (const_int 12)]))) ++ (sign_extend:V4SI ++ (vec_select:V4QI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 5) ++ (const_int 9) ++ (const_int 13)])))) ++ (plus:V4SI ++ (sign_extend:V4SI ++ (vec_select:V4QI ++ (match_dup 1) ++ (parallel [(const_int 2) ++ (const_int 6) ++ (const_int 10) ++ (const_int 14)]))) ++ (sign_extend:V4SI ++ (vec_select:V4QI ++ (match_dup 1) ++ (parallel [(const_int 3) ++ (const_int 7) ++ (const_int 11) ++ (const_int 15)]))))))] ++ "TARGET_SSE5" ++ "phaddbd\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++(define_insn "sse5_phaddbq" ++ [(set (match_operand:V2DI 0 "register_operand" "=x") ++ (plus:V2DI ++ (plus:V2DI ++ (plus:V2DI ++ (sign_extend:V2DI ++ (vec_select:V2QI ++ (match_operand:V16QI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 4)]))) ++ (sign_extend:V2DI ++ (vec_select:V2QI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 5)])))) ++ (plus:V2DI ++ (sign_extend:V2DI ++ (vec_select:V2QI ++ (match_dup 1) ++ (parallel [(const_int 2) ++ (const_int 6)]))) ++ (sign_extend:V2DI ++ (vec_select:V2QI ++ (match_dup 1) ++ (parallel [(const_int 3) ++ (const_int 7)]))))) ++ (plus:V2DI ++ (plus:V2DI ++ (sign_extend:V2DI ++ (vec_select:V2QI ++ (match_dup 1) ++ (parallel [(const_int 8) ++ (const_int 12)]))) ++ (sign_extend:V2DI ++ (vec_select:V2QI ++ (match_dup 1) ++ (parallel [(const_int 9) ++ (const_int 13)])))) ++ (plus:V2DI ++ (sign_extend:V2DI ++ (vec_select:V2QI ++ (match_dup 1) ++ (parallel [(const_int 10) ++ (const_int 14)]))) ++ (sign_extend:V2DI ++ (vec_select:V2QI ++ (match_dup 1) ++ (parallel [(const_int 11) ++ (const_int 15)])))))))] ++ "TARGET_SSE5" ++ "phaddbq\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++(define_insn "sse5_phaddwd" ++ [(set (match_operand:V4SI 0 "register_operand" "=x") ++ (plus:V4SI ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 2) ++ (const_int 4) ++ (const_int 6)]))) ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 3) ++ (const_int 5) ++ (const_int 7)])))))] ++ "TARGET_SSE5" ++ "phaddwd\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++(define_insn "sse5_phaddwq" ++ [(set (match_operand:V2DI 0 "register_operand" "=x") ++ (plus:V2DI ++ (plus:V2DI ++ (sign_extend:V2DI ++ (vec_select:V2HI ++ (match_operand:V8HI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 4)]))) ++ (sign_extend:V2DI ++ (vec_select:V2HI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 5)])))) ++ (plus:V2DI ++ (sign_extend:V2DI ++ (vec_select:V2HI ++ (match_dup 1) ++ (parallel [(const_int 2) ++ (const_int 6)]))) ++ (sign_extend:V2DI ++ (vec_select:V2HI ++ (match_dup 1) ++ (parallel [(const_int 3) ++ (const_int 7)]))))))] ++ "TARGET_SSE5" ++ "phaddwq\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++(define_insn "sse5_phadddq" ++ [(set (match_operand:V2DI 0 "register_operand" "=x") ++ (plus:V2DI ++ (sign_extend:V2DI ++ (vec_select:V2SI ++ (match_operand:V4SI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 2)]))) ++ (sign_extend:V2DI ++ (vec_select:V2SI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 3)])))))] ++ "TARGET_SSE5" ++ "phadddq\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++(define_insn "sse5_phaddubw" ++ [(set (match_operand:V8HI 0 "register_operand" "=x") ++ (plus:V8HI ++ (zero_extend:V8HI ++ (vec_select:V8QI ++ (match_operand:V16QI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 2) ++ (const_int 4) ++ (const_int 6) ++ (const_int 8) ++ (const_int 10) ++ (const_int 12) ++ (const_int 14)]))) ++ (zero_extend:V8HI ++ (vec_select:V8QI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 3) ++ (const_int 5) ++ (const_int 7) ++ (const_int 9) ++ (const_int 11) ++ (const_int 13) ++ (const_int 15)])))))] ++ "TARGET_SSE5" ++ "phaddubw\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++(define_insn "sse5_phaddubd" ++ [(set (match_operand:V4SI 0 "register_operand" "=x") ++ (plus:V4SI ++ (plus:V4SI ++ (zero_extend:V4SI ++ (vec_select:V4QI ++ (match_operand:V16QI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 4) ++ (const_int 8) ++ (const_int 12)]))) ++ (zero_extend:V4SI ++ (vec_select:V4QI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 5) ++ (const_int 9) ++ (const_int 13)])))) ++ (plus:V4SI ++ (zero_extend:V4SI ++ (vec_select:V4QI ++ (match_dup 1) ++ (parallel [(const_int 2) ++ (const_int 6) ++ (const_int 10) ++ (const_int 14)]))) ++ (zero_extend:V4SI ++ (vec_select:V4QI ++ (match_dup 1) ++ (parallel [(const_int 3) ++ (const_int 7) ++ (const_int 11) ++ (const_int 15)]))))))] ++ "TARGET_SSE5" ++ "phaddubd\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++(define_insn "sse5_phaddubq" ++ [(set (match_operand:V2DI 0 "register_operand" "=x") ++ (plus:V2DI ++ (plus:V2DI ++ (plus:V2DI ++ (zero_extend:V2DI ++ (vec_select:V2QI ++ (match_operand:V16QI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 4)]))) ++ (sign_extend:V2DI ++ (vec_select:V2QI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 5)])))) ++ (plus:V2DI ++ (zero_extend:V2DI ++ (vec_select:V2QI ++ (match_dup 1) ++ (parallel [(const_int 2) ++ (const_int 6)]))) ++ (zero_extend:V2DI ++ (vec_select:V2QI ++ (match_dup 1) ++ (parallel [(const_int 3) ++ (const_int 7)]))))) ++ (plus:V2DI ++ (plus:V2DI ++ (zero_extend:V2DI ++ (vec_select:V2QI ++ (match_dup 1) ++ (parallel [(const_int 8) ++ (const_int 12)]))) ++ (sign_extend:V2DI ++ (vec_select:V2QI ++ (match_dup 1) ++ (parallel [(const_int 9) ++ (const_int 13)])))) ++ (plus:V2DI ++ (zero_extend:V2DI ++ (vec_select:V2QI ++ (match_dup 1) ++ (parallel [(const_int 10) ++ (const_int 14)]))) ++ (zero_extend:V2DI ++ (vec_select:V2QI ++ (match_dup 1) ++ (parallel [(const_int 11) ++ (const_int 15)])))))))] ++ "TARGET_SSE5" ++ "phaddubq\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++(define_insn "sse5_phadduwd" ++ [(set (match_operand:V4SI 0 "register_operand" "=x") ++ (plus:V4SI ++ (zero_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 2) ++ (const_int 4) ++ (const_int 6)]))) ++ (zero_extend:V4SI ++ (vec_select:V4HI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 3) ++ (const_int 5) ++ (const_int 7)])))))] ++ "TARGET_SSE5" ++ "phadduwd\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++(define_insn "sse5_phadduwq" ++ [(set (match_operand:V2DI 0 "register_operand" "=x") ++ (plus:V2DI ++ (plus:V2DI ++ (zero_extend:V2DI ++ (vec_select:V2HI ++ (match_operand:V8HI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 4)]))) ++ (zero_extend:V2DI ++ (vec_select:V2HI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 5)])))) ++ (plus:V2DI ++ (zero_extend:V2DI ++ (vec_select:V2HI ++ (match_dup 1) ++ (parallel [(const_int 2) ++ (const_int 6)]))) ++ (zero_extend:V2DI ++ (vec_select:V2HI ++ (match_dup 1) ++ (parallel [(const_int 3) ++ (const_int 7)]))))))] ++ "TARGET_SSE5" ++ "phadduwq\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++(define_insn "sse5_phaddudq" ++ [(set (match_operand:V2DI 0 "register_operand" "=x") ++ (plus:V2DI ++ (zero_extend:V2DI ++ (vec_select:V2SI ++ (match_operand:V4SI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 2)]))) ++ (zero_extend:V2DI ++ (vec_select:V2SI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 3)])))))] ++ "TARGET_SSE5" ++ "phaddudq\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++(define_insn "sse5_phsubbw" ++ [(set (match_operand:V8HI 0 "register_operand" "=x") ++ (minus:V8HI ++ (sign_extend:V8HI ++ (vec_select:V8QI ++ (match_operand:V16QI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 2) ++ (const_int 4) ++ (const_int 6) ++ (const_int 8) ++ (const_int 10) ++ (const_int 12) ++ (const_int 14)]))) ++ (sign_extend:V8HI ++ (vec_select:V8QI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 3) ++ (const_int 5) ++ (const_int 7) ++ (const_int 9) ++ (const_int 11) ++ (const_int 13) ++ (const_int 15)])))))] ++ "TARGET_SSE5" ++ "phsubbw\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++(define_insn "sse5_phsubwd" ++ [(set (match_operand:V4SI 0 "register_operand" "=x") ++ (minus:V4SI ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 2) ++ (const_int 4) ++ (const_int 6)]))) ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 3) ++ (const_int 5) ++ (const_int 7)])))))] ++ "TARGET_SSE5" ++ "phsubwd\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++(define_insn "sse5_phsubdq" ++ [(set (match_operand:V2DI 0 "register_operand" "=x") ++ (minus:V2DI ++ (sign_extend:V2DI ++ (vec_select:V2SI ++ (match_operand:V4SI 1 "nonimmediate_operand" "xm") ++ (parallel [(const_int 0) ++ (const_int 2)]))) ++ (sign_extend:V2DI ++ (vec_select:V2SI ++ (match_dup 1) ++ (parallel [(const_int 1) ++ (const_int 3)])))))] ++ "TARGET_SSE5" ++ "phsubdq\t{%1, %0|%0, %1}" ++ [(set_attr "type" "sseiadd1")]) ++ ++;; SSE5 permute instructions ++(define_insn "sse5_pperm" ++ [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") ++ (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,xm,xm") ++ (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,0,x") ++ (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0")] ++ UNSPEC_SSE5_PERMUTE))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "sse4arg") ++ (set_attr "mode" "TI")]) ++ ++;; The following are for the various unpack insns which doesn't need the first ++;; source operand, so we can just use the output operand for the first operand. ++;; This allows either of the other two operands to be a memory operand. We ++;; can't just use the first operand as an argument to the normal pperm because ++;; then an output only argument, suddenly becomes an input operand. ++(define_insn "sse5_pperm_zero_v16qi_v8hi" ++ [(set (match_operand:V8HI 0 "register_operand" "=x,x") ++ (zero_extend:V8HI ++ (vec_select:V8QI ++ (match_operand:V16QI 1 "nonimmediate_operand" "xm,x") ++ (match_operand 2 "" "")))) ;; parallel with const_int's ++ (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] ++ "TARGET_SSE5 ++ && (register_operand (operands[1], V16QImode) ++ || register_operand (operands[2], V16QImode))" ++ "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" ++ [(set_attr "type" "sseadd") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pperm_sign_v16qi_v8hi" ++ [(set (match_operand:V8HI 0 "register_operand" "=x,x") ++ (sign_extend:V8HI ++ (vec_select:V8QI ++ (match_operand:V16QI 1 "nonimmediate_operand" "xm,x") ++ (match_operand 2 "" "")))) ;; parallel with const_int's ++ (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] ++ "TARGET_SSE5 ++ && (register_operand (operands[1], V16QImode) ++ || register_operand (operands[2], V16QImode))" ++ "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" ++ [(set_attr "type" "sseadd") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pperm_zero_v8hi_v4si" ++ [(set (match_operand:V4SI 0 "register_operand" "=x,x") ++ (zero_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 1 "nonimmediate_operand" "xm,x") ++ (match_operand 2 "" "")))) ;; parallel with const_int's ++ (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] ++ "TARGET_SSE5 ++ && (register_operand (operands[1], V8HImode) ++ || register_operand (operands[2], V16QImode))" ++ "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" ++ [(set_attr "type" "sseadd") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pperm_sign_v8hi_v4si" ++ [(set (match_operand:V4SI 0 "register_operand" "=x,x") ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 1 "nonimmediate_operand" "xm,x") ++ (match_operand 2 "" "")))) ;; parallel with const_int's ++ (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] ++ "TARGET_SSE5 ++ && (register_operand (operands[1], V8HImode) ++ || register_operand (operands[2], V16QImode))" ++ "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" ++ [(set_attr "type" "sseadd") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pperm_zero_v4si_v2di" ++ [(set (match_operand:V2DI 0 "register_operand" "=x,x") ++ (zero_extend:V2DI ++ (vec_select:V2SI ++ (match_operand:V4SI 1 "nonimmediate_operand" "xm,x") ++ (match_operand 2 "" "")))) ;; parallel with const_int's ++ (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] ++ "TARGET_SSE5 ++ && (register_operand (operands[1], V4SImode) ++ || register_operand (operands[2], V16QImode))" ++ "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" ++ [(set_attr "type" "sseadd") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pperm_sign_v4si_v2di" ++ [(set (match_operand:V2DI 0 "register_operand" "=x,x") ++ (sign_extend:V2DI ++ (vec_select:V2SI ++ (match_operand:V4SI 1 "nonimmediate_operand" "xm,x") ++ (match_operand 2 "" "")))) ;; parallel with const_int's ++ (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] ++ "TARGET_SSE5 ++ && (register_operand (operands[1], V4SImode) ++ || register_operand (operands[2], V16QImode))" ++ "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" ++ [(set_attr "type" "sseadd") ++ (set_attr "mode" "TI")]) ++ ++;; SSE5 pack instructions that combine two vectors into a smaller vector ++(define_insn "sse5_pperm_pack_v2di_v4si" ++ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x") ++ (vec_concat:V4SI ++ (truncate:V2SI ++ (match_operand:V2DI 1 "nonimmediate_operand" "0,0,xm,xm")) ++ (truncate:V2SI ++ (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,0,x")))) ++ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "sse4arg") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pperm_pack_v4si_v8hi" ++ [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x") ++ (vec_concat:V8HI ++ (truncate:V4HI ++ (match_operand:V4SI 1 "nonimmediate_operand" "0,0,xm,xm")) ++ (truncate:V4HI ++ (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,0,x")))) ++ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "sse4arg") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_pperm_pack_v8hi_v16qi" ++ [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") ++ (vec_concat:V16QI ++ (truncate:V8QI ++ (match_operand:V8HI 1 "nonimmediate_operand" "0,0,xm,xm")) ++ (truncate:V8QI ++ (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,0,x")))) ++ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0"))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "sse4arg") ++ (set_attr "mode" "TI")]) ++ ++;; Floating point permutation (permps, permpd) ++(define_insn "sse5_perm" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") ++ (unspec:SSEMODEF2P ++ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,xm,xm") ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,0,x") ++ (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x,0")] ++ UNSPEC_SSE5_PERMUTE))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)" ++ "perm\t{%3, %2, %1, %0|%0, %1, %2, %3}" ++ [(set_attr "type" "sse4arg") ++ (set_attr "mode" "")]) ++ ++;; SSE5 packed rotate instructions ++(define_insn "rotl3" ++ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") ++ (rotate:SSEMODE1248 ++ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") ++ (match_operand:SI 2 "const_0_to__operand" "n")))] ++ "TARGET_SSE5" ++ "prot\t{%2, %1, %0|%0, %1, %2}" ++ [(set_attr "type" "sseishft") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_rotl3" ++ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") ++ (rotate:SSEMODE1248 ++ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") ++ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)" ++ "prot\t{%2, %1, %0|%0, %1, %2}" ++ [(set_attr "type" "sseishft") ++ (set_attr "mode" "TI")]) ++ ++;; SSE5 packed shift instructions. Note negative values for the shift amount ++;; convert this into a right shift instead of left shift. For now, model this ++;; with an UNSPEC instead of using ashift/lshift since the rest of the x86 does ++;; not have the concept of negating the shift amount. Also, there is no LSHIFT ++(define_insn "sse5_ashl3" ++ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") ++ (unspec:SSEMODE1248 ++ [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") ++ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")] ++ UNSPEC_SSE5_ASHIFT))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)" ++ "psha\t{%2, %1, %0|%0, %1, %2}" ++ [(set_attr "type" "sseishft") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_lshl3" ++ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") ++ (unspec:SSEMODE1248 ++ [(match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") ++ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")] ++ UNSPEC_SSE5_LSHIFT))] ++ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1)" ++ "pshl\t{%2, %1, %0|%0, %1, %2}" ++ [(set_attr "type" "sseishft") ++ (set_attr "mode" "TI")]) ++ ++;; SSE5 FRCZ support ++;; parallel insns ++(define_insn "sse5_frcz2" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") ++ (unspec:SSEMODEF2P ++ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")] ++ UNSPEC_FRCZ))] ++ "TARGET_SSE5" ++ "frcz\t{%1, %0|%0, %1}" ++ [(set_attr "type" "ssecvt1") ++ (set_attr "mode" "")]) ++ ++;; scalar insns ++(define_insn "sse5_vmfrcz2" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") ++ (vec_merge:SSEMODEF2P ++ (unspec:SSEMODEF2P ++ [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")] ++ UNSPEC_FRCZ) ++ (match_operand:SSEMODEF2P 1 "register_operand" "0") ++ (const_int 1)))] ++ "TARGET_ROUND" ++ "frcz\t{%2, %0|%0, %2}" ++ [(set_attr "type" "ssecvt1") ++ (set_attr "mode" "")]) ++ ++(define_insn "sse5_cvtph2ps" ++ [(set (match_operand:V4SF 0 "register_operand" "=x") ++ (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")] ++ UNSPEC_CVTPH2PS))] ++ "TARGET_SSE5" ++ "cvtph2ps\t{%1, %0|%0, %1}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "mode" "V4SF")]) ++ ++(define_insn "sse5_cvtps2ph" ++ [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm") ++ (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")] ++ UNSPEC_CVTPS2PH))] ++ "TARGET_SSE5" ++ "cvtps2ph\t{%1, %0|%0, %1}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "mode" "V4SF")]) ++ ++;; Scalar versions of the com instructions that use vector types that are ++;; called from the intrinsics. Unlike the the other s{s,d} instructions, the ++;; com instructions fill in 0's in the upper bits instead of leaving them ++;; unmodified, so we use const_vector of 0 instead of match_dup. ++(define_expand "sse5_vmmaskcmp3" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "") ++ (vec_merge:SSEMODEF2P ++ (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator" ++ [(match_operand:SSEMODEF2P 2 "register_operand" "") ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")]) ++ (match_dup 4) ++ (const_int 1)))] ++ "TARGET_SSE5" ++{ ++ operands[4] = CONST0_RTX (mode); ++}) ++ ++(define_insn "*sse5_vmmaskcmp3" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") ++ (vec_merge:SSEMODEF2P ++ (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator" ++ [(match_operand:SSEMODEF2P 2 "register_operand" "x") ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]) ++ (match_operand:SSEMODEF2P 4 "") ++ (const_int 1)))] ++ "TARGET_SSE5" ++ "com%Y1\t{%3, %2, %0|%0, %2, %3}" ++ [(set_attr "type" "sse4arg") ++ (set_attr "mode" "")]) ++ ++;; We don't have a comparison operator that always returns true/false, so ++;; handle comfalse and comtrue specially. ++(define_insn "sse5_com_tf3" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") ++ (unspec:SSEMODEF2P ++ [(match_operand:SSEMODEF2P 1 "register_operand" "x") ++ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm") ++ (match_operand:SI 3 "const_int_operand" "n")] ++ UNSPEC_SSE5_TRUEFALSE))] ++ "TARGET_SSE5" ++{ ++ const char *ret = NULL; ++ ++ switch (INTVAL (operands[3])) ++ { ++ case COM_FALSE_S: ++ ret = \"comfalses\t{%2, %1, %0|%0, %1, %2}\"; ++ break; ++ ++ case COM_FALSE_P: ++ ret = \"comfalsep\t{%2, %1, %0|%0, %1, %2}\"; ++ break; ++ ++ case COM_TRUE_S: ++ ret = \"comfalses\t{%2, %1, %0|%0, %1, %2}\"; ++ break; ++ ++ case COM_TRUE_P: ++ ret = \"comfalsep\t{%2, %1, %0|%0, %1, %2}\"; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return ret; ++} ++ [(set_attr "type" "ssecmp") ++ (set_attr "mode" "")]) ++ ++(define_insn "sse5_maskcmp3" ++ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") ++ (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator" ++ [(match_operand:SSEMODEF2P 2 "register_operand" "x") ++ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))] ++ "TARGET_SSE5" ++ "com%Y1\t{%3, %2, %0|%0, %2, %3}" ++ [(set_attr "type" "ssecmp") ++ (set_attr "mode" "")]) ++ ++(define_insn "sse5_maskcmp3" ++ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") ++ (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator" ++ [(match_operand:SSEMODE1248 2 "register_operand" "x") ++ (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] ++ "TARGET_SSE5" ++ "pcom%Y1\t{%3, %2, %0|%0, %2, %3}" ++ [(set_attr "type" "sse4arg") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "sse5_maskcmp_uns3" ++ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") ++ (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" ++ [(match_operand:SSEMODE1248 2 "register_operand" "x") ++ (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] ++ "TARGET_SSE5" ++ "pcom%Y1u\t{%3, %2, %0|%0, %2, %3}" ++ [(set_attr "type" "ssecmp") ++ (set_attr "mode" "TI")]) ++ ++;; Version of pcom*u* that is called from the intrinsics that allows pcomequ* ++;; and pcomneu* not to be converted to the signed ones in case somebody needs ++;; the exact instruction generated for the intrinsic. ++(define_insn "sse5_maskcmp_uns23" ++ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") ++ (unspec:SSEMODE1248 ++ [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" ++ [(match_operand:SSEMODE1248 2 "register_operand" "x") ++ (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])] ++ UNSPEC_SSE5_UNSIGNED_CMP))] ++ "TARGET_SSE5" ++ "pcom%Y1u\t{%3, %2, %0|%0, %2, %3}" ++ [(set_attr "type" "ssecmp") ++ (set_attr "mode" "TI")]) ++ ++;; Pcomtrue and pcomfalse support. These are useless instructions, but are ++;; being added here to be complete. ++(define_insn "sse5_pcom_tf3" ++ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") ++ (unspec:SSEMODE1248 [(match_operand:SSEMODE1248 1 "register_operand" "x") ++ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm") ++ (match_operand:SI 3 "const_int_operand" "n")] ++ UNSPEC_SSE5_TRUEFALSE))] ++ "TARGET_SSE5" ++{ ++ return ((INTVAL (operands[3]) != 0) ++ ? "pcomtrue\t{%2, %1, %0|%0, %1, %2}" ++ : "pcomfalse\t{%2, %1, %0|%0, %1, %2}"); ++} ++ [(set_attr "type" "ssecmp") ++ (set_attr "mode" "TI")]) +--- gcc/doc/extend.texi.jj 2007-12-28 15:43:26.000000000 +0100 ++++ gcc/doc/extend.texi 2007-12-28 20:04:35.000000000 +0100 +@@ -5970,6 +5970,222 @@ Similar to @code{__builtin_powi}, except + are @code{long double}. + @end deftypefn + ++The following built-in functions are available when @option{-msse5} is used. ++All of them generate the machine instruction that is part of the name ++with MMX registers. ++ ++@smallexample ++v2df __builtin_ia32_comeqpd (v2df, v2df) ++v2df __builtin_ia32_comeqps (v2df, v2df) ++v4sf __builtin_ia32_comeqsd (v4sf, v4sf) ++v4sf __builtin_ia32_comeqss (v4sf, v4sf) ++v2df __builtin_ia32_comfalsepd (v2df, v2df) ++v2df __builtin_ia32_comfalseps (v2df, v2df) ++v4sf __builtin_ia32_comfalsesd (v4sf, v4sf) ++v4sf __builtin_ia32_comfalsess (v4sf, v4sf) ++v2df __builtin_ia32_comgepd (v2df, v2df) ++v2df __builtin_ia32_comgeps (v2df, v2df) ++v4sf __builtin_ia32_comgesd (v4sf, v4sf) ++v4sf __builtin_ia32_comgess (v4sf, v4sf) ++v2df __builtin_ia32_comgtpd (v2df, v2df) ++v2df __builtin_ia32_comgtps (v2df, v2df) ++v4sf __builtin_ia32_comgtsd (v4sf, v4sf) ++v4sf __builtin_ia32_comgtss (v4sf, v4sf) ++v2df __builtin_ia32_comlepd (v2df, v2df) ++v2df __builtin_ia32_comleps (v2df, v2df) ++v4sf __builtin_ia32_comlesd (v4sf, v4sf) ++v4sf __builtin_ia32_comless (v4sf, v4sf) ++v2df __builtin_ia32_comltpd (v2df, v2df) ++v2df __builtin_ia32_comltps (v2df, v2df) ++v4sf __builtin_ia32_comltsd (v4sf, v4sf) ++v4sf __builtin_ia32_comltss (v4sf, v4sf) ++v2df __builtin_ia32_comnepd (v2df, v2df) ++v2df __builtin_ia32_comneps (v2df, v2df) ++v4sf __builtin_ia32_comnesd (v4sf, v4sf) ++v4sf __builtin_ia32_comness (v4sf, v4sf) ++v2df __builtin_ia32_comordpd (v2df, v2df) ++v2df __builtin_ia32_comordps (v2df, v2df) ++v4sf __builtin_ia32_comordsd (v4sf, v4sf) ++v4sf __builtin_ia32_comordss (v4sf, v4sf) ++v2df __builtin_ia32_comtruepd (v2df, v2df) ++v2df __builtin_ia32_comtrueps (v2df, v2df) ++v4sf __builtin_ia32_comtruesd (v4sf, v4sf) ++v4sf __builtin_ia32_comtruess (v4sf, v4sf) ++v2df __builtin_ia32_comueqpd (v2df, v2df) ++v2df __builtin_ia32_comueqps (v2df, v2df) ++v4sf __builtin_ia32_comueqsd (v4sf, v4sf) ++v4sf __builtin_ia32_comueqss (v4sf, v4sf) ++v2df __builtin_ia32_comugepd (v2df, v2df) ++v2df __builtin_ia32_comugeps (v2df, v2df) ++v4sf __builtin_ia32_comugesd (v4sf, v4sf) ++v4sf __builtin_ia32_comugess (v4sf, v4sf) ++v2df __builtin_ia32_comugtpd (v2df, v2df) ++v2df __builtin_ia32_comugtps (v2df, v2df) ++v4sf __builtin_ia32_comugtsd (v4sf, v4sf) ++v4sf __builtin_ia32_comugtss (v4sf, v4sf) ++v2df __builtin_ia32_comulepd (v2df, v2df) ++v2df __builtin_ia32_comuleps (v2df, v2df) ++v4sf __builtin_ia32_comulesd (v4sf, v4sf) ++v4sf __builtin_ia32_comuless (v4sf, v4sf) ++v2df __builtin_ia32_comultpd (v2df, v2df) ++v2df __builtin_ia32_comultps (v2df, v2df) ++v4sf __builtin_ia32_comultsd (v4sf, v4sf) ++v4sf __builtin_ia32_comultss (v4sf, v4sf) ++v2df __builtin_ia32_comunepd (v2df, v2df) ++v2df __builtin_ia32_comuneps (v2df, v2df) ++v4sf __builtin_ia32_comunesd (v4sf, v4sf) ++v4sf __builtin_ia32_comuness (v4sf, v4sf) ++v2df __builtin_ia32_comunordpd (v2df, v2df) ++v2df __builtin_ia32_comunordps (v2df, v2df) ++v4sf __builtin_ia32_comunordsd (v4sf, v4sf) ++v4sf __builtin_ia32_comunordss (v4sf, v4sf) ++v2df __builtin_ia32_fmaddpd (v2df, v2df, v2df) ++v4sf __builtin_ia32_fmaddps (v4sf, v4sf, v4sf) ++v2df __builtin_ia32_fmaddsd (v2df, v2df, v2df) ++v4sf __builtin_ia32_fmaddss (v4sf, v4sf, v4sf) ++v2df __builtin_ia32_fmsubpd (v2df, v2df, v2df) ++v4sf __builtin_ia32_fmsubps (v4sf, v4sf, v4sf) ++v2df __builtin_ia32_fmsubsd (v2df, v2df, v2df) ++v4sf __builtin_ia32_fmsubss (v4sf, v4sf, v4sf) ++v2df __builtin_ia32_fnmaddpd (v2df, v2df, v2df) ++v4sf __builtin_ia32_fnmaddps (v4sf, v4sf, v4sf) ++v2df __builtin_ia32_fnmaddsd (v2df, v2df, v2df) ++v4sf __builtin_ia32_fnmaddss (v4sf, v4sf, v4sf) ++v2df __builtin_ia32_fnmsubpd (v2df, v2df, v2df) ++v4sf __builtin_ia32_fnmsubps (v4sf, v4sf, v4sf) ++v2df __builtin_ia32_fnmsubsd (v2df, v2df, v2df) ++v4sf __builtin_ia32_fnmsubss (v4sf, v4sf, v4sf) ++v2df __builtin_ia32_frczpd (v2df) ++v4sf __builtin_ia32_frczps (v4sf) ++v2df __builtin_ia32_frczsd (v2df, v2df) ++v4sf __builtin_ia32_frczss (v4sf, v4sf) ++v2di __builtin_ia32_pcmov (v2di, v2di, v2di) ++v2di __builtin_ia32_pcmov_v2di (v2di, v2di, v2di) ++v4si __builtin_ia32_pcmov_v4si (v4si, v4si, v4si) ++v8hi __builtin_ia32_pcmov_v8hi (v8hi, v8hi, v8hi) ++v16qi __builtin_ia32_pcmov_v16qi (v16qi, v16qi, v16qi) ++v2df __builtin_ia32_pcmov_v2df (v2df, v2df, v2df) ++v4sf __builtin_ia32_pcmov_v4sf (v4sf, v4sf, v4sf) ++v16qi __builtin_ia32_pcomeqb (v16qi, v16qi) ++v8hi __builtin_ia32_pcomeqw (v8hi, v8hi) ++v4si __builtin_ia32_pcomeqd (v4si, v4si) ++v2di __builtin_ia32_pcomeqq (v2di, v2di) ++v16qi __builtin_ia32_pcomequb (v16qi, v16qi) ++v4si __builtin_ia32_pcomequd (v4si, v4si) ++v2di __builtin_ia32_pcomequq (v2di, v2di) ++v8hi __builtin_ia32_pcomequw (v8hi, v8hi) ++v8hi __builtin_ia32_pcomeqw (v8hi, v8hi) ++v16qi __builtin_ia32_pcomfalseb (v16qi, v16qi) ++v4si __builtin_ia32_pcomfalsed (v4si, v4si) ++v2di __builtin_ia32_pcomfalseq (v2di, v2di) ++v16qi __builtin_ia32_pcomfalseub (v16qi, v16qi) ++v4si __builtin_ia32_pcomfalseud (v4si, v4si) ++v2di __builtin_ia32_pcomfalseuq (v2di, v2di) ++v8hi __builtin_ia32_pcomfalseuw (v8hi, v8hi) ++v8hi __builtin_ia32_pcomfalsew (v8hi, v8hi) ++v16qi __builtin_ia32_pcomgeb (v16qi, v16qi) ++v4si __builtin_ia32_pcomged (v4si, v4si) ++v2di __builtin_ia32_pcomgeq (v2di, v2di) ++v16qi __builtin_ia32_pcomgeub (v16qi, v16qi) ++v4si __builtin_ia32_pcomgeud (v4si, v4si) ++v2di __builtin_ia32_pcomgeuq (v2di, v2di) ++v8hi __builtin_ia32_pcomgeuw (v8hi, v8hi) ++v8hi __builtin_ia32_pcomgew (v8hi, v8hi) ++v16qi __builtin_ia32_pcomgtb (v16qi, v16qi) ++v4si __builtin_ia32_pcomgtd (v4si, v4si) ++v2di __builtin_ia32_pcomgtq (v2di, v2di) ++v16qi __builtin_ia32_pcomgtub (v16qi, v16qi) ++v4si __builtin_ia32_pcomgtud (v4si, v4si) ++v2di __builtin_ia32_pcomgtuq (v2di, v2di) ++v8hi __builtin_ia32_pcomgtuw (v8hi, v8hi) ++v8hi __builtin_ia32_pcomgtw (v8hi, v8hi) ++v16qi __builtin_ia32_pcomleb (v16qi, v16qi) ++v4si __builtin_ia32_pcomled (v4si, v4si) ++v2di __builtin_ia32_pcomleq (v2di, v2di) ++v16qi __builtin_ia32_pcomleub (v16qi, v16qi) ++v4si __builtin_ia32_pcomleud (v4si, v4si) ++v2di __builtin_ia32_pcomleuq (v2di, v2di) ++v8hi __builtin_ia32_pcomleuw (v8hi, v8hi) ++v8hi __builtin_ia32_pcomlew (v8hi, v8hi) ++v16qi __builtin_ia32_pcomltb (v16qi, v16qi) ++v4si __builtin_ia32_pcomltd (v4si, v4si) ++v2di __builtin_ia32_pcomltq (v2di, v2di) ++v16qi __builtin_ia32_pcomltub (v16qi, v16qi) ++v4si __builtin_ia32_pcomltud (v4si, v4si) ++v2di __builtin_ia32_pcomltuq (v2di, v2di) ++v8hi __builtin_ia32_pcomltuw (v8hi, v8hi) ++v8hi __builtin_ia32_pcomltw (v8hi, v8hi) ++v16qi __builtin_ia32_pcomneb (v16qi, v16qi) ++v4si __builtin_ia32_pcomned (v4si, v4si) ++v2di __builtin_ia32_pcomneq (v2di, v2di) ++v16qi __builtin_ia32_pcomneub (v16qi, v16qi) ++v4si __builtin_ia32_pcomneud (v4si, v4si) ++v2di __builtin_ia32_pcomneuq (v2di, v2di) ++v8hi __builtin_ia32_pcomneuw (v8hi, v8hi) ++v8hi __builtin_ia32_pcomnew (v8hi, v8hi) ++v16qi __builtin_ia32_pcomtrueb (v16qi, v16qi) ++v4si __builtin_ia32_pcomtrued (v4si, v4si) ++v2di __builtin_ia32_pcomtrueq (v2di, v2di) ++v16qi __builtin_ia32_pcomtrueub (v16qi, v16qi) ++v4si __builtin_ia32_pcomtrueud (v4si, v4si) ++v2di __builtin_ia32_pcomtrueuq (v2di, v2di) ++v8hi __builtin_ia32_pcomtrueuw (v8hi, v8hi) ++v8hi __builtin_ia32_pcomtruew (v8hi, v8hi) ++v4df __builtin_ia32_permpd (v2df, v2df, v16qi) ++v4sf __builtin_ia32_permps (v4sf, v4sf, v16qi) ++v4si __builtin_ia32_phaddbd (v16qi) ++v2di __builtin_ia32_phaddbq (v16qi) ++v8hi __builtin_ia32_phaddbw (v16qi) ++v2di __builtin_ia32_phadddq (v4si) ++v4si __builtin_ia32_phaddubd (v16qi) ++v2di __builtin_ia32_phaddubq (v16qi) ++v8hi __builtin_ia32_phaddubw (v16qi) ++v2di __builtin_ia32_phaddudq (v4si) ++v4si __builtin_ia32_phadduwd (v8hi) ++v2di __builtin_ia32_phadduwq (v8hi) ++v4si __builtin_ia32_phaddwd (v8hi) ++v2di __builtin_ia32_phaddwq (v8hi) ++v8hi __builtin_ia32_phsubbw (v16qi) ++v2di __builtin_ia32_phsubdq (v4si) ++v4si __builtin_ia32_phsubwd (v8hi) ++v4si __builtin_ia32_pmacsdd (v4si, v4si, v4si) ++v2di __builtin_ia32_pmacsdqh (v4si, v4si, v2di) ++v2di __builtin_ia32_pmacsdql (v4si, v4si, v2di) ++v4si __builtin_ia32_pmacssdd (v4si, v4si, v4si) ++v2di __builtin_ia32_pmacssdqh (v4si, v4si, v2di) ++v2di __builtin_ia32_pmacssdql (v4si, v4si, v2di) ++v4si __builtin_ia32_pmacsswd (v8hi, v8hi, v4si) ++v8hi __builtin_ia32_pmacssww (v8hi, v8hi, v8hi) ++v4si __builtin_ia32_pmacswd (v8hi, v8hi, v4si) ++v8hi __builtin_ia32_pmacsww (v8hi, v8hi, v8hi) ++v4si __builtin_ia32_pmadcsswd (v8hi, v8hi, v4si) ++v4si __builtin_ia32_pmadcswd (v8hi, v8hi, v4si) ++v16qi __builtin_ia32_pperm (v16qi, v16qi, v16qi) ++v16qi __builtin_ia32_protb (v16qi, v16qi) ++v4si __builtin_ia32_protd (v4si, v4si) ++v2di __builtin_ia32_protq (v2di, v2di) ++v8hi __builtin_ia32_protw (v8hi, v8hi) ++v16qi __builtin_ia32_pshab (v16qi, v16qi) ++v4si __builtin_ia32_pshad (v4si, v4si) ++v2di __builtin_ia32_pshaq (v2di, v2di) ++v8hi __builtin_ia32_pshaw (v8hi, v8hi) ++v16qi __builtin_ia32_pshlb (v16qi, v16qi) ++v4si __builtin_ia32_pshld (v4si, v4si) ++v2di __builtin_ia32_pshlq (v2di, v2di) ++v8hi __builtin_ia32_pshlw (v8hi, v8hi) ++@end smallexample ++ ++The following builtin-in functions are available when @option{-msse5} ++is used. The second argument must be an integer constant and generate ++the machine instruction that is part of the name with the @samp{_imm} ++suffix removed. ++ ++@smallexample ++v16qi __builtin_ia32_protb_imm (v16qi, int) ++v4si __builtin_ia32_protd_imm (v4si, int) ++v2di __builtin_ia32_protq_imm (v2di, int) ++v8hi __builtin_ia32_protw_imm (v8hi, int) ++@end smallexample + + @node Target Builtins + @section Built-in Functions Specific to Particular Target Machines +--- gcc/doc/invoke.texi.jj 2007-11-24 21:47:34.000000000 +0100 ++++ gcc/doc/invoke.texi 2007-12-28 20:06:54.000000000 +0100 +@@ -525,13 +525,14 @@ Objective-C and Objective-C++ Dialects}. + -mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol + -mno-wide-multiply -mrtd -malign-double @gol + -mpreferred-stack-boundary=@var{num} @gol +--mmmx -msse -msse2 -msse3 -mssse3 -msse4a -m3dnow -mpopcnt -mabm @gol ++-mmmx -msse -msse2 -msse3 -mssse3 -msse4a -msse5 -m3dnow -mpopcnt -mabm @gol + -mthreads -mno-align-stringops -minline-all-stringops @gol + -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol + -m96bit-long-double -mregparm=@var{num} -msseregparm @gol + -momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol + -mcmodel=@var{code-model} @gol +--m32 -m64 -mlarge-data-threshold=@var{num}} ++-m32 -m64 -mlarge-data-threshold=@var{num} ++-mfused-madd -mno-fused-madd} + + @emph{IA-64 Options} + @gccoptlist{-mbig-endian -mlittle-endian -mgnu-as -mgnu-ld -mno-pic @gol +@@ -9122,7 +9123,7 @@ AMD K8 core based CPUs with x86-64 instr + MMX, SSE, SSE2, 3dNOW!, enhanced 3dNOW! and 64-bit instruction set extensions.) + @item amdfam10 + AMD Family 10 core based CPUs with x86-64 instruction set support. (This +-supersets MMX, SSE, SSE2, SSE3, SSE4A, 3dNOW!, enhanced 3dNOW!, ABM and 64-bit ++supersets MMX, SSE, SSE2, SSE3, SSE4A, SSE5, 3dNOW!, enhanced 3dNOW!, ABM and 64-bit + instruction set extensions.) + @item winchip-c6 + IDT Winchip C6 CPU, dealt in same way as i486 with additional MMX instruction +@@ -9403,6 +9404,8 @@ preferred alignment to @option{-mpreferr + @itemx -mno-ssse3 + @item -msse4a + @item -mno-sse4a ++@item -msse5 ++@item -mno-sse5 + @item -m3dnow + @itemx -mno-3dnow + @item -mpopcnt +@@ -9535,6 +9538,13 @@ building of shared libraries are not sup + Generate code for the large model: This model makes no assumptions + about addresses and sizes of sections. Currently GCC does not implement + this model. ++ ++@item -mfused-madd ++@itemx -mno-fused-madd ++@opindex mfused-madd ++Enable automatic generation of fused floating point multiply-add instructions ++if the ISA supports such instructions. The -mfused-madd option is on by ++default. + @end table + + @node IA-64 Options +--- gcc/testsuite/g++.dg/other/i386-2a.C.jj 2007-12-30 21:57:01.000000000 +0100 ++++ gcc/testsuite/g++.dg/other/i386-2a.C 2007-12-30 22:02:14.000000000 +0100 +@@ -0,0 +1,10 @@ ++/* Test that {,x,e,p,t,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are ++ usable with -O -pedantic-errors. */ ++/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ ++/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mssse3 -msse5" } */ ++ ++#include ++#include ++#include ++ ++int dummy; +--- gcc/testsuite/gcc.dg/i386-cpuid.h.jj 2007-06-26 13:38:46.000000000 +0200 ++++ gcc/testsuite/gcc.dg/i386-cpuid.h 2007-12-29 18:35:01.000000000 +0100 +@@ -15,6 +15,7 @@ + /* Extended Features */ + /* %ecx */ + #define bit_SSE4a (1 << 6) ++#define bit_SSE5 (1 << 11) + + #ifndef NOINLINE + #define NOINLINE __attribute__ ((noinline)) +--- gcc/testsuite/gcc.target/i386/i386.exp.jj 2007-02-20 22:35:34.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/i386.exp 2007-12-30 21:39:35.000000000 +0100 +@@ -24,6 +24,21 @@ if { ![istarget i?86*-*-*] && ![istarget + # Load support procs. + load_lib gcc-dg.exp + ++# Return 1 if sse5 instructions can be compiled. ++proc check_effective_target_sse5 { } { ++ return [check_no_compiler_messages sse5 object { ++ typedef long long __m128i __attribute__ ((__vector_size__ (16))); ++ typedef long long __v2di __attribute__ ((__vector_size__ (16))); ++ ++ __m128i _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) ++ { ++ return (__m128i) __builtin_ia32_pmacssww ((__v2di)__A, ++ (__v2di)__B, ++ (__v2di)__C); ++ } ++ } "-O2 -msse5" ] ++} ++ + # If a testcase doesn't have special options, use these. + global DEFAULT_CFLAGS + if ![info exists DEFAULT_CFLAGS] then { +--- gcc/testsuite/gcc.target/i386/sse-12a.c.jj 2007-12-30 22:01:34.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse-12a.c 2007-12-30 22:02:40.000000000 +0100 +@@ -0,0 +1,10 @@ ++/* Test that {,x,e,p,t,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are ++ usable with -O -std=c89 -pedantic-errors. */ ++/* { dg-do compile } */ ++/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mssse3 -msse5" } */ ++ ++#include ++#include ++#include ++ ++int dummy; +--- gcc/testsuite/gcc.target/i386/sse-13a.c.jj 2007-12-30 22:01:37.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse-13a.c 2007-12-31 09:57:36.000000000 +0100 +@@ -0,0 +1,55 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=k8 -m3dnow -mssse3 -msse5" } */ ++ ++/* Test that the intrinsics compile with optimization. All of them are ++ defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h ++ that reference the proper builtin functions. Defining away "static" and ++ "__inline" results in all of them being compiled as proper functions. */ ++ ++#define static ++#define __inline ++ ++/* Following intrinsics require immediate arguments. */ ++ ++/* ammintrin.h */ ++#define __builtin_ia32_extrqi(X, I, L) __builtin_ia32_extrqi(X, 1, 1) ++#define __builtin_ia32_insertqi(X, Y, I, L) __builtin_ia32_insertqi(X, Y, 1, 1) ++ ++/* mmintrin-common.h */ ++#define __builtin_ia32_roundpd(V, M) __builtin_ia32_roundpd(V, 1) ++#define __builtin_ia32_roundsd(D, V, M) __builtin_ia32_roundsd(D, V, 1) ++#define __builtin_ia32_roundps(V, M) __builtin_ia32_roundps(V, 1) ++#define __builtin_ia32_roundss(D, V, M) __builtin_ia32_roundss(D, V, 1) ++ ++/* tmmintrin.h */ ++#define __builtin_ia32_palignr128(X, Y, N) __builtin_ia32_palignr128(X, Y, 8) ++#define __builtin_ia32_palignr(X, Y, N) __builtin_ia32_palignr(X, Y, 8) ++ ++/* emmintrin.h */ ++#define __builtin_ia32_psrldqi128(A, B) __builtin_ia32_psrldqi128(A, 8) ++#define __builtin_ia32_pslldqi128(A, B) __builtin_ia32_pslldqi128(A, 8) ++#define __builtin_ia32_pshufhw(A, N) __builtin_ia32_pshufhw(A, 0) ++#define __builtin_ia32_pshuflw(A, N) __builtin_ia32_pshuflw(A, 0) ++#define __builtin_ia32_pshufd(A, N) __builtin_ia32_pshufd(A, 0) ++#define __builtin_ia32_vec_set_v8hi(A, D, N) \ ++ __builtin_ia32_vec_set_v8hi(A, D, 0) ++#define __builtin_ia32_vec_ext_v8hi(A, N) __builtin_ia32_vec_ext_v8hi(A, 0) ++#define __builtin_ia32_shufpd(A, B, N) __builtin_ia32_shufpd(A, B, 0) ++ ++/* xmmintrin.h */ ++#define __builtin_prefetch(P, A, I) __builtin_prefetch(P, A, _MM_HINT_NTA) ++#define __builtin_ia32_pshufw(A, N) __builtin_ia32_pshufw(A, 0) ++#define __builtin_ia32_vec_set_v4hi(A, D, N) \ ++ __builtin_ia32_vec_set_v4hi(A, D, 0) ++#define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0) ++#define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0) ++ ++/* bmmintrin.h */ ++#define __builtin_ia32_protbi(A, B) __builtin_ia32_protbi(A,1) ++#define __builtin_ia32_protwi(A, B) __builtin_ia32_protwi(A,1) ++#define __builtin_ia32_protdi(A, B) __builtin_ia32_protdi(A,1) ++#define __builtin_ia32_protqi(A, B) __builtin_ia32_protqi(A,1) ++ ++#include ++#include ++#include +--- gcc/testsuite/gcc.target/i386/sse-14a.c.jj 2007-12-30 22:01:40.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse-14a.c 2007-12-30 22:04:01.000000000 +0100 +@@ -0,0 +1,75 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O0 -march=k8 -m3dnow -mssse3 -msse5" } */ ++ ++/* Test that the intrinsics compile without optimization. All of them are ++ defined as inline functions in {,x,e,p,t,a,b}mmintrin.h and mm3dnow.h ++ that reference the proper builtin functions. Defining away "static" and ++ "__inline" results in all of them being compiled as proper functions. */ ++ ++#define static ++#define __inline ++ ++#include ++#include ++#include ++ ++#define _CONCAT(x,y) x ## y ++ ++#define test_1(func, type, op1_type, imm) \ ++ type _CONCAT(_,func) (op1_type A, int const I) \ ++ { return func (A, imm); } ++ ++#define test_1x(func, type, op1_type, imm1, imm2) \ ++ type _CONCAT(_,func) (op1_type A, int const I, int const L) \ ++ { return func (A, imm1, imm2); } ++ ++#define test_2(func, type, op1_type, op2_type, imm) \ ++ type _CONCAT(_,func) (op1_type A, op2_type B, int const I) \ ++ { return func (A, B, imm); } ++ ++#define test_2x(func, type, op1_type, op2_type, imm1, imm2) \ ++ type _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ ++ { return func (A, B, imm1, imm2); } ++ ++#define test_4(func, type, op1_type, op2_type, op3_type, op4_type, imm) \ ++ type _CONCAT(_,func) (op1_type A, op2_type B, \ ++ op3_type C, op4_type D, int const I) \ ++ { return func (A, B, C, D, imm); } ++ ++ ++/* Following intrinsics require immediate arguments. They ++ are defined as macros for non-optimized compilations. */ ++ ++/* ammintrin.h */ ++test_1x (_mm_extracti_si64, __m128i, __m128i, 1, 1) ++test_2x (_mm_inserti_si64, __m128i, __m128i, __m128i, 1, 1) ++ ++/* tmmintrin.h */ ++test_2 (_mm_alignr_epi8, __m128i, __m128i, __m128i, 1) ++test_2 (_mm_alignr_pi8, __m64, __m64, __m64, 1) ++ ++/* emmintrin.h */ ++test_2 (_mm_shuffle_pd, __m128d, __m128d, __m128d, 1) ++test_1 (_mm_srli_si128, __m128i, __m128i, 1) ++test_1 (_mm_slli_si128, __m128i, __m128i, 1) ++test_1 (_mm_extract_epi16, int, __m128i, 1) ++test_2 (_mm_insert_epi16, __m128i, __m128i, int, 1) ++test_1 (_mm_shufflehi_epi16, __m128i, __m128i, 1) ++test_1 (_mm_shufflelo_epi16, __m128i, __m128i, 1) ++test_1 (_mm_shuffle_epi32, __m128i, __m128i, 1) ++ ++/* xmmintrin.h */ ++test_2 (_mm_shuffle_ps, __m128, __m128, __m128, 1) ++test_1 (_mm_extract_pi16, int, __m64, 1) ++test_1 (_m_pextrw, int, __m64, 1) ++test_2 (_mm_insert_pi16, __m64, __m64, int, 1) ++test_2 (_m_pinsrw, __m64, __m64, int, 1) ++test_1 (_mm_shuffle_pi16, __m64, __m64, 1) ++test_1 (_m_pshufw, __m64, __m64, 1) ++test_1 (_mm_prefetch, void, void *, _MM_HINT_NTA) ++ ++/* bmmintrin.h */ ++test_1 (_mm_roti_epi8, __m128i, __m128i, 1) ++test_1 (_mm_roti_epi16, __m128i, __m128i, 1) ++test_1 (_mm_roti_epi32, __m128i, __m128i, 1) ++test_1 (_mm_roti_epi64, __m128i, __m128i, 1) +--- gcc/testsuite/gcc.target/i386/sse5-check.h.jj 2007-12-29 18:27:58.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse5-check.h 2007-12-29 18:36:02.000000000 +0100 +@@ -0,0 +1,17 @@ ++#include ++ ++#include "../../gcc.dg/i386-cpuid.h" ++ ++static void sse5_test (void); ++ ++int ++main () ++{ ++ unsigned long cpu_facilities = i386_extended_cpuid_ecx (); ++ ++ /* Run SSE5 test only if host has SSE5 support. */ ++ if (cpu_facilities & bit_SSE5) ++ sse5_test (); ++ ++ exit (0); ++} +--- gcc/testsuite/gcc.target/i386/sse5-fma-vector.c.jj 2007-12-29 18:27:58.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse5-fma-vector.c 2007-09-22 23:16:19.000000000 +0200 +@@ -0,0 +1,93 @@ ++/* Test that the compiler properly optimizes floating point multiply and add ++ instructions vector into fmaddps on SSE5 systems. */ ++ ++/* { dg-do compile } */ ++/* { dg-require-effective-target lp64 } */ ++/* { dg-options "-O2 -msse5 -mfused-madd -ftree-vectorize" } */ ++ ++extern void exit (int); ++ ++typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); ++typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); ++ ++#define SIZE 10240 ++ ++union { ++ __m128 f_align; ++ __m128d d_align; ++ float f[SIZE]; ++ double d[SIZE]; ++} a, b, c, d; ++ ++void ++flt_mul_add (void) ++{ ++ int i; ++ ++ for (i = 0; i < SIZE; i++) ++ a.f[i] = (b.f[i] * c.f[i]) + d.f[i]; ++} ++ ++void ++dbl_mul_add (void) ++{ ++ int i; ++ ++ for (i = 0; i < SIZE; i++) ++ a.d[i] = (b.d[i] * c.d[i]) + d.d[i]; ++} ++ ++void ++flt_mul_sub (void) ++{ ++ int i; ++ ++ for (i = 0; i < SIZE; i++) ++ a.f[i] = (b.f[i] * c.f[i]) - d.f[i]; ++} ++ ++void ++dbl_mul_sub (void) ++{ ++ int i; ++ ++ for (i = 0; i < SIZE; i++) ++ a.d[i] = (b.d[i] * c.d[i]) - d.d[i]; ++} ++ ++void ++flt_neg_mul_add (void) ++{ ++ int i; ++ ++ for (i = 0; i < SIZE; i++) ++ a.f[i] = (-(b.f[i] * c.f[i])) + d.f[i]; ++} ++ ++void ++dbl_neg_mul_add (void) ++{ ++ int i; ++ ++ for (i = 0; i < SIZE; i++) ++ a.d[i] = (-(b.d[i] * c.d[i])) + d.d[i]; ++} ++ ++int main () ++{ ++ flt_mul_add (); ++ flt_mul_sub (); ++ flt_neg_mul_add (); ++ ++ dbl_mul_add (); ++ dbl_mul_sub (); ++ dbl_neg_mul_add (); ++ exit (0); ++} ++ ++/* { dg-final { scan-assembler "fmaddps" } } */ ++/* { dg-final { scan-assembler "fmaddpd" } } */ ++/* { dg-final { scan-assembler "fmsubps" } } */ ++/* { dg-final { scan-assembler "fmsubpd" } } */ ++/* { dg-final { scan-assembler "fnmaddps" } } */ ++/* { dg-final { scan-assembler "fnmaddpd" } } */ +--- gcc/testsuite/gcc.target/i386/sse5-fma.c.jj 2007-12-29 18:27:58.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse5-fma.c 2007-09-22 23:16:19.000000000 +0200 +@@ -0,0 +1,82 @@ ++/* Test that the compiler properly optimizes floating point multiply and add ++ instructions into fmaddss, fmsubss, fnmaddss, fnmsubss on SSE5 systems. */ ++ ++/* { dg-do compile } */ ++/* { dg-require-effective-target lp64 } */ ++/* { dg-options "-O2 -msse5 -mfused-madd" } */ ++ ++extern void exit (int); ++ ++float ++flt_mul_add (float a, float b, float c) ++{ ++ return (a * b) + c; ++} ++ ++double ++dbl_mul_add (double a, double b, double c) ++{ ++ return (a * b) + c; ++} ++ ++float ++flt_mul_sub (float a, float b, float c) ++{ ++ return (a * b) - c; ++} ++ ++double ++dbl_mul_sub (double a, double b, double c) ++{ ++ return (a * b) - c; ++} ++ ++float ++flt_neg_mul_add (float a, float b, float c) ++{ ++ return (-(a * b)) + c; ++} ++ ++double ++dbl_neg_mul_add (double a, double b, double c) ++{ ++ return (-(a * b)) + c; ++} ++ ++float ++flt_neg_mul_sub (float a, float b, float c) ++{ ++ return (-(a * b)) - c; ++} ++ ++double ++dbl_neg_mul_sub (double a, double b, double c) ++{ ++ return (-(a * b)) - c; ++} ++ ++float f[10] = { 2, 3, 4 }; ++double d[10] = { 2, 3, 4 }; ++ ++int main () ++{ ++ f[3] = flt_mul_add (f[0], f[1], f[2]); ++ f[4] = flt_mul_sub (f[0], f[1], f[2]); ++ f[5] = flt_neg_mul_add (f[0], f[1], f[2]); ++ f[6] = flt_neg_mul_sub (f[0], f[1], f[2]); ++ ++ d[3] = dbl_mul_add (d[0], d[1], d[2]); ++ d[4] = dbl_mul_sub (d[0], d[1], d[2]); ++ d[5] = dbl_neg_mul_add (d[0], d[1], d[2]); ++ d[6] = dbl_neg_mul_sub (d[0], d[1], d[2]); ++ exit (0); ++} ++ ++/* { dg-final { scan-assembler "fmaddss" } } */ ++/* { dg-final { scan-assembler "fmaddsd" } } */ ++/* { dg-final { scan-assembler "fmsubss" } } */ ++/* { dg-final { scan-assembler "fmsubsd" } } */ ++/* { dg-final { scan-assembler "fnmaddss" } } */ ++/* { dg-final { scan-assembler "fnmaddsd" } } */ ++/* { dg-final { scan-assembler "fnmsubss" } } */ ++/* { dg-final { scan-assembler "fnmsubsd" } } */ +--- gcc/testsuite/gcc.target/i386/sse5-haddX.c.jj 2007-12-29 18:27:58.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse5-haddX.c 2007-09-22 23:16:19.000000000 +0200 +@@ -0,0 +1,208 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target sse5 } */ ++/* { dg-options "-O2 -msse5" } */ ++ ++#include "sse5-check.h" ++ ++#include ++#include ++ ++#define NUM 10 ++ ++union ++{ ++ __m128i x[NUM]; ++ int8_t ssi[NUM * 16]; ++ int16_t si[NUM * 8]; ++ int32_t li[NUM * 4]; ++ int64_t lli[NUM * 2]; ++} dst, res, src1; ++ ++static void ++init_sbyte () ++{ ++ int i; ++ for (i=0; i < NUM * 16; i++) ++ src1.ssi[i] = i; ++} ++ ++static void ++init_sword () ++{ ++ int i; ++ for (i=0; i < NUM * 8; i++) ++ src1.si[i] = i; ++} ++ ++ ++static void ++init_sdword () ++{ ++ int i; ++ for (i=0; i < NUM * 4; i++) ++ src1.li[i] = i; ++} ++ ++static int ++check_sbyte2word () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < NUM * 16; i = i + 16) ++ { ++ for (j = 0; j < 8; j++) ++ { ++ t = i + (2 * j); ++ s = (i / 2) + j; ++ res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ; ++ if (res.si[s] != dst.si[s]) ++ check_fails++; ++ } ++ } ++} ++ ++static int ++check_sbyte2dword () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < NUM * 16; i = i + 16) ++ { ++ for (j = 0; j < 4; j++) ++ { ++ t = i + (4 * j); ++ s = (i / 4) + j; ++ res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] ++ + src1.ssi[t + 3]); ++ if (res.li[s] != dst.li[s]) ++ check_fails++; ++ } ++ } ++ return check_fails++; ++} ++ ++static int ++check_sbyte2qword () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < NUM * 16; i = i + 16) ++ { ++ for (j = 0; j < 2; j++) ++ { ++ t = i + (8 * j); ++ s = (i / 8) + j; ++ res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] ++ + src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5]) ++ + (src1.ssi[t + 6] + src1.ssi[t + 7])); ++ if (res.lli[s] != dst.lli[s]) ++ check_fails++; ++ } ++ } ++ return check_fails++; ++} ++ ++static int ++check_sword2dword () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < (NUM * 8); i = i + 8) ++ { ++ for (j = 0; j < 4; j++) ++ { ++ t = i + (2 * j); ++ s = (i / 2) + j; ++ res.li[s] = src1.si[t] + src1.si[t + 1] ; ++ if (res.li[s] != dst.li[s]) ++ check_fails++; ++ } ++ } ++} ++ ++static int ++check_sword2qword () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < NUM * 8; i = i + 8) ++ { ++ for (j = 0; j < 2; j++) ++ { ++ t = i + (4 * j); ++ s = (i / 4) + j; ++ res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2] ++ + src1.si[t + 3]); ++ if (res.lli[s] != dst.lli[s]) ++ check_fails++; ++ } ++ } ++ return check_fails++; ++} ++ ++static int ++check_dword2qword () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < (NUM * 4); i = i + 4) ++ { ++ for (j = 0; j < 2; j++) ++ { ++ t = i + (2 * j); ++ s = (i / 2) + j; ++ res.lli[s] = src1.li[t] + src1.li[t + 1] ; ++ if (res.lli[s] != dst.lli[s]) ++ check_fails++; ++ } ++ } ++} ++ ++static void ++sse5_test (void) ++{ ++ int i; ++ ++ /* Check haddbw */ ++ init_sbyte (); ++ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_haddw_epi8 (src1.x[i]); ++ ++ if (check_sbyte2word()) ++ abort (); ++ ++ /* Check haddbd */ ++ for (i = 0; i < (NUM ); i++) ++ dst.x[i] = _mm_haddd_epi8 (src1.x[i]); ++ ++ if (check_sbyte2dword()) ++ abort (); ++ ++ /* Check haddbq */ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_haddq_epi8 (src1.x[i]); ++ ++ if (check_sbyte2qword()) ++ abort (); ++ ++ /* Check haddwd */ ++ init_sword (); ++ ++ for (i = 0; i < (NUM ); i++) ++ dst.x[i] = _mm_haddd_epi16 (src1.x[i]); ++ ++ if (check_sword2dword()) ++ abort (); ++ ++ /* Check haddbwq */ ++ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_haddq_epi16 (src1.x[i]); ++ ++ if (check_sword2qword()) ++ abort (); ++ ++ /* Check haddq */ ++ init_sdword (); ++ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_haddq_epi32 (src1.x[i]); ++ ++ if (check_dword2qword()) ++ abort (); ++} +--- gcc/testsuite/gcc.target/i386/sse5-hadduX.c.jj 2007-12-29 18:27:58.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse5-hadduX.c 2007-09-22 23:16:19.000000000 +0200 +@@ -0,0 +1,207 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target sse5 } */ ++/* { dg-options "-O2 -msse5" } */ ++ ++#include "sse5-check.h" ++ ++#include ++#include ++ ++#define NUM 10 ++ ++union ++{ ++ __m128i x[NUM]; ++ unsigned char ssi[NUM * 16]; ++ unsigned short si[NUM * 8]; ++ unsigned int li[NUM * 4]; ++ unsigned long long lli[NUM * 2]; ++} dst, res, src1; ++ ++static void ++init_byte () ++{ ++ int i; ++ for (i=0; i < NUM * 16; i++) ++ src1.ssi[i] = i; ++} ++ ++static void ++init_word () ++{ ++ int i; ++ for (i=0; i < NUM * 8; i++) ++ src1.si[i] = i; ++} ++ ++ ++static void ++init_dword () ++{ ++ int i; ++ for (i=0; i < NUM * 4; i++) ++ src1.li[i] = i; ++} ++ ++static int ++check_byte2word () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < NUM * 16; i = i + 16) ++ { ++ for (j = 0; j < 8; j++) ++ { ++ t = i + (2 * j); ++ s = (i / 2) + j; ++ res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ; ++ if (res.si[s] != dst.si[s]) ++ check_fails++; ++ } ++ } ++} ++ ++static int ++check_byte2dword () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < NUM * 16; i = i + 16) ++ { ++ for (j = 0; j < 4; j++) ++ { ++ t = i + (4 * j); ++ s = (i / 4) + j; ++ res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] ++ + src1.ssi[t + 3]); ++ if (res.li[s] != dst.li[s]) ++ check_fails++; ++ } ++ } ++ return check_fails++; ++} ++ ++static int ++check_byte2qword () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < NUM * 16; i = i + 16) ++ { ++ for (j = 0; j < 2; j++) ++ { ++ t = i + (8 * j); ++ s = (i / 8) + j; ++ res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] ++ + src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5]) ++ + (src1.ssi[t + 6] + src1.ssi[t + 7])); ++ if (res.lli[s] != dst.lli[s]) ++ check_fails++; ++ } ++ } ++ return check_fails++; ++} ++ ++static int ++check_word2dword () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < (NUM * 8); i = i + 8) ++ { ++ for (j = 0; j < 4; j++) ++ { ++ t = i + (2 * j); ++ s = (i / 2) + j; ++ res.li[s] = src1.si[t] + src1.si[t + 1] ; ++ if (res.li[s] != dst.li[s]) ++ check_fails++; ++ } ++ } ++} ++ ++static int ++check_word2qword () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < NUM * 8; i = i + 8) ++ { ++ for (j = 0; j < 2; j++) ++ { ++ t = i + (4 * j); ++ s = (i / 4) + j; ++ res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2] ++ + src1.si[t + 3]); ++ if (res.lli[s] != dst.lli[s]) ++ check_fails++; ++ } ++ } ++ return check_fails++; ++} ++ ++static int ++check_dword2qword () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < (NUM * 4); i = i + 4) ++ { ++ for (j = 0; j < 2; j++) ++ { ++ t = i + (2 * j); ++ s = (i / 2) + j; ++ res.lli[s] = src1.li[t] + src1.li[t + 1] ; ++ if (res.lli[s] != dst.lli[s]) ++ check_fails++; ++ } ++ } ++} ++ ++static void ++sse5_test (void) ++{ ++ int i; ++ ++ /* Check haddubw */ ++ init_byte (); ++ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_haddw_epu8 (src1.x[i]); ++ ++ if (check_byte2word()) ++ abort (); ++ ++ /* Check haddubd */ ++ for (i = 0; i < (NUM ); i++) ++ dst.x[i] = _mm_haddd_epu8 (src1.x[i]); ++ ++ if (check_byte2dword()) ++ abort (); ++ ++ /* Check haddubq */ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_haddq_epu8 (src1.x[i]); ++ ++ if (check_byte2qword()) ++ abort (); ++ ++ /* Check hadduwd */ ++ init_word (); ++ ++ for (i = 0; i < (NUM ); i++) ++ dst.x[i] = _mm_haddd_epu16 (src1.x[i]); ++ ++ if (check_word2dword()) ++ abort (); ++ ++ /* Check haddbuwq */ ++ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_haddq_epu16 (src1.x[i]); ++ ++ if (check_word2qword()) ++ abort (); ++ ++ /* Check hadudq */ ++ init_dword (); ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_haddq_epu32 (src1.x[i]); ++ ++ if (check_dword2qword()) ++ abort (); ++} +--- gcc/testsuite/gcc.target/i386/sse5-hsubX.c.jj 2007-12-29 18:27:58.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse5-hsubX.c 2007-09-22 23:16:19.000000000 +0200 +@@ -0,0 +1,128 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target sse5 } */ ++/* { dg-options "-O2 -msse5" } */ ++ ++#include "sse5-check.h" ++ ++#include ++#include ++ ++#define NUM 10 ++ ++union ++{ ++ __m128i x[NUM]; ++ int8_t ssi[NUM * 16]; ++ int16_t si[NUM * 8]; ++ int32_t li[NUM * 4]; ++ int64_t lli[NUM * 2]; ++} dst, res, src1; ++ ++static void ++init_sbyte () ++{ ++ int i; ++ for (i=0; i < NUM * 16; i++) ++ src1.ssi[i] = i; ++} ++ ++static void ++init_sword () ++{ ++ int i; ++ for (i=0; i < NUM * 8; i++) ++ src1.si[i] = i; ++} ++ ++ ++static void ++init_sdword () ++{ ++ int i; ++ for (i=0; i < NUM * 4; i++) ++ src1.li[i] = i; ++} ++ ++static int ++check_sbyte2word () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < NUM * 16; i = i + 16) ++ { ++ for (j = 0; j < 8; j++) ++ { ++ t = i + (2 * j); ++ s = (i / 2) + j; ++ res.si[s] = src1.ssi[t] - src1.ssi[t + 1] ; ++ if (res.si[s] != dst.si[s]) ++ check_fails++; ++ } ++ } ++} ++ ++static int ++check_sword2dword () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < (NUM * 8); i = i + 8) ++ { ++ for (j = 0; j < 4; j++) ++ { ++ t = i + (2 * j); ++ s = (i / 2) + j; ++ res.li[s] = src1.si[t] - src1.si[t + 1] ; ++ if (res.li[s] != dst.li[s]) ++ check_fails++; ++ } ++ } ++} ++ ++static int ++check_dword2qword () ++{ ++ int i, j, s, t, check_fails = 0; ++ for (i = 0; i < (NUM * 4); i = i + 4) ++ { ++ for (j = 0; j < 2; j++) ++ { ++ t = i + (2 * j); ++ s = (i / 2) + j; ++ res.lli[s] = src1.li[t] - src1.li[t + 1] ; ++ if (res.lli[s] != dst.lli[s]) ++ check_fails++; ++ } ++ } ++} ++ ++static void ++sse5_test (void) ++{ ++ int i; ++ ++ /* Check hsubbw */ ++ init_sbyte (); ++ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_hsubw_epi8 (src1.x[i]); ++ ++ if (check_sbyte2word()) ++ abort (); ++ ++ ++ /* Check hsubwd */ ++ init_sword (); ++ ++ for (i = 0; i < (NUM ); i++) ++ dst.x[i] = _mm_hsubd_epi16 (src1.x[i]); ++ ++ if (check_sword2dword()) ++ abort (); ++ ++ /* Check hsubdq */ ++ init_sdword (); ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_hsubq_epi32 (src1.x[i]); ++ ++ if (check_dword2qword()) ++ abort (); ++} +--- gcc/testsuite/gcc.target/i386/sse5-ima-vector.c.jj 2007-12-29 18:27:58.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse5-ima-vector.c 2007-09-22 23:16:19.000000000 +0200 +@@ -0,0 +1,34 @@ ++/* Test that the compiler properly optimizes vector 32-bit integer point ++ multiply and add instructions vector into pmacsdd on SSE5 systems. */ ++ ++/* { dg-do compile } */ ++/* { dg-require-effective-target lp64 } */ ++/* { dg-options "-O2 -msse5 -ftree-vectorize" } */ ++ ++extern void exit (int); ++ ++typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); ++ ++#define SIZE 10240 ++ ++union { ++ __m128i align; ++ int i[SIZE]; ++} a, b, c, d; ++ ++void ++int_mul_add (void) ++{ ++ int i; ++ ++ for (i = 0; i < SIZE; i++) ++ a.i[i] = (b.i[i] * c.i[i]) + d.i[i]; ++} ++ ++int main () ++{ ++ int_mul_add (); ++ exit (0); ++} ++ ++/* { dg-final { scan-assembler "pmacsdd" } } */ +--- gcc/testsuite/gcc.target/i386/sse5-maccXX.c.jj 2007-12-29 18:27:58.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse5-maccXX.c 2007-09-22 23:16:19.000000000 +0200 +@@ -0,0 +1,140 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target sse5 } */ ++/* { dg-options "-O2 -msse5" } */ ++ ++#include "sse5-check.h" ++ ++#include ++#include ++ ++#define NUM 20 ++ ++union ++{ ++ __m128 x[NUM]; ++ float f[NUM * 4]; ++ __m128d y[NUM]; ++ double d[NUM * 2]; ++} dst, res, src1, src2, src3; ++ ++ ++/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate ++ product is not rounded, only the addition is rounded. */ ++ ++static void ++init_maccps () ++{ ++ int i; ++ for (i = 0; i < NUM * 4; i++) ++ { ++ src1.f[i] = i; ++ src2.f[i] = i + 10; ++ src3.f[i] = i + 20; ++ } ++} ++ ++static void ++init_maccpd () ++{ ++ int i; ++ for (i = 0; i < NUM * 4; i++) ++ { ++ src1.d[i] = i; ++ src2.d[i] = i + 10; ++ src3.d[i] = i + 20; ++ } ++} ++ ++static int ++check_maccps () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 4; i = i + 4) ++ for (j = 0; j < 4; j++) ++ { ++ res.f[i + j] = (src1.f[i + j] * src2.f[i + j]) + src3.f[i + j]; ++ if (dst.f[i + j] != res.f[i + j]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++static int ++check_maccpd () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 2; i = i + 2) ++ for (j = 0; j < 2; j++) ++ { ++ res.d[i + j] = (src1.d[i + j] * src2.d[i + j]) + src3.d[i + j]; ++ if (dst.d[i + j] != res.d[i + j]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++ ++static int ++check_maccss () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 4; i= i + 4) ++ { ++ res.f[i] = (src1.f[i] * src2.f[i]) + src3.f[i]; ++ if (dst.f[i] != res.f[i]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++static int ++check_maccsd () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 2; i = i + 2) ++ { ++ res.d[i] = (src1.d[i] * src2.d[i]) + src3.d[i]; ++ if (dst.d[i] != res.d[i]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++static void ++sse5_test (void) ++{ ++ int i; ++ ++ /* Check maccps */ ++ init_maccps (); ++ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_macc_ps (src1.x[i], src2.x[i], src3.x[i]); ++ ++ if (check_maccps ()) ++ abort (); ++ ++ /* check maccss */ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_macc_ss (src1.x[i], src2.x[i], src3.x[i]); ++ ++ if (check_maccss ()) ++ abort (); ++ ++ /* Check maccpd */ ++ init_maccpd (); ++ ++ for (i = 0; i < NUM; i++) ++ dst.y[i] = _mm_macc_pd (src1.y[i], src2.y[i], src3.y[i]); ++ ++ if (check_maccpd ()) ++ abort (); ++ ++ /* Check maccps */ ++ for (i = 0; i < NUM; i++) ++ dst.y[i] = _mm_macc_sd (src1.y[i], src2.y[i], src3.y[i]); ++ ++ if (check_maccsd ()) ++ abort (); ++ ++} +--- gcc/testsuite/gcc.target/i386/sse5-msubXX.c.jj 2007-12-29 18:27:58.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse5-msubXX.c 2007-09-22 23:16:19.000000000 +0200 +@@ -0,0 +1,139 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target sse5 } */ ++/* { dg-options "-O2 -msse5" } */ ++ ++#include "sse5-check.h" ++ ++#include ++#include ++ ++#define NUM 20 ++ ++union ++{ ++ __m128 x[NUM]; ++ float f[NUM * 4]; ++ __m128d y[NUM]; ++ double d[NUM * 2]; ++} dst, res, src1, src2, src3; ++ ++/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate ++ product is not rounded, only the addition is rounded. */ ++ ++static void ++init_msubps () ++{ ++ int i; ++ for (i = 0; i < NUM * 4; i++) ++ { ++ src1.f[i] = i; ++ src2.f[i] = i + 10; ++ src3.f[i] = i + 20; ++ } ++} ++ ++static void ++init_msubpd () ++{ ++ int i; ++ for (i = 0; i < NUM * 4; i++) ++ { ++ src1.d[i] = i; ++ src2.d[i] = i + 10; ++ src3.d[i] = i + 20; ++ } ++} ++ ++static int ++check_msubps () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 4; i = i + 4) ++ for (j = 0; j < 4; j++) ++ { ++ res.f[i + j] = (src1.f[i + j] * src2.f[i + j]) - src3.f[i + j]; ++ if (dst.f[i + j] != res.f[i + j]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++static int ++check_msubpd () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 2; i = i + 2) ++ for (j = 0; j < 2; j++) ++ { ++ res.d[i + j] = (src1.d[i + j] * src2.d[i + j]) - src3.d[i + j]; ++ if (dst.d[i + j] != res.d[i + j]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++ ++static int ++check_msubss () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 4; i = i + 4) ++ { ++ res.f[i] = (src1.f[i] * src2.f[i]) - src3.f[i]; ++ if (dst.f[i] != res.f[i]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++static int ++check_msubsd () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 2; i = i + 2) ++ { ++ res.d[i] = (src1.d[i] * src2.d[i]) - src3.d[i]; ++ if (dst.d[i] != res.d[i]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++static void ++sse5_test (void) ++{ ++ int i; ++ ++ /* Check msubps */ ++ init_msubps (); ++ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_msub_ps (src1.x[i], src2.x[i], src3.x[i]); ++ ++ if (check_msubps ()) ++ abort (); ++ ++ /* check msubss */ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_msub_ss (src1.x[i], src2.x[i], src3.x[i]); ++ ++ if (check_msubss ()) ++ abort (); ++ ++ /* Check msubpd */ ++ init_msubpd (); ++ ++ for (i = 0; i < NUM; i++) ++ dst.y[i] = _mm_msub_pd (src1.y[i], src2.y[i], src3.y[i]); ++ ++ if (check_msubpd ()) ++ abort (); ++ ++ /* Check msubps */ ++ for (i = 0; i < NUM; i++) ++ dst.y[i] = _mm_msub_sd (src1.y[i], src2.y[i], src3.y[i]); ++ ++ if (check_msubsd ()) ++ abort (); ++ ++} +--- gcc/testsuite/gcc.target/i386/sse5-nmaccXX.c.jj 2007-12-29 18:27:58.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse5-nmaccXX.c 2007-09-22 23:16:19.000000000 +0200 +@@ -0,0 +1,139 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target sse5 } */ ++/* { dg-options "-O2 -msse5" } */ ++ ++#include "sse5-check.h" ++ ++#include ++#include ++ ++#define NUM 20 ++ ++union ++{ ++ __m128 x[NUM]; ++ float f[NUM * 4]; ++ __m128d y[NUM]; ++ double d[NUM * 2]; ++} dst, res, src1, src2, src3; ++ ++/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate ++ product is not rounded, only the addition is rounded. */ ++ ++static void ++init_nmaccps () ++{ ++ int i; ++ for (i = 0; i < NUM * 4; i++) ++ { ++ src1.f[i] = i; ++ src2.f[i] = i + 10; ++ src3.f[i] = i + 20; ++ } ++} ++ ++static void ++init_nmaccpd () ++{ ++ int i; ++ for (i = 0; i < NUM * 4; i++) ++ { ++ src1.d[i] = i; ++ src2.d[i] = i + 10; ++ src3.d[i] = i + 20; ++ } ++} ++ ++static int ++check_nmaccps () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 4; i = i + 4) ++ for (j = 0; j < 4; j++) ++ { ++ res.f[i + j] = - (src1.f[i + j] * src2.f[i + j]) + src3.f[i + j]; ++ if (dst.f[i + j] != res.f[i + j]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++static int ++check_nmaccpd () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 2; i = i + 2) ++ for (j = 0; j < 2; j++) ++ { ++ res.d[i + j] = - (src1.d[i + j] * src2.d[i + j]) + src3.d[i + j]; ++ if (dst.d[i + j] != res.d[i + j]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++ ++static int ++check_nmaccss () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 4; i = i + 4) ++ { ++ res.f[i] = - (src1.f[i] * src2.f[i]) + src3.f[i]; ++ if (dst.f[i] != res.f[i]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++static int ++check_nmaccsd () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 2; i = i + 2) ++ { ++ res.d[i] = - (src1.d[i] * src2.d[i]) + src3.d[i]; ++ if (dst.d[i] != res.d[i]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++static void ++sse5_test (void) ++{ ++ int i; ++ ++ /* Check nmaccps */ ++ init_nmaccps (); ++ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_nmacc_ps (src1.x[i], src2.x[i], src3.x[i]); ++ ++ if (check_nmaccps ()) ++ abort (); ++ ++ /* check nmaccss */ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_nmacc_ss (src1.x[i], src2.x[i], src3.x[i]); ++ ++ if (check_nmaccss ()) ++ abort (); ++ ++ /* Check nmaccpd */ ++ init_nmaccpd (); ++ ++ for (i = 0; i < NUM; i++) ++ dst.y[i] = _mm_nmacc_pd (src1.y[i], src2.y[i], src3.y[i]); ++ ++ if (check_nmaccpd ()) ++ abort (); ++ ++ /* Check nmaccps */ ++ for (i = 0; i < NUM; i++) ++ dst.y[i] = _mm_nmacc_sd (src1.y[i], src2.y[i], src3.y[i]); ++ ++ if (check_nmaccsd ()) ++ abort (); ++ ++} +--- gcc/testsuite/gcc.target/i386/sse5-nmsubXX.c.jj 2007-12-29 18:27:58.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse5-nmsubXX.c 2007-09-22 23:16:19.000000000 +0200 +@@ -0,0 +1,139 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target sse5 } */ ++/* { dg-options "-O2 -msse5" } */ ++ ++#include "sse5-check.h" ++ ++#include ++#include ++ ++#define NUM 20 ++ ++union ++{ ++ __m128 x[NUM]; ++ float f[NUM * 4]; ++ __m128d y[NUM]; ++ double d[NUM * 2]; ++} dst, res, src1, src2, src3; ++ ++/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate ++ product is not rounded, only the addition is rounded. */ ++ ++static void ++init_nmsubps () ++{ ++ int i; ++ for (i = 0; i < NUM * 4; i++) ++ { ++ src1.f[i] = i; ++ src2.f[i] = i + 10; ++ src3.f[i] = i + 20; ++ } ++} ++ ++static void ++init_nmsubpd () ++{ ++ int i; ++ for (i = 0; i < NUM * 4; i++) ++ { ++ src1.d[i] = i; ++ src2.d[i] = i + 10; ++ src3.d[i] = i + 20; ++ } ++} ++ ++static int ++check_nmsubps () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 4; i = i + 4) ++ for (j = 0; j < 4; j++) ++ { ++ res.f[i + j] = - (src1.f[i + j] * src2.f[i + j]) - src3.f[i + j]; ++ if (dst.f[i + j] != res.f[i + j]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++static int ++check_nmsubpd () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 2; i = i + 2) ++ for (j = 0; j < 2; j++) ++ { ++ res.d[i + j] = - (src1.d[i + j] * src2.d[i + j]) - src3.d[i + j]; ++ if (dst.d[i + j] != res.d[i + j]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++ ++static int ++check_nmsubss () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 4; i = i + 4) ++ { ++ res.f[i] = - (src1.f[i] * src2.f[i]) - src3.f[i]; ++ if (dst.f[i] != res.f[i]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++static int ++check_nmsubsd () ++{ ++ int i, j, check_fails = 0; ++ for (i = 0; i < NUM * 2; i = i + 2) ++ { ++ res.d[i] = - (src1.d[i] * src2.d[i]) - src3.d[i]; ++ if (dst.d[i] != res.d[i]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++static void ++sse5_test (void) ++{ ++ int i; ++ ++ /* Check nmsubps */ ++ init_nmsubps (); ++ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_nmsub_ps (src1.x[i], src2.x[i], src3.x[i]); ++ ++ if (check_nmsubps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4], &src3.f[i * 4])) ++ abort (); ++ ++ /* check nmsubss */ ++ for (i = 0; i < NUM; i++) ++ dst.x[i] = _mm_nmsub_ss (src1.x[i], src2.x[i], src3.x[i]); ++ ++ if (check_nmsubss (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4], &src3.f[i * 4])) ++ abort (); ++ ++ /* Check nmsubpd */ ++ init_nmsubpd (); ++ ++ for (i = 0; i < NUM; i++) ++ dst.y[i] = _mm_nmsub_pd (src1.y[i], src2.y[i], src3.y[i]); ++ ++ if (check_nmsubpd (&dst.y[i], &src1.d[i * 2], &src2.d[i * 2], &src3.d[i * 2])) ++ abort (); ++ ++ /* Check nmsubps */ ++ for (i = 0; i < NUM; i++) ++ dst.y[i] = _mm_nmsub_sd (src1.y[i], src2.y[i], src3.y[i]); ++ ++ if (check_nmsubsd (&dst.y[i], &src1.d[i * 2], &src2.d[i * 2], &src3.d[i * 2])) ++ abort (); ++ ++} +--- gcc/testsuite/gcc.target/i386/sse5-pcmov.c.jj 2007-12-29 18:27:58.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse5-pcmov.c 2007-09-22 23:16:19.000000000 +0200 +@@ -0,0 +1,23 @@ ++/* Test that the compiler properly optimizes conditional floating point moves ++ into the pcmov instruction on SSE5 systems. */ ++ ++/* { dg-do compile } */ ++/* { dg-require-effective-target lp64 } */ ++/* { dg-options "-O2 -msse5" } */ ++ ++extern void exit (int); ++ ++double dbl_test (double a, double b, double c, double d) ++{ ++ return (a > b) ? c : d; ++} ++ ++double dbl_a = 1, dbl_b = 2, dbl_c = 3, dbl_d = 4, dbl_e; ++ ++int main() ++{ ++ dbl_e = dbl_test (dbl_a, dbl_b, dbl_c, dbl_d); ++ exit (0); ++} ++ ++/* { dg-final { scan-assembler "pcmov" } } */ +--- gcc/testsuite/gcc.target/i386/sse5-pcmov2.c.jj 2007-12-29 18:27:58.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse5-pcmov2.c 2007-09-22 23:16:19.000000000 +0200 +@@ -0,0 +1,23 @@ ++/* Test that the compiler properly optimizes conditional floating point moves ++ into the pcmov instruction on SSE5 systems. */ ++ ++/* { dg-do compile } */ ++/* { dg-require-effective-target lp64 } */ ++/* { dg-options "-O2 -msse5" } */ ++ ++extern void exit (int); ++ ++float flt_test (float a, float b, float c, float d) ++{ ++ return (a > b) ? c : d; ++} ++ ++float flt_a = 1, flt_b = 2, flt_c = 3, flt_d = 4, flt_e; ++ ++int main() ++{ ++ flt_e = flt_test (flt_a, flt_b, flt_c, flt_d); ++ exit (0); ++} ++ ++/* { dg-final { scan-assembler "pcmov" } } */ +--- gcc/testsuite/gcc.target/i386/sse5-permpX.c.jj 2007-12-29 18:27:58.000000000 +0100 ++++ gcc/testsuite/gcc.target/i386/sse5-permpX.c 2007-09-22 23:16:19.000000000 +0200 +@@ -0,0 +1,120 @@ ++/* { dg-do run } */ ++/* { dg-require-effective-target sse5 } */ ++/* { dg-options "-O2 -msse5" } */ ++ ++#include "sse5-check.h" ++ ++#include ++#include ++ ++union ++{ ++ __m128 x[2]; ++ __m128d y[2]; ++ __m128i z[2]; ++ float f[8]; ++ double d[4]; ++ int i[8]; ++ long li[4]; ++} dst, res, src1, src2, src3; ++ ++ ++static void ++init_ddata () ++{ ++ int i; ++ for (i = 0; i < 4; i++) ++ { ++ src1.d[i] = i; ++ src2.d[i] = i + 2; ++ } ++ ++ src3.li[0] = 3; ++ src3.li[1] = 0; ++ src3.li[2] = 1; ++ src3.li[3] = 2; ++ ++ res.d[0] = 3.0; ++ res.d[1] = 0.0; ++ res.d[2] = 3.0; ++ res.d[3] = 4.0; ++} ++ ++ ++static void ++init_fdata () ++{ ++ int i; ++ for (i = 0; i < 8; i++) ++ { ++ src1.f[i] = i; ++ src2.f[i] = i + 2; ++ } ++ ++ src3.i[0] = 7; ++ src3.i[1] = 5; ++ src3.i[2] = 1; ++ src3.i[3] = 2; ++ src3.i[4] = 0; ++ src3.i[5] = 4; ++ src3.i[6] = 3; ++ src3.i[7] = 6; ++ ++ res.f[0] = 5.0; ++ res.f[1] = 3.0; ++ res.f[2] = 1.0; ++ res.f[3] = 2.0; ++ res.f[4] = 4.0; ++ res.f[5] = 6.0; ++ res.f[6] = 7.0; ++ res.f[7] = 8.0; ++} ++ ++static int ++check_permpd () ++{ ++ int i, check_fails = 0; ++ ++ for (i = 0; i < 4; i++) ++ { ++ if (res.d[i] != dst.d[i]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++static int ++check_permps () ++{ ++ int i, check_fails = 0; ++ ++ for (i = 0; i < 8; i++) ++ { ++ if (res.f[i] != dst.f[i]) ++ check_fails++; ++ } ++ return check_fails++; ++} ++ ++static void ++sse5_test (void) ++{ ++ int i; ++ init_ddata(); ++ ++ for (i = 0; i < 2; i++) ++ dst.y[i] = _mm_perm_pd (src1.y[i], src2.y[i], src3.z[i]); ++ ++ if (check_permpd ()) ++ abort (); ++ ++ init_fdata(); ++ ++ for (i = 0; i < 2; i++) ++ dst.x[i] = _mm_perm_ps (src1.x[i], src2.x[i], src3.z[i]); ++ ++ if (check_permps ()) ++ abort (); ++} ++ ++ diff --git a/gcc41.spec b/gcc41.spec index b925b61..e3ac688 100644 --- a/gcc41.spec +++ b/gcc41.spec @@ -1,6 +1,6 @@ -%define DATE 20070925 +%define DATE 20071124 %define gcc_version 4.1.2 -%define gcc_release 33 +%define gcc_release 37 %define _unpackaged_files_terminate_build 0 %define multilib_64_archs sparc64 ppc64 s390x x86_64 %define include_gappletviewer 1 @@ -10,6 +10,12 @@ %define build_ada 0 %endif %define build_java 1 +# If you don't have already a usable gcc-java and libgcj for your arch, +# do on some arch which has it rpmbuild -bc --with java_tar gcc41.spec +# which creates libjava-classes-%{version}-%{release}.tar.bz2 +# With this then on the new arch do rpmbuild -ba -v --with java_bootstrap gcc41.spec +%define bootstrap_java %{?_with_java_bootstrap:%{build_java}}%{!?_with_java_bootstrap:0} +%define build_java_tar %{?_with_java_tar:%{build_java}}%{!?_with_java_tar:0} %ifarch s390x %define multilib_32_arch s390 %endif @@ -47,7 +53,12 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root BuildRequires: binutils >= 2.17.50.0.17-3 BuildRequires: zlib-devel, gettext, dejagnu, bison, flex, texinfo, sharutils %if %{build_java} -BuildRequires: gcc-java, libgcj, /usr/share/java/eclipse-ecj.jar, zip, unzip +BuildRequires: /usr/share/java/eclipse-ecj.jar, zip, unzip +%if %{bootstrap_java} +Source10: libjava-classes-%{version}-%{release}.tar.bz2 +%else +BuildRequires: gcc-java, libgcj +%endif %endif # Make sure pthread.h doesn't contain __thread tokens # Make sure glibc supports stack protector @@ -154,12 +165,57 @@ Patch37: gcc41-pr33136.patch Patch38: gcc41-pr33238.patch Patch39: gcc41-pr33619.patch Patch40: gcc41-pr33639.patch -Patch41: gcc41-pr33744.patch Patch42: gcc41-pr33763.patch Patch43: gcc41-rh317051.patch Patch44: gcc41-rh330771.patch Patch45: gcc41-rh341221.patch -Patch46: gcc41-ppc64-cr2-unwind.patch +Patch47: gcc41-java-arm1.patch +Patch48: gcc41-java-arm2.patch +Patch49: gcc41-java-arm3.patch +Patch50: gcc41-java-arm4.patch +Patch51: gcc41-java-arm5.patch +Patch52: gcc41-java-arm6.patch +Patch53: gcc41-java-arm7.patch +Patch54: gcc41-java-arm8.patch +Patch55: gcc41-pr23848.patch +Patch56: gcc41-pr29225.patch +Patch57: gcc41-pr29712.patch +Patch58: gcc41-pr30293.patch +Patch59: gcc41-pr30988.patch +Patch60: gcc41-pr32241.patch +Patch61: gcc41-pr32384.patch +Patch62: gcc41-pr33501.patch +Patch63: gcc41-pr33516.patch +Patch64: gcc41-pr33537.patch +Patch65: gcc41-pr33616.patch +Patch66: gcc41-pr33723.patch +Patch67: gcc41-pr33836.patch +Patch68: gcc41-pr33842.patch +Patch69: gcc41-pr33844.patch +Patch70: gcc41-pr33962.patch +Patch71: gcc41-pr34070.patch +Patch72: gcc41-pr34089.patch +Patch73: gcc41-pr34178.patch +Patch74: gcc41-pr34130.patch +Patch75: gcc41-pr34146.patch +Patch76: gcc41-rh364001.patch +Patch77: gcc41-pr34213.patch +Patch78: gcc41-pr34364.patch +Patch79: gcc41-pr34275.patch +Patch80: gcc41-rh407281.patch +Patch81: gcc41-pr34394.patch +Patch82: gcc41-debug-fortran-array.patch +Patch83: gcc41-omp-outer-ctx.patch +Patch84: gcc41-pr27643.patch +Patch85: gcc41-pr29978.patch +Patch86: gcc41-pr31483.patch +Patch87: gcc41-pr33890.patch +Patch88: gcc41-pr34506.patch +Patch89: gcc41-pr34513.patch +Patch90: gcc41-pr7081.patch +Patch91: gcc41-rh426846.patch +Patch92: gcc41-sse5.patch +Patch93: gcc41-sse5-pperm.patch # On ARM EABI systems, we do want -gnueabi to be part of the # target triple. @@ -483,12 +539,61 @@ which are required to run programs compiled with the GNAT. %patch38 -p0 -b .pr33238~ %patch39 -p0 -b .pr33619~ %patch40 -p0 -b .pr33639~ -%patch41 -p0 -b .pr33744~ %patch42 -p0 -b .pr33763~ %patch43 -p0 -b .rh317051~ %patch44 -p0 -b .rh330771~ %patch45 -p0 -b .rh341221~ -%patch46 -p0 -b .ppc64-cr2-unwind~ +%patch47 -p0 -b .java-arm1~ +%patch48 -p0 -b .java-arm2~ +%patch49 -p0 -b .java-arm3~ +%patch50 -p0 -b .java-arm4~ +%patch51 -p0 -b .java-arm5~ +%patch52 -p0 -b .java-arm6~ +%patch53 -p0 -b .java-arm7~ +%patch54 -p0 -b .java-arm8~ +%patch55 -p0 -b .pr23848~ +%patch56 -p0 -b .pr29225~ +%patch57 -p0 -b .pr29712~ +%patch58 -p0 -b .pr30293~ +%patch59 -p0 -b .pr30988~ +%patch60 -p0 -b .pr32241~ +%patch61 -p0 -b .pr32384~ +%patch62 -p0 -b .pr33501~ +%patch63 -p0 -b .pr33516~ +%patch64 -p0 -b .pr33537~ +%patch65 -p0 -b .pr33616~ +%patch66 -p0 -b .pr33723~ +%patch67 -p0 -b .pr33836~ +%patch68 -p0 -b .pr33842~ +%patch69 -p0 -b .pr33844~ +%patch70 -p0 -b .pr33962~ +%patch71 -p0 -b .pr34070~ +%patch72 -p0 -b .pr34089~ +%patch73 -p0 -b .pr34178~ +%patch74 -p0 -b .pr34130~ +%patch75 -p0 -b .pr34146~ +%patch76 -p0 -b .rh364001~ +%patch77 -p0 -b .pr34213~ +%patch78 -p0 -b .pr34364~ +%patch79 -p0 -b .pr34275~ +%patch80 -p0 -b .rh407281~ +%patch81 -p0 -b .pr34394~ +%patch82 -p0 -b .debug-fortran-array~ +%patch83 -p0 -b .omp-outer-ctx~ +%patch84 -p0 -b .pr27643~ +%patch85 -p0 -b .pr29978~ +%patch86 -p0 -b .pr31483~ +%patch87 -p0 -b .pr33890~ +%patch88 -p0 -b .pr34506~ +%patch89 -p0 -b .pr34513~ +%patch90 -p0 -b .pr7081~ +%patch91 -p0 -b .rh426846~ +%patch92 -p0 -b .sse5~ +%patch93 -p0 -b .sse5-pperm~ + +%if %{bootstrap_java} +tar xjf %{SOURCE10} +%endif sed -i -e 's/4\.1\.3/4.1.2/' gcc/BASE-VER gcc/version.c sed -i -e 's/" (Red Hat[^)]*)"/" (Red Hat %{version}-%{gcc_release})"/' gcc/version.c @@ -535,6 +640,7 @@ if [ ! -f /usr/lib/locale/de_DE/LC_CTYPE ]; then fi %if %{build_java} +%if !%{bootstrap_java} # If we don't have gjavah in $PATH, try to build it with the old gij mkdir java_hacks cd java_hacks @@ -561,6 +667,7 @@ chmod +x `pwd`/ecj1 export PATH=`pwd`${PATH:+:$PATH} cd .. %endif +%endif CC=gcc OPT_FLAGS=`echo $RPM_OPT_FLAGS|sed -e 's/\(-Wp,\)\?-D_FORTIFY_SOURCE=[12]//g'` @@ -611,6 +718,9 @@ CC="$CC" CFLAGS="$OPT_FLAGS" CXXFLAGS="$OPT_FLAGS" XCFLAGS="$OPT_FLAGS" TCFLAGS= --enable-libgcj-multifile --enable-java-maintainer-mode \ --with-ecj-jar=/usr/share/java/eclipse-ecj.jar \ %endif +%ifarch %{arm} + --disable-sjlj-exceptions \ +%endif %ifarch ppc ppc64 --enable-secureplt \ %endif @@ -711,6 +821,13 @@ cp -p libjava/LIBGCJ_LICENSE rpm.doc/libjava/ rm -f rpm.doc/changelogs/gcc/ChangeLog.[1-9] find rpm.doc -name \*ChangeLog\* | xargs bzip2 -9 +%if %{build_java_tar} +find libjava -name \*.h -type f | xargs grep -l '// DO NOT EDIT THIS FILE - it is machine generated' > libjava-classes.list +find libjava -name \*.class -type f >> libjava-classes.list +find libjava/testsuite -name \*.jar -type f >> libjava-classes.list +tar cf - -T libjava-classes.list | bzip2 -9 > $RPM_SOURCE_DIR/libjava-classes-%{version}-%{release}.tar.bz2 +%endif + %install rm -fr $RPM_BUILD_ROOT @@ -726,8 +843,10 @@ if [ ! -f /usr/lib/locale/de_DE/LC_CTYPE ]; then fi %if %{build_java} +%if !%{bootstrap_java} export PATH=`pwd`/java_hacks${PATH:+:$PATH} %endif +%endif TARGET_PLATFORM=%{gcc_target_platform} @@ -1643,6 +1762,61 @@ fi %doc rpm.doc/changelogs/libmudflap/ChangeLog* %changelog +* Mon Dec 31 2007 Jakub Jelinek 4.1.2-37 +- add SSE5 support (Michael Meissner, Dwarakanath Rajagopal, Tony Linthicum, + Uros Bizjak, #252998) +- java_mark_cni_decl_local fix (Andrew Haley, #414411, PR java/27643) +- i386 <= 0xNNffffffffLL comparison optimization (PR target/29978) +- fix Fortran alternate returns with dummy procedure (Paul Thomas, #399531, + PR fortran/31483) +- OpenMP fixes (PR c++/33890, c/34506, c++/34513) +- generate DW_TAG_class_type in debuginfo instead of DW_TAG_structure_type if + class keyword was used in the source rather than struct (Alexandre Oliva, + PR debug/7081, #371831) +- don't hold a global guard mutex across whole local static initialization + (Doug Kwan, #426846) + +* Wed Dec 12 2007 Jakub Jelinek 4.1.2-36 +- revert PR c++/34094 fix altogether, it was only accepts-invalid and + caused a bunch of valid or unclear cases to be rejected (#411871, #402521) +- fix OpenMP handling of global vars privatized in orphaned constructs + with #pragma omp parallel inside them +- -frepo fixes (#411741, PRs c++/34178, c++/34340) +- fix dynamic_cast in templates (PR c++/34364) +- fix error diagnostics involving ABS_EXPR (PR c++/34394) + +* Sun Dec 2 2007 Jakub Jelinek 4.1.2-35 +- two ctor preevaluation fixes (Olivier Hainque, + Eric Botcazou, #407281) +- slightly weaken diagnostics for declared, but undefined static data + members in anon ns classes (#402521, PR c++/34238) +- consider static data members and static member functions in anon ns + classes to be external for C++ linkage type handling (PR c++/34213) +- handle OBJ_TYPE_REF in C++ diagnostics (PR c++/34275) + +* Sat Nov 24 2007 Jakub Jelinek 4.1.2-34 +- update from gcc-4_1-branch (-r128736:130387) + - PRs middle-end/34030, rtl-optimization/28062, rtl-optimization/33822 + - fix if-conversion to avoid introducing races into threaded code + (Ian Lance Taylor, #391731) +- some C++ visibility fixes (Jason Merrill, PRs c++/32470, c++/33094, + c++/29365) +- arm Java support (Andrew Haley, #246800) +- add possibility to bootstrap gcj on architectures where libgcj + isn't already available or is too old - build on some already + supported arch the rpm with --with java_tar and the created + tarball bring to the new arch and build --with java_bootstrap +- backport a bunch of bugfixes from GCC trunk + - PRs c++/29225, c++/30293, c++/30294, c++/30988, c++/32241, + c++/32384, c++/33501, c++/33516, c++/33616, c++/33836, + c++/33842, c++/33844, c++/33962, c++/34089, c++/34094, + c/34146, debug/33537, middle-end/23848, middle-end/34070, + testsuite/33978, tree-optimization/33723 +- fix abs optimization (Richard Guenther, #394271, PR middle-end/34130) +- fortran lbound/ubound fix (Paul Thomas, #391151, PR fortran/29712) +- generate proper fortran debuginfo for assumed-size, assumed-shape + and deferred arrays (#364001) + * Sun Oct 21 2007 Jakub Jelinek 4.1.2-33 - rebuild to fix multilib conflict between i386 and x86_64 libgcj, set java man page timestamp from the timestamp of *.texinfo rather diff --git a/sources b/sources index 64eed40..5fee11f 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -562ab2446c60a9145da385ac56cf7715 gcc-4.1.2-20070925.tar.bz2 +b0e332f1be680c13608e127c6ed9acf9 gcc-4.1.2-20071124.tar.bz2