From 6329a9d46dcd77f122d49ed2dc868cd6dc6ea22d Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@fedoraproject.org>
Date: Mon, 27 Aug 2007 11:08:53 +0000
Subject: [PATCH] 4.1.2-18.fc7

---
 .cvsignore                     |    2 +-
 gcc41-amdfam10.patch           | 3619 --------------------------------
 gcc41-build-id.patch           |   74 +
 gcc41-dtor-relro.patch         |   89 -
 gcc41-libgomp-ncpus.patch      |  186 --
 gcc41-multi32-hack.patch       |  104 -
 gcc41-objc-rh185398.patch      |   28 -
 gcc41-ppc-tramp.patch          |   34 +
 gcc41-pr22244.patch            |   63 +
 gcc41-pr24036-revert.patch     |  302 ---
 gcc41-pr27567.patch            |  439 ----
 gcc41-pr28482.patch            |   19 -
 gcc41-pr28690.patch            |  155 ++
 gcc41-pr28709.patch            |   84 -
 gcc41-pr29059.patch            |   76 -
 gcc41-pr29272.patch            |  110 -
 gcc41-pr29299.patch            |   93 -
 gcc41-pr31187.patch            |  113 -
 gcc41-pr31748.patch            |   43 -
 gcc41-pr32139.patch            |   58 +
 gcc41-pr32678.patch            |   63 +
 gcc41-pr32912.patch            |  138 ++
 gcc41-rh234515.patch           |   73 -
 gcc41-rh247256.patch           |   42 +
 gcc41-rh253102.patch           |   89 +
 gcc41-sparc-niagara.patch      |  519 +++++
 gcc41-strncat-chk.patch        |   93 -
 gcc41-tests.patch              |  392 ----
 gcc41-tls-data-alignment.patch |   33 -
 gcc41.spec                     |  173 +-
 libgcc_post_upgrade.c          |   29 +
 sources                        |    2 +-
 32 files changed, 1381 insertions(+), 5956 deletions(-)
 delete mode 100644 gcc41-amdfam10.patch
 create mode 100644 gcc41-build-id.patch
 delete mode 100644 gcc41-dtor-relro.patch
 delete mode 100644 gcc41-libgomp-ncpus.patch
 delete mode 100644 gcc41-multi32-hack.patch
 delete mode 100644 gcc41-objc-rh185398.patch
 create mode 100644 gcc41-ppc-tramp.patch
 create mode 100644 gcc41-pr22244.patch
 delete mode 100644 gcc41-pr24036-revert.patch
 delete mode 100644 gcc41-pr27567.patch
 delete mode 100644 gcc41-pr28482.patch
 create mode 100644 gcc41-pr28690.patch
 delete mode 100644 gcc41-pr28709.patch
 delete mode 100644 gcc41-pr29059.patch
 delete mode 100644 gcc41-pr29272.patch
 delete mode 100644 gcc41-pr29299.patch
 delete mode 100644 gcc41-pr31187.patch
 delete mode 100644 gcc41-pr31748.patch
 create mode 100644 gcc41-pr32139.patch
 create mode 100644 gcc41-pr32678.patch
 create mode 100644 gcc41-pr32912.patch
 delete mode 100644 gcc41-rh234515.patch
 create mode 100644 gcc41-rh247256.patch
 create mode 100644 gcc41-rh253102.patch
 create mode 100644 gcc41-sparc-niagara.patch
 delete mode 100644 gcc41-strncat-chk.patch
 delete mode 100644 gcc41-tests.patch
 delete mode 100644 gcc41-tls-data-alignment.patch

diff --git a/.cvsignore b/.cvsignore
index f922c4c..fd72f71 100644
--- a/.cvsignore
+++ b/.cvsignore
@@ -1 +1 @@
-gcc-4.1.2-20070503.tar.bz2
+gcc-4.1.2-20070821.tar.bz2
diff --git a/gcc41-amdfam10.patch b/gcc41-amdfam10.patch
deleted file mode 100644
index 00b6ac2..0000000
--- a/gcc41-amdfam10.patch
+++ /dev/null
@@ -1,3619 +0,0 @@
-2007-02-10  H.J. Lu  <hongjiu.lu@intel.com>
-
-	* gcc.target/i386/sse4a-extract.c: Add "LL" to 64bit constants.
-	* gcc.target/i386/sse4a-insert.c: Likewise.
-
-2007-02-08  Harsha Jagasia  <harsha.jagasia@amd.com>
-
-	* config/i386/xmmintrin.h: Make inclusion of emmintrin.h
-	conditional to __SSE2__.
-	* config/i386/emmintrin.h: Generate #error if __SSE2__ is not
-	defined.
-	* config/i386/pmmintrin.h: Generate #error if __SSE3__ is not
-	defined.
-	* config/i386/tmmintrin.h: Generate #error if __SSSE3__ is not
-	defined.
-
-2007-02-05  Harsha Jagasia  <harsha.jagasia@amd.com>
-
-	* config/i386/athlon.md (athlon_fldxf_k8, athlon_fld_k8,
-	athlon_fstxf_k8, athlon_fst_k8, athlon_fist, athlon_fmov,
-	athlon_fadd_load, athlon_fadd_load_k8, athlon_fadd, athlon_fmul,
-	athlon_fmul_load, athlon_fmul_load_k8, athlon_fsgn,
-	athlon_fdiv_load, athlon_fdiv_load_k8, athlon_fdiv_k8,
-	athlon_fpspc_load, athlon_fpspc, athlon_fcmov_load,
-	athlon_fcmov_load_k8, athlon_fcmov_k8, athlon_fcomi_load_k8,
-	athlon_fcomi, athlon_fcom_load_k8, athlon_fcom): Added amdfam10.
-
-	* config/i386/i386.md (x86_sahf_1, cmpfp_i_mixed, cmpfp_i_sse,
-	cmpfp_i_i387, cmpfp_iu_mixed, cmpfp_iu_sse, cmpfp_iu_387,
-	swapsi, swaphi_1, swapqi_1, swapdi_rex64, fix_truncsfdi_sse,
-	fix_truncdfdi_sse, fix_truncsfsi_sse, fix_truncdfsi_sse,
-	x86_fldcw_1, floatsisf2_mixed, floatsisf2_sse, floatdisf2_mixed,
-	floatdisf2_sse, floatsidf2_mixed, floatsidf2_sse,
-	floatdidf2_mixed, floatdidf2_sse, muldi3_1_rex64, mulsi3_1,
-	mulsi3_1_zext, mulhi3_1, mulqi3_1, umulqihi3_1, mulqihi3_insn,
-	umulditi3_insn, umulsidi3_insn, mulditi3_insn, mulsidi3_insn,
-	umuldi3_highpart_rex64, umulsi3_highpart_insn,
-	umulsi3_highpart_zext, smuldi3_highpart_rex64,
-	smulsi3_highpart_insn, smulsi3_highpart_zext, x86_64_shld,
-	x86_shld_1, x86_64_shrd, sqrtsf2_mixed, sqrtsf2_sse,
-	sqrtsf2_i387, sqrtdf2_mixed, sqrtdf2_sse, sqrtdf2_i387,
-	sqrtextendsfdf2_i387, sqrtxf2, sqrtextendsfxf2_i387,
-	sqrtextenddfxf2_i387): Added amdfam10_decode.
-	
-	* config/i386/athlon.md (athlon_idirect_amdfam10,
-	athlon_ivector_amdfam10, athlon_idirect_load_amdfam10,
-	athlon_ivector_load_amdfam10, athlon_idirect_both_amdfam10,
-	athlon_ivector_both_amdfam10, athlon_idirect_store_amdfam10,
-	athlon_ivector_store_amdfam10): New define_insn_reservation.
-	(athlon_idirect_loadmov, athlon_idirect_movstore): Added
-	amdfam10.
-
-	* config/i386/athlon.md (athlon_call_amdfam10,
-	athlon_pop_amdfam10, athlon_lea_amdfam10): New
-	define_insn_reservation.
-	(athlon_branch, athlon_push, athlon_leave_k8, athlon_imul_k8,
-	athlon_imul_k8_DI, athlon_imul_mem_k8, athlon_imul_mem_k8_DI,
-	athlon_idiv, athlon_idiv_mem, athlon_str): Added amdfam10.
-
-	* config/i386/athlon.md (athlon_sseld_amdfam10,
-	athlon_mmxld_amdfam10, athlon_ssest_amdfam10,
-	athlon_mmxssest_short_amdfam10): New define_insn_reservation.
-
-	* config/i386/athlon.md (athlon_sseins_amdfam10): New
-	define_insn_reservation.
-	* config/i386/i386.md (sseins): Added sseins to define_attr type
-	and define_attr unit.
-	* config/i386/sse.md: Set type attribute to sseins for insertq
-	and insertqi.
-
-	* config/i386/athlon.md (sselog_load_amdfam10, sselog_amdfam10,
-	ssecmpvector_load_amdfam10, ssecmpvector_amdfam10,
-	ssecomi_load_amdfam10, ssecomi_amdfam10,
-	sseaddvector_load_amdfam10, sseaddvector_amdfam10): New
-	define_insn_reservation.
-	(ssecmp_load_k8, ssecmp, sseadd_load_k8, seadd): Added amdfam10.
-
-	* config/i386/athlon.md (cvtss2sd_load_amdfam10,
-	cvtss2sd_amdfam10, cvtps2pd_load_amdfam10, cvtps2pd_amdfam10,
-	cvtsi2sd_load_amdfam10, cvtsi2ss_load_amdfam10,
-	cvtsi2sd_amdfam10, cvtsi2ss_amdfam10, cvtsd2ss_load_amdfam10,
-	cvtsd2ss_amdfam10, cvtpd2ps_load_amdfam10, cvtpd2ps_amdfam10,
-	cvtsX2si_load_amdfam10, cvtsX2si_amdfam10): New 
-	define_insn_reservation.
-
-	* config/i386/sse.md (cvtsi2ss, cvtsi2ssq, cvtss2si,
-	cvtss2siq, cvttss2si, cvttss2siq, cvtsi2sd, cvtsi2sdq,
-	cvtsd2si, cvtsd2siq, cvttsd2si, cvttsd2siq,
-	cvtpd2dq, cvttpd2dq, cvtsd2ss, cvtss2sd,
-	cvtpd2ps, cvtps2pd): Added amdfam10_decode attribute.
-
-	* config/i386/athlon.md (athlon_ssedivvector_amdfam10,
-	athlon_ssedivvector_load_amdfam10, athlon_ssemulvector_amdfam10,
-	athlon_ssemulvector_load_amdfam10): New define_insn_reservation.
-	(athlon_ssediv, athlon_ssediv_load_k8, athlon_ssemul,
-	athlon_ssemul_load_k8): Added amdfam10.
-
-	* config/i386/i386.h (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL): New macro.
-	(x86_sse_unaligned_move_optimal): New variable.
-	
-	* config/i386/i386.c (x86_sse_unaligned_move_optimal): Enable for  
-	m_AMDFAM10.
-	(ix86_expand_vector_move_misalign): Add code to generate movupd/movups
-	for unaligned vector SSE double/single precision loads for AMDFAM10.
-
-	* config/i386/i386.h (TARGET_AMDFAM10): New macro.
-	(TARGET_CPU_CPP_BUILTINS): Add code for amdfam10.
-	Define TARGET_CPU_DEFAULT_amdfam10.
-	(TARGET_CPU_DEFAULT_NAMES): Add amdfam10.
-	(processor_type): Add PROCESSOR_AMDFAM10.	
-	
-	* config/i386/i386.md: Add amdfam10 as a new cpu attribute to match
-	processor_type in config/i386/i386.h.
-	Enable imul peepholes for TARGET_AMDFAM10.
-	
-	* config.gcc: Add support for --with-cpu option for amdfam10.
-	
-	* config/i386/i386.c (amdfam10_cost): New variable.
-	(m_AMDFAM10): New macro.
-	(m_ATHLON_K8_AMDFAM10): New macro.
-	(x86_use_leave, x86_push_memory, x86_movx, x86_unroll_strlen,
-	x86_cmove, x86_3dnow_a, x86_deep_branch, x86_use_simode_fiop,
-	x86_promote_QImode, x86_integer_DFmode_moves,
-	x86_partial_reg_dependency, x86_memory_mismatch_stall, 
-	x86_accumulate_outgoing_args, x86_arch_always_fancy_math_387,
-	x86_sse_partial_reg_dependency, x86_sse_typeless_stores,
-	x86_use_ffreep, x86_use_incdec, x86_four_jump_limit,
-	x86_schedule, x86_use_bt, x86_cmpxchg16b, x86_pad_returns):
-	Enable/disable for amdfam10.
-	(override_options): Add amdfam10_cost to processor_target_table.
-	Set up PROCESSOR_AMDFAM10 for amdfam10 entry in 
-	processor_alias_table.
-	(ix86_issue_rate): Add PROCESSOR_AMDFAM10.
-	(ix86_adjust_cost): Add code for amdfam10.
-
-	* config/i386/i386.opt: Add new Advanced Bit Manipulation (-mabm)
-	instruction set feature flag. Add new (-mpopcnt) flag for popcnt 
-	instruction. Add new SSE4A (-msse4a) instruction set feature flag.
-	* config/i386/i386.h: Add builtin definition for SSE4A.
-	* config/i386/i386.md: Add support for ABM instructions 
-	(popcnt and lzcnt).
-	* config/i386/sse.md: Add support for SSE4A instructions
-	(movntss, movntsd, extrq, insertq).
-	* config/i386/i386.c: Add support for ABM and SSE4A builtins.
-	Add -march=amdfam10 flag.
-	* config/i386/ammintrin.h: Add support for SSE4A intrinsics.
-	* doc/invoke.texi: Add documentation on flags for sse4a, abm, popcnt
-	and amdfam10.
-	* doc/extend.texi: Add documentation for SSE4A builtins.
-
-2007-02-05  Dwarakanath Rajagopal  <dwarak.rajagopal@amd.com>
-
-	* gcc.dg/i386-cpuid.h: Test whether SSE4A is supported
-	for running tests.
-	* gcc.target/i386/sse4a-extract.c: New test.
-	* gcc.target/i386/sse4a-insert.c: New test.
-	* gcc.target/i386/sse4a-montsd.c: New test.
-	* gcc.target/i386/sse4a-montss.c: New test.
-
-2006-12-15  H.J. Lu  <hongjiu.lu@intel.com>
-
-	* gcc.dg/i386-cpuid.h (bit_SSSE3): New.
-
-2006-11-30  H.J. Lu  <hongjiu.lu@intel.com>
-
-	* gcc.dg/i386-cpuid.h (bit_SSE3): New.
-	(i386_get_cpuid): New function.
-	(i386_cpuid_ecx): Likewise.
-	(i386_cpuid_edx): Likewise.
-	(i386_cpuid): Updated to call i386_cpuid_edx.
-
---- gcc/doc/extend.texi.jj	2007-02-09 16:18:25.000000000 +0100
-+++ gcc/doc/extend.texi	2007-02-09 21:26:06.000000000 +0100
-@@ -6931,6 +6931,23 @@ v4si __builtin_ia32_pabsd128 (v4si)
- v8hi __builtin_ia32_pabsw128 (v8hi)
- @end smallexample
- 
-+The following built-in functions are available when @option{-msse4a} is used.
-+
-+@smallexample
-+void             _mm_stream_sd (double*,__m128d);
-+Generates the @code{movntsd} machine instruction.
-+void             _mm_stream_ss (float*,__m128);
-+Generates the @code{movntss} machine instruction.
-+__m128i          _mm_extract_si64 (__m128i, __m128i);
-+Generates the @code{extrq} machine instruction with only SSE register operands.
-+__m128i          _mm_extracti_si64 (__m128i, int, int);
-+Generates the @code{extrq} machine instruction with SSE register and immediate operands.
-+__m128i          _mm_insert_si64 (__m128i, __m128i);
-+Generates the @code{insertq} machine instruction with only SSE register operands.
-+__m128i          _mm_inserti_si64 (__m128i, __m128i, int, int);
-+Generates the @code{insertq} machine instruction with SSE register and immediate operands.
-+@end smallexample
-+
- The following built-in functions are available when @option{-m3dnow} is used.
- All of them generate the machine instruction that is part of the name.
- 
---- gcc/doc/invoke.texi.jj	2007-02-09 16:18:25.000000000 +0100
-+++ gcc/doc/invoke.texi	2007-02-09 21:56:44.000000000 +0100
-@@ -522,7 +522,7 @@ Objective-C and Objective-C++ Dialects}.
- -mno-fp-ret-in-387  -msoft-float  -msvr3-shlib @gol
- -mno-wide-multiply  -mrtd  -malign-double @gol
- -mpreferred-stack-boundary=@var{num} @gol
---mmmx  -msse  -msse2 -msse3 -mssse3 -m3dnow @gol
-+-mmmx  -msse  -msse2 -msse3 -mssse3 -msse4a -m3dnow -mpopcnt -mabm @gol
- -mthreads  -mno-align-stringops  -minline-all-stringops @gol
- -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
- -m96bit-long-double  -mregparm=@var{num}  -msseregparm @gol
-@@ -9062,6 +9062,10 @@ instruction set support.
- @item k8, opteron, athlon64, athlon-fx
- AMD K8 core based CPUs with x86-64 instruction set support.  (This supersets
- MMX, SSE, SSE2, 3dNOW!, enhanced 3dNOW! and 64-bit instruction set extensions.)
-+@item amdfam10
-+AMD Family 10 core based CPUs with x86-64 instruction set support.  (This
-+supersets MMX, SSE, SSE2, SSE3, SSE4A, 3dNOW!, enhanced 3dNOW!, ABM and 64-bit
-+instruction set extensions.)
- @item winchip-c6
- IDT Winchip C6 CPU, dealt in same way as i486 with additional MMX instruction
- set support.
-@@ -9339,8 +9343,14 @@ preferred alignment to @option{-mpreferr
- @itemx -mno-sse3
- @item -mssse3
- @itemx -mno-ssse3
-+@item -msse4a
-+@item -mno-sse4a
- @item -m3dnow
- @itemx -mno-3dnow
-+@item -mpopcnt
-+@itemx -mno-popcnt
-+@item -mabm
-+@itemx -mno-abm
- @opindex mmmx
- @opindex mno-mmx
- @opindex msse
---- gcc/testsuite/gcc.target/i386/sse4a-insert.c.jj	2007-02-09 21:26:06.000000000 +0100
-+++ gcc/testsuite/gcc.target/i386/sse4a-insert.c	2007-02-09 21:26:06.000000000 +0100
-@@ -0,0 +1,110 @@
-+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
-+/* { dg-options "-O2 -msse4a" } */
-+#include <ammintrin.h>
-+#include <stdlib.h>
-+#include "../../gcc.dg/i386-cpuid.h"
-+
-+static void sse4a_test (void);
-+
-+typedef union
-+{
-+  long long i[2];
-+  __m128i vec;
-+} LI;
-+
-+int
-+main ()
-+{  
-+  unsigned long cpu_facilities;
-+
-+  cpu_facilities = i386_extended_cpuid_ecx ();
-+
-+  /* Run SSE4a test only if host has SSE4a support.  */
-+  if ((cpu_facilities & bit_SSE4a))
-+    sse4a_test ();
-+
-+  exit (0);
-+}
-+
-+static long long
-+sse4a_test_insert (long long in1, long long in2)
-+{
-+  __m128i v1,v2;
-+  long long index_length, pad;
-+  LI v_out;
-+  index_length = 0x0000000000000810LL;
-+  pad = 0x0;
-+  v1 = _mm_set_epi64x (pad, in1);
-+  v2 = _mm_set_epi64x (index_length, in2); 
-+  v_out.vec = _mm_insert_si64 (v1, v2);
-+  return (v_out.i[0]);
-+}
-+
-+static long long
-+sse4a_test_inserti (long long in1, long long in2)
-+{
-+  __m128i v1,v2;
-+  long long pad = 0x0;
-+  LI v_out;
-+  v1 = _mm_set_epi64x (pad, in1);
-+  v2 = _mm_set_epi64x (pad, in2); 
-+  v_out.vec = _mm_inserti_si64 (v1, v2, (unsigned int) 0x10, (unsigned int) 0x08);
-+  return (v_out.i[0]);  
-+}
-+
-+static chk (long long i1, long long i2)
-+{
-+  int n_fails =0;
-+  if (i1 != i2) 
-+    n_fails +=1;
-+  return n_fails;
-+}
-+
-+long long vals_in1[5] =
-+  {
-+    0x1234567887654321LL,
-+    0x1456782093002490LL,
-+    0x2340909123990390LL,
-+    0x9595959599595999LL,
-+    0x9099038798000029LL
-+  };
-+
-+long long vals_in2[5] =
-+  {
-+    0x9ABCDEF00FEDCBA9LL,
-+    0x234567097289672ALL,
-+    0x45476453097BD342LL,
-+    0x23569012AE586FF0LL,
-+    0x432567ABCDEF765DLL
-+  };
-+
-+long long vals_out[5] =
-+  {
-+    0x1234567887CBA921LL,
-+    0x1456782093672A90LL,
-+    0x2340909123D34290LL,
-+    0x95959595996FF099LL,
-+    0x9099038798765D29LL
-+  };
-+
-+static void
-+sse4a_test (void)
-+{
-+  int i;
-+  int fail = 0;
-+  long long out;
-+
-+  for (i = 0; i < 5; i += 1)
-+    {
-+      out = sse4a_test_insert (vals_in1[i], vals_in2[i]);
-+      fail += chk(out, vals_out[i]);
-+
-+      out = sse4a_test_inserti (vals_in1[i], vals_in2[i]);
-+      fail += chk(out, vals_out[i]);
-+    }
-+
-+  if (fail != 0)
-+    abort ();
-+
-+  exit (0);
-+}
---- gcc/testsuite/gcc.target/i386/sse4a-extract.c.jj	2007-02-09 21:26:06.000000000 +0100
-+++ gcc/testsuite/gcc.target/i386/sse4a-extract.c	2007-02-09 21:26:06.000000000 +0100
-@@ -0,0 +1,100 @@
-+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
-+/* { dg-options "-O2 -msse4a" } */
-+#include <ammintrin.h>
-+#include <stdlib.h>
-+#include "../../gcc.dg/i386-cpuid.h"
-+
-+static void sse4a_test (void);
-+
-+typedef union
-+{
-+  long long i[2];
-+  __m128i vec;
-+} LI;
-+
-+int
-+main ()
-+{  
-+  unsigned long cpu_facilities;
-+
-+  cpu_facilities = i386_extended_cpuid_ecx ();
-+
-+  /* Run SSE4a test only if host has SSE4a support.  */
-+  if ((cpu_facilities & bit_SSE4a))
-+    sse4a_test ();
-+
-+  exit (0);
-+}
-+
-+static long long 
-+sse4a_test_extrq (long long in)
-+{
-+  __m128i v1, v2;
-+  long long index_length, pad;
-+  LI v_out;
-+  index_length = 0x0000000000000810LL; 
-+  pad = 0x0;
-+  v1 = _mm_set_epi64x (pad, in);
-+  v2 = _mm_set_epi64x (pad, index_length); 
-+  v_out.vec = _mm_extract_si64 (v1, v2);
-+  return (v_out.i[0]); 
-+}
-+
-+static long long 
-+sse4a_test_extrqi (long long in)
-+{
-+  __m128i v1;
-+  long long pad =0x0;
-+  LI v_out;
-+  v1 = _mm_set_epi64x (pad, in);
-+  v_out.vec = _mm_extracti_si64 (v1, (unsigned int) 0x10,(unsigned int) 0x08);
-+  return (v_out.i[0]);
-+}
-+
-+static chk (long long i1, long long i2)
-+{
-+  int n_fails =0;
-+  if (i1 != i2) 
-+    n_fails +=1;
-+  return n_fails;
-+}
-+
-+long long vals_in[5] =
-+  {
-+    0x1234567887654321LL,
-+    0x1456782093002490LL,
-+    0x2340909123990390LL,
-+    0x9595959599595999LL,
-+    0x9099038798000029LL
-+  };
-+
-+long long vals_out[5] =
-+  {
-+    0x0000000000006543LL,
-+    0x0000000000000024LL,
-+    0x0000000000009903LL,
-+    0x0000000000005959LL,
-+    0x0000000000000000LL
-+  };
-+
-+static void
-+sse4a_test (void)
-+{
-+  int i;
-+  int fail = 0;
-+  long long out;
-+
-+  for (i = 0; i < 5; i += 1)
-+    {
-+      out = sse4a_test_extrq (vals_in[i]);
-+      fail += chk(out, vals_out[i]);
-+
-+      out = sse4a_test_extrqi (vals_in[i]);
-+      fail += chk(out, vals_out[i]);
-+    }
-+
-+  if (fail != 0)
-+    abort ();
-+
-+  exit (0);
-+}
---- gcc/testsuite/gcc.target/i386/sse4a-montss.c.jj	2007-02-09 21:26:06.000000000 +0100
-+++ gcc/testsuite/gcc.target/i386/sse4a-montss.c	2007-02-09 21:26:06.000000000 +0100
-@@ -0,0 +1,64 @@
-+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
-+/* { dg-options "-O2 -msse4a" } */
-+#include <ammintrin.h>
-+#include <stdlib.h>
-+#include "../../gcc.dg/i386-cpuid.h"
-+
-+static void sse4a_test (void);
-+
-+int
-+main ()
-+{  
-+  unsigned long cpu_facilities;
-+
-+  cpu_facilities = i386_extended_cpuid_ecx ();
-+
-+  /* Run SSE4a test only if host has SSE4a support.  */
-+  if ((cpu_facilities & bit_SSE4a))
-+    sse4a_test ();
-+
-+  exit (0);
-+}
-+
-+static void 
-+sse4a_test_movntss (float *out, float *in)
-+{
-+  __m128 in_v4sf = _mm_load_ss (in);
-+  _mm_stream_ss (out, in_v4sf);
-+}
-+
-+static int 
-+chk_ss (float *v1, float *v2)
-+{
-+  int n_fails = 0;
-+  if (v1[0] != v2[0])
-+    n_fails += 1;
-+  return n_fails;
-+}
-+
-+float vals[10] =
-+  {
-+    100.0,  200.0, 300.0, 400.0, 5.0, 
-+    -1.0, .345, -21.5, 9.32,  8.41
-+  };
-+
-+static void
-+sse4a_test (void)
-+{
-+  int i;
-+  int fail = 0;
-+  float *out;
-+
-+  out = (float *) malloc (sizeof (float));
-+  for (i = 0; i < 10; i += 1)
-+    {
-+      sse4a_test_movntss (out, &vals[i]);
-+      
-+      fail += chk_ss (out, &vals[i]);
-+    }
-+
-+  if (fail != 0)
-+    abort ();
-+
-+  exit (0);
-+}
---- gcc/testsuite/gcc.target/i386/sse4a-montsd.c.jj	2007-02-09 21:26:06.000000000 +0100
-+++ gcc/testsuite/gcc.target/i386/sse4a-montsd.c	2007-02-09 21:26:06.000000000 +0100
-@@ -0,0 +1,64 @@
-+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
-+/* { dg-options "-O2 -msse4a" } */
-+#include <ammintrin.h>
-+#include <stdlib.h>
-+#include "../../gcc.dg/i386-cpuid.h"
-+
-+static void sse4a_test (void);
-+
-+int
-+main ()
-+{  
-+  unsigned long cpu_facilities;
-+
-+  cpu_facilities = i386_extended_cpuid_ecx ();
-+
-+  /* Run SSE4a test only if host has SSE4a support.  */
-+  if ((cpu_facilities & bit_SSE4a))
-+    sse4a_test ();
-+
-+  exit (0);
-+}
-+  
-+static void 
-+sse4a_test_movntsd (double *out, double *in)
-+{
-+  __m128d in_v2df = _mm_load_sd (in);
-+  _mm_stream_sd (out, in_v2df);
-+}
-+
-+static int 
-+chk_sd (double *v1, double *v2)
-+{
-+  int n_fails = 0;
-+  if (v1[0] != v2[0])
-+    n_fails += 1;
-+  return n_fails;
-+}
-+
-+double vals[10] =
-+  {
-+    100.0,  200.0, 300.0, 400.0, 5.0, 
-+    -1.0, .345, -21.5, 9.32,  8.41
-+  };
-+
-+static void
-+sse4a_test (void)
-+{
-+  int i;
-+  int fail = 0;
-+  double *out;
-+
-+  out = (double *) malloc (sizeof (double));
-+  for (i = 0; i < 10; i += 1)
-+    {
-+      sse4a_test_movntsd (out, &vals[i]);
-+      
-+      fail += chk_sd (out, &vals[i]);
-+    }
-+
-+  if (fail != 0)
-+    abort ();
-+
-+  exit (0);
-+}
---- gcc/testsuite/gcc.dg/i386-cpuid.h.jj	2006-10-05 00:26:53.000000000 +0200
-+++ gcc/testsuite/gcc.dg/i386-cpuid.h	2007-02-07 13:07:08.000000000 +0100
-@@ -2,23 +2,32 @@
-    Used by 20020523-2.c and i386-sse-6.c, and possibly others.  */
- /* Plagarized from 20020523-2.c.  */
- 
-+/* %ecx */
-+#define bit_SSE3 (1 << 0)
-+#define bit_SSSE3 (1 << 9)
-+
-+/* %edx */
- #define bit_CMOV (1 << 15)
- #define bit_MMX (1 << 23)
- #define bit_SSE (1 << 25)
- #define bit_SSE2 (1 << 26)
- 
-+/* Extended Features */
-+/* %ecx */
-+#define bit_SSE4a (1 << 6)
-+
- #ifndef NOINLINE
- #define NOINLINE __attribute__ ((noinline))
- #endif
- 
--unsigned int i386_cpuid (void) NOINLINE;
--
--unsigned int NOINLINE
--i386_cpuid (void)
-+static inline unsigned int
-+i386_get_cpuid (unsigned int *ecx, unsigned int *edx)
- {
--  int fl1, fl2;
-+  int fl1;
- 
- #ifndef __x86_64__
-+  int fl2;
-+
-   /* See if we can use cpuid.  On AMD64 we always can.  */
-   __asm__ ("pushfl; pushfl; popl %0; movl %0,%1; xorl %2,%0;"
- 	   "pushl %0; popfl; pushfl; popl %0; popfl"
-@@ -42,15 +51,99 @@ i386_cpuid (void)
-   if (fl1 == 0)
-     return (0);
- 
--  /* Invoke CPUID(1), return %edx; caller can examine bits to
-+  /* Invoke CPUID(1), return %ecx and %edx; caller can examine bits to
-      determine what's supported.  */
- #ifdef __x86_64__
--  __asm__ ("pushq %%rcx; pushq %%rbx; cpuid; popq %%rbx; popq %%rcx"
--	   : "=d" (fl2), "=a" (fl1) : "1" (1) : "cc");
-+  __asm__ ("pushq %%rbx; cpuid; popq %%rbx"
-+	   : "=c" (*ecx), "=d" (*edx), "=a" (fl1) : "2" (1) : "cc");
- #else
--  __asm__ ("pushl %%ecx; pushl %%ebx; cpuid; popl %%ebx; popl %%ecx"
--	   : "=d" (fl2), "=a" (fl1) : "1" (1) : "cc");
-+  __asm__ ("pushl %%ebx; cpuid; popl %%ebx"
-+	   : "=c" (*ecx), "=d" (*edx), "=a" (fl1) : "2" (1) : "cc");
-+#endif
-+
-+  return 1;
-+}
-+
-+static inline unsigned int
-+i386_get_extended_cpuid (unsigned int *ecx, unsigned int *edx)
-+{
-+  int fl1;
-+  if (!(i386_get_cpuid (ecx, edx)))
-+    return 0;
-+
-+  /* Invoke CPUID(0x80000000) to get the highest supported extended function
-+     number */
-+#ifdef __x86_64__
-+  __asm__ ("cpuid"
-+	   : "=a" (fl1) : "0" (0x80000000) : "edx", "ecx", "ebx");
-+#else
-+  __asm__ ("pushl %%ebx; cpuid; popl %%ebx"
-+	   : "=a" (fl1) : "0" (0x80000000) : "edx", "ecx");
-+#endif
-+  /* Check if highest supported extended function used below are supported */
-+  if (fl1 < 0x80000001)
-+    return 0;  
-+
-+  /* Invoke CPUID(0x80000001), return %ecx and %edx; caller can examine bits to
-+     determine what's supported.  */
-+#ifdef __x86_64__
-+  __asm__ ("cpuid"
-+	   : "=c" (*ecx), "=d" (*edx), "=a" (fl1) : "2" (0x80000001) : "ebx");
-+#else
-+  __asm__ ("pushl %%ebx; cpuid; popl %%ebx"
-+	   : "=c" (*ecx), "=d" (*edx), "=a" (fl1) : "2" (0x80000001));
- #endif
-+  return 1;
-+}
-+
-+
-+unsigned int i386_cpuid_ecx (void) NOINLINE;
-+unsigned int i386_cpuid_edx (void) NOINLINE;
-+unsigned int i386_extended_cpuid_ecx (void) NOINLINE;
-+unsigned int i386_extended_cpuid_edx (void) NOINLINE;
-+
-+unsigned int NOINLINE
-+i386_cpuid_ecx (void)
-+{
-+  unsigned int ecx, edx;
-+  if (i386_get_cpuid (&ecx, &edx))
-+    return ecx;
-+  else
-+    return 0;
-+}
-+
-+unsigned int NOINLINE
-+i386_cpuid_edx (void)
-+{
-+  unsigned int ecx, edx;
-+  if (i386_get_cpuid (&ecx, &edx))
-+    return edx;
-+  else
-+    return 0;
-+}
- 
--  return fl2;
-+unsigned int NOINLINE
-+i386_extended_cpuid_ecx (void)
-+{
-+  unsigned int ecx, edx;
-+  if (i386_get_extended_cpuid (&ecx, &edx))
-+    return ecx;
-+  else
-+    return 0;
-+}
-+
-+unsigned int NOINLINE
-+i386_extended_cpuid_edx (void)
-+{
-+  unsigned int ecx, edx;
-+  if (i386_get_extended_cpuid (&ecx, &edx))
-+    return edx;
-+  else
-+    return 0;
-+}
-+
-+static inline unsigned int
-+i386_cpuid (void)
-+{
-+  return i386_cpuid_edx ();
- }
---- gcc/config.gcc.jj	2007-02-09 16:18:25.000000000 +0100
-+++ gcc/config.gcc	2007-02-09 21:26:06.000000000 +0100
-@@ -264,12 +264,12 @@ xscale-*-*)
- i[34567]86-*-*)
- 	cpu_type=i386
- 	extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
--		       pmmintrin.h tmmintrin.h"
-+		       pmmintrin.h tmmintrin.h ammintrin.h"
- 	;;
- x86_64-*-*)
- 	cpu_type=i386
- 	extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
--		       pmmintrin.h tmmintrin.h"
-+		       pmmintrin.h tmmintrin.h ammintrin.h"
- 	need_64bit_hwint=yes
- 	;;
- ia64-*-*)
-@@ -2396,6 +2396,9 @@ if test x$with_cpu = x ; then
-       ;;
-     i686-*-* | i786-*-*)
-       case ${target_noncanonical} in
-+        amdfam10-*)
-+          with_cpu=amdfam10
-+          ;;
-         k8-*|opteron-*|athlon_64-*)
-           with_cpu=k8
-           ;;
-@@ -2436,6 +2439,9 @@ if test x$with_cpu = x ; then
-       ;;
-     x86_64-*-*)
-       case ${target_noncanonical} in
-+        amdfam10-*)
-+          with_cpu=amdfam10
-+          ;;
-         k8-*|opteron-*|athlon_64-*)
-           with_cpu=k8
-           ;;
-@@ -2668,7 +2674,7 @@ case "${target}" in
- 				esac
- 				# OK
- 				;;
--			"" | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
-+			"" | amdfam10 | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
- 				# OK
- 				;;
- 			*)
---- gcc/config/i386/i386.h.jj	2007-02-09 16:18:25.000000000 +0100
-+++ gcc/config/i386/i386.h	2007-02-09 21:29:00.000000000 +0100
-@@ -141,6 +141,7 @@ extern const struct processor_costs *ix8
- #define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32)
- #define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
- #define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
-+#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
- 
- #define TUNEMASK (1 << ix86_tune)
- extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
-@@ -159,6 +160,7 @@ extern const int x86_accumulate_outgoing
- extern const int x86_epilogue_using_move, x86_decompose_lea;
- extern const int x86_arch_always_fancy_math_387, x86_shift1;
- extern const int x86_sse_partial_reg_dependency, x86_sse_split_regs;
-+extern const int x86_sse_unaligned_move_optimal;
- extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
- extern const int x86_use_ffreep;
- extern const int x86_inter_unit_moves, x86_schedule;
-@@ -208,6 +210,8 @@ extern int x86_prefetch_sse, x86_cmpxchg
- #define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & TUNEMASK)
- #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
- 				      (x86_sse_partial_reg_dependency & TUNEMASK)
-+#define TARGET_SSE_UNALIGNED_MOVE_OPTIMAL \
-+				      (x86_sse_unaligned_move_optimal & TUNEMASK)
- #define TARGET_SSE_SPLIT_REGS (x86_sse_split_regs & TUNEMASK)
- #define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & TUNEMASK)
- #define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & TUNEMASK)
-@@ -376,6 +380,8 @@ extern int x86_prefetch_sse, x86_cmpxchg
- 	}							\
-       else if (TARGET_K8)					\
- 	builtin_define ("__tune_k8__");				\
-+      else if (TARGET_AMDFAM10)					\
-+	builtin_define ("__tune_amdfam10__");			\
-       else if (TARGET_PENTIUM4)					\
- 	builtin_define ("__tune_pentium4__");			\
-       else if (TARGET_NOCONA)					\
-@@ -400,6 +406,8 @@ extern int x86_prefetch_sse, x86_cmpxchg
- 	  builtin_define ("__SSSE3__");				\
- 	  builtin_define ("__MNI__");				\
- 	}							\
-+      if (TARGET_SSE4A)						\
-+	builtin_define ("__SSE4A__");		                \
-       if (TARGET_SSE_MATH && TARGET_SSE)			\
- 	builtin_define ("__SSE_MATH__");			\
-       if (TARGET_SSE_MATH && TARGET_SSE2)			\
-@@ -455,6 +463,11 @@ extern int x86_prefetch_sse, x86_cmpxchg
- 	  builtin_define ("__k8");				\
- 	  builtin_define ("__k8__");				\
- 	}							\
-+      else if (ix86_arch == PROCESSOR_AMDFAM10)			\
-+	{							\
-+	  builtin_define ("__amdfam10");			\
-+	  builtin_define ("__amdfam10__");			\
-+	}							\
-       else if (ix86_arch == PROCESSOR_PENTIUM4)			\
- 	{							\
- 	  builtin_define ("__pentium4");			\
-@@ -493,13 +506,14 @@ extern int x86_prefetch_sse, x86_cmpxchg
- #define TARGET_CPU_DEFAULT_nocona 17
- #define TARGET_CPU_DEFAULT_core2 18
- #define TARGET_CPU_DEFAULT_generic 19
-+#define TARGET_CPU_DEFAULT_amdfam10 20
- 
- #define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\
- 				  "pentiumpro", "pentium2", "pentium3", \
- 				  "pentium4", "geode", "k6", "k6-2", "k6-3", \
- 				  "athlon", "athlon-4", "k8", \
- 				  "pentium-m", "prescott", "nocona", \
--				  "core2", "generic"}
-+				  "core2", "generic", "amdfam10"}
- 
- #ifndef CC1_SPEC
- #define CC1_SPEC "%(cc1_cpu) "
-@@ -2162,6 +2176,7 @@ enum processor_type
-   PROCESSOR_CORE2,
-   PROCESSOR_GENERIC32,
-   PROCESSOR_GENERIC64,
-+  PROCESSOR_AMDFAM10,
-   PROCESSOR_max
- };
- 
---- gcc/config/i386/i386.md.jj	2007-02-09 16:18:25.000000000 +0100
-+++ gcc/config/i386/i386.md	2007-02-10 19:33:43.000000000 +0100
-@@ -151,6 +151,12 @@
-    (UNSPEC_PSHUFB		120)
-    (UNSPEC_PSIGN		121)
-    (UNSPEC_PALIGNR		122)
-+
-+   ; For SSE4A support
-+   (UNSPEC_EXTRQI               130)
-+   (UNSPEC_EXTRQ                131)   
-+   (UNSPEC_INSERTQI             132)
-+   (UNSPEC_INSERTQ              133)
-   ])
- 
- (define_constants
-@@ -190,7 +196,8 @@
- 
- ;; Processor type.  This attribute must exactly match the processor_type
- ;; enumeration in i386.h.
--(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,nocona,core2,generic32,generic64"
-+(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,
-+                    nocona,core2,generic32,generic64,amdfam10"
-   (const (symbol_ref "ix86_tune")))
- 
- ;; A basic instruction type.  Refinements due to arguments to be
-@@ -201,10 +208,10 @@
-    incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
-    icmp,test,ibr,setcc,icmov,
-    push,pop,call,callv,leave,
--   str,cld,
-+   str,bitmanip,cld,
-    fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
-    sselog,sselog1,sseiadd,sseishft,sseimul,
--   sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,
-+   sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins,
-    mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
-   (const_string "other"))
- 
-@@ -218,7 +225,7 @@
-   (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
- 	   (const_string "i387")
- 	 (eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul,
--			  sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv")
-+			  sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins")
- 	   (const_string "sse")
- 	 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
- 	   (const_string "mmx")
-@@ -228,7 +235,8 @@
- 
- ;; The (bounding maximum) length of an instruction immediate.
- (define_attr "length_immediate" ""
--  (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave")
-+  (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave,
-+			  bitmanip")
- 	   (const_int 0)
- 	 (eq_attr "unit" "i387,sse,mmx")
- 	   (const_int 0)
-@@ -282,7 +290,7 @@
- ;; Set when 0f opcode prefix is used.
- (define_attr "prefix_0f" ""
-   (if_then_else 
--    (ior (eq_attr "type" "imovx,setcc,icmov")
-+    (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip")
- 	 (eq_attr "unit" "sse,mmx"))
-     (const_int 1)
-     (const_int 0)))
-@@ -407,7 +415,7 @@
- 	   (const_string "load")
- 	 (and (eq_attr "type"
- 		 "!alu1,negnot,ishift1,
--		   imov,imovx,icmp,test,
-+		   imov,imovx,icmp,test,bitmanip,
- 		   fmov,fcmp,fsgn,
- 		   sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1,
- 		   mmx,mmxmov,mmxcmp,mmxcvt")
-@@ -961,10 +969,11 @@
-   "sahf"
-   [(set_attr "length" "1")
-    (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "direct")
-    (set_attr "mode" "SI")])
- 
- ;; Pentium Pro can do steps 1 through 3 in one go.
--
-+;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes) 
- (define_insn "*cmpfp_i_mixed"
-   [(set (reg:CCFP FLAGS_REG)
- 	(compare:CCFP (match_operand 0 "register_operand" "f#x,x#f")
-@@ -978,7 +987,8 @@
-      (if_then_else (match_operand:SF 1 "" "")
-         (const_string "SF")
-         (const_string "DF")))
--   (set_attr "athlon_decode" "vector")])
-+   (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "direct")])
- 
- (define_insn "*cmpfp_i_sse"
-   [(set (reg:CCFP FLAGS_REG)
-@@ -993,7 +1003,8 @@
-      (if_then_else (match_operand:SF 1 "" "")
-         (const_string "SF")
-         (const_string "DF")))
--   (set_attr "athlon_decode" "vector")])
-+   (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "direct")])
- 
- (define_insn "*cmpfp_i_i387"
-   [(set (reg:CCFP FLAGS_REG)
-@@ -1012,7 +1023,8 @@
- 	      (const_string "DF")
- 	   ]
- 	   (const_string "XF")))
--   (set_attr "athlon_decode" "vector")])
-+   (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "direct")])
- 
- (define_insn "*cmpfp_iu_mixed"
-   [(set (reg:CCFPU FLAGS_REG)
-@@ -1027,7 +1039,8 @@
-      (if_then_else (match_operand:SF 1 "" "")
-         (const_string "SF")
-         (const_string "DF")))
--   (set_attr "athlon_decode" "vector")])
-+   (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "direct")])
- 
- (define_insn "*cmpfp_iu_sse"
-   [(set (reg:CCFPU FLAGS_REG)
-@@ -1042,7 +1055,8 @@
-      (if_then_else (match_operand:SF 1 "" "")
-         (const_string "SF")
-         (const_string "DF")))
--   (set_attr "athlon_decode" "vector")])
-+   (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "direct")])
- 
- (define_insn "*cmpfp_iu_387"
-   [(set (reg:CCFPU FLAGS_REG)
-@@ -1061,7 +1075,8 @@
- 	      (const_string "DF")
- 	   ]
- 	   (const_string "XF")))
--   (set_attr "athlon_decode" "vector")])
-+   (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "direct")])
- 
- ;; Move instructions.
- 
-@@ -1267,7 +1282,8 @@
-   [(set_attr "type" "imov")
-    (set_attr "mode" "SI")
-    (set_attr "pent_pair" "np")
--   (set_attr "athlon_decode" "vector")])
-+   (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "double")])   
- 
- (define_expand "movhi"
-   [(set (match_operand:HI 0 "nonimmediate_operand" "")
-@@ -1384,8 +1400,10 @@
-   [(set_attr "type" "imov")
-    (set_attr "mode" "SI")
-    (set_attr "pent_pair" "np")
--   (set_attr "athlon_decode" "vector")])
-+   (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "double")])   
- 
-+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
- (define_insn "*swaphi_2"
-   [(set (match_operand:HI 0 "register_operand" "+r")
- 	(match_operand:HI 1 "register_operand" "+r"))
-@@ -1558,8 +1576,10 @@
-   [(set_attr "type" "imov")
-    (set_attr "mode" "SI")
-    (set_attr "pent_pair" "np")
--   (set_attr "athlon_decode" "vector")])
-+   (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "vector")])   
- 
-+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
- (define_insn "*swapqi_2"
-   [(set (match_operand:QI 0 "register_operand" "+q")
- 	(match_operand:QI 1 "register_operand" "+q"))
-@@ -2113,7 +2133,8 @@
-   [(set_attr "type" "imov")
-    (set_attr "mode" "DI")
-    (set_attr "pent_pair" "np")
--   (set_attr "athlon_decode" "vector")])
-+   (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "double")])   
- 
- (define_expand "movti"
-   [(set (match_operand:TI 0 "nonimmediate_operand" "")
-@@ -4122,7 +4143,8 @@
-   "cvttss2si{q}\t{%1, %0|%0, %1}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "mode" "SF")
--   (set_attr "athlon_decode" "double,vector")])
-+   (set_attr "athlon_decode" "double,vector")
-+   (set_attr "amdfam10_decode" "double,double")])
- 
- (define_insn "fix_truncdfdi_sse"
-   [(set (match_operand:DI 0 "register_operand" "=r,r")
-@@ -4131,7 +4153,8 @@
-   "cvttsd2si{q}\t{%1, %0|%0, %1}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "mode" "DF")
--   (set_attr "athlon_decode" "double,vector")])
-+   (set_attr "athlon_decode" "double,vector")
-+   (set_attr "amdfam10_decode" "double,double")])
- 
- (define_insn "fix_truncsfsi_sse"
-   [(set (match_operand:SI 0 "register_operand" "=r,r")
-@@ -4140,7 +4163,8 @@
-   "cvttss2si\t{%1, %0|%0, %1}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "mode" "DF")
--   (set_attr "athlon_decode" "double,vector")])
-+   (set_attr "athlon_decode" "double,vector")
-+   (set_attr "amdfam10_decode" "double,double")])
- 
- (define_insn "fix_truncdfsi_sse"
-   [(set (match_operand:SI 0 "register_operand" "=r,r")
-@@ -4149,7 +4173,8 @@
-   "cvttsd2si\t{%1, %0|%0, %1}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "mode" "DF")
--   (set_attr "athlon_decode" "double,vector")])
-+   (set_attr "athlon_decode" "double,vector")
-+   (set_attr "amdfam10_decode" "double,double")])
- 
- ;; Avoid vector decoded forms of the instruction.
- (define_peephole2
-@@ -4410,7 +4435,8 @@
-   [(set_attr "length" "2")
-    (set_attr "mode" "HI")
-    (set_attr "unit" "i387")
--   (set_attr "athlon_decode" "vector")])
-+   (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "vector")])   
- 
- ;; Conversion between fixed point and floating point.
- 
-@@ -4461,6 +4487,7 @@
-    (set_attr "mode" "SF")
-    (set_attr "unit" "*,i387,*,*")
-    (set_attr "athlon_decode" "*,*,vector,double")
-+   (set_attr "amdfam10_decode" "*,*,vector,double")
-    (set_attr "fp_int_src" "true")])
- 
- (define_insn "*floatsisf2_sse"
-@@ -4471,6 +4498,7 @@
-   [(set_attr "type" "sseicvt")
-    (set_attr "mode" "SF")
-    (set_attr "athlon_decode" "vector,double")
-+   (set_attr "amdfam10_decode" "vector,double")
-    (set_attr "fp_int_src" "true")])
- 
- (define_insn "*floatsisf2_i387"
-@@ -4504,6 +4532,7 @@
-    (set_attr "mode" "SF")
-    (set_attr "unit" "*,i387,*,*")
-    (set_attr "athlon_decode" "*,*,vector,double")
-+   (set_attr "amdfam10_decode" "*,*,vector,double")
-    (set_attr "fp_int_src" "true")])
- 
- (define_insn "*floatdisf2_sse"
-@@ -4514,6 +4543,7 @@
-   [(set_attr "type" "sseicvt")
-    (set_attr "mode" "SF")
-    (set_attr "athlon_decode" "vector,double")
-+   (set_attr "amdfam10_decode" "vector,double")
-    (set_attr "fp_int_src" "true")])
- 
- (define_insn "*floatdisf2_i387"
-@@ -4572,6 +4602,7 @@
-    (set_attr "mode" "DF")
-    (set_attr "unit" "*,i387,*,*")
-    (set_attr "athlon_decode" "*,*,double,direct")
-+   (set_attr "amdfam10_decode" "*,*,vector,double")
-    (set_attr "fp_int_src" "true")])
- 
- (define_insn "*floatsidf2_sse"
-@@ -4582,6 +4613,7 @@
-   [(set_attr "type" "sseicvt")
-    (set_attr "mode" "DF")
-    (set_attr "athlon_decode" "double,direct")
-+   (set_attr "amdfam10_decode" "vector,double")
-    (set_attr "fp_int_src" "true")])
- 
- (define_insn "*floatsidf2_i387"
-@@ -4615,6 +4647,7 @@
-    (set_attr "mode" "DF")
-    (set_attr "unit" "*,i387,*,*")
-    (set_attr "athlon_decode" "*,*,double,direct")
-+   (set_attr "amdfam10_decode" "*,*,vector,double")
-    (set_attr "fp_int_src" "true")])
- 
- (define_insn "*floatdidf2_sse"
-@@ -4625,6 +4658,7 @@
-   [(set_attr "type" "sseicvt")
-    (set_attr "mode" "DF")
-    (set_attr "athlon_decode" "double,direct")
-+   (set_attr "amdfam10_decode" "vector,double")
-    (set_attr "fp_int_src" "true")])
- 
- (define_insn "*floatdidf2_i387"
-@@ -6832,6 +6866,14 @@
-   "TARGET_64BIT"
-   "")
- 
-+;; On AMDFAM10 
-+;; IMUL reg64, reg64, imm8 	Direct
-+;; IMUL reg64, mem64, imm8 	VectorPath
-+;; IMUL reg64, reg64, imm32 	Direct
-+;; IMUL reg64, mem64, imm32 	VectorPath 
-+;; IMUL reg64, reg64 		Direct
-+;; IMUL reg64, mem64 		Direct
-+
- (define_insn "*muldi3_1_rex64"
-   [(set (match_operand:DI 0 "register_operand" "=r,r,r")
- 	(mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0")
-@@ -6854,6 +6896,11 @@
- 		    (match_operand 1 "memory_operand" ""))
- 		  (const_string "vector")]
- 	      (const_string "direct")))
-+   (set (attr "amdfam10_decode")
-+	(cond [(and (eq_attr "alternative" "0,1")
-+		    (match_operand 1 "memory_operand" ""))
-+		  (const_string "vector")]
-+	      (const_string "direct")))	      
-    (set_attr "mode" "DI")])
- 
- (define_expand "mulsi3"
-@@ -6864,6 +6911,14 @@
-   ""
-   "")
- 
-+;; On AMDFAM10 
-+;; IMUL reg32, reg32, imm8 	Direct
-+;; IMUL reg32, mem32, imm8 	VectorPath
-+;; IMUL reg32, reg32, imm32 	Direct
-+;; IMUL reg32, mem32, imm32 	VectorPath
-+;; IMUL reg32, reg32 		Direct
-+;; IMUL reg32, mem32 		Direct
-+
- (define_insn "*mulsi3_1"
-   [(set (match_operand:SI 0 "register_operand" "=r,r,r")
- 	(mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
-@@ -6885,6 +6940,11 @@
- 		    (match_operand 1 "memory_operand" ""))
- 		  (const_string "vector")]
- 	      (const_string "direct")))
-+   (set (attr "amdfam10_decode")
-+	(cond [(and (eq_attr "alternative" "0,1")
-+		    (match_operand 1 "memory_operand" ""))
-+		  (const_string "vector")]
-+	      (const_string "direct")))	      
-    (set_attr "mode" "SI")])
- 
- (define_insn "*mulsi3_1_zext"
-@@ -6910,6 +6970,11 @@
- 		    (match_operand 1 "memory_operand" ""))
- 		  (const_string "vector")]
- 	      (const_string "direct")))
-+   (set (attr "amdfam10_decode")
-+	(cond [(and (eq_attr "alternative" "0,1")
-+		    (match_operand 1 "memory_operand" ""))
-+		  (const_string "vector")]
-+	      (const_string "direct")))	      
-    (set_attr "mode" "SI")])
- 
- (define_expand "mulhi3"
-@@ -6920,6 +6985,13 @@
-   "TARGET_HIMODE_MATH"
-   "")
- 
-+;; On AMDFAM10
-+;; IMUL reg16, reg16, imm8 	VectorPath
-+;; IMUL reg16, mem16, imm8 	VectorPath
-+;; IMUL reg16, reg16, imm16 	VectorPath
-+;; IMUL reg16, mem16, imm16 	VectorPath
-+;; IMUL reg16, reg16 		Direct
-+;; IMUL reg16, mem16 		Direct
- (define_insn "*mulhi3_1"
-   [(set (match_operand:HI 0 "register_operand" "=r,r,r")
- 	(mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0")
-@@ -6938,6 +7010,10 @@
- 	       (eq_attr "alternative" "1,2")
- 		  (const_string "vector")]
- 	      (const_string "direct")))
-+   (set (attr "amdfam10_decode")
-+	(cond [(eq_attr "alternative" "0,1")
-+		  (const_string "vector")]
-+	      (const_string "direct")))
-    (set_attr "mode" "HI")])
- 
- (define_expand "mulqi3"
-@@ -6948,6 +7024,10 @@
-   "TARGET_QIMODE_MATH"
-   "")
- 
-+;;On AMDFAM10
-+;; MUL reg8 	Direct
-+;; MUL mem8 	Direct
-+
- (define_insn "*mulqi3_1"
-   [(set (match_operand:QI 0 "register_operand" "=a")
- 	(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
-@@ -6962,6 +7042,7 @@
-      (if_then_else (eq_attr "cpu" "athlon")
-         (const_string "vector")
-         (const_string "direct")))
-+   (set_attr "amdfam10_decode" "direct")        
-    (set_attr "mode" "QI")])
- 
- (define_expand "umulqihi3"
-@@ -6988,6 +7069,7 @@
-      (if_then_else (eq_attr "cpu" "athlon")
-         (const_string "vector")
-         (const_string "direct")))
-+   (set_attr "amdfam10_decode" "direct")        
-    (set_attr "mode" "QI")])
- 
- (define_expand "mulqihi3"
-@@ -7012,6 +7094,7 @@
-      (if_then_else (eq_attr "cpu" "athlon")
-         (const_string "vector")
-         (const_string "direct")))
-+   (set_attr "amdfam10_decode" "direct")        
-    (set_attr "mode" "QI")])
- 
- (define_expand "umulditi3"
-@@ -7038,6 +7121,7 @@
-      (if_then_else (eq_attr "cpu" "athlon")
-         (const_string "vector")
-         (const_string "double")))
-+   (set_attr "amdfam10_decode" "double")        
-    (set_attr "mode" "DI")])
- 
- ;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers
-@@ -7065,6 +7149,7 @@
-      (if_then_else (eq_attr "cpu" "athlon")
-         (const_string "vector")
-         (const_string "double")))
-+   (set_attr "amdfam10_decode" "double")        
-    (set_attr "mode" "SI")])
- 
- (define_expand "mulditi3"
-@@ -7091,6 +7176,7 @@
-      (if_then_else (eq_attr "cpu" "athlon")
-         (const_string "vector")
-         (const_string "double")))
-+   (set_attr "amdfam10_decode" "double")
-    (set_attr "mode" "DI")])
- 
- (define_expand "mulsidi3"
-@@ -7117,6 +7203,7 @@
-      (if_then_else (eq_attr "cpu" "athlon")
-         (const_string "vector")
-         (const_string "double")))
-+   (set_attr "amdfam10_decode" "double")        
-    (set_attr "mode" "SI")])
- 
- (define_expand "umuldi3_highpart"
-@@ -7153,6 +7240,7 @@
-      (if_then_else (eq_attr "cpu" "athlon")
-         (const_string "vector")
-         (const_string "double")))
-+   (set_attr "amdfam10_decode" "double")        
-    (set_attr "mode" "DI")])
- 
- (define_expand "umulsi3_highpart"
-@@ -7188,6 +7276,7 @@
-      (if_then_else (eq_attr "cpu" "athlon")
-         (const_string "vector")
-         (const_string "double")))
-+   (set_attr "amdfam10_decode" "double")
-    (set_attr "mode" "SI")])
- 
- (define_insn "*umulsi3_highpart_zext"
-@@ -7210,6 +7299,7 @@
-      (if_then_else (eq_attr "cpu" "athlon")
-         (const_string "vector")
-         (const_string "double")))
-+   (set_attr "amdfam10_decode" "double")
-    (set_attr "mode" "SI")])
- 
- (define_expand "smuldi3_highpart"
-@@ -7245,6 +7335,7 @@
-      (if_then_else (eq_attr "cpu" "athlon")
-         (const_string "vector")
-         (const_string "double")))
-+   (set_attr "amdfam10_decode" "double")
-    (set_attr "mode" "DI")])
- 
- (define_expand "smulsi3_highpart"
-@@ -7279,6 +7370,7 @@
-      (if_then_else (eq_attr "cpu" "athlon")
-         (const_string "vector")
-         (const_string "double")))
-+   (set_attr "amdfam10_decode" "double")
-    (set_attr "mode" "SI")])
- 
- (define_insn "*smulsi3_highpart_zext"
-@@ -7300,6 +7392,7 @@
-      (if_then_else (eq_attr "cpu" "athlon")
-         (const_string "vector")
-         (const_string "double")))
-+   (set_attr "amdfam10_decode" "double")
-    (set_attr "mode" "SI")])
- 
- ;; The patterns that match these are at the end of this file.
-@@ -10281,7 +10374,8 @@
-   [(set_attr "type" "ishift")
-    (set_attr "prefix_0f" "1")
-    (set_attr "mode" "DI")
--   (set_attr "athlon_decode" "vector")])
-+   (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "vector")])   
- 
- (define_expand "x86_64_shift_adj"
-   [(set (reg:CCZ FLAGS_REG)
-@@ -10496,7 +10590,8 @@
-    (set_attr "prefix_0f" "1")
-    (set_attr "mode" "SI")
-    (set_attr "pent_pair" "np")
--   (set_attr "athlon_decode" "vector")])
-+   (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "vector")])   
- 
- (define_expand "x86_shift_adj_1"
-   [(set (reg:CCZ FLAGS_REG)
-@@ -11256,7 +11351,8 @@
-   [(set_attr "type" "ishift")
-    (set_attr "prefix_0f" "1")
-    (set_attr "mode" "DI")
--   (set_attr "athlon_decode" "vector")])
-+   (set_attr "athlon_decode" "vector")
-+   (set_attr "amdfam10_decode" "vector")])   
- 
- (define_expand "ashrdi3"
-   [(set (match_operand:DI 0 "shiftdi_operand" "")
-@@ -14520,7 +14616,23 @@
-      [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
-       (clobber (reg:CC FLAGS_REG))])]
-   ""
--  "")
-+{
-+  if (TARGET_ABM)
-+    {
-+      emit_insn (gen_clzsi2_abm (operands[0], operands[1]));
-+      DONE;
-+    }
-+})
-+
-+(define_insn "clzsi2_abm"
-+  [(set (match_operand:SI 0 "register_operand" "=r")
-+        (clz:SI (match_operand:SI 1 "nonimmediate_operand" "")))
-+   (clobber (reg:CC FLAGS_REG))]
-+  "TARGET_ABM"
-+  "lzcnt{l}\t{%1, %0|%0, %1}"
-+  [(set_attr "prefix_rep" "1")
-+   (set_attr "type" "bitmanip")
-+   (set_attr "mode" "SI")])
- 
- (define_insn "*bsr"
-   [(set (match_operand:SI 0 "register_operand" "=r")
-@@ -14529,7 +14641,44 @@
-    (clobber (reg:CC FLAGS_REG))]
-   ""
-   "bsr{l}\t{%1, %0|%0, %1}"
--  [(set_attr "prefix_0f" "1")])
-+  [(set_attr "prefix_0f" "1")
-+   (set_attr "mode" "SI")])
-+
-+(define_insn "popcountsi2"
-+  [(set (match_operand:SI 0 "register_operand" "=r")
-+	(popcount:SI (match_operand:SI 1 "nonimmediate_operand" "")))
-+   (clobber (reg:CC FLAGS_REG))]
-+  "TARGET_POPCNT"
-+  "popcnt{l}\t{%1, %0|%0, %1}"
-+  [(set_attr "prefix_rep" "1")
-+   (set_attr "type" "bitmanip")
-+   (set_attr "mode" "SI")])
-+
-+(define_insn "*popcountsi2_cmp"
-+  [(set (reg FLAGS_REG)
-+	(compare
-+	  (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
-+	  (const_int 0)))
-+   (set (match_operand:SI 0 "register_operand" "=r")
-+	(popcount:SI (match_dup 1)))]
-+  "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
-+  "popcnt{l}\t{%1, %0|%0, %1}"
-+  [(set_attr "prefix_rep" "1")
-+   (set_attr "type" "bitmanip")
-+   (set_attr "mode" "SI")])
-+
-+(define_insn "*popcountsi2_cmp_zext"
-+  [(set (reg FLAGS_REG)
-+        (compare
-+          (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
-+          (const_int 0)))
-+   (set (match_operand:DI 0 "register_operand" "=r")
-+        (zero_extend:DI(popcount:SI (match_dup 1))))]
-+  "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
-+  "popcnt{l}\t{%1, %0|%0, %1}"
-+  [(set_attr "prefix_rep" "1")
-+   (set_attr "type" "bitmanip")
-+   (set_attr "mode" "SI")])
- 
- (define_expand "clzdi2"
-   [(parallel
-@@ -14541,7 +14690,23 @@
-      [(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63)))
-       (clobber (reg:CC FLAGS_REG))])]
-   "TARGET_64BIT"
--  "")
-+{
-+  if (TARGET_ABM)
-+    {
-+      emit_insn (gen_clzdi2_abm (operands[0], operands[1]));
-+      DONE;
-+    }
-+})
-+
-+(define_insn "clzdi2_abm"
-+  [(set (match_operand:DI 0 "register_operand" "=r")
-+	(clz:DI (match_operand:DI 1 "nonimmediate_operand" "")))
-+   (clobber (reg:CC FLAGS_REG))]
-+  "TARGET_64BIT && TARGET_ABM"
-+  "lzcnt{q}\t{%1, %0|%0, %1}"
-+  [(set_attr "prefix_rep" "1")
-+   (set_attr "type" "bitmanip")
-+   (set_attr "mode" "DI")])
- 
- (define_insn "*bsr_rex64"
-   [(set (match_operand:DI 0 "register_operand" "=r")
-@@ -14550,7 +14715,92 @@
-    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_64BIT"
-   "bsr{q}\t{%1, %0|%0, %1}"
--  [(set_attr "prefix_0f" "1")])
-+  [(set_attr "prefix_0f" "1")
-+   (set_attr "mode" "DI")])
-+
-+(define_insn "popcountdi2"
-+  [(set (match_operand:DI 0 "register_operand" "=r")
-+	(popcount:DI (match_operand:DI 1 "nonimmediate_operand" "")))
-+   (clobber (reg:CC FLAGS_REG))]
-+  "TARGET_64BIT && TARGET_POPCNT"
-+  "popcnt{q}\t{%1, %0|%0, %1}"
-+  [(set_attr "prefix_rep" "1")
-+   (set_attr "type" "bitmanip")
-+   (set_attr "mode" "DI")])
-+
-+(define_insn "*popcountdi2_cmp"
-+  [(set (reg FLAGS_REG)
-+	(compare
-+	  (popcount:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))
-+	  (const_int 0)))
-+   (set (match_operand:DI 0 "register_operand" "=r")
-+	(popcount:DI (match_dup 1)))]
-+  "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
-+  "popcnt{q}\t{%1, %0|%0, %1}"
-+  [(set_attr "prefix_rep" "1")
-+   (set_attr "type" "bitmanip")
-+   (set_attr "mode" "DI")])
-+
-+(define_expand "clzhi2"
-+  [(parallel
-+     [(set (match_operand:HI 0 "register_operand" "")
-+	   (minus:HI (const_int 15)
-+		     (clz:HI (match_operand:HI 1 "nonimmediate_operand" ""))))
-+      (clobber (reg:CC FLAGS_REG))])
-+   (parallel
-+     [(set (match_dup 0) (xor:HI (match_dup 0) (const_int 15)))
-+      (clobber (reg:CC FLAGS_REG))])]
-+  ""
-+{
-+  if (TARGET_ABM)
-+    {
-+      emit_insn (gen_clzhi2_abm (operands[0], operands[1]));
-+      DONE;
-+    }
-+})
-+
-+(define_insn "clzhi2_abm"
-+  [(set (match_operand:HI 0 "register_operand" "=r")
-+	(clz:HI (match_operand:HI 1 "nonimmediate_operand" "")))
-+   (clobber (reg:CC FLAGS_REG))]
-+  "TARGET_ABM"
-+  "lzcnt{w}\t{%1, %0|%0, %1}"
-+  [(set_attr "prefix_rep" "1")
-+   (set_attr "type" "bitmanip")
-+   (set_attr "mode" "HI")])
-+
-+(define_insn "*bsrhi"
-+  [(set (match_operand:HI 0 "register_operand" "=r")
-+	(minus:HI (const_int 15)
-+		  (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))
-+   (clobber (reg:CC FLAGS_REG))]
-+  ""
-+  "bsr{w}\t{%1, %0|%0, %1}"
-+  [(set_attr "prefix_0f" "1")
-+   (set_attr "mode" "HI")])
-+
-+(define_insn "popcounthi2"
-+  [(set (match_operand:HI 0 "register_operand" "=r")
-+	(popcount:HI (match_operand:HI 1 "nonimmediate_operand" "")))
-+   (clobber (reg:CC FLAGS_REG))]
-+  "TARGET_POPCNT"
-+  "popcnt{w}\t{%1, %0|%0, %1}"
-+  [(set_attr "prefix_rep" "1")
-+   (set_attr "type" "bitmanip")
-+   (set_attr "mode" "HI")])
-+
-+(define_insn "*popcounthi2_cmp"
-+  [(set (reg FLAGS_REG)
-+        (compare
-+          (popcount:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))
-+          (const_int 0)))
-+   (set (match_operand:HI 0 "register_operand" "=r")
-+        (popcount:HI (match_dup 1)))]
-+  "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
-+  "popcnt{w}\t{%1, %0|%0, %1}"
-+  [(set_attr "prefix_rep" "1")
-+   (set_attr "type" "bitmanip")
-+   (set_attr "mode" "HI")])
- 
- ;; Thread-local storage patterns for ELF.
- ;;
-@@ -15302,7 +15552,8 @@
-    sqrtss\t{%1, %0|%0, %1}"
-   [(set_attr "type" "fpspc,sse")
-    (set_attr "mode" "SF,SF")
--   (set_attr "athlon_decode" "direct,*")])
-+   (set_attr "athlon_decode" "direct,*")
-+   (set_attr "amdfam10_decode" "direct,*")])
- 
- (define_insn "*sqrtsf2_sse"
-   [(set (match_operand:SF 0 "register_operand" "=x")
-@@ -15311,7 +15562,8 @@
-   "sqrtss\t{%1, %0|%0, %1}"
-   [(set_attr "type" "sse")
-    (set_attr "mode" "SF")
--   (set_attr "athlon_decode" "*")])
-+   (set_attr "athlon_decode" "*")
-+   (set_attr "amdfam10_decode" "*")])
- 
- (define_insn "*sqrtsf2_i387"
-   [(set (match_operand:SF 0 "register_operand" "=f")
-@@ -15320,7 +15572,8 @@
-   "fsqrt"
-   [(set_attr "type" "fpspc")
-    (set_attr "mode" "SF")
--   (set_attr "athlon_decode" "direct")])
-+   (set_attr "athlon_decode" "direct")
-+   (set_attr "amdfam10_decode" "direct")])
- 
- (define_expand "sqrtdf2"
-   [(set (match_operand:DF 0 "register_operand" "")
-@@ -15399,7 +15652,8 @@
-   "fsqrt"
-   [(set_attr "type" "fpspc")
-    (set_attr "mode" "XF")
--   (set_attr "athlon_decode" "direct")])
-+   (set_attr "athlon_decode" "direct")
-+   (set_attr "amdfam10_decode" "direct")])
- 
- (define_insn "fpremxf4"
-   [(set (match_operand:XF 0 "register_operand" "=f")
-@@ -20186,7 +20440,7 @@
- 		   (mult:DI (match_operand:DI 1 "memory_operand" "")
- 			    (match_operand:DI 2 "immediate_operand" "")))
- 	      (clobber (reg:CC FLAGS_REG))])]
--  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
-+  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
-    && (GET_CODE (operands[2]) != CONST_INT
-        || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
-   [(set (match_dup 3) (match_dup 1))
-@@ -20200,7 +20454,7 @@
- 		   (mult:SI (match_operand:SI 1 "memory_operand" "")
- 			    (match_operand:SI 2 "immediate_operand" "")))
- 	      (clobber (reg:CC FLAGS_REG))])]
--  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
-+  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
-    && (GET_CODE (operands[2]) != CONST_INT
-        || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
-   [(set (match_dup 3) (match_dup 1))
-@@ -20215,7 +20469,7 @@
- 		     (mult:SI (match_operand:SI 1 "memory_operand" "")
- 			      (match_operand:SI 2 "immediate_operand" ""))))
- 	      (clobber (reg:CC FLAGS_REG))])]
--  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
-+  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
-    && (GET_CODE (operands[2]) != CONST_INT
-        || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
-   [(set (match_dup 3) (match_dup 1))
-@@ -20233,7 +20487,7 @@
- 			    (match_operand:DI 2 "const_int_operand" "")))
- 	      (clobber (reg:CC FLAGS_REG))])
-    (match_scratch:DI 3 "r")]
--  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
-+  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
-    && CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')"
-   [(set (match_dup 3) (match_dup 2))
-    (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3)))
-@@ -20249,7 +20503,7 @@
- 			    (match_operand:SI 2 "const_int_operand" "")))
- 	      (clobber (reg:CC FLAGS_REG))])
-    (match_scratch:SI 3 "r")]
--  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
-+  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
-    && CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')"
-   [(set (match_dup 3) (match_dup 2))
-    (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3)))
-@@ -20265,7 +20519,7 @@
- 			    (match_operand:HI 2 "immediate_operand" "")))
- 	      (clobber (reg:CC FLAGS_REG))])
-    (match_scratch:HI 3 "r")]
--  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
-+  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size"
-   [(set (match_dup 3) (match_dup 2))
-    (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3)))
- 	      (clobber (reg:CC FLAGS_REG))])]
---- gcc/config/i386/athlon.md.jj	2006-10-29 20:56:45.000000000 +0100
-+++ gcc/config/i386/athlon.md	2007-02-09 21:26:06.000000000 +0100
-@@ -29,6 +29,8 @@
- 	   (const_string "vector")]
- 	(const_string "direct")))
- 
-+(define_attr "amdfam10_decode" "direct,vector,double"
-+  (const_string "direct"))
- ;;
- ;;           decode0 decode1 decode2
- ;;                 \    |   /
-@@ -131,18 +133,22 @@
- 
- ;; Jump instructions are executed in the branch unit completely transparent to us
- (define_insn_reservation "athlon_branch" 0
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (eq_attr "type" "ibr"))
- 			 "athlon-direct,athlon-ieu")
- (define_insn_reservation "athlon_call" 0
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
- 			      (eq_attr "type" "call,callv"))
- 			 "athlon-vector,athlon-ieu")
-+(define_insn_reservation "athlon_call_amdfam10" 0
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (eq_attr "type" "call,callv"))
-+			 "athlon-double,athlon-ieu")
- 
- ;; Latency of push operation is 3 cycles, but ESP value is available
- ;; earlier
- (define_insn_reservation "athlon_push" 2
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (eq_attr "type" "push"))
- 			 "athlon-direct,athlon-agu,athlon-store")
- (define_insn_reservation "athlon_pop" 4
-@@ -153,12 +159,16 @@
- 			 (and (eq_attr "cpu" "k8,generic64")
- 			      (eq_attr "type" "pop"))
- 			 "athlon-double,(athlon-ieu+athlon-load)")
-+(define_insn_reservation "athlon_pop_amdfam10" 3
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (eq_attr "type" "pop"))
-+			 "athlon-direct,(athlon-ieu+athlon-load)")
- (define_insn_reservation "athlon_leave" 3
- 			 (and (eq_attr "cpu" "athlon")
- 			      (eq_attr "type" "leave"))
- 			 "athlon-vector,(athlon-ieu+athlon-load)")
- (define_insn_reservation "athlon_leave_k8" 3
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (eq_attr "type" "leave"))
- 			 "athlon-double,(athlon-ieu+athlon-load)")
- 
-@@ -167,6 +177,11 @@
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
- 			      (eq_attr "type" "lea"))
- 			 "athlon-direct,athlon-agu,nothing")
-+;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10
-+(define_insn_reservation "athlon_lea_amdfam10" 1
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (eq_attr "type" "lea"))
-+			 "athlon-direct,athlon-agu,nothing")
- 
- ;; Mul executes in special multiplier unit attached to IEU0
- (define_insn_reservation "athlon_imul" 5
-@@ -176,29 +191,35 @@
- 			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
- ;; ??? Widening multiply is vector or double.
- (define_insn_reservation "athlon_imul_k8_DI" 4
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "imul")
- 				   (and (eq_attr "mode" "DI")
- 					(eq_attr "memory" "none,unknown"))))
- 			 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
- (define_insn_reservation "athlon_imul_k8" 3
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "imul")
- 				   (eq_attr "memory" "none,unknown")))
- 			 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
-+(define_insn_reservation "athlon_imul_amdfam10_HI" 4
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "imul")
-+				   (and (eq_attr "mode" "HI")
-+					(eq_attr "memory" "none,unknown"))))
-+			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")			 
- (define_insn_reservation "athlon_imul_mem" 8
- 			 (and (eq_attr "cpu" "athlon")
- 			      (and (eq_attr "type" "imul")
- 				   (eq_attr "memory" "load,both")))
- 			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
- (define_insn_reservation "athlon_imul_mem_k8_DI" 7
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "imul")
- 				   (and (eq_attr "mode" "DI")
- 					(eq_attr "memory" "load,both"))))
- 			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
- (define_insn_reservation "athlon_imul_mem_k8" 6
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "imul")
- 				   (eq_attr "memory" "load,both")))
- 			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
-@@ -209,21 +230,23 @@
- ;; other instructions.
- ;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
- ;; of the other code
-+;; Using the same heuristics for amdfam10 as K8 with idiv
- 
- (define_insn_reservation "athlon_idiv" 6
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "idiv")
- 				   (eq_attr "memory" "none,unknown")))
- 			 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
- (define_insn_reservation "athlon_idiv_mem" 9
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "idiv")
- 				   (eq_attr "memory" "load,both")))
- 			 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
- ;; The parallelism of string instructions is not documented.  Model it same way
- ;; as idiv to create smaller automata.  This probably does not matter much.
-+;; Using the same heuristics for amdfam10 as K8 with idiv
- (define_insn_reservation "athlon_str" 6
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "str")
- 				   (eq_attr "memory" "load,both,store")))
- 			 "athlon-vector,athlon-load,athlon-ieu0*6")
-@@ -234,34 +257,62 @@
- 				   (and (eq_attr "unit" "integer,unknown")
- 					(eq_attr "memory" "none,unknown"))))
- 			 "athlon-direct,athlon-ieu")
-+(define_insn_reservation "athlon_idirect_amdfam10" 1
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "amdfam10_decode" "direct")
-+				   (and (eq_attr "unit" "integer,unknown")
-+					(eq_attr "memory" "none,unknown"))))
-+			 "athlon-direct,athlon-ieu")
- (define_insn_reservation "athlon_ivector" 2
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
- 			      (and (eq_attr "athlon_decode" "vector")
- 				   (and (eq_attr "unit" "integer,unknown")
- 					(eq_attr "memory" "none,unknown"))))
- 			 "athlon-vector,athlon-ieu,athlon-ieu")
-+(define_insn_reservation "athlon_ivector_amdfam10" 2
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "amdfam10_decode" "vector")
-+				   (and (eq_attr "unit" "integer,unknown")
-+					(eq_attr "memory" "none,unknown"))))
-+			 "athlon-vector,athlon-ieu,athlon-ieu")
-+
- (define_insn_reservation "athlon_idirect_loadmov" 3
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "imov")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-direct,athlon-load")
-+
- (define_insn_reservation "athlon_idirect_load" 4
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
- 			      (and (eq_attr "athlon_decode" "direct")
- 				   (and (eq_attr "unit" "integer,unknown")
- 					(eq_attr "memory" "load"))))
- 			 "athlon-direct,athlon-load,athlon-ieu")
-+(define_insn_reservation "athlon_idirect_load_amdfam10" 4
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "amdfam10_decode" "direct")
-+				   (and (eq_attr "unit" "integer,unknown")
-+					(eq_attr "memory" "load"))))
-+			 "athlon-direct,athlon-load,athlon-ieu")
- (define_insn_reservation "athlon_ivector_load" 6
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
- 			      (and (eq_attr "athlon_decode" "vector")
- 				   (and (eq_attr "unit" "integer,unknown")
- 					(eq_attr "memory" "load"))))
- 			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
-+(define_insn_reservation "athlon_ivector_load_amdfam10" 6
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "amdfam10_decode" "vector")
-+				   (and (eq_attr "unit" "integer,unknown")
-+					(eq_attr "memory" "load"))))
-+			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
-+
- (define_insn_reservation "athlon_idirect_movstore" 1
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "imov")
- 				   (eq_attr "memory" "store")))
- 			 "athlon-direct,athlon-agu,athlon-store")
-+
- (define_insn_reservation "athlon_idirect_both" 4
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
- 			      (and (eq_attr "athlon_decode" "direct")
-@@ -270,6 +321,15 @@
- 			 "athlon-direct,athlon-load,
- 			  athlon-ieu,athlon-store,
- 			  athlon-store")
-+(define_insn_reservation "athlon_idirect_both_amdfam10" 4
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "amdfam10_decode" "direct")
-+				   (and (eq_attr "unit" "integer,unknown")
-+					(eq_attr "memory" "both"))))
-+			 "athlon-direct,athlon-load,
-+			  athlon-ieu,athlon-store,
-+			  athlon-store")			  
-+
- (define_insn_reservation "athlon_ivector_both" 6
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
- 			      (and (eq_attr "athlon_decode" "vector")
-@@ -279,6 +339,16 @@
- 			  athlon-ieu,
- 			  athlon-ieu,
- 			  athlon-store")
-+(define_insn_reservation "athlon_ivector_both_amdfam10" 6
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "amdfam10_decode" "vector")
-+				   (and (eq_attr "unit" "integer,unknown")
-+					(eq_attr "memory" "both"))))
-+			 "athlon-vector,athlon-load,
-+			  athlon-ieu,
-+			  athlon-ieu,
-+			  athlon-store")
-+
- (define_insn_reservation "athlon_idirect_store" 1
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
- 			      (and (eq_attr "athlon_decode" "direct")
-@@ -286,6 +356,14 @@
- 					(eq_attr "memory" "store"))))
- 			 "athlon-direct,(athlon-ieu+athlon-agu),
- 			  athlon-store")
-+(define_insn_reservation "athlon_idirect_store_amdfam10" 1
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "amdfam10_decode" "direct")
-+				   (and (eq_attr "unit" "integer,unknown")
-+					(eq_attr "memory" "store"))))
-+			 "athlon-direct,(athlon-ieu+athlon-agu),
-+			  athlon-store")
-+
- (define_insn_reservation "athlon_ivector_store" 2
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
- 			      (and (eq_attr "athlon_decode" "vector")
-@@ -293,6 +371,13 @@
- 					(eq_attr "memory" "store"))))
- 			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
- 			  athlon-store")
-+(define_insn_reservation "athlon_ivector_store_amdfam10" 2
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "amdfam10_decode" "vector")
-+				   (and (eq_attr "unit" "integer,unknown")
-+					(eq_attr "memory" "store"))))
-+			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
-+			  athlon-store")
- 
- ;; Athlon floatin point unit
- (define_insn_reservation "athlon_fldxf" 12
-@@ -302,7 +387,7 @@
- 					(eq_attr "mode" "XF"))))
- 			 "athlon-vector,athlon-fpload2,athlon-fvector*9")
- (define_insn_reservation "athlon_fldxf_k8" 13
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "fmov")
- 				   (and (eq_attr "memory" "load")
- 					(eq_attr "mode" "XF"))))
-@@ -314,7 +399,7 @@
- 				   (eq_attr "memory" "load")))
- 			 "athlon-direct,athlon-fpload,athlon-fany")
- (define_insn_reservation "athlon_fld_k8" 2
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "fmov")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-direct,athlon-fploadk8,athlon-fstore")
-@@ -326,7 +411,7 @@
- 					(eq_attr "mode" "XF"))))
- 			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
- (define_insn_reservation "athlon_fstxf_k8" 8
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "fmov")
- 				   (and (eq_attr "memory" "store,both")
- 					(eq_attr "mode" "XF"))))
-@@ -337,16 +422,16 @@
- 				   (eq_attr "memory" "store,both")))
- 			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
- (define_insn_reservation "athlon_fst_k8" 2
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "fmov")
- 				   (eq_attr "memory" "store,both")))
- 			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
- (define_insn_reservation "athlon_fist" 4
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (eq_attr "type" "fistp,fisttp"))
- 			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
- (define_insn_reservation "athlon_fmov" 2
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (eq_attr "type" "fmov"))
- 			 "athlon-direct,athlon-fpsched,athlon-faddmul")
- (define_insn_reservation "athlon_fadd_load" 4
-@@ -355,12 +440,12 @@
- 				   (eq_attr "memory" "load")))
- 			 "athlon-direct,athlon-fpload,athlon-fadd")
- (define_insn_reservation "athlon_fadd_load_k8" 6
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "fop")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-direct,athlon-fploadk8,athlon-fadd")
- (define_insn_reservation "athlon_fadd" 4
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (eq_attr "type" "fop"))
- 			 "athlon-direct,athlon-fpsched,athlon-fadd")
- (define_insn_reservation "athlon_fmul_load" 4
-@@ -369,16 +454,16 @@
- 				   (eq_attr "memory" "load")))
- 			 "athlon-direct,athlon-fpload,athlon-fmul")
- (define_insn_reservation "athlon_fmul_load_k8" 6
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "fmul")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-direct,athlon-fploadk8,athlon-fmul")
- (define_insn_reservation "athlon_fmul" 4
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (eq_attr "type" "fmul"))
- 			 "athlon-direct,athlon-fpsched,athlon-fmul")
- (define_insn_reservation "athlon_fsgn" 2
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (eq_attr "type" "fsgn"))
- 			 "athlon-direct,athlon-fpsched,athlon-fmul")
- (define_insn_reservation "athlon_fdiv_load" 24
-@@ -387,7 +472,7 @@
- 				   (eq_attr "memory" "load")))
- 			 "athlon-direct,athlon-fpload,athlon-fmul")
- (define_insn_reservation "athlon_fdiv_load_k8" 13
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "fdiv")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-direct,athlon-fploadk8,athlon-fmul")
-@@ -396,16 +481,16 @@
- 			      (eq_attr "type" "fdiv"))
- 			 "athlon-direct,athlon-fpsched,athlon-fmul")
- (define_insn_reservation "athlon_fdiv_k8" 11
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (eq_attr "type" "fdiv"))
- 			 "athlon-direct,athlon-fpsched,athlon-fmul")
- (define_insn_reservation "athlon_fpspc_load" 103
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "fpspc")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-vector,athlon-fpload,athlon-fvector")
- (define_insn_reservation "athlon_fpspc" 100
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (eq_attr "type" "fpspc"))
- 			 "athlon-vector,athlon-fpsched,athlon-fvector")
- (define_insn_reservation "athlon_fcmov_load" 7
-@@ -418,12 +503,12 @@
- 			      (eq_attr "type" "fcmov"))
- 			 "athlon-vector,athlon-fpsched,athlon-fvector")
- (define_insn_reservation "athlon_fcmov_load_k8" 17
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "fcmov")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-vector,athlon-fploadk8,athlon-fvector")
- (define_insn_reservation "athlon_fcmov_k8" 15
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (eq_attr "type" "fcmov"))
- 			 "athlon-vector,athlon-fpsched,athlon-fvector")
- ;; fcomi is vector decoded by uses only one pipe.
-@@ -434,13 +519,13 @@
- 				        (eq_attr "memory" "load"))))
- 			 "athlon-vector,athlon-fpload,athlon-fadd")
- (define_insn_reservation "athlon_fcomi_load_k8" 5
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "fcmp")
- 				   (and (eq_attr "athlon_decode" "vector")
- 				        (eq_attr "memory" "load"))))
- 			 "athlon-vector,athlon-fploadk8,athlon-fadd")
- (define_insn_reservation "athlon_fcomi" 3
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (and (eq_attr "athlon_decode" "vector")
- 				   (eq_attr "type" "fcmp")))
- 			 "athlon-vector,athlon-fpsched,athlon-fadd")
-@@ -450,18 +535,18 @@
- 				   (eq_attr "memory" "load")))
- 			 "athlon-direct,athlon-fpload,athlon-fadd")
- (define_insn_reservation "athlon_fcom_load_k8" 4
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "fcmp")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-direct,athlon-fploadk8,athlon-fadd")
- (define_insn_reservation "athlon_fcom" 2
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (eq_attr "type" "fcmp"))
- 			 "athlon-direct,athlon-fpsched,athlon-fadd")
- ;; Never seen by the scheduler because we still don't do post reg-stack
- ;; scheduling.
- ;(define_insn_reservation "athlon_fxch" 2
--;			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+;			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- ;			      (eq_attr "type" "fxch"))
- ;			 "athlon-direct,athlon-fpsched,athlon-fany")
- 
-@@ -516,6 +601,23 @@
- 			      (and (eq_attr "type" "mmxmov,ssemov")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-direct,athlon-fploadk8,athlon-fstore")
-+;; On AMDFAM10 all double, single and integer packed and scalar SSEx data
-+;; loads  generated are direct path, latency of 2 and do not use any FP
-+;; executions units. No seperate entries for movlpx/movhpx loads, which
-+;; are direct path, latency of 4 and use the FADD/FMUL FP execution units,
-+;; as they will not be generated.
-+(define_insn_reservation "athlon_sseld_amdfam10" 2
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "ssemov")
-+				   (eq_attr "memory" "load")))
-+			 "athlon-direct,athlon-fploadk8")
-+;; On AMDFAM10 MMX data loads  generated are direct path, latency of 4
-+;; and can use any  FP executions units
-+(define_insn_reservation "athlon_mmxld_amdfam10" 4
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "mmxmov")
-+				   (eq_attr "memory" "load")))
-+			 "athlon-direct,athlon-fploadk8, athlon-fany")			 
- (define_insn_reservation "athlon_mmxssest" 3
- 			 (and (eq_attr "cpu" "k8,generic64")
- 			      (and (eq_attr "type" "mmxmov,ssemov")
-@@ -533,6 +635,25 @@
- 			      (and (eq_attr "type" "mmxmov,ssemov")
- 				   (eq_attr "memory" "store,both")))
- 			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
-+;; On AMDFAM10 all double, single and integer packed SSEx data stores
-+;; generated are all double path, latency of 2 and use the FSTORE FP
-+;; execution unit. No entries seperate for movupx/movdqu, which are
-+;; vector path, latency of 3 and use the FSTORE*2 FP execution unit,
-+;; as they will not be generated.
-+(define_insn_reservation "athlon_ssest_amdfam10" 2
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "ssemov")
-+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
-+					(eq_attr "memory" "store,both"))))
-+			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)")
-+;; On AMDFAM10 all double, single and integer scalar SSEx and MMX
-+;; data stores generated are all direct path, latency of 2 and use
-+;; the FSTORE FP execution unit
-+(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "mmxmov,ssemov")
-+				   (eq_attr "memory" "store,both")))
-+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
- (define_insn_reservation "athlon_movaps_k8" 2
- 			 (and (eq_attr "cpu" "k8,generic64")
- 			      (and (eq_attr "type" "ssemov")
-@@ -578,6 +699,11 @@
- 			      (and (eq_attr "type" "sselog,sselog1")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
-+(define_insn_reservation "athlon_sselog_load_amdfam10" 4
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "sselog,sselog1")
-+				   (eq_attr "memory" "load")))
-+			 "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)")
- (define_insn_reservation "athlon_sselog" 3
- 			 (and (eq_attr "cpu" "athlon")
- 			      (eq_attr "type" "sselog,sselog1"))
-@@ -586,6 +712,11 @@
- 			 (and (eq_attr "cpu" "k8,generic64")
- 			      (eq_attr "type" "sselog,sselog1"))
- 			 "athlon-double,athlon-fpsched,athlon-fmul")
-+(define_insn_reservation "athlon_sselog_amdfam10" 2
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (eq_attr "type" "sselog,sselog1"))
-+			 "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)")
-+
- ;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
- (define_insn_reservation "athlon_ssecmp_load" 2
- 			 (and (eq_attr "cpu" "athlon")
-@@ -594,13 +725,13 @@
- 					(eq_attr "memory" "load"))))
- 			 "athlon-direct,athlon-fpload,athlon-fadd")
- (define_insn_reservation "athlon_ssecmp_load_k8" 4
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "ssecmp")
- 				   (and (eq_attr "mode" "SF,DF,DI,TI")
- 					(eq_attr "memory" "load"))))
- 			 "athlon-direct,athlon-fploadk8,athlon-fadd")
- (define_insn_reservation "athlon_ssecmp" 2
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "ssecmp")
- 				   (eq_attr "mode" "SF,DF,DI,TI")))
- 			 "athlon-direct,athlon-fpsched,athlon-fadd")
-@@ -614,6 +745,11 @@
- 			      (and (eq_attr "type" "ssecmp")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
-+(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "ssecmp")
-+				   (eq_attr "memory" "load")))
-+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
- (define_insn_reservation "athlon_ssecmpvector" 3
- 			 (and (eq_attr "cpu" "athlon")
- 			      (eq_attr "type" "ssecmp"))
-@@ -622,6 +758,10 @@
- 			 (and (eq_attr "cpu" "k8,generic64")
- 			      (eq_attr "type" "ssecmp"))
- 			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
-+(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (eq_attr "type" "ssecmp"))
-+			 "athlon-direct,athlon-fpsched,athlon-fadd")
- (define_insn_reservation "athlon_ssecomi_load" 4
- 			 (and (eq_attr "cpu" "athlon")
- 			      (and (eq_attr "type" "ssecomi")
-@@ -632,10 +772,20 @@
- 			      (and (eq_attr "type" "ssecomi")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-vector,athlon-fploadk8,athlon-fadd")
-+(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "ssecomi")
-+				   (eq_attr "memory" "load")))
-+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
- (define_insn_reservation "athlon_ssecomi" 4
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
- 			      (eq_attr "type" "ssecmp"))
- 			 "athlon-vector,athlon-fpsched,athlon-fadd")
-+(define_insn_reservation "athlon_ssecomi_amdfam10" 3
-+			 (and (eq_attr "cpu" "amdfam10")
-+;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10
-+			      (eq_attr "type" "ssecomi"))
-+			 "athlon-direct,athlon-fpsched,athlon-fadd")
- (define_insn_reservation "athlon_sseadd_load" 4
- 			 (and (eq_attr "cpu" "athlon")
- 			      (and (eq_attr "type" "sseadd")
-@@ -643,13 +793,13 @@
- 					(eq_attr "memory" "load"))))
- 			 "athlon-direct,athlon-fpload,athlon-fadd")
- (define_insn_reservation "athlon_sseadd_load_k8" 6
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "sseadd")
- 				   (and (eq_attr "mode" "SF,DF,DI")
- 					(eq_attr "memory" "load"))))
- 			 "athlon-direct,athlon-fploadk8,athlon-fadd")
- (define_insn_reservation "athlon_sseadd" 4
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "sseadd")
- 				   (eq_attr "mode" "SF,DF,DI")))
- 			 "athlon-direct,athlon-fpsched,athlon-fadd")
-@@ -663,6 +813,11 @@
- 			      (and (eq_attr "type" "sseadd")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
-+(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "sseadd")
-+				   (eq_attr "memory" "load")))
-+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
- (define_insn_reservation "athlon_sseaddvector" 5
- 			 (and (eq_attr "cpu" "athlon")
- 			      (eq_attr "type" "sseadd"))
-@@ -671,6 +826,10 @@
- 			 (and (eq_attr "cpu" "k8,generic64")
- 			      (eq_attr "type" "sseadd"))
- 			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
-+(define_insn_reservation "athlon_sseaddvector_amdfam10" 4
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (eq_attr "type" "sseadd"))
-+			 "athlon-direct,athlon-fpsched,athlon-fadd")
- 
- ;; Conversions behaves very irregularly and the scheduling is critical here.
- ;; Take each instruction separately.  Assume that the mode is always set to the
-@@ -684,12 +843,25 @@
- 					(and (eq_attr "mode" "DF")
- 					     (eq_attr "memory" "load")))))
- 			 "athlon-direct,athlon-fploadk8,athlon-fstore")
-+(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "ssecvt")
-+				   (and (eq_attr "amdfam10_decode" "double")
-+					(and (eq_attr "mode" "DF")
-+					     (eq_attr "memory" "load")))))
-+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
- (define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
- 			      (and (eq_attr "type" "ssecvt")
- 				   (and (eq_attr "athlon_decode" "direct")
- 					(eq_attr "mode" "DF"))))
- 			 "athlon-direct,athlon-fpsched,athlon-fstore")
-+(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "ssecvt")
-+				   (and (eq_attr "amdfam10_decode" "vector")
-+					(eq_attr "mode" "DF"))))
-+			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
- ;; cvtps2pd.  Model same way the other double decoded FP conversions.
- (define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
- 			 (and (eq_attr "cpu" "k8,athlon,generic64")
-@@ -698,12 +870,25 @@
- 					(and (eq_attr "mode" "V2DF,V4SF,TI")
- 					     (eq_attr "memory" "load")))))
- 			 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
-+(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "ssecvt")
-+				   (and (eq_attr "amdfam10_decode" "direct")
-+					(and (eq_attr "mode" "V2DF,V4SF,TI")
-+					     (eq_attr "memory" "load")))))
-+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
- (define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
- 			 (and (eq_attr "cpu" "k8,athlon,generic64")
- 			      (and (eq_attr "type" "ssecvt")
- 				   (and (eq_attr "athlon_decode" "double")
- 					(eq_attr "mode" "V2DF,V4SF,TI"))))
- 			 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
-+(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "ssecvt")
-+				   (and (eq_attr "amdfam10_decode" "direct")
-+					(eq_attr "mode" "V2DF,V4SF,TI"))))
-+			 "athlon-direct,athlon-fpsched,athlon-fstore")
- ;; cvtsi2sd mem,reg is directpath path  (cvtsi2sd reg,reg is doublepath)
- ;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
- (define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
-@@ -713,6 +898,13 @@
- 					(and (eq_attr "mode" "SF,DF")
- 					     (eq_attr "memory" "load")))))
- 			 "athlon-direct,athlon-fploadk8,athlon-fstore")
-+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "sseicvt")
-+				   (and (eq_attr "amdfam10_decode" "double")
-+					(and (eq_attr "mode" "SF,DF")
-+					     (eq_attr "memory" "load")))))
-+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
- ;; cvtsi2ss mem, reg is doublepath
- (define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
- 			 (and (eq_attr "cpu" "athlon")
-@@ -728,6 +920,13 @@
- 					(and (eq_attr "mode" "SF,DF")
- 					     (eq_attr "memory" "load")))))
- 			 "athlon-double,athlon-fploadk8,(athlon-fstore*2)")
-+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "sseicvt")
-+				   (and (eq_attr "amdfam10_decode" "double")
-+					(and (eq_attr "mode" "SF,DF")
-+					     (eq_attr "memory" "load")))))
-+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")			 
- ;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
- (define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
- 			 (and (eq_attr "cpu" "k8,athlon,generic64")
-@@ -736,6 +935,13 @@
- 					(and (eq_attr "mode" "SF,DF")
- 					     (eq_attr "memory" "none")))))
- 			 "athlon-double,athlon-fploadk8,athlon-fstore")
-+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "sseicvt")
-+				   (and (eq_attr "amdfam10_decode" "vector")
-+					(and (eq_attr "mode" "SF,DF")
-+					     (eq_attr "memory" "none")))))
-+			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
- ;; cvtsi2ss reg, reg is doublepath
- (define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
-@@ -744,6 +950,13 @@
- 					(and (eq_attr "mode" "SF,DF")
- 					     (eq_attr "memory" "none")))))
- 			 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
-+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "sseicvt")
-+				   (and (eq_attr "amdfam10_decode" "vector")
-+					(and (eq_attr "mode" "SF,DF")
-+					     (eq_attr "memory" "none")))))
-+			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
- ;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
- (define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
- 			 (and (eq_attr "cpu" "k8,athlon,generic64")
-@@ -752,6 +965,13 @@
- 					(and (eq_attr "mode" "SF")
- 					     (eq_attr "memory" "load")))))
- 			 "athlon-double,athlon-fploadk8,(athlon-fstore*3)")
-+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "ssecvt")
-+				   (and (eq_attr "amdfam10_decode" "double")
-+					(and (eq_attr "mode" "SF")
-+					     (eq_attr "memory" "load")))))
-+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
- ;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
- (define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
-@@ -760,6 +980,13 @@
- 					(and (eq_attr "mode" "SF")
- 					     (eq_attr "memory" "none")))))
- 			 "athlon-vector,athlon-fpsched,(athlon-fvector*3)")
-+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "ssecvt")
-+				   (and (eq_attr "amdfam10_decode" "vector")
-+					(and (eq_attr "mode" "SF")
-+					     (eq_attr "memory" "none")))))
-+			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
- (define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
- 			      (and (eq_attr "type" "ssecvt")
-@@ -767,6 +994,13 @@
- 					(and (eq_attr "mode" "V4SF,V2DF,TI")
- 					     (eq_attr "memory" "load")))))
- 			 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
-+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "ssecvt")
-+				   (and (eq_attr "amdfam10_decode" "double")
-+					(and (eq_attr "mode" "V4SF,V2DF,TI")
-+					     (eq_attr "memory" "load")))))
-+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
- ;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
- ;; ??? Why it is fater than cvtsd2ss?
- (define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
-@@ -776,6 +1010,13 @@
- 					(and (eq_attr "mode" "V4SF,V2DF,TI")
- 					     (eq_attr "memory" "none")))))
- 			 "athlon-vector,athlon-fpsched,athlon-fvector*2")
-+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "ssecvt")
-+				   (and (eq_attr "amdfam10_decode" "double")
-+					(and (eq_attr "mode" "V4SF,V2DF,TI")
-+					     (eq_attr "memory" "none")))))
-+			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
- ;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
- (define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
- 			 (and (eq_attr "cpu" "athlon,k8,generic64")
-@@ -784,6 +1025,13 @@
- 					(and (eq_attr "mode" "SI,DI")
- 					     (eq_attr "memory" "load")))))
- 			 "athlon-vector,athlon-fploadk8,athlon-fvector")
-+(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "sseicvt")
-+				   (and (eq_attr "amdfam10_decode" "double")
-+					(and (eq_attr "mode" "SI,DI")
-+					     (eq_attr "memory" "load")))))
-+			 "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)")
- ;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
- (define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
- 			 (and (eq_attr "cpu" "athlon")
-@@ -799,6 +1047,29 @@
- 					(and (eq_attr "mode" "SI,DI")
- 					     (eq_attr "memory" "none")))))
- 			 "athlon-double,athlon-fpsched,athlon-fstore")
-+(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "sseicvt")
-+				   (and (eq_attr "amdfam10_decode" "double")
-+					(and (eq_attr "mode" "SI,DI")
-+					     (eq_attr "memory" "none")))))
-+			 "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)")
-+;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10
-+(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "sseicvt")
-+				   (and (eq_attr "amdfam10_decode" "double")
-+					(and (eq_attr "mode" "TI")
-+					     (eq_attr "memory" "load")))))
-+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
-+;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10
-+(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "sseicvt")
-+				   (and (eq_attr "amdfam10_decode" "double")
-+					(and (eq_attr "mode" "TI")
-+					     (eq_attr "memory" "none")))))
-+			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
- 
- 
- (define_insn_reservation "athlon_ssemul_load" 4
-@@ -808,13 +1079,13 @@
- 					(eq_attr "memory" "load"))))
- 			 "athlon-direct,athlon-fpload,athlon-fmul")
- (define_insn_reservation "athlon_ssemul_load_k8" 6
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "ssemul")
- 				   (and (eq_attr "mode" "SF,DF")
- 					(eq_attr "memory" "load"))))
- 			 "athlon-direct,athlon-fploadk8,athlon-fmul")
- (define_insn_reservation "athlon_ssemul" 4
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "ssemul")
- 				   (eq_attr "mode" "SF,DF")))
- 			 "athlon-direct,athlon-fpsched,athlon-fmul")
-@@ -828,6 +1099,11 @@
- 			      (and (eq_attr "type" "ssemul")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
-+(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "ssemul")
-+				   (eq_attr "memory" "load")))
-+			 "athlon-direct,athlon-fploadk8,athlon-fmul")
- (define_insn_reservation "athlon_ssemulvector" 5
- 			 (and (eq_attr "cpu" "athlon")
- 			      (eq_attr "type" "ssemul"))
-@@ -836,6 +1112,10 @@
- 			 (and (eq_attr "cpu" "k8,generic64")
- 			      (eq_attr "type" "ssemul"))
- 			 "athlon-double,athlon-fpsched,(athlon-fmul*2)")
-+(define_insn_reservation "athlon_ssemulvector_amdfam10" 4
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (eq_attr "type" "ssemul"))
-+			 "athlon-direct,athlon-fpsched,athlon-fmul")			 
- ;; divsd timings.  divss is faster
- (define_insn_reservation "athlon_ssediv_load" 20
- 			 (and (eq_attr "cpu" "athlon")
-@@ -844,13 +1124,13 @@
- 					(eq_attr "memory" "load"))))
- 			 "athlon-direct,athlon-fpload,athlon-fmul*17")
- (define_insn_reservation "athlon_ssediv_load_k8" 22
--			 (and (eq_attr "cpu" "k8,generic64")
-+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "ssediv")
- 				   (and (eq_attr "mode" "SF,DF")
- 					(eq_attr "memory" "load"))))
- 			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
- (define_insn_reservation "athlon_ssediv" 20
--			 (and (eq_attr "cpu" "athlon,k8,generic64")
-+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
- 			      (and (eq_attr "type" "ssediv")
- 				   (eq_attr "mode" "SF,DF")))
- 			 "athlon-direct,athlon-fpsched,athlon-fmul*17")
-@@ -864,6 +1144,11 @@
- 			      (and (eq_attr "type" "ssediv")
- 				   (eq_attr "memory" "load")))
- 			 "athlon-double,athlon-fpload2k8,athlon-fmul*34")
-+(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (and (eq_attr "type" "ssediv")
-+				   (eq_attr "memory" "load")))
-+			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")			 
- (define_insn_reservation "athlon_ssedivvector" 39
- 			 (and (eq_attr "cpu" "athlon")
- 			      (eq_attr "type" "ssediv"))
-@@ -872,3 +1157,12 @@
- 			 (and (eq_attr "cpu" "k8,generic64")
- 			      (eq_attr "type" "ssediv"))
- 			 "athlon-double,athlon-fmul*34")
-+(define_insn_reservation "athlon_ssedivvector_amdfam10" 20
-+			 (and (eq_attr "cpu" "amdfam10")
-+			      (eq_attr "type" "ssediv"))
-+			 "athlon-direct,athlon-fmul*17")
-+(define_insn_reservation "athlon_sseins_amdfam10" 5
-+                         (and (eq_attr "cpu" "amdfam10")
-+                              (and (eq_attr "type" "sseins")
-+                                   (eq_attr "mode" "TI")))
-+                         "athlon-vector,athlon-fpsched,athlon-faddmul")
---- gcc/config/i386/pmmintrin.h.jj	2006-10-05 00:29:29.000000000 +0200
-+++ gcc/config/i386/pmmintrin.h	2007-02-09 21:26:06.000000000 +0100
-@@ -30,7 +30,11 @@
- #ifndef _PMMINTRIN_H_INCLUDED
- #define _PMMINTRIN_H_INCLUDED
- 
--#ifdef __SSE3__
-+#ifndef __SSE3__
-+# error "SSE3 instruction set not enabled"
-+#else
-+
-+/* We need definitions from the SSE2 and SSE header files*/
- #include <xmmintrin.h>
- #include <emmintrin.h>
- 
---- gcc/config/i386/tmmintrin.h.jj	2007-02-09 16:18:25.000000000 +0100
-+++ gcc/config/i386/tmmintrin.h	2007-02-09 21:26:06.000000000 +0100
-@@ -30,7 +30,11 @@
- #ifndef _TMMINTRIN_H_INCLUDED
- #define _TMMINTRIN_H_INCLUDED
- 
--#ifdef __SSSE3__
-+#ifndef __SSSE3__
-+# error "SSSE3 instruction set not enabled"
-+#else
-+
-+/* We need definitions from the SSE3, SSE2 and SSE header files*/
- #include <pmmintrin.h>
- 
- static __inline __m128i
---- gcc/config/i386/sse.md.jj	2007-02-09 16:18:25.000000000 +0100
-+++ gcc/config/i386/sse.md	2007-02-09 21:26:06.000000000 +0100
-@@ -963,6 +963,7 @@
-   "cvtsi2ss\t{%2, %0|%0, %2}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "athlon_decode" "vector,double")
-+   (set_attr "amdfam10_decode" "vector,double")
-    (set_attr "mode" "SF")])
- 
- (define_insn "sse_cvtsi2ssq"
-@@ -976,6 +977,7 @@
-   "cvtsi2ssq\t{%2, %0|%0, %2}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "athlon_decode" "vector,double")
-+   (set_attr "amdfam10_decode" "vector,double")
-    (set_attr "mode" "SF")])
- 
- (define_insn "sse_cvtss2si"
-@@ -989,6 +991,7 @@
-   "cvtss2si\t{%1, %0|%0, %1}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "athlon_decode" "double,vector")
-+   (set_attr "amdfam10_decode" "double,double")
-    (set_attr "mode" "SI")])
- 
- (define_insn "sse_cvtss2siq"
-@@ -1002,6 +1005,7 @@
-   "cvtss2siq\t{%1, %0|%0, %1}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "athlon_decode" "double,vector")
-+   (set_attr "amdfam10_decode" "double,double")
-    (set_attr "mode" "DI")])
- 
- (define_insn "sse_cvttss2si"
-@@ -1014,6 +1018,7 @@
-   "cvttss2si\t{%1, %0|%0, %1}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "athlon_decode" "double,vector")
-+   (set_attr "amdfam10_decode" "double,double")
-    (set_attr "mode" "SI")])
- 
- (define_insn "sse_cvttss2siq"
-@@ -1026,6 +1031,7 @@
-   "cvttss2siq\t{%1, %0|%0, %1}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "athlon_decode" "double,vector")
-+   (set_attr "amdfam10_decode" "double,double")
-    (set_attr "mode" "DI")])
- 
- (define_insn "sse2_cvtdq2ps"
-@@ -1921,7 +1927,8 @@
-   "cvtsi2sd\t{%2, %0|%0, %2}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "mode" "DF")
--   (set_attr "athlon_decode" "double,direct")])
-+   (set_attr "athlon_decode" "double,direct")
-+   (set_attr "amdfam10_decode" "vector,double")])
- 
- (define_insn "sse2_cvtsi2sdq"
-   [(set (match_operand:V2DF 0 "register_operand" "=x,x")
-@@ -1934,7 +1941,8 @@
-   "cvtsi2sdq\t{%2, %0|%0, %2}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "mode" "DF")
--   (set_attr "athlon_decode" "double,direct")])
-+   (set_attr "athlon_decode" "double,direct")
-+   (set_attr "amdfam10_decode" "vector,double")])
- 
- (define_insn "sse2_cvtsd2si"
-   [(set (match_operand:SI 0 "register_operand" "=r,r")
-@@ -1947,6 +1955,7 @@
-   "cvtsd2si\t{%1, %0|%0, %1}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "athlon_decode" "double,vector")
-+   (set_attr "amdfam10_decode" "double,double")
-    (set_attr "mode" "SI")])
- 
- (define_insn "sse2_cvtsd2siq"
-@@ -1960,6 +1969,7 @@
-   "cvtsd2siq\t{%1, %0|%0, %1}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "athlon_decode" "double,vector")
-+   (set_attr "amdfam10_decode" "double,double")
-    (set_attr "mode" "DI")])
- 
- (define_insn "sse2_cvttsd2si"
-@@ -1972,7 +1982,8 @@
-   "cvttsd2si\t{%1, %0|%0, %1}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "mode" "SI")
--   (set_attr "athlon_decode" "double,vector")])
-+   (set_attr "athlon_decode" "double,vector")
-+   (set_attr "amdfam10_decode" "double,double")])
- 
- (define_insn "sse2_cvttsd2siq"
-   [(set (match_operand:DI 0 "register_operand" "=r,r")
-@@ -1984,7 +1995,8 @@
-   "cvttsd2siq\t{%1, %0|%0, %1}"
-   [(set_attr "type" "sseicvt")
-    (set_attr "mode" "DI")
--   (set_attr "athlon_decode" "double,vector")])
-+   (set_attr "athlon_decode" "double,vector")
-+   (set_attr "amdfam10_decode" "double,double")])
- 
- (define_insn "sse2_cvtdq2pd"
-   [(set (match_operand:V2DF 0 "register_operand" "=x")
-@@ -2015,7 +2027,8 @@
-   "TARGET_SSE2"
-   "cvtpd2dq\t{%1, %0|%0, %1}"
-   [(set_attr "type" "ssecvt")
--   (set_attr "mode" "TI")])
-+   (set_attr "mode" "TI")
-+   (set_attr "amdfam10_decode" "double")])
- 
- (define_expand "sse2_cvttpd2dq"
-   [(set (match_operand:V4SI 0 "register_operand" "")
-@@ -2033,7 +2046,8 @@
-   "TARGET_SSE2"
-   "cvttpd2dq\t{%1, %0|%0, %1}"
-   [(set_attr "type" "ssecvt")
--   (set_attr "mode" "TI")])
-+   (set_attr "mode" "TI")
-+   (set_attr "amdfam10_decode" "double")])
- 
- (define_insn "sse2_cvtsd2ss"
-   [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-@@ -2047,20 +2061,22 @@
-   "cvtsd2ss\t{%2, %0|%0, %2}"
-   [(set_attr "type" "ssecvt")
-    (set_attr "athlon_decode" "vector,double")
-+   (set_attr "amdfam10_decode" "vector,double")
-    (set_attr "mode" "SF")])
- 
- (define_insn "sse2_cvtss2sd"
--  [(set (match_operand:V2DF 0 "register_operand" "=x")
-+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
- 	(vec_merge:V2DF
- 	  (float_extend:V2DF
- 	    (vec_select:V2SF
--	      (match_operand:V4SF 2 "nonimmediate_operand" "xm")
-+	      (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
- 	      (parallel [(const_int 0) (const_int 1)])))
--	  (match_operand:V2DF 1 "register_operand" "0")
-+	  (match_operand:V2DF 1 "register_operand" "0,0")
- 	  (const_int 1)))]
-   "TARGET_SSE2"
-   "cvtss2sd\t{%2, %0|%0, %2}"
-   [(set_attr "type" "ssecvt")
-+   (set_attr "amdfam10_decode" "vector,double")
-    (set_attr "mode" "DF")])
- 
- (define_expand "sse2_cvtpd2ps"
-@@ -2081,7 +2097,8 @@
-   "TARGET_SSE2"
-   "cvtpd2ps\t{%1, %0|%0, %1}"
-   [(set_attr "type" "ssecvt")
--   (set_attr "mode" "V4SF")])
-+   (set_attr "mode" "V4SF")
-+   (set_attr "amdfam10_decode" "double")])
- 
- (define_insn "sse2_cvtps2pd"
-   [(set (match_operand:V2DF 0 "register_operand" "=x")
-@@ -2092,7 +2109,8 @@
-   "TARGET_SSE2"
-   "cvtps2pd\t{%1, %0|%0, %1}"
-   [(set_attr "type" "ssecvt")
--   (set_attr "mode" "V2DF")])
-+   (set_attr "mode" "V2DF")
-+   (set_attr "amdfam10_decode" "direct")])
- 
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ;;
-@@ -4550,3 +4568,92 @@
-   "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
-   [(set_attr "type" "sselog1")
-    (set_attr "mode" "DI")])
-+
-+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-+;;
-+;; AMD SSE4A instructions
-+;;
-+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-+
-+(define_insn "sse4a_vmmovntv2df"
-+  [(set (match_operand:DF 0 "memory_operand" "=m")
-+        (unspec:DF [(vec_select:DF 
-+                      (match_operand:V2DF 1 "register_operand" "x")
-+                      (parallel [(const_int 0)]))]
-+                   UNSPEC_MOVNT))]
-+  "TARGET_SSE4A"
-+  "movntsd\t{%1, %0|%0, %1}"
-+  [(set_attr "type" "ssemov")
-+   (set_attr "mode" "DF")])
-+
-+(define_insn "sse4a_movntdf"
-+  [(set (match_operand:DF 0 "memory_operand" "=m")
-+        (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
-+                   UNSPEC_MOVNT))]
-+  "TARGET_SSE4A"
-+  "movntsd\t{%1, %0|%0, %1}"
-+  [(set_attr "type" "ssemov")
-+   (set_attr "mode" "DF")])
-+
-+(define_insn "sse4a_vmmovntv4sf"
-+  [(set (match_operand:SF 0 "memory_operand" "=m")
-+	(unspec:SF [(vec_select:SF 
-+	              (match_operand:V4SF 1 "register_operand" "x")
-+		      (parallel [(const_int 0)]))]
-+		   UNSPEC_MOVNT))]
-+  "TARGET_SSE4A"
-+  "movntss\t{%1, %0|%0, %1}"
-+  [(set_attr "type" "ssemov")
-+   (set_attr "mode" "SF")])
-+
-+(define_insn "sse4a_movntsf"
-+  [(set (match_operand:SF 0 "memory_operand" "=m")
-+	(unspec:SF [(match_operand:SF 1 "register_operand" "x")]
-+		   UNSPEC_MOVNT))]
-+  "TARGET_SSE4A"
-+  "movntss\t{%1, %0|%0, %1}"
-+  [(set_attr "type" "ssemov")
-+   (set_attr "mode" "SF")])
-+
-+(define_insn "sse4a_extrqi"
-+  [(set (match_operand:V2DI 0 "register_operand" "=x")
-+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
-+                      (match_operand 2 "const_int_operand" "")
-+                      (match_operand 3 "const_int_operand" "")]
-+                     UNSPEC_EXTRQI))]
-+  "TARGET_SSE4A"
-+  "extrq\t{%3, %2, %0|%0, %2, %3}"
-+  [(set_attr "type" "sse")
-+   (set_attr "mode" "TI")])
-+
-+(define_insn "sse4a_extrq"
-+  [(set (match_operand:V2DI 0 "register_operand" "=x")
-+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
-+                      (match_operand:V16QI 2 "register_operand" "x")]
-+                     UNSPEC_EXTRQ))]
-+  "TARGET_SSE4A"
-+  "extrq\t{%2, %0|%0, %2}"
-+  [(set_attr "type" "sse")
-+   (set_attr "mode" "TI")])
-+
-+(define_insn "sse4a_insertqi"
-+  [(set (match_operand:V2DI 0 "register_operand" "=x")
-+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
-+        	      (match_operand:V2DI 2 "register_operand" "x")
-+                      (match_operand 3 "const_int_operand" "")
-+                      (match_operand 4 "const_int_operand" "")]
-+                     UNSPEC_INSERTQI))]
-+  "TARGET_SSE4A"
-+  "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
-+  [(set_attr "type" "sseins")
-+   (set_attr "mode" "TI")])
-+
-+(define_insn "sse4a_insertq"
-+  [(set (match_operand:V2DI 0 "register_operand" "=x")
-+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
-+        	      (match_operand:V2DI 2 "register_operand" "x")]
-+        	     UNSPEC_INSERTQ))]
-+  "TARGET_SSE4A"
-+  "insertq\t{%2, %0|%0, %2}"
-+  [(set_attr "type" "sseins")
-+   (set_attr "mode" "TI")])
---- gcc/config/i386/i386.opt.jj	2007-02-09 16:18:25.000000000 +0100
-+++ gcc/config/i386/i386.opt	2007-02-09 21:26:06.000000000 +0100
-@@ -205,6 +205,22 @@ mmni
- Target Undocumented Mask(SSSE3) MaskExists
- Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation
- 
-+msse4a
-+Target Report Mask(SSE4A)
-+Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation
-+
-+mpopcnt
-+Target Report Mask(POPCNT)
-+Support code generation of popcount instruction for popcount built-ins 
-+namely __builtin_popcount, __builtin_popcountl and __builtin_popcountll
-+
-+mabm
-+Target Report Mask(ABM)
-+Support code generation of Advanced Bit Manipulation (ABM) instructions,
-+which include popcnt and lzcnt instructions, for popcount and clz built-ins
-+namely __builtin_popcount, __builtin_popcountl, __builtin_popcountll and
-+__builtin_clz, __builtin_clzl, __builtin_clzll
-+
- msseregparm
- Target RejectNegative Mask(SSEREGPARM)
- Use SSE register passing conventions for SF and DF mode
---- gcc/config/i386/ammintrin.h.jj	2007-02-09 21:26:06.000000000 +0100
-+++ gcc/config/i386/ammintrin.h	2007-02-09 21:26:06.000000000 +0100
-@@ -0,0 +1,73 @@
-+/* Copyright (C) 2007 Free Software Foundation, Inc.
-+
-+   This file is part of GCC.
-+
-+   GCC is free software; you can redistribute it and/or modify
-+   it under the terms of the GNU General Public License as published by
-+   the Free Software Foundation; either version 2, or (at your option)
-+   any later version.
-+
-+   GCC is distributed in the hope that it will be useful,
-+   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+   GNU General Public License for more details.
-+
-+   You should have received a copy of the GNU General Public License
-+   along with GCC; see the file COPYING.  If not, write to
-+   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-+   Boston, MA 02110-1301, USA.  */
-+
-+/* As a special exception, if you include this header file into source
-+   files compiled by GCC, this header file does not by itself cause
-+   the resulting executable to be covered by the GNU General Public
-+   License.  This exception does not however invalidate any other
-+   reasons why the executable file might be covered by the GNU General
-+   Public License.  */
-+
-+/* Implemented from the specification included in the AMD Programmers
-+   Manual Update, version 2.x */
-+
-+#ifndef _AMMINTRIN_H_INCLUDED
-+#define _AMMINTRIN_H_INCLUDED
-+
-+#ifndef __SSE4A__
-+# error "SSE4A instruction set not enabled"
-+#else
-+
-+/* We need definitions from the SSE3, SSE2 and SSE header files*/
-+#include <pmmintrin.h>
-+
-+static __inline void __attribute__((__always_inline__))
-+_mm_stream_sd (double * __P, __m128d __Y)
-+{
-+  __builtin_ia32_movntsd (__P, (__v2df) __Y);
-+}
-+
-+static __inline void __attribute__((__always_inline__))
-+_mm_stream_ss (float * __P, __m128 __Y)
-+{
-+  __builtin_ia32_movntss (__P, (__v4sf) __Y);
-+}
-+
-+static __inline __m128i __attribute__((__always_inline__))
-+_mm_extract_si64 (__m128i __X, __m128i __Y)
-+{
-+  return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
-+}
-+
-+#define _mm_extracti_si64(X, I, L) \
-+((__m128i) __builtin_ia32_extrqi ((__v2di)(X), I, L))
-+
-+static __inline __m128i __attribute__((__always_inline__))
-+_mm_insert_si64 (__m128i __X,__m128i __Y)
-+{
-+  return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
-+}
-+
-+#define _mm_inserti_si64(X, Y, I, L) \
-+((__m128i) __builtin_ia32_insertqi ((__v2di)(X), (__v2di)(Y), I, L))
-+
-+
-+#endif /* __SSE4A__ */
-+
-+#endif /* _AMMINTRIN_H_INCLUDED */
---- gcc/config/i386/emmintrin.h.jj	2006-10-05 00:29:29.000000000 +0200
-+++ gcc/config/i386/emmintrin.h	2007-02-09 21:26:06.000000000 +0100
-@@ -30,7 +30,11 @@
- #ifndef _EMMINTRIN_H_INCLUDED
- #define _EMMINTRIN_H_INCLUDED
- 
--#ifdef __SSE2__
-+#ifndef __SSE2__
-+# error "SSE2 instruction set not enabled"
-+#else
-+
-+/* We need definitions from the SSE header files*/
- #include <xmmintrin.h>
- 
- /* SSE2 */
---- gcc/config/i386/i386.c.jj	2007-02-09 16:24:00.000000000 +0100
-+++ gcc/config/i386/i386.c	2007-02-10 19:47:05.000000000 +0100
-@@ -534,6 +534,71 @@ struct processor_costs k8_cost = {
-   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
- };
- 
-+struct processor_costs amdfam10_cost = {
-+  COSTS_N_INSNS (1),                    /* cost of an add instruction */
-+  COSTS_N_INSNS (2),                    /* cost of a lea instruction */
-+  COSTS_N_INSNS (1),                    /* variable shift costs */
-+  COSTS_N_INSNS (1),                    /* constant shift costs */
-+  {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
-+   COSTS_N_INSNS (4),                   /*                               HI */
-+   COSTS_N_INSNS (3),                   /*                               SI */
-+   COSTS_N_INSNS (4),                   /*                               DI */
-+   COSTS_N_INSNS (5)},                  /*                               other */
-+  0,                                    /* cost of multiply per each bit set */
-+  {COSTS_N_INSNS (19),                  /* cost of a divide/mod for QI */
-+   COSTS_N_INSNS (35),                  /*                          HI */
-+   COSTS_N_INSNS (51),                  /*                          SI */
-+   COSTS_N_INSNS (83),                  /*                          DI */
-+   COSTS_N_INSNS (83)},                 /*                          other */
-+  COSTS_N_INSNS (1),			/* cost of movsx */
-+  COSTS_N_INSNS (1),			/* cost of movzx */
-+  8,					/* "large" insn */
-+  9,					/* MOVE_RATIO */
-+  4,					/* cost for loading QImode using movzbl */
-+  {3, 4, 3},				/* cost of loading integer registers
-+					   in QImode, HImode and SImode.
-+					   Relative to reg-reg move (2).  */
-+  {3, 4, 3},				/* cost of storing integer registers */
-+  4,					/* cost of reg,reg fld/fst */
-+  {4, 4, 12},				/* cost of loading fp registers
-+		   			   in SFmode, DFmode and XFmode */
-+  {6, 6, 8},				/* cost of storing fp registers
-+ 		   			   in SFmode, DFmode and XFmode */
-+  2,					/* cost of moving MMX register */
-+  {3, 3},				/* cost of loading MMX registers
-+					   in SImode and DImode */
-+  {4, 4},				/* cost of storing MMX registers
-+					   in SImode and DImode */
-+  2,					/* cost of moving SSE register */
-+  {4, 4, 3},				/* cost of loading SSE registers
-+					   in SImode, DImode and TImode */
-+  {4, 4, 5},				/* cost of storing SSE registers
-+					   in SImode, DImode and TImode */
-+  3,					/* MMX or SSE register to integer */
-+  					/* On K8
-+  					    MOVD reg64, xmmreg 	Double	FSTORE 4
-+					    MOVD reg32, xmmreg 	Double	FSTORE 4
-+					   On AMDFAM10
-+					    MOVD reg64, xmmreg 	Double	FADD 3
-+                                                                1/1  1/1
-+					    MOVD reg32, xmmreg 	Double	FADD 3
-+                                                                1/1  1/1 */
-+  64,					/* size of prefetch block */
-+  /* New AMD processors never drop prefetches; if they cannot be performed
-+     immediately, they are queued.  We set number of simultaneous prefetches
-+     to a large constant to reflect this (it probably is not a good idea not
-+     to limit number of prefetches at all, as their execution also takes some
-+     time).  */
-+  100,					/* number of parallel prefetches */
-+  5,					/* Branch cost */
-+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
-+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
-+  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
-+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
-+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
-+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
-+};
-+
- static const
- struct processor_costs pentium4_cost = {
-   COSTS_N_INSNS (1),			/* cost of an add instruction */
-@@ -816,11 +881,13 @@ const struct processor_costs *ix86_cost 
- #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
- #define m_K8  (1<<PROCESSOR_K8)
- #define m_ATHLON_K8  (m_K8 | m_ATHLON)
-+#define m_AMDFAM10  (1<<PROCESSOR_AMDFAM10)
- #define m_NOCONA  (1<<PROCESSOR_NOCONA)
- #define m_CORE2  (1<<PROCESSOR_CORE2)
- #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
- #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
- #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
-+#define m_ATHLON_K8_AMDFAM10  (m_K8 | m_ATHLON | m_AMDFAM10)
- 
- /* Generic instruction choice should be common subset of supported CPUs
-    (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
-@@ -828,23 +895,31 @@ const struct processor_costs *ix86_cost 
- /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
-    Generic64 seems like good code size tradeoff.  We can't enable it for 32bit
-    generic because it is not working well with PPro base chips.  */
--const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
--const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-+const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
-+                          | m_GENERIC64;
-+const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
-+                            | m_NOCONA | m_CORE2 | m_GENERIC;
- const int x86_zero_extend_with_and = m_486 | m_PENT;
--const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
-+/* Enable to zero extend integer registers to avoid partial dependencies */
-+const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
-+                     | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
- const int x86_double_with_add = ~m_386;
- const int x86_use_bit_test = m_386;
--const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
--const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA; 
-+const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
-+                              | m_K6 | m_CORE2 | m_GENERIC;
-+const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
-+                      | m_NOCONA;
- const int x86_fisttp = m_NOCONA;
--const int x86_3dnow_a = m_ATHLON_K8;
--const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-+const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
-+const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
-+                            | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
- /* Branch hints were put in P4 based on simulation result. But
-    after P4 was made, no performance benefit was observed with
-    branch hints. It also increases the code size. As the result,
-    icc never generates branch hints.  */
- const int x86_branch_hints = 0;
--const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
-+const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
-+                         /*m_GENERIC | m_ATHLON_K8 ? */
- /* We probably ought to watch for partial register stalls on Generic32
-    compilation setting as well.  However in current implementation the
-    partial register stalls are not eliminated very well - they can
-@@ -856,13 +931,16 @@ const int x86_use_sahf = m_PPRO | m_K6_G
- const int x86_partial_reg_stall = m_PPRO;
- const int x86_partial_flag_reg_stall =  m_CORE2 | m_GENERIC;
- const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
--const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
-+const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
-+                                  | m_CORE2 | m_GENERIC);
- const int x86_use_mov0 = m_K6;
- const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
- const int x86_read_modify_write = ~m_PENT;
- const int x86_read_modify = ~(m_PENT | m_PPRO);
- const int x86_split_long_moves = m_PPRO;
--const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
-+const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
-+                               | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
-+                               /* m_PENT4 ? */
- const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
- const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
- const int x86_qimode_math = ~(0);
-@@ -872,18 +950,37 @@ const int x86_promote_qi_regs = 0;
-    if our scheme for avoiding partial stalls was more effective.  */
- const int x86_himode_math = ~(m_PPRO);
- const int x86_promote_hi_regs = m_PPRO;
--const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
--const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
--const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
--const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
--const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
--const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
--const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
--const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
-+/* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
-+const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
-+                          | m_CORE2 | m_GENERIC;
-+const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
-+                          | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-+const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
-+                          | m_CORE2 | m_GENERIC;
-+const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
-+                          | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-+/* Enable if integer moves are preferred for DFmode copies */
-+const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
-+                                       | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
-+const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
-+                                       | m_CORE2 | m_GENERIC;
-+const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
-+                                      | m_CORE2 | m_GENERIC;
-+/* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
-+   for outgoing arguments will be computed and placed into the variable
-+   `current_function_outgoing_args_size'. No space will be pushed onto the stack
-+   for each call; instead, the function prologue should increase the stack frame
-+   size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
-+   not proper. */
-+const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
-+                                         | m_NOCONA | m_PPRO | m_CORE2
-+                                         | m_GENERIC;
- const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
- const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
- const int x86_shift1 = ~m_486;
--const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-+const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
-+                                           | m_ATHLON_K8_AMDFAM10 | m_PENT4
-+                                           | m_NOCONA | m_CORE2 | m_GENERIC;
- /* In Generic model we have an confict here in between PPro/Pentium4 based chips
-    that thread 128bit SSE registers as single units versus K8 based chips that
-    divide SSE registers to two 64bit halves.
-@@ -893,15 +990,66 @@ const int x86_arch_always_fancy_math_387
-    this option on P4 brings over 20% SPECfp regression, while enabling it on
-    K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
-    of moves.  */
--const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
-+const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
-+                                           | m_GENERIC | m_AMDFAM10;
- /* Set for machines where the type and dependencies are resolved on SSE
-    register parts instead of whole registers, so we may maintain just
-    lower part of scalar values in proper format leaving the upper part
-    undefined.  */
- const int x86_sse_split_regs = m_ATHLON_K8;
--const int x86_sse_typeless_stores = m_ATHLON_K8;
-+/* Code generation for scalar reg-reg moves of single and double precision data:
-+     if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
-+       movaps reg, reg
-+     else
-+       movss reg, reg
-+     if (x86_sse_partial_reg_dependency == true)
-+       movapd reg, reg
-+     else
-+       movsd reg, reg
-+
-+   Code generation for scalar loads of double precision data:
-+     if (x86_sse_split_regs == true)
-+       movlpd mem, reg      (gas syntax)
-+     else
-+       movsd mem, reg
-+
-+   Code generation for unaligned packed loads of single precision data
-+   (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
-+     if (x86_sse_unaligned_move_optimal)
-+       movups mem, reg
-+
-+     if (x86_sse_partial_reg_dependency == true)
-+       {
-+         xorps  reg, reg
-+         movlps mem, reg
-+         movhps mem+8, reg
-+       }
-+     else
-+       {
-+         movlps mem, reg
-+         movhps mem+8, reg
-+       }
-+
-+   Code generation for unaligned packed loads of double precision data
-+   (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
-+     if (x86_sse_unaligned_move_optimal)
-+       movupd mem, reg
-+
-+     if (x86_sse_split_regs == true)
-+       {
-+         movlpd mem, reg
-+         movhpd mem+8, reg
-+       }
-+     else
-+       {
-+         movsd  mem, reg
-+         movhpd mem+8, reg
-+       }
-+ */
-+const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
-+const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
- const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
--const int x86_use_ffreep = m_ATHLON_K8;
-+const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
- const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
- const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
- 
-@@ -909,19 +1057,22 @@ const int x86_use_incdec = ~(m_PENT4 | m
-    integer data in xmm registers.  Which results in pretty abysmal code.  */
- const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
- 
--const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
-+const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4
-+                                    | m_NOCONA | m_PPRO | m_GENERIC32;
- /* Some CPU cores are not able to predict more than 4 branch instructions in
-    the 16 byte window.  */
--const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
--const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC;
--const int x86_use_bt = m_ATHLON_K8;
-+const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
-+                                | m_NOCONA | m_CORE2 | m_GENERIC;
-+const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
-+                         | m_CORE2 | m_GENERIC;
-+const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
- /* Compare and exchange was added for 80486.  */
- const int x86_cmpxchg = ~m_386;
- /* Compare and exchange 8 bytes was added for pentium.  */
- const int x86_cmpxchg8b = ~(m_386 | m_486);
- /* Exchange and add was added for 80486.  */
- const int x86_xadd = ~m_386;
--const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
-+const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
- 
- /* In case the average insn count for single function invocation is
-    lower than this constant, emit fast (but longer) prologue and
-@@ -1485,16 +1636,24 @@ ix86_handle_option (size_t code, const c
-     case OPT_msse:
-       if (!value)
- 	{
--	  target_flags &= ~(MASK_SSE2 | MASK_SSE3);
--	  target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
-+	  target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
-+	  target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
- 	}
-       return true;
- 
-     case OPT_msse2:
-       if (!value)
- 	{
--	  target_flags &= ~MASK_SSE3;
--	  target_flags_explicit |= MASK_SSE3;
-+	  target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
-+	  target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
-+	}
-+      return true;
-+
-+    case OPT_msse3:
-+      if (!value)
-+	{
-+	  target_flags &= ~MASK_SSE4A;
-+	  target_flags_explicit |= MASK_SSE4A;
- 	}
-       return true;
- 
-@@ -1546,7 +1705,8 @@ override_options (void)
-       {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
-       {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
-       {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
--      {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
-+      {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
-+      {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
-     };
- 
-   static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
-@@ -1565,7 +1725,10 @@ override_options (void)
- 	  PTA_3DNOW_A = 64,
- 	  PTA_64BIT = 128,
- 	  PTA_SSSE3 = 256,
--	  PTA_CX16 = 512
-+	  PTA_CX16 = 512,
-+	  PTA_POPCNT = 1024,
-+	  PTA_ABM = 2048,
-+ 	  PTA_SSE4A = 4096
- 	} flags;
-     }
-   const processor_alias_table[] =
-@@ -1621,6 +1784,10 @@ override_options (void)
- 				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
-       {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
- 				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
-+      {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
-+                                       | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
-+                                       | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
-+                                       | PTA_ABM | PTA_SSE4A | PTA_CX16},
-       {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch.  */ },
-       {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch.  */ },
-     };
-@@ -1772,6 +1939,15 @@ override_options (void)
- 	  x86_prefetch_sse = true;
- 	if (processor_alias_table[i].flags & PTA_CX16)
- 	  x86_cmpxchg16b = true;
-+	if (processor_alias_table[i].flags & PTA_POPCNT
-+	    && !(target_flags_explicit & MASK_POPCNT))
-+	  target_flags |= MASK_POPCNT;
-+	if (processor_alias_table[i].flags & PTA_ABM
-+	    && !(target_flags_explicit & MASK_ABM))
-+	  target_flags |= MASK_ABM;
-+	if (processor_alias_table[i].flags & PTA_SSE4A
-+	    && !(target_flags_explicit & MASK_SSE4A))
-+	  target_flags |= MASK_SSE4A;
- 	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
- 	  error ("CPU you selected does not support x86-64 "
- 		 "instruction set");
-@@ -1963,6 +2139,10 @@ override_options (void)
-   if (TARGET_SSSE3)
-     target_flags |= MASK_SSE3;
- 
-+  /* Turn on SSE3 builtins for -msse4a.  */
-+  if (TARGET_SSE4A)
-+    target_flags |= MASK_SSE3;
-+
-   /* Turn on SSE2 builtins for -msse3.  */
-   if (TARGET_SSE3)
-     target_flags |= MASK_SSE2;
-@@ -1982,6 +2162,10 @@ override_options (void)
-   if (TARGET_3DNOW)
-     target_flags |= MASK_MMX;
- 
-+  /* Turn on POPCNT builtins for -mabm.  */
-+  if (TARGET_ABM)
-+    target_flags |= MASK_POPCNT;
-+
-   if (TARGET_64BIT)
-     {
-       if (TARGET_ALIGN_DOUBLE)
-@@ -8900,8 +9084,16 @@ ix86_expand_vector_move_misalign (enum m
- 	}
- 
-       if (TARGET_SSE2 && mode == V2DFmode)
--	{
--	  rtx zero;
-+        {
-+          rtx zero;
-+
-+          if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
-+            {
-+              op0 = gen_lowpart (V2DFmode, op0);
-+              op1 = gen_lowpart (V2DFmode, op1);
-+              emit_insn (gen_sse2_movupd (op0, op1));
-+              return;
-+            }
- 
- 	  /* When SSE registers are split into halves, we can avoid
- 	     writing to the top half twice.  */
-@@ -8929,7 +9121,15 @@ ix86_expand_vector_move_misalign (enum m
- 	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
- 	}
-       else
--	{
-+        {
-+          if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
-+            {
-+              op0 = gen_lowpart (V4SFmode, op0);
-+              op1 = gen_lowpart (V4SFmode, op1);
-+              emit_insn (gen_sse_movups (op0, op1));
-+              return;
-+            }
-+
- 	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
- 	    emit_move_insn (op0, CONST0_RTX (mode));
- 	  else
-@@ -13461,6 +13661,7 @@ ix86_issue_rate (void)
-     case PROCESSOR_PENTIUM4:
-     case PROCESSOR_ATHLON:
-     case PROCESSOR_K8:
-+    case PROCESSOR_AMDFAM10:
-     case PROCESSOR_NOCONA:
-     case PROCESSOR_GENERIC32:
-     case PROCESSOR_GENERIC64:
-@@ -13659,6 +13860,7 @@ ix86_adjust_cost (rtx insn, rtx link, rt
- 
-     case PROCESSOR_ATHLON:
-     case PROCESSOR_K8:
-+    case PROCESSOR_AMDFAM10:
-     case PROCESSOR_GENERIC32:
-     case PROCESSOR_GENERIC64:
-       memory = get_attr_memory (insn);
-@@ -14370,6 +14572,14 @@ enum ix86_builtins
-   IX86_BUILTIN_PABSW128,
-   IX86_BUILTIN_PABSD128,
- 
-+  /* AMDFAM10 - SSE4A New Instructions.  */
-+  IX86_BUILTIN_MOVNTSD,
-+  IX86_BUILTIN_MOVNTSS,
-+  IX86_BUILTIN_EXTRQI,
-+  IX86_BUILTIN_EXTRQ,
-+  IX86_BUILTIN_INSERTQI,
-+  IX86_BUILTIN_INSERTQ,
-+
-   IX86_BUILTIN_VEC_INIT_V2SI,
-   IX86_BUILTIN_VEC_INIT_V4HI,
-   IX86_BUILTIN_VEC_INIT_V8QI,
-@@ -15102,6 +15312,18 @@ ix86_init_mmx_sse_builtins (void)
-     = build_function_type_list (void_type_node,
- 			        pchar_type_node, V16QI_type_node, NULL_TREE);
- 
-+  tree v2di_ftype_v2di_unsigned_unsigned
-+    = build_function_type_list (V2DI_type_node, V2DI_type_node,
-+                                unsigned_type_node, unsigned_type_node,
-+                                NULL_TREE);
-+  tree v2di_ftype_v2di_v2di_unsigned_unsigned
-+    = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
-+                                unsigned_type_node, unsigned_type_node,
-+                                NULL_TREE);
-+  tree v2di_ftype_v2di_v16qi
-+    = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
-+                                NULL_TREE);
-+
-   tree float80_type;
-   tree float128_type;
-   tree ftype;
-@@ -15435,6 +15657,20 @@ ix86_init_mmx_sse_builtins (void)
-   def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
- 	       IX86_BUILTIN_PALIGNR);
- 
-+  /* AMDFAM10 SSE4A New built-ins  */
-+  def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd", 
-+               void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
-+  def_builtin (MASK_SSE4A, "__builtin_ia32_movntss", 
-+               void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
-+  def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi", 
-+               v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
-+  def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
-+               v2di_ftype_v2di_v16qi,  IX86_BUILTIN_EXTRQ);
-+  def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
-+               v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
-+  def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
-+               v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
-+
-   /* Access to the vec_init patterns.  */
-   ftype = build_function_type_list (V2SI_type_node, integer_type_node,
- 				    integer_type_node, NULL_TREE);
-@@ -15923,9 +16159,9 @@ ix86_expand_builtin (tree exp, rtx targe
-   enum insn_code icode;
-   tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
-   tree arglist = TREE_OPERAND (exp, 1);
--  tree arg0, arg1, arg2;
--  rtx op0, op1, op2, pat;
--  enum machine_mode tmode, mode0, mode1, mode2, mode3;
-+  tree arg0, arg1, arg2, arg3;
-+  rtx op0, op1, op2, op3, pat;
-+  enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
-   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
- 
-   switch (fcode)
-@@ -16340,6 +16576,114 @@ ix86_expand_builtin (tree exp, rtx targe
-       emit_insn (pat);
-       return target;
- 
-+    case IX86_BUILTIN_MOVNTSD:
-+      return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, arglist);
-+
-+    case IX86_BUILTIN_MOVNTSS:
-+      return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, arglist);
-+
-+    case IX86_BUILTIN_INSERTQ:
-+    case IX86_BUILTIN_EXTRQ:
-+      icode = (fcode == IX86_BUILTIN_EXTRQ
-+               ? CODE_FOR_sse4a_extrq
-+               : CODE_FOR_sse4a_insertq);
-+      arg0 = TREE_VALUE (arglist);
-+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
-+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
-+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
-+      tmode = insn_data[icode].operand[0].mode;
-+      mode1 = insn_data[icode].operand[1].mode;
-+      mode2 = insn_data[icode].operand[2].mode;
-+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
-+        op0 = copy_to_mode_reg (mode1, op0);
-+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
-+        op1 = copy_to_mode_reg (mode2, op1);
-+      if (optimize || target == 0
-+          || GET_MODE (target) != tmode
-+          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
-+        target = gen_reg_rtx (tmode);
-+      pat = GEN_FCN (icode) (target, op0, op1);
-+      if (! pat)
-+        return NULL_RTX;
-+      emit_insn (pat);
-+      return target;
-+
-+    case IX86_BUILTIN_EXTRQI:
-+      icode = CODE_FOR_sse4a_extrqi;
-+      arg0 = TREE_VALUE (arglist);
-+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
-+      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
-+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
-+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
-+      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
-+      tmode = insn_data[icode].operand[0].mode;
-+      mode1 = insn_data[icode].operand[1].mode;
-+      mode2 = insn_data[icode].operand[2].mode;
-+      mode3 = insn_data[icode].operand[3].mode;
-+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
-+        op0 = copy_to_mode_reg (mode1, op0);
-+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
-+        {
-+          error ("index mask must be an immediate");
-+          return gen_reg_rtx (tmode);
-+        }
-+      if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
-+        {
-+          error ("length mask must be an immediate");
-+          return gen_reg_rtx (tmode);
-+        }
-+      if (optimize || target == 0
-+          || GET_MODE (target) != tmode
-+          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
-+        target = gen_reg_rtx (tmode);
-+      pat = GEN_FCN (icode) (target, op0, op1, op2);
-+      if (! pat)
-+        return NULL_RTX;
-+      emit_insn (pat);
-+      return target;
-+
-+    case IX86_BUILTIN_INSERTQI:
-+      icode = CODE_FOR_sse4a_insertqi;
-+      arg0 = TREE_VALUE (arglist);
-+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
-+      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
-+      arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
-+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
-+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
-+      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
-+      op3 = expand_expr (arg3, NULL_RTX, VOIDmode, 0);
-+      tmode = insn_data[icode].operand[0].mode;
-+      mode1 = insn_data[icode].operand[1].mode;
-+      mode2 = insn_data[icode].operand[2].mode;
-+      mode3 = insn_data[icode].operand[3].mode;
-+      mode4 = insn_data[icode].operand[4].mode;
-+
-+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
-+        op0 = copy_to_mode_reg (mode1, op0);
-+
-+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
-+        op1 = copy_to_mode_reg (mode2, op1);
-+
-+      if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
-+        {
-+          error ("index mask must be an immediate");
-+          return gen_reg_rtx (tmode);
-+        }
-+      if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
-+        {
-+          error ("length mask must be an immediate");
-+          return gen_reg_rtx (tmode);
-+        }
-+      if (optimize || target == 0
-+          || GET_MODE (target) != tmode
-+          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
-+        target = gen_reg_rtx (tmode);
-+      pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
-+      if (! pat)
-+        return NULL_RTX;
-+      emit_insn (pat);
-+      return target;
-+
-     case IX86_BUILTIN_VEC_INIT_V2SI:
-     case IX86_BUILTIN_VEC_INIT_V4HI:
-     case IX86_BUILTIN_VEC_INIT_V8QI:
---- gcc/config/i386/xmmintrin.h.jj	2006-10-05 00:29:29.000000000 +0200
-+++ gcc/config/i386/xmmintrin.h	2007-02-09 21:26:06.000000000 +0100
-@@ -1241,7 +1241,9 @@ do {									\
- } while (0)
- 
- /* For backward source compatibility.  */
--#include <emmintrin.h>
-+#ifdef __SSE2__
-+# include <emmintrin.h>
-+#endif
- 
- #endif /* __SSE__ */
- #endif /* _XMMINTRIN_H_INCLUDED */
diff --git a/gcc41-build-id.patch b/gcc41-build-id.patch
new file mode 100644
index 0000000..4e162e6
--- /dev/null
+++ b/gcc41-build-id.patch
@@ -0,0 +1,74 @@
+2007-07-22  Roland McGrath  <roland@redhat.com>
+
+	* config/rs6000/sysv4.h (LINK_EH_SPEC): Add --build-id for
+	non-relocatable link.
+	* config/linux.h (LINK_EH_SPEC): Likewise.
+	* config/sparc/linux.h (LINK_EH_SPEC): Likewise.
+	* config/sparc/linux64.h (LINK_EH_SPEC): Likewise.
+	* config/alpha/elf.h (LINK_EH_SPEC): Likewise.
+	* config/ia64/linux.h (LINK_EH_SPEC): Likewise.
+
+--- gcc/config/rs6000/sysv4.h.~1~
++++ gcc/config/rs6000/sysv4.h
+@@ -1044,7 +1044,7 @@ extern int fixuplabelno;
+   %{!dynamic-linker:-dynamic-linker /lib/ld.so.1}}}"
+ 
+ #if defined(HAVE_LD_EH_FRAME_HDR)
+-# define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
++# define LINK_EH_SPEC "%{!static:--eh-frame-hdr} %{!r:--build-id} "
+ #endif
+ 
+ #define CPP_OS_LINUX_SPEC "-D__unix__ -D__gnu_linux__ -D__linux__ \
+--- gcc/config/linux.h.~1~
++++ gcc/config/linux.h
+@@ -85,7 +85,7 @@ Boston, MA 02110-1301, USA.  */
+     } while (0)
+ 
+ #if defined(HAVE_LD_EH_FRAME_HDR)
+-#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
++#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} %{!r:--build-id} "
+ #endif
+ 
+ /* Define this so we can compile MS code for use with WINE.  */
+--- gcc/config/sparc/linux64.h.~1~
++++ gcc/config/sparc/linux64.h
+@@ -316,7 +316,7 @@ do {									\
+ #define DITF_CONVERSION_LIBFUNCS 1
+ 
+ #if defined(HAVE_LD_EH_FRAME_HDR)
+-#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
++#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} %{!r:--build-id} "
+ #endif
+ 
+ #ifdef HAVE_AS_TLS
+--- gcc/config/sparc/linux.h.~1~
++++ gcc/config/sparc/linux.h
+@@ -188,7 +188,7 @@ do {									\
+ #define DITF_CONVERSION_LIBFUNCS 1
+ 
+ #if defined(HAVE_LD_EH_FRAME_HDR)
+-#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
++#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} %{!r:--build-id} "
+ #endif
+ 
+ #ifdef HAVE_AS_TLS
+--- gcc/config/alpha/elf.h.~1~
++++ gcc/config/alpha/elf.h
+@@ -453,5 +453,5 @@ extern int alpha_this_gpdisp_sequence_nu
+    I imagine that other systems will catch up.  In the meantime, it
+    doesn't harm to make sure that the data exists to be used later.  */
+ #if defined(HAVE_LD_EH_FRAME_HDR)
+-#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} "
++#define LINK_EH_SPEC "%{!static:--eh-frame-hdr} %{!r:--build-id} "
+ #endif
+--- gcc/config/ia64/linux.h.~1~
++++ gcc/config/ia64/linux.h
+@@ -56,7 +56,7 @@ do {						\
+    Signalize that because we have fde-glibc, we don't need all C shared libs
+    linked against -lgcc_s.  */
+ #undef LINK_EH_SPEC
+-#define LINK_EH_SPEC ""
++#define LINK_EH_SPEC "%{!r:--build-id} "
+ 
+ #define MD_UNWIND_SUPPORT "config/ia64/linux-unwind.h"
+ 
diff --git a/gcc41-dtor-relro.patch b/gcc41-dtor-relro.patch
deleted file mode 100644
index 118128c..0000000
--- a/gcc41-dtor-relro.patch
+++ /dev/null
@@ -1,89 +0,0 @@
-2007-05-03  Ulrich Drepper  <drepper@redhat.com>
-	    Jakub Jelinek  <jakub@redhat.com>
-
-	* crtstuff.c (HIDDEN_DTOR_LIST_END): New macro.
-	(__do_global_dtors_aux): Use more paranoid loop to run
-	destructors if HIDDEN_DTOR_LIST_END.
-	(__DTOR_END__): Export as a hidden symbol when HIDDEN_DTOR_LIST_END.
-
---- gcc/crtstuff.c.jj	2007-01-24 22:24:21.000000000 +0100
-+++ gcc/crtstuff.c	2007-05-03 10:40:29.000000000 +0200
-@@ -106,6 +107,11 @@ call_ ## FUNC (void)					\
- # define EH_FRAME_SECTION_CONST
- #endif
- 
-+#if !defined(DTOR_LIST_END) && defined(OBJECT_FORMAT_ELF) \
-+    && defined(HAVE_GAS_HIDDEN) && !defined(FINI_ARRAY_SECTION_ASM_OP)
-+# define HIDDEN_DTOR_LIST_END
-+#endif
-+
- /* We do not want to add the weak attribute to the declarations of these
-    routines in unwind-dw2-fde.h because that will cause the definition of
-    these symbols to be weak as well.
-@@ -265,10 +271,6 @@ extern void __cxa_finalize (void *) TARG
- static void __attribute__((used))
- __do_global_dtors_aux (void)
- {
--#ifndef FINI_ARRAY_SECTION_ASM_OP
--  static func_ptr *p = __DTOR_LIST__ + 1;
--  func_ptr f;
--#endif /* !defined(FINI_ARRAY_SECTION_ASM_OP)  */
-   static _Bool completed;
- 
-   if (__builtin_expect (completed, 0))
-@@ -282,12 +284,32 @@ __do_global_dtors_aux (void)
- #ifdef FINI_ARRAY_SECTION_ASM_OP
-   /* If we are using .fini_array then destructors will be run via that
-      mechanism.  */
-+#elif defined(HIDDEN_DTOR_LIST_END)
-+  {
-+    /* Safer version that makes sure only .dtors function pointers are
-+       called even if the static variable is maliciously changed.  */
-+    extern func_ptr __DTOR_END__[] __attribute__((visibility ("hidden")));
-+    static size_t dtor_idx;
-+    const size_t max_idx = __DTOR_END__ - __DTOR_LIST__ - 1;
-+    func_ptr f;
-+
-+    while (dtor_idx < max_idx)
-+      {
-+	f = __DTOR_LIST__[++dtor_idx];
-+	f ();
-+      }
-+  }
- #else /* !defined (FINI_ARRAY_SECTION_ASM_OP) */
--  while ((f = *p))
--    {
--      p++;
--      f ();
--    }
-+  {
-+    static func_ptr *p = __DTOR_LIST__ + 1;
-+    func_ptr f;
-+
-+    while ((f = *p))
-+      {
-+	p++;
-+	f ();
-+      }
-+  }
- #endif /* !defined(FINI_ARRAY_SECTION_ASM_OP) */
- 
- #ifdef USE_EH_FRAME_REGISTRY
-@@ -471,6 +493,17 @@ STATIC func_ptr __CTOR_END__[1]
- 
- #ifdef DTOR_LIST_END
- DTOR_LIST_END;
-+#elif defined(HIDDEN_DTOR_LIST_END)
-+#ifdef DTORS_SECTION_ASM_OP
-+asm (DTORS_SECTION_ASM_OP);
-+#endif
-+func_ptr __DTOR_END__[1]
-+  __attribute__ ((unused,
-+#ifndef DTORS_SECTION_ASM_OP
-+		  section(".dtors"),
-+#endif
-+		  aligned(sizeof(func_ptr)), visibility ("hidden")))
-+  = { (func_ptr) 0 };
- #elif defined(DTORS_SECTION_ASM_OP)
- asm (DTORS_SECTION_ASM_OP);
- STATIC func_ptr __DTOR_END__[1]
diff --git a/gcc41-libgomp-ncpus.patch b/gcc41-libgomp-ncpus.patch
deleted file mode 100644
index 3cb6238..0000000
--- a/gcc41-libgomp-ncpus.patch
+++ /dev/null
@@ -1,186 +0,0 @@
-2007-05-02  Jakub Jelinek  <jakub@redhat.com>
-
-	* config/linux/proc.c: New file.
-
---- libgomp/config/linux/proc.c.jj	2007-05-02 13:50:37.000000000 +0200
-+++ libgomp/config/linux/proc.c	2007-05-02 16:00:47.000000000 +0200
-@@ -0,0 +1,179 @@
-+/* Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
-+   Contributed by Jakub Jelinek <jakub@redhat.com>.
-+
-+   This file is part of the GNU OpenMP Library (libgomp).
-+
-+   Libgomp is free software; you can redistribute it and/or modify it
-+   under the terms of the GNU Lesser General Public License as published by
-+   the Free Software Foundation; either version 2.1 of the License, or
-+   (at your option) any later version.
-+
-+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
-+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-+   FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
-+   more details.
-+
-+   You should have received a copy of the GNU Lesser General Public License 
-+   along with libgomp; see the file COPYING.LIB.  If not, write to the
-+   Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
-+   MA 02110-1301, USA.  */
-+
-+/* As a special exception, if you link this library with other files, some
-+   of which are compiled with GCC, to produce an executable, this library
-+   does not by itself cause the resulting executable to be covered by the
-+   GNU General Public License.  This exception does not however invalidate
-+   any other reasons why the executable file might be covered by the GNU
-+   General Public License.  */
-+
-+/* This file contains system specific routines related to counting
-+   online processors and dynamic load balancing.  */
-+
-+#ifndef _GNU_SOURCE
-+#define _GNU_SOURCE 1
-+#endif
-+#include "libgomp.h"
-+#include <sched.h>
-+#include <stdlib.h>
-+#include <unistd.h>
-+#ifdef HAVE_GETLOADAVG
-+# ifdef HAVE_SYS_LOADAVG_H
-+#  include <sys/loadavg.h>
-+# endif
-+#endif
-+
-+#ifdef HAVE_PTHREAD_AFFINITY_NP
-+static unsigned long
-+cpuset_popcount (cpu_set_t *cpusetp)
-+{
-+#ifdef CPU_COUNT
-+  /* glibc 2.6 and above provide a macro for this.  */
-+  return CPU_COUNT (cpusetp);
-+#else
-+  size_t i;
-+  unsigned long ret = 0;
-+  extern int check[sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int)];
-+
-+  (void) check;
-+  for (i = 0; i < sizeof (*cpusetp) / sizeof (cpusetp->__bits[0]); i++)
-+    {
-+      unsigned long int mask = cpusetp->__bits[i];
-+      if (mask == 0)
-+	continue;
-+      ret += __builtin_popcountl (mask);
-+    }
-+  return ret;
-+#endif
-+}
-+#endif
-+
-+/* At startup, determine the default number of threads.  It would seem
-+   this should be related to the number of cpus online.  */
-+
-+void
-+gomp_init_num_threads (void)
-+{
-+#ifdef HAVE_PTHREAD_AFFINITY_NP
-+  cpu_set_t cpuset;
-+
-+  if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset), &cpuset) == 0)
-+    {
-+      /* Count only the CPUs this process can use.  */
-+      gomp_nthreads_var = cpuset_popcount (&cpuset);
-+      if (gomp_nthreads_var == 0)
-+	gomp_nthreads_var = 1;
-+      return;
-+    }
-+#endif
-+#ifdef _SC_NPROCESSORS_ONLN
-+  gomp_nthreads_var = sysconf (_SC_NPROCESSORS_ONLN);
-+#endif
-+}
-+
-+static int
-+get_num_procs (void)
-+{
-+#ifdef HAVE_PTHREAD_AFFINITY_NP
-+  cpu_set_t cpuset;
-+
-+  if (gomp_cpu_affinity == NULL)
-+    {
-+      /* Count only the CPUs this process can use.  */
-+      if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset),
-+				  &cpuset) == 0)
-+	{
-+	  int ret = cpuset_popcount (&cpuset);
-+	  return ret != 0 ? ret : 1;
-+	}
-+    }
-+  else
-+    {
-+      size_t idx;
-+      static int affinity_cpus;
-+
-+      /* We can't use pthread_getaffinity_np in this case
-+	 (we have changed it ourselves, it binds to just one CPU).
-+	 Count instead the number of different CPUs we are
-+	 using.  */
-+      CPU_ZERO (&cpuset);
-+      if (affinity_cpus == 0)
-+	{
-+	  int cpus = 0;
-+	  for (idx = 0; idx < gomp_cpu_affinity_len; idx++)
-+	    if (! CPU_ISSET (gomp_cpu_affinity[idx], &cpuset))
-+	      {
-+		cpus++;
-+		CPU_SET (gomp_cpu_affinity[idx], &cpuset);
-+	      }
-+	  affinity_cpus = cpus;
-+	}
-+      return affinity_cpus;
-+    }
-+#endif
-+#ifdef _SC_NPROCESSORS_ONLN
-+  return sysconf (_SC_NPROCESSORS_ONLN);
-+#else
-+  return gomp_nthreads_var;
-+#endif
-+}
-+
-+/* When OMP_DYNAMIC is set, at thread launch determine the number of
-+   threads we should spawn for this team.  */
-+/* ??? I have no idea what best practice for this is.  Surely some
-+   function of the number of processors that are *still* online and
-+   the load average.  Here I use the number of processors online
-+   minus the 15 minute load average.  */
-+
-+unsigned
-+gomp_dynamic_max_threads (void)
-+{
-+  unsigned n_onln, loadavg;
-+
-+  n_onln = get_num_procs ();
-+  if (n_onln > gomp_nthreads_var)
-+    n_onln = gomp_nthreads_var;
-+
-+  loadavg = 0;
-+#ifdef HAVE_GETLOADAVG
-+  {
-+    double dloadavg[3];
-+    if (getloadavg (dloadavg, 3) == 3)
-+      {
-+	/* Add 0.1 to get a kind of biased rounding.  */
-+	loadavg = dloadavg[2] + 0.1;
-+      }
-+  }
-+#endif
-+
-+  if (loadavg >= n_onln)
-+    return 1;
-+  else
-+    return n_onln - loadavg;
-+}
-+
-+int
-+omp_get_num_procs (void)
-+{
-+  return get_num_procs ();
-+}
-+
-+ialias (omp_get_num_procs)
diff --git a/gcc41-multi32-hack.patch b/gcc41-multi32-hack.patch
deleted file mode 100644
index e697f96..0000000
--- a/gcc41-multi32-hack.patch
+++ /dev/null
@@ -1,104 +0,0 @@
---- libcpp/configure.ac.jj	2005-10-28 23:13:40.000000000 +0200
-+++ libcpp/configure.ac	2005-11-25 14:34:31.000000000 +0100
-@@ -112,6 +112,8 @@ fi
- 
- m4_changequote(,)
- case $target in
-+	powerpc-*-linux*)
-+		need_64bit_hwint=no ;;
- 	alpha*-*-* | \
- 	arm*-*-*eabi* | \
- 	arm*-*-symbianelf* | \
---- libcpp/configure.jj	2005-10-28 23:13:40.000000000 +0200
-+++ libcpp/configure	2005-11-25 14:34:40.000000000 +0100
-@@ -8217,6 +8217,8 @@ fi
- 
- 
- case $target in
-+	powerpc-*-linux*)
-+		need_64bit_hwint=no ;;
- 	alpha*-*-* | \
- 	arm*-*-*eabi* | \
- 	arm*-*-symbianelf* | \
---- gcc/config.gcc.jj	2005-11-19 09:27:16.000000000 +0100
-+++ gcc/config.gcc	2005-11-25 14:29:30.000000000 +0100
-@@ -294,7 +294,10 @@ mips*-*-*)
- powerpc*-*-*)
- 	cpu_type=rs6000
- 	extra_headers="ppc-asm.h altivec.h spe.h"
--	need_64bit_hwint=yes
-+	case ${target} in
-+	powerpc-*-linux*) ;;
-+	*) need_64bit_hwint=yes ;;
-+	esac
- 	case x$with_cpu in
- 	    xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[345]|xrs64a)
- 		cpu_is_64bit=yes
---- gcc/gcc.c.jj	2005-11-19 21:16:39.000000000 +0100
-+++ gcc/gcc.c	2005-11-28 13:00:58.000000000 +0100
-@@ -3154,6 +3154,8 @@ process_command (int argc, const char **
-   const char *spec_lang = 0;
-   int last_language_n_infiles;
-   int lang_n_infiles = 0;
-+  int hack32 = 1;
-+  int used_B = 0;
- #ifdef MODIFY_TARGET_NAME
-   int is_modify_target_name;
-   int j;
-@@ -3680,6 +3682,7 @@ warranty; not even for MERCHANTABILITY o
- 			    PREFIX_PRIORITY_B_OPT, 0, 0);
- 		add_prefix (&include_prefixes, value, NULL,
- 			    PREFIX_PRIORITY_B_OPT, 0, 0);
-+		used_B = 1;
- 		n_switches++;
- 	      }
- 	      break;
-@@ -3742,6 +3745,21 @@ warranty; not even for MERCHANTABILITY o
- #endif
- 	      goto normal_switch;
- 
-+	    /* HACK START */
-+	    case 'm':
-+	      if ((p[1] == '6' && p[2] == '4')
-+		  || (p[1] == 'a' && strncmp (p + 2, "ltivec", 6) == 0)
-+		  || (p[1] == 'c' && strncmp (p + 2, "pu=", 3) == 0
-+		      && (strncmp (p + 5, "7400", 4) == 0
-+			  || strncmp (p + 5, "7450", 4) == 0
-+			  || strncmp (p + 5, "G4", 2) == 0
-+			  || strncmp (p + 5, "970", 3) == 0
-+			  || strncmp (p + 5, "G5", 2) == 0))
-+		  || (p[1] == 'p' && strncmp (p + 2, "owerpc64", 8) == 0))
-+		hack32 = 0;
-+	    /* FALLTHROUGH */
-+	    /* HACK END */
-+
- 	    default:
- 	    normal_switch:
- 
-@@ -3812,6 +3830,26 @@ warranty; not even for MERCHANTABILITY o
-   /* Use 2 as fourth arg meaning try just the machine as a suffix,
-      as well as trying the machine and the version.  */
- #ifndef OS2
-+  /* HACK START */
-+  if (hack32 && !used_B && !strncmp (spec_machine, "sparc64-", 8))
-+    {
-+      const char *sparc32_exec_prefix =
-+	concat (standard_libexec_prefix, "sparc-", spec_machine + 8,
-+		dir_separator_str, spec_version, dir_separator_str, NULL);
-+      add_prefix (&exec_prefixes, sparc32_exec_prefix, "GCC",
-+		  PREFIX_PRIORITY_LAST, 0, 0);
-+    }
-+  /* HACK END */
-+  /* HACK START */
-+  if (hack32 && !used_B && !strncmp (spec_machine, "ppc64-", 6))
-+    {
-+      const char *ppc32_exec_prefix =
-+	concat (standard_libexec_prefix, "ppc-", spec_machine + 6,
-+		dir_separator_str, spec_version, dir_separator_str, NULL);
-+      add_prefix (&exec_prefixes, ppc32_exec_prefix, "GCC",
-+		  PREFIX_PRIORITY_LAST, 0, 0);
-+    }
-+  /* HACK END */
-   add_prefix (&exec_prefixes, standard_libexec_prefix, "GCC",
- 	      PREFIX_PRIORITY_LAST, 1, 0);
-   add_prefix (&exec_prefixes, standard_libexec_prefix, "BINUTILS",
diff --git a/gcc41-objc-rh185398.patch b/gcc41-objc-rh185398.patch
deleted file mode 100644
index fff33a2..0000000
--- a/gcc41-objc-rh185398.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-2006-04-06  Jakub Jelinek  <jakub@redhat.com>
-
-	* objc/compile/20060406-1.m: New test.
-
---- gcc/testsuite/objc/compile/20060406-1.m.jj	2006-04-06 17:25:59.000000000 +0200
-+++ gcc/testsuite/objc/compile/20060406-1.m	2006-04-06 17:20:48.000000000 +0200
-@@ -0,0 +1,21 @@
-+typedef struct
-+{
-+  void *p;
-+} *S;
-+
-+@protocol O
-+- (unsigned)j;
-+@end
-+
-+@interface I
-++ (unsigned char)T:(S<O>[2])p v:(S<O>)h;
-+@end
-+
-+@implementation I
-++ (unsigned char)T:(S<O>[2])p v:(S<O>)h
-+{
-+  p[0] = (S) 0;
-+  p[1] = (S) 0;
-+  return 0;
-+}
-+@end
diff --git a/gcc41-ppc-tramp.patch b/gcc41-ppc-tramp.patch
new file mode 100644
index 0000000..145b628
--- /dev/null
+++ b/gcc41-ppc-tramp.patch
@@ -0,0 +1,34 @@
+2007-08-20  Jakub Jelinek  <jakub@redhat.com>
+
+	* config/rs6000/tramp.asm: Include config.h.
+	Check __PIC__ or __pic__ macro instead of SHARED.
+
+--- gcc/config/rs6000/tramp.asm.jj	2006-10-05 00:28:33.000000000 +0200
++++ gcc/config/rs6000/tramp.asm	2007-08-20 23:20:52.000000000 +0200
+@@ -1,6 +1,6 @@
+ /*  Special support for trampolines
+  *
+- *   Copyright (C) 1996, 1997, 2000 Free Software Foundation, Inc.
++ *   Copyright (C) 1996, 1997, 2000, 2007 Free Software Foundation, Inc.
+  *   Written By Michael Meissner
+  * 
+  * This file is free software; you can redistribute it and/or modify it
+@@ -37,7 +37,8 @@
+ 
+ 	.file	"tramp.asm"
+ 	.section ".text"
+-	#include "ppc-asm.h"
++#include "ppc-asm.h"
++#include "config.h"
+ 
+ #ifndef __powerpc64__
+ 	.type	trampoline_initial,@object
+@@ -105,7 +106,7 @@ FUNC_START(__trampoline_setup)
+ 	blr
+ 
+ .Labort:
+-#if defined SHARED && defined HAVE_AS_REL16
++#if (defined __PIC__ || defined __pic__) && defined HAVE_AS_REL16
+ 	bcl	20,31,1f
+ 1:	mflr	r30
+ 	addis	r30,r30,_GLOBAL_OFFSET_TABLE_-1b@ha
diff --git a/gcc41-pr22244.patch b/gcc41-pr22244.patch
new file mode 100644
index 0000000..b7e037e
--- /dev/null
+++ b/gcc41-pr22244.patch
@@ -0,0 +1,63 @@
+2007-08-14  Jakub Jelinek  <jakub@redhat.com>
+
+	PR fortran/22244
+	* Make-lang.in (fortran/trans-types.o): Depend on $(FLAGS_H).
+	* trans-types.c: Include flags.h.
+	(gfc_get_nodesc_array_type): Add TYPE_DECL TYPE_NAME with
+	correct bounds and dimensions for packed arrays.
+
+--- gcc/fortran/Make-lang.in	(revision 127395)
++++ gcc/fortran/Make-lang.in	(working copy)
+@@ -292,7 +292,7 @@ fortran/trans-decl.o: $(GFORTRAN_TRANS_D
+   cgraph.h $(TARGET_H) function.h $(FLAGS_H) $(RTL_H) tree-gimple.h \
+   tree-dump.h
+ fortran/trans-types.o: $(GFORTRAN_TRANS_DEPS) gt-fortran-trans-types.h \
+-  real.h toplev.h $(TARGET_H)
++  real.h toplev.h $(TARGET_H) $(FLAGS_H)
+ fortran/trans-const.o: $(GFORTRAN_TRANS_DEPS)
+ fortran/trans-expr.o: $(GFORTRAN_TRANS_DEPS) fortran/dependency.h
+ fortran/trans-stmt.o: $(GFORTRAN_TRANS_DEPS) fortran/dependency.h
+--- gcc/fortran/trans-types.c	(revision 127395)
++++ gcc/fortran/trans-types.c	(working copy)
+@@ -35,6 +35,7 @@ Software Foundation, 51 Franklin Street,
+ #include "trans-types.h"
+ #include "trans-const.h"
+ #include "real.h"
++#include "flags.h"
+ 
+ 
+ #if (GFC_MAX_DIMENSIONS < 10)
+@@ -1005,7 +1006,7 @@ gfc_get_nodesc_array_type (tree etype, g
+     {
+       /* Fill in the stride and bound components of the type.  */
+       if (known_stride)
+-	tmp =  gfc_conv_mpz_to_tree (stride, gfc_index_integer_kind);
++	tmp = gfc_conv_mpz_to_tree (stride, gfc_index_integer_kind);
+       else
+         tmp = NULL_TREE;
+       GFC_TYPE_ARRAY_STRIDE (type, n) = tmp;
+@@ -1103,6 +1104,24 @@ gfc_get_nodesc_array_type (tree etype, g
+   mpz_clear (stride);
+   mpz_clear (delta);
+ 
++  /* In debug info represent packed arrays as multi-dimensional
++     if they have rank > 1 and with proper bounds, instead of flat
++     arrays.  */
++  if (known_stride && write_symbols != NO_DEBUG)
++    {
++      tree gtype = etype, rtype, type_decl;
++
++      for (n = as->rank - 1; n >= 0; n--)
++	{
++	  rtype = build_range_type (gfc_array_index_type,
++				    GFC_TYPE_ARRAY_LBOUND (type, n),
++				    GFC_TYPE_ARRAY_UBOUND (type, n));
++	  gtype = build_array_type (gtype, rtype);
++	}
++      TYPE_NAME (type) = type_decl = build_decl (TYPE_DECL, NULL, gtype);
++      DECL_ORIGINAL_TYPE (type_decl) = gtype;
++    }
++
+   if (packed < 3 || !known_stride)
+     {
+       /* For dummy arrays and automatic (heap allocated) arrays we
diff --git a/gcc41-pr24036-revert.patch b/gcc41-pr24036-revert.patch
deleted file mode 100644
index c40e940..0000000
--- a/gcc41-pr24036-revert.patch
+++ /dev/null
@@ -1,302 +0,0 @@
-2006-12-21  Jakub Jelinek  <jakub@redhat.com>
-
-	Revert:
-	2006-12-16  Joseph Myers  <joseph@codesourcery.com>
-		    David Edelsohn  <edelsohn@gnu.org>
-	PR target/24036
-	* doc/tm.texi (HARD_REGNO_NREGS_HAS_PADDING,
-	HARD_REGNO_NREGS_WITH_PADDING): Document new target macros.
-	* defaults.h (HARD_REGNO_NREGS_HAS_PADDING,
-	HARD_REGNO_NREGS_WITH_PADDING): Define.
-	* config/i386/i386.h (HARD_REGNO_NREGS_HAS_PADDING,
-	HARD_REGNO_NREGS_WITH_PADDING): Define.
-	* rtlanal.c (subreg_regno_offset, subreg_offset_representable_p):
-	Use new macros to detect modes with holes; do not look at integer
-	units.
-	(subreg_offset_representable_p): Check for and disallow cases
-	where the modes use different numbers of bits from registers.
-	* config/rs6000/rs6000.c (rs6000_emit_move): Handle TFmode
-	constant for soft-float.
-	(rs6000_hard_regno_nregs): Use UNITS_PER_FP_WORD for e500 GPRs
-	containing doubles.
-	(rs6000_split_multireg_move): Use DFmode reg_mode for TFmode moves
-	in E500 double case.
-	* config/rs6000/rs6000.md (movtf): Allow soft-float.
-	(movtf_softfloat): New.
-
---- gcc/doc/tm.texi	(revision 119967)
-+++ gcc/doc/tm.texi	(revision 119966)
-@@ -1937,33 +1937,6 @@ definition of this macro is
- @end smallexample
- @end defmac
- 
--@defmac HARD_REGNO_NREGS_HAS_PADDING (@var{regno}, @var{mode})
--A C expression that is nonzero if a value of mode @var{mode}, stored
--in memory, ends with padding that causes it to take up more space than
--in registers starting at register number @var{regno} (as determined by
--multiplying GCC's notion of the size of the register when containing
--this mode by the number of registers returned by
--@code{HARD_REGNO_NREGS}).  By default this is zero.
--
--For example, if a floating-point value is stored in three 32-bit
--registers but takes up 128 bits in memory, then this would be
--nonzero.
--
--This macros only needs to be defined if there are cases where
--@code{subreg_regno_offset} and @code{subreg_offset_representable_p}
--would otherwise wrongly determine that a @code{subreg} can be
--represented by an offset to the register number, when in fact such a
--@code{subreg} would contain some of the padding not stored in
--registers and so not be representable.
--@end defmac
--
--@defmac HARD_REGNO_NREGS_WITH_PADDING (@var{regno}, @var{mode})
--For values of @var{regno} and @var{mode} for which
--@code{HARD_REGNO_NREGS_HAS_PADDING} returns nonzero, a C expression
--returning the greater number of registers required to hold the value
--including any padding.  In the example above, the value would be four.
--@end defmac
--
- @defmac REGMODE_NATURAL_SIZE (@var{mode})
- Define this macro if the natural size of registers that hold values
- of mode @var{mode} is not the word size.  It is a C expression that
---- gcc/defaults.h	(revision 119967)
-+++ gcc/defaults.h	(revision 119966)
-@@ -867,9 +867,4 @@ Software Foundation, 51 Franklin Street,
- #define INCOMING_FRAME_SP_OFFSET 0
- #endif
- 
--#ifndef HARD_REGNO_NREGS_HAS_PADDING
--#define HARD_REGNO_NREGS_HAS_PADDING(REGNO, MODE) 0
--#define HARD_REGNO_NREGS_WITH_PADDING(REGNO, MODE) -1
--#endif
--
- #endif  /* ! GCC_DEFAULTS_H */
---- gcc/rtlanal.c	(revision 119967)
-+++ gcc/rtlanal.c	(revision 119966)
-@@ -3138,15 +3138,34 @@ unsigned int
- subreg_regno_offset (unsigned int xregno, enum machine_mode xmode,
- 		     unsigned int offset, enum machine_mode ymode)
- {
--  int nregs_xmode, nregs_ymode;
-+  int nregs_xmode, nregs_ymode, nregs_xmode_unit_int;
-   int mode_multiple, nregs_multiple;
-   int y_offset;
-+  enum machine_mode xmode_unit, xmode_unit_int;
- 
-   gcc_assert (xregno < FIRST_PSEUDO_REGISTER);
- 
-+  if (GET_MODE_INNER (xmode) == VOIDmode)
-+    xmode_unit = xmode;
-+  else
-+    xmode_unit = GET_MODE_INNER (xmode);
-+  
-+  if (FLOAT_MODE_P (xmode_unit))
-+    {
-+      xmode_unit_int = int_mode_for_mode (xmode_unit);
-+      if (xmode_unit_int == BLKmode)
-+	/* It's probably bad to be here; a port should have an integer mode
-+	   that's the same size as anything of which it takes a SUBREG.  */
-+	xmode_unit_int = xmode_unit;
-+    }
-+  else
-+    xmode_unit_int = xmode_unit;
-+
-+  nregs_xmode_unit_int = hard_regno_nregs[xregno][xmode_unit_int];
-+
-   /* Adjust nregs_xmode to allow for 'holes'.  */
--  if (HARD_REGNO_NREGS_HAS_PADDING (xregno, xmode))
--    nregs_xmode = HARD_REGNO_NREGS_WITH_PADDING (xregno, xmode);
-+  if (nregs_xmode_unit_int != hard_regno_nregs[xregno][xmode_unit])
-+    nregs_xmode = nregs_xmode_unit_int * GET_MODE_NUNITS (xmode);
-   else
-     nregs_xmode = hard_regno_nregs[xregno][xmode];
-     
-@@ -3184,31 +3203,38 @@ bool
- subreg_offset_representable_p (unsigned int xregno, enum machine_mode xmode,
- 			       unsigned int offset, enum machine_mode ymode)
- {
--  int nregs_xmode, nregs_ymode;
-+  int nregs_xmode, nregs_ymode, nregs_xmode_unit, nregs_xmode_unit_int;
-   int mode_multiple, nregs_multiple;
-   int y_offset;
--  int regsize_xmode, regsize_ymode;
-+  enum machine_mode xmode_unit, xmode_unit_int;
- 
-   gcc_assert (xregno < FIRST_PSEUDO_REGISTER);
- 
-+  if (GET_MODE_INNER (xmode) == VOIDmode)
-+    xmode_unit = xmode;
-+  else
-+    xmode_unit = GET_MODE_INNER (xmode);
-+  
-+  if (FLOAT_MODE_P (xmode_unit))
-+    {
-+      xmode_unit_int = int_mode_for_mode (xmode_unit);
-+      if (xmode_unit_int == BLKmode)
-+	/* It's probably bad to be here; a port should have an integer mode
-+	   that's the same size as anything of which it takes a SUBREG.  */
-+	xmode_unit_int = xmode_unit;
-+    }
-+  else
-+    xmode_unit_int = xmode_unit;
-+
-+  nregs_xmode_unit = hard_regno_nregs[xregno][xmode_unit];
-+  nregs_xmode_unit_int = hard_regno_nregs[xregno][xmode_unit_int];
-+
-   /* If there are holes in a non-scalar mode in registers, we expect
-      that it is made up of its units concatenated together.  */
--  if (HARD_REGNO_NREGS_HAS_PADDING (xregno, xmode))
-+  if (nregs_xmode_unit != nregs_xmode_unit_int)
-     {
--      enum machine_mode xmode_unit;
--
--      nregs_xmode = HARD_REGNO_NREGS_WITH_PADDING (xregno, xmode);
--      if (GET_MODE_INNER (xmode) == VOIDmode)
--	xmode_unit = xmode;
--      else
--	xmode_unit = GET_MODE_INNER (xmode);
--      gcc_assert (HARD_REGNO_NREGS_HAS_PADDING (xregno, xmode_unit));
--      gcc_assert (nregs_xmode
--		  == (GET_MODE_NUNITS (xmode)
--		      * HARD_REGNO_NREGS_WITH_PADDING (xregno, xmode_unit)));
--      gcc_assert (hard_regno_nregs[xregno][xmode]
--		  == (hard_regno_nregs[xregno][xmode_unit]
--		      * GET_MODE_NUNITS (xmode)));
-+      gcc_assert (nregs_xmode_unit * GET_MODE_NUNITS (xmode)
-+		  == hard_regno_nregs[xregno][xmode]);
- 
-       /* You can only ask for a SUBREG of a value with holes in the middle
- 	 if you don't cross the holes.  (Such a SUBREG should be done by
-@@ -3218,12 +3244,15 @@ subreg_offset_representable_p (unsigned 
- 	 3 for each part, but in memory it's two 128-bit parts.  
- 	 Padding is assumed to be at the end (not necessarily the 'high part')
- 	 of each unit.  */
--      if ((offset / GET_MODE_SIZE (xmode_unit) + 1 
--	   < GET_MODE_NUNITS (xmode))
--	  && (offset / GET_MODE_SIZE (xmode_unit)
-+      if (nregs_xmode_unit != nregs_xmode_unit_int
-+	  && (offset / GET_MODE_SIZE (xmode_unit_int) + 1 
-+	      < GET_MODE_NUNITS (xmode))
-+	  && (offset / GET_MODE_SIZE (xmode_unit_int) 
- 	      != ((offset + GET_MODE_SIZE (ymode) - 1)
--		  / GET_MODE_SIZE (xmode_unit))))
-+		  / GET_MODE_SIZE (xmode_unit_int))))
- 	return false;
-+
-+      nregs_xmode = nregs_xmode_unit_int * GET_MODE_NUNITS (xmode);
-     }
-   else
-     nregs_xmode = hard_regno_nregs[xregno][xmode];
-@@ -3237,15 +3266,6 @@ subreg_offset_representable_p (unsigned 
- 	  ? WORDS_BIG_ENDIAN : BYTES_BIG_ENDIAN))
-     return true;
- 
--  /* If registers store different numbers of bits in the different
--     modes, we cannot generally form this subreg.  */
--  regsize_xmode = GET_MODE_SIZE (xmode) / nregs_xmode;
--  regsize_ymode = GET_MODE_SIZE (ymode) / nregs_ymode;
--  if (regsize_xmode > regsize_ymode && nregs_ymode > 1)
--    return false;
--  if (regsize_ymode > regsize_xmode && nregs_xmode > 1)
--    return false;
--
-   /* Lowpart subregs are otherwise valid.  */
-   if (offset == subreg_lowpart_offset (ymode, xmode))
-     return true;
---- gcc/config/i386/i386.h	(revision 119967)
-+++ gcc/config/i386/i386.h	(revision 119966)
-@@ -827,15 +827,6 @@ do {									\
-       ? (TARGET_64BIT ? 4 : 6)						\
-       : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)))
- 
--#define HARD_REGNO_NREGS_HAS_PADDING(REGNO, MODE)			\
--  ((TARGET_128BIT_LONG_DOUBLE && !TARGET_64BIT)				\
--   ? (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO)	\
--      ? 0								\
--      : ((MODE) == XFmode || (MODE) == XCmode))				\
--   : 0)
--
--#define HARD_REGNO_NREGS_WITH_PADDING(REGNO, MODE) ((MODE) == XFmode ? 4 : 8)
--
- #define VALID_SSE2_REG_MODE(MODE) \
-     ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode    \
-      || (MODE) == V2DImode || (MODE) == DFmode)
---- gcc/config/rs6000/rs6000.c	(revision 119967)
-+++ gcc/config/rs6000/rs6000.c	(revision 119966)
-@@ -3573,6 +3573,9 @@ rs6000_hard_regno_nregs (int regno, enum
-   if (FP_REGNO_P (regno))
-     return (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
- 
-+  if (TARGET_E500_DOUBLE && mode == DFmode)
-+    return 1;
-+
-   if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
-     return (GET_MODE_SIZE (mode) + UNITS_PER_SPE_WORD - 1) / UNITS_PER_SPE_WORD;
- 
-@@ -3580,14 +3583,6 @@ rs6000_hard_regno_nregs (int regno, enum
-     return
-       (GET_MODE_SIZE (mode) + UNITS_PER_ALTIVEC_WORD - 1) / UNITS_PER_ALTIVEC_WORD;
- 
--  /* The value returned for SCmode in the E500 double case is 2 for
--     ABI compatibility; storing an SCmode value in a single register
--     would require function_arg and rs6000_spe_function_arg to handle
--     SCmode so as to pass the value correctly in a pair of
--     registers.  */
--  if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode)
--    return (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
--
-   return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
- }
- 
-@@ -3956,7 +3951,8 @@ rs6000_emit_move (rtx dest, rtx source, 
- 
-   /* 128-bit constant floating-point values on Darwin should really be
-      loaded as two parts.  */
--  if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
-+  if (!TARGET_IEEEQUAD
-+      && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128
-       && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
-     {
-       /* DImode is used, not DFmode, because simplify_gen_subreg doesn't
-@@ -12402,8 +12398,6 @@ rs6000_split_multireg_move (rtx dst, rtx
-     reg_mode = DFmode;
-   else if (ALTIVEC_REGNO_P (reg))
-     reg_mode = V16QImode;
--  else if (TARGET_E500_DOUBLE && mode == TFmode)
--    reg_mode = DFmode;
-   else
-     reg_mode = word_mode;
-   reg_mode_size = GET_MODE_SIZE (reg_mode);
---- gcc/config/rs6000/rs6000.md	(revision 119967)
-+++ gcc/config/rs6000/rs6000.md	(revision 119966)
-@@ -7789,7 +7789,8 @@ (define_insn "*movdf_softfloat64"
- (define_expand "movtf"
-   [(set (match_operand:TF 0 "general_operand" "")
- 	(match_operand:TF 1 "any_operand" ""))]
--  "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
-+  "!TARGET_IEEEQUAD
-+   && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128"
-   "{ rs6000_emit_move (operands[0], operands[1], TFmode); DONE; }")
- 
- ; It's important to list the o->f and f->o moves before f->f because
-@@ -7808,19 +7809,6 @@ (define_insn_and_split "*movtf_internal"
- { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
-   [(set_attr "length" "8,8,8,20,20,16")])
- 
--(define_insn_and_split "*movtf_softfloat"
--  [(set (match_operand:TF 0 "nonimmediate_operand" "=r,Y,r")
--	(match_operand:TF 1 "input_operand"         "YGHF,r,r"))]
--  "!TARGET_IEEEQUAD
--   && (TARGET_SOFT_FLOAT || !TARGET_FPRS) && TARGET_LONG_DOUBLE_128
--   && (gpc_reg_operand (operands[0], TFmode)
--       || gpc_reg_operand (operands[1], TFmode))"
--  "#"
--  "&& reload_completed"
--  [(pc)]
--{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
--  [(set_attr "length" "20,20,16")])
--
- (define_expand "extenddftf2"
-   [(parallel [(set (match_operand:TF 0 "nonimmediate_operand" "")
- 		   (float_extend:TF (match_operand:DF 1 "input_operand" "")))
diff --git a/gcc41-pr27567.patch b/gcc41-pr27567.patch
deleted file mode 100644
index b5f12ad..0000000
--- a/gcc41-pr27567.patch
+++ /dev/null
@@ -1,439 +0,0 @@
-2006-09-01  Jakub Jelinek  <jakub@redhat.com>
-
-	PR middle-end/27567
-	* builtins.c (fold_builtin_memcpy, fold_builtin_memmove): Remove.
-	(fold_builtin_mempcpy): Rename to...
-	(fold_builtin_memory_op): ... this.  Optimize one element copy
-	into an assignment.
-	(fold_builtin_memset, fold_builtin_bzero, fold_builtin_bcopy): New
-	functions.
-	(expand_builtin_strcpy, expand_builtin_strncpy): Handle COMPOUND_EXPR.
-	(expand_builtin_memcpy, expand_builtin_mempcpy,
-	expand_builtin_memmove): Likewise.  Use fold_builtin_memory_op.
-	(fold_builtin_1): Handle BUILT_IN_MEMSET, BUILT_IN_BZERO and
-	BUILT_IN_BCOPY.  Use fold_builtin_memory_op for
-	BUILT_IN_MEM{CPY,PCPY,MOVE}.
-
---- gcc/builtins.c.jj	2006-08-28 13:00:23.000000000 +0200
-+++ gcc/builtins.c	2006-09-01 11:55:51.000000000 +0200
-@@ -160,9 +160,7 @@ static tree fold_builtin_ceil (tree, tre
- static tree fold_builtin_round (tree, tree);
- static tree fold_builtin_int_roundingfn (tree, tree);
- static tree fold_builtin_bitop (tree, tree);
--static tree fold_builtin_memcpy (tree, tree);
--static tree fold_builtin_mempcpy (tree, tree, int);
--static tree fold_builtin_memmove (tree, tree);
-+static tree fold_builtin_memory_op (tree, tree, bool, int);
- static tree fold_builtin_strchr (tree, tree);
- static tree fold_builtin_memcmp (tree);
- static tree fold_builtin_strcmp (tree);
-@@ -2890,10 +2888,19 @@ expand_builtin_memcpy (tree exp, rtx tar
-       unsigned int dest_align
- 	= get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
-       rtx dest_mem, src_mem, dest_addr, len_rtx;
--      tree result = fold_builtin_memcpy (fndecl, arglist);
-+      tree result = fold_builtin_memory_op (arglist, TREE_TYPE (TREE_TYPE (fndecl)),
-+					    false, /*endp=*/0);
- 
-       if (result)
--	return expand_expr (result, target, mode, EXPAND_NORMAL);
-+	{
-+	  while (TREE_CODE (result) == COMPOUND_EXPR)
-+	    {
-+	      expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
-+			   EXPAND_NORMAL);
-+	      result = TREE_OPERAND (result, 1);
-+	    }
-+	  return expand_expr (result, target, mode, EXPAND_NORMAL);
-+	}
- 
-       /* If DEST is not a pointer type, call the normal function.  */
-       if (dest_align == 0)
-@@ -2979,11 +2986,19 @@ expand_builtin_mempcpy (tree arglist, tr
-       unsigned int dest_align
- 	= get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
-       rtx dest_mem, src_mem, len_rtx;
--      tree result = fold_builtin_mempcpy (arglist, type, endp);
-+      tree result = fold_builtin_memory_op (arglist, type, false, endp);
- 
-       if (result)
--	return expand_expr (result, target, mode, EXPAND_NORMAL);
--      
-+	{
-+	  while (TREE_CODE (result) == COMPOUND_EXPR)
-+	    {
-+	      expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
-+			   EXPAND_NORMAL);
-+	      result = TREE_OPERAND (result, 1);
-+	    }
-+	  return expand_expr (result, target, mode, EXPAND_NORMAL);
-+	}
-+
-       /* If either SRC or DEST is not a pointer type, don't do this
-          operation in-line.  */
-       if (dest_align == 0 || src_align == 0)
-@@ -3053,10 +3068,18 @@ expand_builtin_memmove (tree arglist, tr
-       unsigned int src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
-       unsigned int dest_align
- 	= get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
--      tree result = fold_builtin_memmove (arglist, type);
-+      tree result = fold_builtin_memory_op (arglist, type, false, /*endp=*/3);
- 
-       if (result)
--	return expand_expr (result, target, mode, EXPAND_NORMAL);
-+	{
-+	  while (TREE_CODE (result) == COMPOUND_EXPR)
-+	    {
-+	      expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
-+			   EXPAND_NORMAL);
-+	      result = TREE_OPERAND (result, 1);
-+	    }
-+	  return expand_expr (result, target, mode, EXPAND_NORMAL);
-+	}
- 
-       /* If DEST is not a pointer type, call the normal function.  */
-       if (dest_align == 0)
-@@ -3204,7 +3227,15 @@ expand_builtin_strcpy (tree fndecl, tree
-     {
-       tree result = fold_builtin_strcpy (fndecl, arglist, 0);
-       if (result)
--	return expand_expr (result, target, mode, EXPAND_NORMAL);
-+	{
-+	  while (TREE_CODE (result) == COMPOUND_EXPR)
-+	    {
-+	      expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
-+			   EXPAND_NORMAL);
-+	      result = TREE_OPERAND (result, 1);
-+	    }
-+	  return expand_expr (result, target, mode, EXPAND_NORMAL);
-+	}
- 
-       return expand_movstr (TREE_VALUE (arglist),
- 			    TREE_VALUE (TREE_CHAIN (arglist)),
-@@ -3330,7 +3361,15 @@ expand_builtin_strncpy (tree exp, rtx ta
-       tree result = fold_builtin_strncpy (fndecl, arglist, slen);
-       
-       if (result)
--	return expand_expr (result, target, mode, EXPAND_NORMAL);
-+	{
-+	  while (TREE_CODE (result) == COMPOUND_EXPR)
-+	    {
-+	      expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
-+			   EXPAND_NORMAL);
-+	      result = TREE_OPERAND (result, 1);
-+	    }
-+	  return expand_expr (result, target, mode, EXPAND_NORMAL);
-+	}
- 
-       /* We must be passed a constant len and src parameter.  */
-       if (!host_integerp (len, 1) || !slen || !host_integerp (slen, 1))
-@@ -7982,78 +8021,121 @@ fold_builtin_exponent (tree fndecl, tree
-   return 0;
- }
- 
--/* Fold function call to builtin memcpy.  Return
-+/* Fold function call to builtin memset.  Return
-    NULL_TREE if no simplification can be made.  */
- 
- static tree
--fold_builtin_memcpy (tree fndecl, tree arglist)
-+fold_builtin_memset (tree arglist, tree type, bool ignore)
- {
--  tree dest, src, len;
-+  tree dest, c, len, var, ret;
-+  unsigned HOST_WIDE_INT length, cval;
- 
-   if (!validate_arglist (arglist,
--			 POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
-+			 POINTER_TYPE, INTEGER_TYPE, INTEGER_TYPE, VOID_TYPE))
-     return 0;
- 
-   dest = TREE_VALUE (arglist);
--  src = TREE_VALUE (TREE_CHAIN (arglist));
-+  c = TREE_VALUE (TREE_CHAIN (arglist));
-   len = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
- 
-+  if (! host_integerp (len, 1))
-+    return 0;
-+
-   /* If the LEN parameter is zero, return DEST.  */
-   if (integer_zerop (len))
--    return omit_one_operand (TREE_TYPE (TREE_TYPE (fndecl)), dest, src);
-+    return omit_one_operand (type, dest, c);
- 
--  /* If SRC and DEST are the same (and not volatile), return DEST.  */
--  if (operand_equal_p (src, dest, 0))
--    return omit_one_operand (TREE_TYPE (TREE_TYPE (fndecl)), dest, len);
-+  if (! host_integerp (c, 1) || TREE_SIDE_EFFECTS (dest))
-+    return 0;
- 
--  return 0;
-+  var = dest;
-+  STRIP_NOPS (var);
-+  if (TREE_CODE (var) != ADDR_EXPR)
-+    return 0;
-+
-+  var = TREE_OPERAND (var, 0);
-+  if (TREE_THIS_VOLATILE (var))
-+    return 0;
-+
-+  if (!INTEGRAL_TYPE_P (TREE_TYPE (var))
-+      && !POINTER_TYPE_P (TREE_TYPE (var)))
-+    return 0;
-+
-+  length = tree_low_cst (len, 1);
-+  if (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (var))) != length
-+      || get_pointer_alignment (dest, BIGGEST_ALIGNMENT) / BITS_PER_UNIT
-+	 < (int) length)
-+    return 0;
-+
-+  if (length > HOST_BITS_PER_WIDE_INT / BITS_PER_UNIT)
-+    return 0;
-+
-+  if (integer_zerop (c))
-+    cval = 0;
-+  else
-+    {
-+      if (CHAR_BIT != 8 || BITS_PER_UNIT != 8 || HOST_BITS_PER_WIDE_INT > 64)
-+	return 0;
-+
-+      cval = tree_low_cst (c, 1);
-+      cval &= 0xff;
-+      cval |= cval << 8;
-+      cval |= cval << 16;
-+      cval |= (cval << 31) << 1;
-+    }
-+
-+  ret = build_int_cst_type (TREE_TYPE (var), cval);
-+  ret = build2 (MODIFY_EXPR, TREE_TYPE (var), var, ret);
-+  if (ignore)
-+    return ret;
-+
-+  return omit_one_operand (type, dest, ret);
- }
- 
--/* Fold function call to builtin mempcpy.  Return
-+/* Fold function call to builtin memset.  Return
-    NULL_TREE if no simplification can be made.  */
- 
- static tree
--fold_builtin_mempcpy (tree arglist, tree type, int endp)
-+fold_builtin_bzero (tree arglist, bool ignore)
- {
--  if (validate_arglist (arglist,
--			POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
--    {
--      tree dest = TREE_VALUE (arglist);
--      tree src = TREE_VALUE (TREE_CHAIN (arglist));
--      tree len = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
-+  tree dest, size, newarglist;
- 
--      /* If the LEN parameter is zero, return DEST.  */
--      if (integer_zerop (len))
--	return omit_one_operand (type, dest, src);
-+  if (!validate_arglist (arglist, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
-+    return 0;
- 
--      /* If SRC and DEST are the same (and not volatile), return DEST+LEN.  */
--      if (operand_equal_p (src, dest, 0))
--        {
--	  if (endp == 0)
--	    return omit_one_operand (type, dest, len);
-+  if (!ignore)
-+    return 0;
- 
--	  if (endp == 2)
--	    len = fold_build2 (MINUS_EXPR, TREE_TYPE (len), len,
--			       ssize_int (1));
--      
--	  len = fold_convert (TREE_TYPE (dest), len);
--	  len = fold_build2 (PLUS_EXPR, TREE_TYPE (dest), dest, len);
--	  return fold_convert (type, len);
--	}
--    }
--  return 0;
-+  dest = TREE_VALUE (arglist);
-+  size = TREE_VALUE (TREE_CHAIN (arglist));
-+
-+  /* New argument list transforming bzero(ptr x, int y) to
-+     memset(ptr x, int 0, size_t y).   This is done this way
-+     so that if it isn't expanded inline, we fallback to
-+     calling bzero instead of memset.  */
-+
-+  newarglist = build_tree_list (NULL_TREE, fold_convert (sizetype, size));
-+  newarglist = tree_cons (NULL_TREE, integer_zero_node, newarglist);
-+  newarglist = tree_cons (NULL_TREE, dest, newarglist);
-+  return fold_builtin_memset (newarglist, void_type_node, ignore);
- }
- 
--/* Fold function call to builtin memmove.  Return
--   NULL_TREE if no simplification can be made.  */
-+/* Fold function call to builtin mem{{,p}cpy,move}.  Return
-+   NULL_TREE if no simplification can be made.
-+   If ENDP is 0, return DEST (like memcpy).
-+   If ENDP is 1, return DEST+LEN (like mempcpy).
-+   If ENDP is 2, return DEST+LEN-1 (like stpcpy).
-+   If ENDP is 3, return DEST, additionally *SRC and *DEST may overlap
-+   (memmove).   */
- 
- static tree
--fold_builtin_memmove (tree arglist, tree type)
-+fold_builtin_memory_op (tree arglist, tree type, bool ignore, int endp)
- {
--  tree dest, src, len;
-+  tree dest, src, len, destvar, srcvar, expr;
-+  unsigned HOST_WIDE_INT length;
- 
--  if (!validate_arglist (arglist,
--			 POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
-+  if (! validate_arglist (arglist,
-+			  POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
-     return 0;
- 
-   dest = TREE_VALUE (arglist);
-@@ -8064,11 +8146,115 @@ fold_builtin_memmove (tree arglist, tree
-   if (integer_zerop (len))
-     return omit_one_operand (type, dest, src);
- 
--  /* If SRC and DEST are the same (and not volatile), return DEST.  */
-+  /* If SRC and DEST are the same (and not volatile), return
-+     DEST{,+LEN,+LEN-1}.  */
-   if (operand_equal_p (src, dest, 0))
--    return omit_one_operand (type, dest, len);
-+    expr = len;
-+  else
-+    {
-+      if (! host_integerp (len, 1))
-+	return 0;
- 
--  return 0;
-+      if (TREE_SIDE_EFFECTS (dest) || TREE_SIDE_EFFECTS (src))
-+	return 0;
-+
-+      destvar = dest;
-+      STRIP_NOPS (destvar);
-+      if (TREE_CODE (destvar) != ADDR_EXPR)
-+	return 0;
-+
-+      destvar = TREE_OPERAND (destvar, 0);
-+      if (TREE_THIS_VOLATILE (destvar))
-+	return 0;
-+
-+      if (!INTEGRAL_TYPE_P (TREE_TYPE (destvar))
-+	  && !POINTER_TYPE_P (TREE_TYPE (destvar))
-+	  && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (destvar)))
-+	return 0;
-+
-+      srcvar = src;
-+      STRIP_NOPS (srcvar);
-+      if (TREE_CODE (srcvar) != ADDR_EXPR)
-+	return 0;
-+
-+      srcvar = TREE_OPERAND (srcvar, 0);
-+      if (TREE_THIS_VOLATILE (srcvar))
-+	return 0;
-+
-+      if (!INTEGRAL_TYPE_P (TREE_TYPE (srcvar))
-+	  && !POINTER_TYPE_P (TREE_TYPE (srcvar))
-+	  && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (srcvar)))
-+	return 0;
-+
-+      length = tree_low_cst (len, 1);
-+      if (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (destvar))) != length
-+	  || get_pointer_alignment (dest, BIGGEST_ALIGNMENT) / BITS_PER_UNIT
-+	     < (int) length
-+	  || GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (srcvar))) != length
-+	  || get_pointer_alignment (src, BIGGEST_ALIGNMENT) / BITS_PER_UNIT
-+	     < (int) length)
-+	return 0;
-+
-+      if ((INTEGRAL_TYPE_P (TREE_TYPE (srcvar))
-+	   || POINTER_TYPE_P (TREE_TYPE (srcvar)))
-+	  && (INTEGRAL_TYPE_P (TREE_TYPE (destvar))
-+	      || POINTER_TYPE_P (TREE_TYPE (destvar))))
-+	expr = fold_convert (TREE_TYPE (destvar), srcvar);
-+      else
-+	expr = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (destvar), srcvar);
-+      expr = build2 (MODIFY_EXPR, TREE_TYPE (destvar), destvar, expr);
-+    }
-+
-+  if (ignore)
-+    return expr;
-+
-+  if (endp == 0 || endp == 3)
-+    return omit_one_operand (type, dest, expr);
-+
-+  if (expr == len)
-+    expr = 0;
-+
-+  if (endp == 2)
-+    len = fold_build2 (MINUS_EXPR, TREE_TYPE (len), len,
-+		       ssize_int (1));
-+
-+  len = fold_convert (TREE_TYPE (dest), len);
-+  dest = fold_build2 (PLUS_EXPR, TREE_TYPE (dest), dest, len);
-+  dest = fold_convert (type, dest);
-+  if (expr)
-+    dest = omit_one_operand (type, dest, expr);
-+  return dest;
-+}
-+
-+/* Fold function call to builtin bcopy.  Return NULL_TREE if no
-+   simplification can be made.  */
-+
-+static tree
-+fold_builtin_bcopy (tree arglist, bool ignore)
-+{
-+  tree src, dest, size, newarglist;
-+
-+  if (!validate_arglist (arglist,
-+			 POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
-+    return 0;
-+
-+  if (! ignore)
-+    return 0;
-+
-+  src = TREE_VALUE (arglist);
-+  dest = TREE_VALUE (TREE_CHAIN (arglist));
-+  size = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
-+
-+  /* New argument list transforming bcopy(ptr x, ptr y, int z) to
-+     memmove(ptr y, ptr x, size_t z).   This is done this way
-+     so that if it isn't expanded inline, we fallback to
-+     calling bcopy instead of memmove.  */
-+
-+  newarglist = build_tree_list (NULL_TREE, fold_convert (sizetype, size));
-+  newarglist = tree_cons (NULL_TREE, src, newarglist);
-+  newarglist = tree_cons (NULL_TREE, dest, newarglist);
-+
-+  return fold_builtin_memory_op (newarglist, void_type_node, true, /*endp=*/3);
- }
- 
- /* Fold function call to builtin strcpy.  If LEN is not NULL, it represents
-@@ -9039,14 +9225,23 @@ fold_builtin_1 (tree fndecl, tree arglis
-     case BUILT_IN_PARITYLL:
-       return fold_builtin_bitop (fndecl, arglist);
- 
-+    case BUILT_IN_MEMSET:
-+      return fold_builtin_memset (arglist, type, ignore);
-+
-     case BUILT_IN_MEMCPY:
--      return fold_builtin_memcpy (fndecl, arglist);
-+      return fold_builtin_memory_op (arglist, type, ignore, /*endp=*/0);
- 
-     case BUILT_IN_MEMPCPY:
--      return fold_builtin_mempcpy (arglist, type, /*endp=*/1);
-+      return fold_builtin_memory_op (arglist, type, ignore, /*endp=*/1);
- 
-     case BUILT_IN_MEMMOVE:
--      return fold_builtin_memmove (arglist, type);
-+      return fold_builtin_memory_op (arglist, type, ignore, /*endp=*/3);
-+
-+    case BUILT_IN_BZERO:
-+      return fold_builtin_bzero (arglist, ignore);
-+
-+    case BUILT_IN_BCOPY:
-+      return fold_builtin_bcopy (arglist, ignore);
- 
-     case BUILT_IN_SIGNBIT:
-     case BUILT_IN_SIGNBITF:
diff --git a/gcc41-pr28482.patch b/gcc41-pr28482.patch
deleted file mode 100644
index 2cb3638..0000000
--- a/gcc41-pr28482.patch
+++ /dev/null
@@ -1,19 +0,0 @@
-2007-05-02  Jakub Jelinek  <jakub@redhat.com>
-
-	PR libgomp/28482
-	* configure.tgt: Don't link with -Wl,-z,nodlopen even on Linux.
-
---- libgomp/configure.tgt.jj	2007-04-20 12:55:40.000000000 +0200
-+++ libgomp/configure.tgt	2007-05-02 17:32:15.000000000 +0200
-@@ -11,11 +11,8 @@
- #  XLDFLAGS		Add extra link flags to use.
- 
- # Optimize TLS usage by avoiding the overhead of dynamic allocation.
--# This does require that the library be present during process 
--# startup, so mark the library as not to be dlopened.
- if test $have_tls = yes && test "$with_gnu_ld" = "yes"; then
- 	XCFLAGS="${XCFLAGS} -ftls-model=initial-exec"
--	XLDFLAGS="${XLDFLAGS} -Wl,-z,nodlopen"
- fi
- 
- # Since we require POSIX threads, assume a POSIX system by default.
diff --git a/gcc41-pr28690.patch b/gcc41-pr28690.patch
new file mode 100644
index 0000000..b1c2cc1
--- /dev/null
+++ b/gcc41-pr28690.patch
@@ -0,0 +1,155 @@
+2007-06-26  Jakub Jelinek  <jakub@redhat.com>
+
+	* defaults.h (TARGET_INDEX_OPERAND_FIRST): Define.
+	* config/rs6000/rs6000.h (TARGET_INDEX_OPERAND_FIRST): Define.
+	* optabs.c (emit_cmp_and_jump_insns): Don't call swap_operand
+	twice.
+	* rtlanal.c (commutative_operand_precedence): Only prefer
+	REG_POINTER and MEM_POINTER operands over REG and MEM operands
+	if TARGET_INDEX_OPERAND_FIRST.
+	(swap_commutative_operands_p): Only sort on REGNO if
+	TARGET_INDEX_OPERAND_FIRST.
+	* tree-ssa-address.c (gen_addr_rtx): Only use simplify_gen_binary
+	instead of gen_rtx_PLUS if TARGET_INDEX_OPERAND_FIRST.
+
+2007-06-20  Peter Bergner  <bergner@vnet.ibm.com>
+
+	PR middle-end/28690
+	* optabs.c (emit_cmp_and_jump_insns): Allow EQ compares.
+	* rtlanal.c (commutative_operand_precedence): Prefer both REG_POINTER
+	and MEM_POINTER operands over REG and MEM operands.
+	(swap_commutative_operands_p): In case of a tie, sort on REGNO.
+	* tree-ssa-address.c (gen_addr_rtx): Use simplify_gen_binary
+	instead of gen_rtx_PLUS.
+
+--- gcc/defaults.h.jj	2007-02-20 22:39:12.000000000 +0100
++++ gcc/defaults.h	2007-06-26 00:32:16.000000000 +0200
+@@ -785,6 +785,10 @@ Software Foundation, 51 Franklin Street,
+ #define TARGET_C99_FUNCTIONS 0
+ #endif
+ 
++#ifndef TARGET_INDEX_OPERAND_FIRST
++#define TARGET_INDEX_OPERAND_FIRST 0
++#endif
++
+ /* Indicate that CLZ and CTZ are undefined at zero.  */
+ #ifndef CLZ_DEFINED_VALUE_AT_ZERO
+ #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)  0
+--- gcc/config/rs6000/rs6000.h.jj	2007-02-20 22:39:00.000000000 +0100
++++ gcc/config/rs6000/rs6000.h	2007-06-26 00:33:32.000000000 +0200
+@@ -57,6 +57,8 @@
+ #define PPC405_ERRATUM77 0
+ #endif
+ 
++#define TARGET_INDEX_OPERAND_FIRST (rs6000_cpu == PROCESSOR_POWER6)
++
+ /* Common ASM definitions used by ASM_SPEC among the various targets
+    for handling -mcpu=xxx switches.  */
+ #define ASM_CPU_SPEC \
+--- gcc/optabs.c.jj	2007-02-20 22:39:12.000000000 +0100
++++ gcc/optabs.c	2007-06-26 00:30:27.000000000 +0200
+@@ -3673,12 +3673,16 @@ emit_cmp_and_jump_insns (rtx x, rtx y, e
+   /* Swap operands and condition to ensure canonical RTL.  */
+   if (swap_commutative_operands_p (x, y))
+     {
+-      /* If we're not emitting a branch, this means some caller
+-         is out of sync.  */
+-      gcc_assert (label);
++      enum rtx_code swapped_comparison = swap_condition (comparison);
++
++      /* If we're not emitting a branch, callers are required to pass
++	 operands in an order conforming to canonical RTL.  We relax this
++	 for commutative comparsions so callers using EQ don't need to do
++	 swapping by hand.  */
++      gcc_assert (label || swapped_comparison == comparison);
+ 
+       op0 = y, op1 = x;
+-      comparison = swap_condition (comparison);
++      comparison = swapped_comparison;
+     }
+ 
+ #ifdef HAVE_cc0
+--- gcc/rtlanal.c.jj	2007-02-20 22:39:12.000000000 +0100
++++ gcc/rtlanal.c	2007-06-26 00:28:56.000000000 +0200
+@@ -2890,9 +2890,9 @@ commutative_operand_precedence (rtx op)
+   
+   /* Constants always come the second operand.  Prefer "nice" constants.  */
+   if (code == CONST_INT)
+-    return -7;
++    return -10;
+   if (code == CONST_DOUBLE)
+-    return -6;
++    return -9;
+   op = avoid_constant_pool_reference (op);
+   code = GET_CODE (op);
+ 
+@@ -2900,26 +2900,31 @@ commutative_operand_precedence (rtx op)
+     {
+     case RTX_CONST_OBJ:
+       if (code == CONST_INT)
+-        return -5;
++	return -8;
+       if (code == CONST_DOUBLE)
+-        return -4;
+-      return -3;
++	return -7;
++      return -6;
+ 
+     case RTX_EXTRA:
+       /* SUBREGs of objects should come second.  */
+       if (code == SUBREG && OBJECT_P (SUBREG_REG (op)))
+-        return -2;
++	return -5;
+ 
+       if (!CONSTANT_P (op))
+         return 0;
+       else
+ 	/* As for RTX_CONST_OBJ.  */
+-	return -3;
++	return -6;
+ 
+     case RTX_OBJ:
+       /* Complex expressions should be the first, so decrease priority
+          of objects.  */
+-      return -1;
++      if (!TARGET_INDEX_OPERAND_FIRST)
++	return -1;
++      if (REG_P (op))
++	return (REG_POINTER (op)) ? -1 : -3;
++      else
++	return (MEM_P (op) && MEM_POINTER (op)) ? -2 : -4;
+ 
+     case RTX_COMM_ARITH:
+       /* Prefer operands that are themselves commutative to be first.
+@@ -2949,8 +2954,16 @@ commutative_operand_precedence (rtx op)
+ int
+ swap_commutative_operands_p (rtx x, rtx y)
+ {
+-  return (commutative_operand_precedence (x)
+-	  < commutative_operand_precedence (y));
++  int result = (commutative_operand_precedence (x)
++		- commutative_operand_precedence (y));
++  if (!TARGET_INDEX_OPERAND_FIRST || result)
++    return result < 0;
++
++  /* Group together equal REGs to do more simplification.  */
++  if (REG_P (x) && REG_P (y))
++    return REGNO (x) > REGNO (y);
++
++  return 0;
+ }
+ 
+ /* Return 1 if X is an autoincrement side effect and the register is
+--- gcc/tree-ssa-address.c.jj	2007-02-20 22:39:12.000000000 +0100
++++ gcc/tree-ssa-address.c	2007-06-26 00:29:49.000000000 +0200
+@@ -124,7 +124,9 @@ gen_addr_rtx (rtx symbol, rtx base, rtx 
+   if (base)
+     {
+       if (*addr)
+-	*addr = gen_rtx_PLUS (Pmode, *addr, base);
++	*addr = (TARGET_INDEX_OPERAND_FIRST
++		 ? simplify_gen_binary (PLUS, Pmode, base, *addr)
++		 : gen_rtx_PLUS (Pmode, *addr, base));
+       else
+ 	*addr = base;
+     }
diff --git a/gcc41-pr28709.patch b/gcc41-pr28709.patch
deleted file mode 100644
index 33c9eef..0000000
--- a/gcc41-pr28709.patch
+++ /dev/null
@@ -1,84 +0,0 @@
-2006-08-16  Jakub Jelinek  <jakub@redhat.com>
-
-	PR preprocessor/28709
-	* macro.c (paste_tokens): Do error reporting here, use BUF with the
-	spelled LHS token as opposed to spelling it again.
-	(paste_all_tokens): Don't report errors here, just break on failure.
-
-	* gcc.dg/cpp/paste14.c: New test.
-
---- libcpp/macro.c.jj	2006-06-09 23:07:54.000000000 +0200
-+++ libcpp/macro.c	2006-08-16 17:30:17.000000000 +0200
-@@ -430,15 +430,14 @@ stringify_arg (cpp_reader *pfile, macro_
- static bool
- paste_tokens (cpp_reader *pfile, const cpp_token **plhs, const cpp_token *rhs)
- {
--  unsigned char *buf, *end;
-+  unsigned char *buf, *end, *lhsend;
-   const cpp_token *lhs;
-   unsigned int len;
--  bool valid;
- 
-   lhs = *plhs;
-   len = cpp_token_len (lhs) + cpp_token_len (rhs) + 1;
-   buf = (unsigned char *) alloca (len);
--  end = cpp_spell_token (pfile, lhs, buf, false);
-+  end = lhsend = cpp_spell_token (pfile, lhs, buf, false);
- 
-   /* Avoid comment headers, since they are still processed in stage 3.
-      It is simpler to insert a space here, rather than modifying the
-@@ -455,10 +454,22 @@ paste_tokens (cpp_reader *pfile, const c
-   /* Set pfile->cur_token as required by _cpp_lex_direct.  */
-   pfile->cur_token = _cpp_temp_token (pfile);
-   *plhs = _cpp_lex_direct (pfile);
--  valid = pfile->buffer->cur == pfile->buffer->rlimit;
--  _cpp_pop_buffer (pfile);
-+  if (pfile->buffer->cur != pfile->buffer->rlimit)
-+    {
-+      _cpp_pop_buffer (pfile);
-+      _cpp_backup_tokens (pfile, 1);
-+      *lhsend = '\0';
- 
--  return valid;
-+      /* Mandatory error for all apart from assembler.  */
-+      if (CPP_OPTION (pfile, lang) != CLK_ASM)
-+	cpp_error (pfile, CPP_DL_ERROR,
-+	 "pasting \"%s\" and \"%s\" does not give a valid preprocessing token",
-+		   buf, cpp_token_as_text (pfile, rhs));
-+      return false;
-+    }
-+
-+  _cpp_pop_buffer (pfile);
-+  return true;
- }
- 
- /* Handles an arbitrarily long sequence of ## operators, with initial
-@@ -490,17 +501,7 @@ paste_all_tokens (cpp_reader *pfile, con
- 	abort ();
- 
-       if (!paste_tokens (pfile, &lhs, rhs))
--	{
--	  _cpp_backup_tokens (pfile, 1);
--
--	  /* Mandatory error for all apart from assembler.  */
--	  if (CPP_OPTION (pfile, lang) != CLK_ASM)
--	    cpp_error (pfile, CPP_DL_ERROR,
--	 "pasting \"%s\" and \"%s\" does not give a valid preprocessing token",
--		       cpp_token_as_text (pfile, lhs),
--		       cpp_token_as_text (pfile, rhs));
--	  break;
--	}
-+	break;
-     }
-   while (rhs->flags & PASTE_LEFT);
- 
---- gcc/testsuite/gcc.dg/cpp/paste14.c.jj	2006-08-16 16:51:45.000000000 +0200
-+++ gcc/testsuite/gcc.dg/cpp/paste14.c	2006-08-16 16:53:30.000000000 +0200
-@@ -0,0 +1,7 @@
-+/* PR preprocessor/28709 */
-+/* { dg-do preprocess } */
-+
-+#define foo - ## >>
-+foo		/* { dg-error "pasting \"-\" and \">>\"" } */
-+#define bar = ## ==
-+bar		/* { dg-error "pasting \"=\" and \"==\"" } */
diff --git a/gcc41-pr29059.patch b/gcc41-pr29059.patch
deleted file mode 100644
index b9dbd8a..0000000
--- a/gcc41-pr29059.patch
+++ /dev/null
@@ -1,76 +0,0 @@
-2006-09-16  Andrew Pinski  <pinskia@physics.uc.edu>
-
-	PR tree-opt/29059
-	* tree-ssa-propagate.c (set_rhs): Reject MODIFY_EXPR right
-	away for the expr argument.
-
-	* gcc.c-torture/compile/strcpy-1.c: New test.
-	* gcc.c-torture/compile/strcpy-2.c: New test.
-	* gcc.c-torture/compile/memcpy-1.c: New test.
-	* gcc.c-torture/compile/memcpy-2.c: New test.
-
---- gcc/tree-ssa-propagate.c	(revision 116996)
-+++ gcc/tree-ssa-propagate.c	(revision 116997)
-@@ -588,7 +588,8 @@ set_rhs (tree *stmt_p, tree expr)
- 	  && !is_gimple_val (TREE_OPERAND (TREE_OPERAND (expr, 0), 1)))
- 	return false;
-     }
--  else if (code == COMPOUND_EXPR)
-+  else if (code == COMPOUND_EXPR
-+	   || code == MODIFY_EXPR)
-     return false;
- 
-   switch (TREE_CODE (stmt))
---- gcc/testsuite/gcc.c-torture/compile/strcpy-1.c	(revision 0)
-+++ gcc/testsuite/gcc.c-torture/compile/strcpy-1.c	(revision 116997)
-@@ -0,0 +1,15 @@
-+
-+
-+typedef struct
-+{
-+  char str[20];
-+}STACK;
-+STACK stack[15];
-+int level;
-+rezero ()
-+{
-+  level = 0;
-+  __builtin_strcpy (stack[level].str, "");
-+}
-+
-+
---- gcc/testsuite/gcc.c-torture/compile/strcpy-2.c	(revision 0)
-+++ gcc/testsuite/gcc.c-torture/compile/strcpy-2.c	(revision 116997)
-@@ -0,0 +1,7 @@
-+char wrkstr_un[270];
-+extern void
-+LoadUserAlph (char *s)
-+{
-+  s = &wrkstr_un[0];
-+  __builtin_strcpy (s, "");
-+};
---- gcc/testsuite/gcc.c-torture/compile/memcpy-1.c	(revision 0)
-+++ gcc/testsuite/gcc.c-torture/compile/memcpy-1.c	(revision 116997)
-@@ -0,0 +1,9 @@
-+static const char OggFLAC__MAPPING_VERSION_MAJOR = 1;
-+void f(void)
-+{
-+  char synthetic_first_packet_body[10];
-+  char *b = &synthetic_first_packet_body[4];
-+  __builtin_memcpy (b, &OggFLAC__MAPPING_VERSION_MAJOR, (1u));
-+}
-+
-+
---- gcc/testsuite/gcc.c-torture/compile/memcpy-2.c	(revision 0)
-+++ gcc/testsuite/gcc.c-torture/compile/memcpy-2.c	(revision 116997)
-@@ -0,0 +1,10 @@
-+static const char OggFLAC__MAPPING_VERSION_MAJOR = 1;
-+void f(void)
-+{
-+  char synthetic_first_packet_body[10];
-+  char *b = synthetic_first_packet_body;
-+  b+=4u;
-+  __builtin_memcpy (b, &OggFLAC__MAPPING_VERSION_MAJOR, (1u));
-+}
-+
-+
diff --git a/gcc41-pr29272.patch b/gcc41-pr29272.patch
deleted file mode 100644
index 50fdf99..0000000
--- a/gcc41-pr29272.patch
+++ /dev/null
@@ -1,110 +0,0 @@
-2006-09-28  Jakub Jelinek  <jakub@redhat.com>
-
-	PR middle-end/29272
-	* builtins.c (fold_builtin_memset, fold_builtin_memory_op): Restrict
-	single entry optimization to variables and components thereof.
-
-	* gcc.c-torture/execute/20060930-1.c: New test.
-
---- gcc/builtins.c.jj	2006-09-22 10:29:55.000000000 +0200
-+++ gcc/builtins.c	2006-09-28 20:31:30.000000000 +0200
-@@ -7905,7 +7905,7 @@ fold_builtin_exponent (tree fndecl, tree
- static tree
- fold_builtin_memset (tree arglist, tree type, bool ignore)
- {
--  tree dest, c, len, var, ret;
-+  tree dest, c, len, var, ret, inner;
-   unsigned HOST_WIDE_INT length, cval;
- 
-   if (!validate_arglist (arglist,
-@@ -7939,6 +7939,15 @@ fold_builtin_memset (tree arglist, tree 
-       && !POINTER_TYPE_P (TREE_TYPE (var)))
-     return 0;
- 
-+  /* If var is a VAR_DECL or a component thereof,
-+     we can use its alias set, otherwise we'd need to make
-+     sure we go through alias set 0.  */
-+  inner = var;
-+  while (handled_component_p (inner))
-+    inner = TREE_OPERAND (inner, 0);
-+  if (! SSA_VAR_P (inner))
-+    return 0;
-+
-   length = tree_low_cst (len, 1);
-   if (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (var))) != length
-       || get_pointer_alignment (dest, BIGGEST_ALIGNMENT) / BITS_PER_UNIT
-@@ -8009,7 +8018,7 @@ fold_builtin_bzero (tree arglist, bool i
- static tree
- fold_builtin_memory_op (tree arglist, tree type, bool ignore, int endp)
- {
--  tree dest, src, len, destvar, srcvar, expr;
-+  tree dest, src, len, destvar, srcvar, expr, inner;
-   unsigned HOST_WIDE_INT length;
- 
-   if (! validate_arglist (arglist,
-@@ -8050,6 +8059,15 @@ fold_builtin_memory_op (tree arglist, tr
- 	  && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (destvar)))
- 	return 0;
- 
-+      /* If destvar is a VAR_DECL or a component thereof,
-+	 we can use its alias set, otherwise we'd need to make
-+	 sure we go through alias set 0.  */
-+      inner = destvar;
-+      while (handled_component_p (inner))
-+	inner = TREE_OPERAND (inner, 0);
-+      if (! SSA_VAR_P (inner))
-+	return 0;
-+
-       srcvar = src;
-       STRIP_NOPS (srcvar);
-       if (TREE_CODE (srcvar) != ADDR_EXPR)
-@@ -8064,6 +8082,15 @@ fold_builtin_memory_op (tree arglist, tr
- 	  && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (srcvar)))
- 	return 0;
- 
-+      /* If srcvar is a VAR_DECL or a component thereof,
-+	 we can use its alias set, otherwise we'd need to make
-+	 sure we go through alias set 0.  */
-+      inner = srcvar;
-+      while (handled_component_p (inner))
-+	inner = TREE_OPERAND (inner, 0);
-+      if (! SSA_VAR_P (inner))
-+	return 0;
-+
-       length = tree_low_cst (len, 1);
-       if (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (destvar))) != length
- 	  || get_pointer_alignment (dest, BIGGEST_ALIGNMENT) / BITS_PER_UNIT
---- gcc/testsuite/gcc.c-torture/execute/20060930-1.c.jj	2006-09-30 21:10:17.000000000 +0200
-+++ gcc/testsuite/gcc.c-torture/execute/20060930-1.c	2006-09-30 21:09:33.000000000 +0200
-@@ -0,0 +1,31 @@
-+/* PR middle-end/29272 */
-+
-+extern void abort (void);
-+
-+struct S { struct S *s; } s;
-+struct T { struct T *t; } t;
-+
-+static inline void
-+foo (void *s)
-+{
-+  struct T *p = s;
-+  __builtin_memcpy (&p->t, &t.t, sizeof (t.t));
-+}
-+
-+void *
-+__attribute__((noinline))
-+bar (void *p, struct S *q)
-+{
-+  q->s = &s;
-+  foo (p);
-+  return q->s;
-+}
-+
-+int
-+main (void)
-+{
-+  t.t = &t;
-+  if (bar (&s, &s) != (void *) &t)
-+    abort ();
-+  return 0;
-+}
diff --git a/gcc41-pr29299.patch b/gcc41-pr29299.patch
deleted file mode 100644
index ff0acf5..0000000
--- a/gcc41-pr29299.patch
+++ /dev/null
@@ -1,93 +0,0 @@
-2006-10-18  Jan Hubicka  <jh@suse.cz>
-
-	PR middle-end/29299
-	* cfgexpand.c (expand_used_vars_for_block): Vars marked used by user
-	are used.
-
-2006-10-15  Jan Hubicka  <jh@suse.cz>
-            Richard Guenther  <rguenther@suse.de>
-
-	PR middle-end/29299
-	* cgraphunit.c (cgraph_finalize_compilation_unit): Call
-	process_function_and_variable_attributes on all variables, including
-	those discovered during cgraph construction phase.
-
-	* gcc.dg/pr29299.c: New testcase.
-
---- gcc/cgraphunit.c	(revision 117745)
-+++ gcc/cgraphunit.c	(revision 117863)
-@@ -1055,6 +1055,7 @@ cgraph_finalize_compilation_unit (void)
-   /* Keep track of already processed nodes when called multiple times for
-      intermodule optimization.  */
-   static struct cgraph_node *first_analyzed;
-+  struct cgraph_node *first_processed = first_analyzed;
-   static struct cgraph_varpool_node *first_analyzed_var;
- 
-   if (errorcount || sorrycount)
-@@ -1077,7 +1078,10 @@ cgraph_finalize_compilation_unit (void)
-     }
- 
-   timevar_push (TV_CGRAPH);
--  process_function_and_variable_attributes (first_analyzed, first_analyzed_var);
-+  process_function_and_variable_attributes (first_processed,
-+					    first_analyzed_var);
-+  first_processed = cgraph_nodes;
-+  first_analyzed_var = cgraph_varpool_nodes;
-   cgraph_varpool_analyze_pending_decls ();
-   if (cgraph_dump_file)
-     {
-@@ -1119,11 +1123,16 @@ cgraph_finalize_compilation_unit (void)
- 	if (!edge->callee->reachable)
- 	  cgraph_mark_reachable_node (edge->callee);
- 
-+      /* We finalize local static variables during constructing callgraph
-+         edges.  Process their attributes too.  */
-+      process_function_and_variable_attributes (first_processed,
-+						first_analyzed_var);
-+      first_processed = cgraph_nodes;
-+      first_analyzed_var = cgraph_varpool_nodes;
-       cgraph_varpool_analyze_pending_decls ();
-     }
- 
-   /* Collect entry points to the unit.  */
--
-   if (cgraph_dump_file)
-     {
-       fprintf (cgraph_dump_file, "Unit entry points:");
-@@ -1163,7 +1172,6 @@ cgraph_finalize_compilation_unit (void)
-       dump_cgraph (cgraph_dump_file);
-     }
-   first_analyzed = cgraph_nodes;
--  first_analyzed_var = cgraph_varpool_nodes;
-   ggc_collect ();
-   timevar_pop (TV_CGRAPH);
- }
---- gcc/cfgexpand.c	(revision 117745)
-+++ gcc/cfgexpand.c	(revision 117863)
-@@ -764,7 +764,12 @@ expand_used_vars_for_block (tree block, 
- 
-   /* Expand all variables at this level.  */
-   for (t = BLOCK_VARS (block); t ; t = TREE_CHAIN (t))
--    if (TREE_USED (t))
-+    if (TREE_USED (t)
-+	/* Force local static variables to be output when marked by
-+	   used attribute.  For unit-at-a-time, cgraph code already takes
-+	   care of this.  */
-+	|| (!flag_unit_at_a_time && TREE_STATIC (t)
-+	    && DECL_PRESERVE_P (t)))
-       expand_one_var (t, toplevel);
- 
-   this_sv_num = stack_vars_num;
---- gcc/testsuite/gcc.dg/pr29299.c	(revision 0)
-+++ gcc/testsuite/gcc.dg/pr29299.c	(revision 117863)
-@@ -0,0 +1,10 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O" } */
-+
-+static int bof __attribute__((used));
-+int foo()
-+{
-+	static int barbarbarbar __attribute__((used));
-+};
-+
-+/* { dg-final { scan-assembler "barbarbarbar" } } */
diff --git a/gcc41-pr31187.patch b/gcc41-pr31187.patch
deleted file mode 100644
index 33d688c..0000000
--- a/gcc41-pr31187.patch
+++ /dev/null
@@ -1,113 +0,0 @@
-2007-03-30  Jason Merrill  <jason@redhat.com>
-
-	PR c++/31187
-	* typeck.c (cp_type_readonly): New fn.
-	* cp-tree.h: Declare it.
-	* decl.c (start_decl): Set implicit DECL_THIS_STATIC here.
-	(cp_finish_decl): Not here.
-
-2007-04-02  Jason Merrill  <jason@redhat.com>
-
-	PR c++/31187
-	* g++.dg/ext/visibility/anon3.C: New test.
-
---- gcc/cp/typeck.c	(revision 123431)
-+++ gcc/cp/typeck.c	(revision 123432)
-@@ -6897,6 +6897,16 @@ cp_type_quals (tree type)
-   return TYPE_QUALS (type);
- }
- 
-+/* Returns nonzero if the TYPE is const from a C++ perspective: look inside
-+   arrays.  */
-+
-+bool
-+cp_type_readonly (tree type)
-+{
-+  type = strip_array_types (type);
-+  return TYPE_READONLY (type);
-+}
-+
- /* Returns nonzero if the TYPE contains a mutable member.  */
- 
- bool
---- gcc/cp/decl.c	(revision 123431)
-+++ gcc/cp/decl.c	(revision 123432)
-@@ -3817,6 +3817,7 @@ start_decl (const cp_declarator *declara
-   tree decl;
-   tree type, tem;
-   tree context;
-+  bool was_public;
- 
-   *pushed_scope_p = NULL_TREE;
- 
-@@ -3969,6 +3970,8 @@ start_decl (const cp_declarator *declara
- 		 decl);
-     }
- 
-+  was_public = TREE_PUBLIC (decl);
-+
-   /* Enter this declaration into the symbol table.  */
-   tem = maybe_push_decl (decl);
- 
-@@ -3988,6 +3991,17 @@ start_decl (const cp_declarator *declara
- 		       && (flag_conserve_space || ! TREE_PUBLIC (tem)));
- #endif
- 
-+  if (TREE_CODE (tem) == VAR_DECL
-+      && DECL_NAMESPACE_SCOPE_P (tem) && !TREE_PUBLIC (tem) && !was_public
-+      && !DECL_THIS_STATIC (tem) && !DECL_ARTIFICIAL (tem))
-+    {
-+      /* This is a const variable with implicit 'static'.  Set
-+	 DECL_THIS_STATIC so we can tell it from variables that are
-+	 !TREE_PUBLIC because of the anonymous namespace.  */
-+      gcc_assert (cp_type_readonly (TREE_TYPE (tem)));
-+      DECL_THIS_STATIC (tem) = 1;
-+    }
-+
-   if (! processing_template_decl)
-     start_decl_1 (tem);
- 
-@@ -5320,14 +5334,6 @@ cp_finish_decl (tree decl, tree init, bo
- 	{
- 	  layout_var_decl (decl);
- 	  maybe_commonize_var (decl);
--	  if (DECL_NAMESPACE_SCOPE_P (decl) && !TREE_PUBLIC (decl)
--	      && !DECL_THIS_STATIC (decl) && !DECL_ARTIFICIAL (decl))
--	    {
--	      /* This is a const variable with implicit 'static'.  Set
--		 DECL_THIS_STATIC so we can tell it from variables that are
--		 !TREE_PUBLIC because of the anonymous namespace.  */
--	      DECL_THIS_STATIC (decl) = 1;
--	    }
- 	}
- 
-       make_rtl_for_nonlocal_decl (decl, init, asmspec);
---- gcc/cp/cp-tree.h	(revision 123431)
-+++ gcc/cp/cp-tree.h	(revision 123432)
-@@ -4742,6 +4742,7 @@ extern bool comp_ptr_ttypes_const		(tree
- extern int ptr_reasonably_similar		(tree, tree);
- extern tree build_ptrmemfunc			(tree, tree, int, bool);
- extern int cp_type_quals			(tree);
-+extern bool cp_type_readonly			(tree);
- extern bool cp_has_mutable_p			(tree);
- extern bool at_least_as_qualified_p		(tree, tree);
- extern void cp_apply_type_quals_to_decl		(int, tree);
---- gcc/testsuite/g++.dg/ext/visibility/anon3.C	(revision 0)
-+++ gcc/testsuite/g++.dg/ext/visibility/anon3.C	(revision 123432)
-@@ -0,0 +1,16 @@
-+// PR c++/31187
-+// Bug: the repeated declaration was confusing the compiler into
-+// thinking that foo1 had language internal linkage.
-+
-+class foo { };
-+
-+namespace
-+{
-+  extern foo foo1;
-+  foo foo1;
-+}
-+
-+template< foo * >
-+class bar { };
-+
-+bar< &foo1 > bar1;
diff --git a/gcc41-pr31748.patch b/gcc41-pr31748.patch
deleted file mode 100644
index 1329d44..0000000
--- a/gcc41-pr31748.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-2007-05-01  Jakub Jelinek  <jakub@redhat.com>
-
-	PR c++/31748
-	* semantics.c (finish_omp_clauses): Use %qD instead of %qE for
-	DECL_P in not a variable and appears more than once error messages.
-
-	* g++.dg/gomp/pr31748.C: New test.
-
---- gcc/cp/semantics.c.jj	2007-04-26 09:30:58.000000000 +0200
-+++ gcc/cp/semantics.c	2007-05-01 14:22:58.000000000 +0200
-@@ -3376,14 +3376,17 @@ finish_omp_clauses (tree clauses)
- 	    {
- 	      if (processing_template_decl)
- 		break;
--	      error ("%qE is not a variable in clause %qs", t, name);
-+	      if (DECL_P (t))
-+		error ("%qD is not a variable in clause %qs", t, name);
-+	      else
-+		error ("%qE is not a variable in clause %qs", t, name);
- 	      remove = true;
- 	    }
- 	  else if (bitmap_bit_p (&generic_head, DECL_UID (t))
- 		   || bitmap_bit_p (&firstprivate_head, DECL_UID (t))
- 		   || bitmap_bit_p (&lastprivate_head, DECL_UID (t)))
- 	    {
--	      error ("%qE appears more than once in data clauses", t);
-+	      error ("%qD appears more than once in data clauses", t);
- 	      remove = true;
- 	    }
- 	  else
---- gcc/testsuite/g++.dg/gomp/pr31748.C.jj	2007-05-01 14:26:13.000000000 +0200
-+++ gcc/testsuite/g++.dg/gomp/pr31748.C	2007-05-01 14:26:07.000000000 +0200
-@@ -0,0 +1,10 @@
-+// PR c++/31748
-+
-+struct A;
-+
-+void
-+foo ()
-+{
-+#pragma omp parallel private(A)	// { dg-error "struct A.*is not a variable" }
-+  ;
-+}
diff --git a/gcc41-pr32139.patch b/gcc41-pr32139.patch
new file mode 100644
index 0000000..84e8ffb
--- /dev/null
+++ b/gcc41-pr32139.patch
@@ -0,0 +1,58 @@
+2007-06-01  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/32139
+	* c-typeck.c (common_pointer_type): Set TYPE_READONLY
+	and TYPE_VOLATILE on the merged pointed to FUNCTION_TYPE
+	only if both pointed_to_1 and pointed_to_2 are TYPE_READONLY
+	resp. TYPE_VOLATILE.
+
+	* gcc.c-torture/compile/20070531-1.c: New test.
+
+--- gcc/c-typeck.c.jj	2007-04-25 10:13:52.000000000 +0200
++++ gcc/c-typeck.c	2007-06-01 10:51:53.000000000 +0200
+@@ -499,6 +499,7 @@ common_pointer_type (tree t1, tree t2)
+   tree pointed_to_1, mv1;
+   tree pointed_to_2, mv2;
+   tree target;
++  int type_quals;
+ 
+   /* Save time if the two types are the same.  */
+ 
+@@ -526,10 +527,19 @@ common_pointer_type (tree t1, tree t2)
+   if (TREE_CODE (mv2) != ARRAY_TYPE)
+     mv2 = TYPE_MAIN_VARIANT (pointed_to_2);
+   target = composite_type (mv1, mv2);
+-  t1 = build_pointer_type (c_build_qualified_type
+-			   (target,
+-			    TYPE_QUALS (pointed_to_1) |
+-			    TYPE_QUALS (pointed_to_2)));
++  type_quals = TYPE_QUALS (pointed_to_1) | TYPE_QUALS (pointed_to_2);
++  if (TREE_CODE (pointed_to_1) == FUNCTION_TYPE)
++    {
++      /* TYPE_READONLY and TYPE_VOLATILE on FUNCTION_TYPE should be
++	 logically ANDed, not ORed, as if one function is
++	 __attribute__((const)) and the other is not, the common type
++	 must be conservatively not __attribute__((const))
++	 and similarly for __attribute__((noreturn)).  */
++      type_quals &= ~(TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE);
++      type_quals |= (TYPE_QUALS (pointed_to_1) & TYPE_QUALS (pointed_to_2))
++		    & (TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE);
++    }
++  t1 = build_pointer_type (c_build_qualified_type (target, type_quals));
+   return build_type_attribute_variant (t1, attributes);
+ }
+ 
+--- gcc/testsuite/gcc.c-torture/compile/20070531-1.c.jj	2007-05-31 13:47:22.000000000 +0200
++++ gcc/testsuite/gcc.c-torture/compile/20070531-1.c	2007-06-01 10:57:15.000000000 +0200
+@@ -0,0 +1,11 @@
++/* PR tree-optimization/32139 */
++int foo (void);
++int bar (void) __attribute__ ((const));
++
++int
++test (int x)
++{
++  int a = (x == 10000 ? foo : bar) ();
++  int b = (x == 10000 ? foo : bar) ();
++  return a + b;
++}
diff --git a/gcc41-pr32678.patch b/gcc41-pr32678.patch
new file mode 100644
index 0000000..61692bb
--- /dev/null
+++ b/gcc41-pr32678.patch
@@ -0,0 +1,63 @@
+2007-07-21  Jerry DeLisle  <jvdelisle@gcc.gnu.org>
+
+	PR libgfortran/32678
+	* io/transfer.c (formatted_transfer_scalar): Fix off by one error in
+	calculation of pos and skips. Don't allow pending_spaces to go
+	negative.
+
+	PR fortran/32678
+	* gfortran.dg/fmt_t_5.f90: New test.
+
+--- libgfortran/io/transfer.c	(revision 126821)
++++ libgfortran/io/transfer.c	(revision 126823)
+@@ -893,9 +893,9 @@ formatted_transfer_scalar (st_parameter_
+ 	case FMT_TR:
+ 	  consume_data_flag = 0 ;
+ 
+-	  pos = bytes_used + f->u.n + dtp->u.p.skips;
+-	  dtp->u.p.skips = f->u.n + dtp->u.p.skips;
+-	  dtp->u.p.pending_spaces = pos - dtp->u.p.max_pos;
++	  dtp->u.p.skips += f->u.n;
++	  pos = bytes_used + dtp->u.p.skips - 1;
++	  dtp->u.p.pending_spaces = pos - dtp->u.p.max_pos + 1;
+ 
+ 	  /* Writes occur just before the switch on f->format, above, so
+ 	     that trailing blanks are suppressed, unless we are doing a
+@@ -922,8 +922,6 @@ formatted_transfer_scalar (st_parameter_
+ 	      if (bytes_used == 0)
+ 		{
+ 		  dtp->u.p.pending_spaces -= f->u.n;
+-		  dtp->u.p.pending_spaces = dtp->u.p.pending_spaces < 0 ? 0
+-					    : dtp->u.p.pending_spaces;
+ 		  dtp->u.p.skips -= f->u.n;
+ 		  dtp->u.p.skips = dtp->u.p.skips < 0 ? 0 : dtp->u.p.skips;
+ 		}
+@@ -945,6 +943,8 @@ formatted_transfer_scalar (st_parameter_
+ 	  dtp->u.p.skips = dtp->u.p.skips + pos - bytes_used;
+ 	  dtp->u.p.pending_spaces = dtp->u.p.pending_spaces
+ 				    + pos - dtp->u.p.max_pos;
++	  dtp->u.p.pending_spaces = dtp->u.p.pending_spaces < 0
++				    ? 0 : dtp->u.p.pending_spaces;
+ 
+ 	  if (dtp->u.p.skips == 0)
+ 	    break;
+--- gcc/testsuite/gfortran.dg/fmt_t_5.f90	(revision 126821)
++++ gcc/testsuite/gfortran.dg/fmt_t_5.f90	(revision 126823)
+@@ -0,0 +1,17 @@
++! { dg-do run }
++! PR32678 GFortan works incorrectly when writing with FORMAT Tx
++! Before patch, NULLs were inserted in output.
++! Test case from reporter enhanced to detect this problem.
++      character(25) :: output
++      character(1)  :: c
++      output = ""
++      open (unit=10, file="pr32678testfile", status="replace")
++      write (10,10) '12','a','b'
++      close (10, status="keep")
++      open (unit=10, file="pr32678testfile")
++      read(10,20) output(1:21)
++      if (output(1:21).ne."ab                  x") call abort
++      close (10, status="delete")
++ 10   format (a2,t1,a1,t2,a1,t20,' x')
++ 20   format (a21)
++      end
diff --git a/gcc41-pr32912.patch b/gcc41-pr32912.patch
new file mode 100644
index 0000000..67ce830
--- /dev/null
+++ b/gcc41-pr32912.patch
@@ -0,0 +1,138 @@
+2007-08-20  Jakub Jelinek  <jakub@redhat.com>
+
+	PR middle-end/32912
+	* fold-const.c (fold_binary): Only optimize X | ~X and X ^ ~X for
+	integral types.
+
+	* gcc.dg/pr32912-1.c: New test.
+	* gcc.dg/pr32912-2.c: New test.
+
+--- gcc/fold-const.c.jj	2007-08-13 15:11:18.000000000 +0200
++++ gcc/fold-const.c	2007-08-20 15:49:05.000000000 +0200
+@@ -8079,6 +8079,7 @@ fold_binary (enum tree_code code, tree t
+ 
+       /* ~X | X is -1.  */
+       if (TREE_CODE (arg0) == BIT_NOT_EXPR
++	  && INTEGRAL_TYPE_P (TREE_TYPE (arg1))
+ 	  && operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0))
+ 	{
+ 	  t1 = build_int_cst (type, -1);
+@@ -8088,6 +8089,7 @@ fold_binary (enum tree_code code, tree t
+ 
+       /* X | ~X is -1.  */
+       if (TREE_CODE (arg1) == BIT_NOT_EXPR
++	  && INTEGRAL_TYPE_P (TREE_TYPE (arg0))
+ 	  && operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0))
+ 	{
+ 	  t1 = build_int_cst (type, -1);
+@@ -8175,6 +8177,7 @@ fold_binary (enum tree_code code, tree t
+ 
+       /* ~X ^ X is -1.  */
+       if (TREE_CODE (arg0) == BIT_NOT_EXPR
++	  && INTEGRAL_TYPE_P (TREE_TYPE (arg1))
+ 	  && operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0))
+ 	{
+ 	  t1 = build_int_cst (type, -1);
+@@ -8184,6 +8187,7 @@ fold_binary (enum tree_code code, tree t
+ 
+       /* X ^ ~X is -1.  */
+       if (TREE_CODE (arg1) == BIT_NOT_EXPR
++	  && INTEGRAL_TYPE_P (TREE_TYPE (arg0))
+ 	  && operand_equal_p (arg0, TREE_OPERAND (arg1, 0), 0))
+ 	{
+ 	  t1 = build_int_cst (type, -1);
+--- gcc/testsuite/gcc.dg/pr32912-1.c.jj	2007-08-20 14:43:05.000000000 +0200
++++ gcc/testsuite/gcc.dg/pr32912-1.c	2007-08-20 14:43:23.000000000 +0200
+@@ -0,0 +1,44 @@
++/* PR middle-end/32912 */
++/* { dg-do run } */
++/* { dg-options "-O2 -w" } */
++
++extern void abort (void);
++
++typedef int __m128i __attribute__ ((__vector_size__ (16)));
++
++__m128i a, b, c, d, e, f;
++
++void
++foo (__m128i x)
++{
++  a = x ^ ~x;
++  b = ~x ^ x;
++  c = x | ~x;
++  d = ~x | x;
++  e = x & ~x;
++  f = ~x & x;
++}
++
++int
++main (void)
++{
++  union { __m128i v; int i[sizeof (__m128i) / sizeof (int)]; } u;
++  int i;
++
++  for (i = 0; i < sizeof (u.i) / sizeof (u.i[0]); i++)
++    u.i[i] = i * 49 - 36;
++  foo (u.v);
++#define check(x, val) \
++  u.v = (x); \
++  for (i = 0; i < sizeof (u.i) / sizeof (u.i[0]); i++) \
++    if (u.i[i] != (val)) \
++      abort ()
++
++  check (a, ~0);
++  check (b, ~0);
++  check (c, ~0);
++  check (d, ~0);
++  check (e, 0);
++  check (f, 0);
++  return 0;
++}
+--- gcc/testsuite/gcc.dg/pr32912-2.c.jj	2007-08-20 15:58:47.000000000 +0200
++++ gcc/testsuite/gcc.dg/pr32912-2.c	2007-08-20 15:55:32.000000000 +0200
+@@ -0,0 +1,45 @@
++/* { dg-do run } */
++/* { dg-options "-O2 -w" } */
++
++extern void abort (void);
++
++typedef int __m128i __attribute__ ((__vector_size__ (16)));
++
++__m128i a, b, c, d, e, f;
++
++__m128i
++foo (void)
++{
++  __m128i x = { 0x11111111, 0x22222222, 0x44444444 };
++  return x;
++}
++
++__m128i
++bar (void)
++{
++  __m128i x = { 0x11111111, 0x22222222, 0x44444444 };
++  return ~x;
++}
++
++int
++main (void)
++{
++  union { __m128i v; int i[sizeof (__m128i) / sizeof (int)]; } u, v;
++  int i;
++
++  u.v = foo ();
++  v.v = bar ();
++  for (i = 0; i < sizeof (u.i) / sizeof (u.i[0]); i++)
++    {
++      if (u.i[i] != ~v.i[i])
++	abort ();
++      if (i < 3)
++	{
++	  if (u.i[i] != (0x11111111 << i))
++	    abort ();
++	}
++      else if (u.i[i])
++	abort ();
++    }
++  return 0;
++}
diff --git a/gcc41-rh234515.patch b/gcc41-rh234515.patch
deleted file mode 100644
index e2a241d..0000000
--- a/gcc41-rh234515.patch
+++ /dev/null
@@ -1,73 +0,0 @@
-2007-01-24   Steve LoBasso <slobasso@yahoo.com>
-	     Paolo Carlini  <pcarlini@suse.de>
-
-	* include/bits/deque.tcc (deque<>::erase(iterator, iterator)):
-	Fix condition.
-	* testsuite/23_containers/deque/modifiers/erase/3.cc: New.
-
---- libstdc++-v3/include/bits/deque.tcc	(revision 121146)
-+++ libstdc++-v3/include/bits/deque.tcc	(revision 121147)
-@@ -142,7 +142,7 @@ namespace _GLIBCXX_STD
- 	  const difference_type __n = __last - __first;
- 	  const difference_type __elems_before = (__first
- 						  - this->_M_impl._M_start);
--	  if (static_cast<size_type>(__elems_before) < (size() - __n) / 2)
-+	  if (static_cast<size_type>(__elems_before) <= (size() - __n) / 2)
- 	    {
- 	      std::copy_backward(this->_M_impl._M_start, __first, __last);
- 	      iterator __new_start = this->_M_impl._M_start + __n;
---- libstdc++-v3/testsuite/23_containers/deque/modifiers/erase/3.cc	(revision 0)
-+++ libstdc++-v3/testsuite/23_containers/deque/modifiers/erase/3.cc	(revision 121147)
-@@ -0,0 +1,52 @@
-+// Copyright (C) 2007 Free Software Foundation, Inc.
-+//
-+// This file is part of the GNU ISO C++ Library.  This library is free
-+// software; you can redistribute it and/or modify it under the
-+// terms of the GNU General Public License as published by the
-+// Free Software Foundation; either version 2, or (at your option)
-+// any later version.
-+
-+// This library is distributed in the hope that it will be useful,
-+// but WITHOUT ANY WARRANTY; without even the implied warranty of
-+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+// GNU General Public License for more details.
-+
-+// You should have received a copy of the GNU General Public License along
-+// with this library; see the file COPYING.  If not, write to the Free
-+// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
-+// USA.
-+
-+// 23.2.1.3 deque modifiers
-+
-+#include <deque>
-+#include <testsuite_hooks.h>
-+
-+void erase(size_t num_elm, size_t elm_strt, size_t elm_end)
-+{
-+  bool test __attribute__((unused)) = true;
-+  using __gnu_test::copy_tracker;
-+  using __gnu_test::assignment_operator;
-+
-+  std::deque<copy_tracker> x(num_elm);
-+  copy_tracker::reset();
-+  
-+  x.erase(x.begin() + elm_strt, x.begin() + elm_end);
-+  
-+  const size_t min_num_cpy = std::min(elm_strt, num_elm - elm_end);
-+  VERIFY( assignment_operator::count() == min_num_cpy );
-+}
-+
-+// http://gcc.gnu.org/ml/libstdc++/2007-01/msg00098.html
-+void test01()
-+{
-+  for (size_t num_elm = 0; num_elm <= 10; ++num_elm)
-+    for (size_t elm_strt = 0; elm_strt <= num_elm; ++elm_strt)
-+      for (size_t elm_end = elm_strt; elm_end <= num_elm; ++elm_end)
-+	erase(num_elm, elm_strt, elm_end);
-+}
-+
-+int main()
-+{
-+  test01();
-+  return 0;
-+}
diff --git a/gcc41-rh247256.patch b/gcc41-rh247256.patch
new file mode 100644
index 0000000..bc3c778
--- /dev/null
+++ b/gcc41-rh247256.patch
@@ -0,0 +1,42 @@
+2007-07-10  Jakub Jelinek  <jakub@redhat.com>
+
+	* simplify-rtx.c (simplify_plus_minus_op_data_cmp): If both operands
+	are REGs and TARGET_INDEX_OPERAND_FIRST, sort lower REGNOs first.
+
+	* gcc.dg/20070710-1.c: New test.
+
+--- gcc/simplify-rtx.c.jj	2006-08-11 17:32:05.000000000 +0200
++++ gcc/simplify-rtx.c	2007-07-09 22:53:26.000000000 +0200
+@@ -2608,6 +2608,12 @@ simplify_plus_minus_op_data_cmp (const v
+ 	    - commutative_operand_precedence (d1->op));
+   if (result)
+     return result;
++
++  /* Group together equal REGs to do more simplification.  */
++  if (TARGET_INDEX_OPERAND_FIRST && REG_P (d1->op) && REG_P (d2->op)
++      && REGNO (d1->op) != REGNO (d2->op))
++    return REGNO (d1->op) - REGNO (d2->op);
++
+   return d1->ix - d2->ix;
+ }
+ 
+--- gcc/testsuite/gcc.dg/20070710-1.c.jj	2007-07-10 09:32:43.000000000 +0200
++++ gcc/testsuite/gcc.dg/20070710-1.c	2007-07-10 09:31:39.000000000 +0200
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -ffast-math" } */
++
++extern float sqrtf (float);
++
++float
++foo (const float *m)
++{
++  float x = m[0] + m[1] + m[2] + 1.0f;
++  float s;
++
++  if (x > 0.001)
++    s = 0.5f / sqrtf (x);
++  else
++    s = 2.0f * sqrtf (1.0f + m[0] - m[1] - m[2]);
++  return s;
++}
diff --git a/gcc41-rh253102.patch b/gcc41-rh253102.patch
new file mode 100644
index 0000000..d4cad4f
--- /dev/null
+++ b/gcc41-rh253102.patch
@@ -0,0 +1,89 @@
+2007-08-17  Jakub Jelinek  <jakub@redhat.com>
+
+	* decl.c (variable_decl): Don't share charlen structs if
+	length == NULL.
+	* trans-decl.c (create_function_arglist): Assert
+	f->sym->ts.cl->backend_decl is NULL instead of unsharing
+	charlen struct here.
+
+	* gfortran.dg/assumed_charlen_sharing.f90: New test.
+
+--- gcc/fortran/decl.c.jj	2007-02-20 22:38:20.000000000 +0100
++++ gcc/fortran/decl.c	2007-08-21 20:50:33.000000000 +0200
+@@ -1086,10 +1086,11 @@ variable_decl (int elem)
+ 	  break;
+ 
+ 	/* Non-constant lengths need to be copied after the first
+-	   element.  */
++	   element.  Also copy assumed lengths.  */
+ 	case MATCH_NO:
+-	  if (elem > 1 && current_ts.cl->length
+-		&& current_ts.cl->length->expr_type != EXPR_CONSTANT)
++	  if (elem > 1
++	      && (current_ts.cl->length == NULL
++		  || current_ts.cl->length->expr_type != EXPR_CONSTANT))
+ 	    {
+ 	      cl = gfc_get_charlen ();
+ 	      cl->next = gfc_current_ns->cl_list;
+--- gcc/fortran/trans-decl.c.jj	2007-03-12 08:28:13.000000000 +0100
++++ gcc/fortran/trans-decl.c	2007-08-21 20:50:33.000000000 +0200
+@@ -1417,25 +1417,8 @@ create_function_arglist (gfc_symbol * sy
+ 	  if (!f->sym->ts.cl->length)
+ 	    {
+ 	      TREE_USED (length) = 1;
+-	      if (!f->sym->ts.cl->backend_decl)
+-		f->sym->ts.cl->backend_decl = length;
+-	      else
+-		{
+-		  /* there is already another variable using this
+-		     gfc_charlen node, build a new one for this variable
+-		     and chain it into the list of gfc_charlens.
+-		     This happens for e.g. in the case
+-		     CHARACTER(*)::c1,c2
+-		     since CHARACTER declarations on the same line share
+-		     the same gfc_charlen node.  */
+-		  gfc_charlen *cl;
+-	      
+-		  cl = gfc_get_charlen ();
+-		  cl->backend_decl = length;
+-		  cl->next = f->sym->ts.cl->next;
+-		  f->sym->ts.cl->next = cl;
+-		  f->sym->ts.cl = cl;
+-		}
++	      gcc_assert (!f->sym->ts.cl->backend_decl);
++	      f->sym->ts.cl->backend_decl = length;
+ 	    }
+ 
+ 	  hidden_typelist = TREE_CHAIN (hidden_typelist);
+--- gcc/testsuite/gfortran.dg/assumed_charlen_sharing.f90.jj	2007-08-21 08:29:57.000000000 +0200
++++ gcc/testsuite/gfortran.dg/assumed_charlen_sharing.f90	2007-08-21 08:29:57.000000000 +0200
+@@ -0,0 +1,29 @@
++! This testcase was miscompiled, because ts.cl
++! in function bar was initially shared between both
++! dummy arguments.  Although it was later unshared,
++! all expressions which copied ts.cl from bar2
++! before that used incorrectly bar1's length
++! instead of bar2.
++! { dg-do run }
++
++subroutine foo (foo1, foo2)
++  implicit none
++  integer, intent(in) :: foo2
++  character(*), intent(in) :: foo1(foo2)
++end subroutine foo
++
++subroutine bar (bar1, bar2)
++  implicit none
++  character(*), intent(in) :: bar1, bar2
++
++  call foo ((/ bar2 /), 1)
++end subroutine bar
++
++program test
++  character(80) :: str1
++  character(5) :: str2
++
++  str1 = 'String'
++  str2 = 'Strng'
++  call bar (str2, str1)
++end program test
diff --git a/gcc41-sparc-niagara.patch b/gcc41-sparc-niagara.patch
new file mode 100644
index 0000000..c9e61ba
--- /dev/null
+++ b/gcc41-sparc-niagara.patch
@@ -0,0 +1,519 @@
+2006-03-02  David S. Miller  <davem@sunset.davemloft.net>
+
+	Sun Niagara specific optimizations.
+	* config.gcc: Recognize niagara as target.
+	* config/sparc/sparc.h (SPARC_RELAXED_ORDERING): Mention Niagara.
+	(TARGET_CPU_niagara): Define.
+	(CPP_CPU64_DEFAULT_SPEC): Define __sparc_v9__ for Niagara.
+	(ASM_CPU64_DEFAULT_SPEC): Pass -Av9b for Niagara.
+	(CPP_CPU_SPEC): Handle -mcpu=niagara.
+	(ASM_CPU_SPEC): Likewise.
+	(PROCESSOR_NIAGARA): New enum entry.
+	(REGISTER_MOVE_COST): Handle Niagara.
+	(BRANCH_COST, PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES): Likewise.
+	* config/sparc/sparc.c (niagara_costs): New processor_costs entry.
+	(sparc_override_options): Recognize "niagara", set appropriate
+	default MASK_* values for it, and align functions to 32-bytes
+	by default just like ULTRASPARC/ULTRASPARC3.
+	(sparc_initialize_trampoline): Handle niagara like ultrasparc.
+	(sparc64_initialize_trampoline): Likewise.
+	(sparc_use_sched_lookahead): Use zero for niagara.
+	(sparc_issue_rate): Use one for niagara.
+	* config/sparc/niagara.md: New file.
+	* config/sparc/sparc.md: Include it.
+	* config/sparc/sol2-bi.h (CPP_CPU64_DEFAULT_SPEC,
+	ASM_CPU32_DEFAULT_SPEC, ASM_CPU64_DEFAULT_SPEC): Set appropriately
+	when default cpu is niagara.
+	(CPP_CPU_SPEC): Handle -mcpu=niagara.
+	(ASM_CPU_SPEC): Likewise.
+	* config/sparc/sol2.h (ASM_CPU_DEFAULT_SPEC): Set appropriately
+	when default cpu is niagara.
+	(ASM_CPU_SPEC): Handle -mcpu=niagara.
+	* config/sparc/linux64.h: Handle a default of TARGET_CPU_niagara
+	just like v9/ultrasparc/ultrasparc3.
+	* doc/invoke.texi: Add documentation for "niagara" and improve
+	existing documentation for ultrasparc variants.
+
+--- gcc/doc/invoke.texi	(revision 111647)
++++ gcc/doc/invoke.texi	(revision 111648)
+@@ -12268,8 +12268,8 @@ Set the instruction set, register set, a
+ for machine type @var{cpu_type}.  Supported values for @var{cpu_type} are
+ @samp{v7}, @samp{cypress}, @samp{v8}, @samp{supersparc}, @samp{sparclite},
+ @samp{f930}, @samp{f934}, @samp{hypersparc}, @samp{sparclite86x},
+-@samp{sparclet}, @samp{tsc701}, @samp{v9}, @samp{ultrasparc}, and
+-@samp{ultrasparc3}.
++@samp{sparclet}, @samp{tsc701}, @samp{v9}, @samp{ultrasparc},
++@samp{ultrasparc3}, and @samp{niagara}.
+ 
+ Default instruction scheduling parameters are used for values that select
+ an architecture and not an implementation.  These are @samp{v7}, @samp{v8},
+@@ -12283,7 +12283,7 @@ implementations.
+     v8:             supersparc, hypersparc
+     sparclite:      f930, f934, sparclite86x
+     sparclet:       tsc701
+-    v9:             ultrasparc, ultrasparc3
++    v9:             ultrasparc, ultrasparc3, niagara
+ @end smallexample
+ 
+ By default (unless configured otherwise), GCC generates code for the V7
+@@ -12317,9 +12317,11 @@ With @option{-mcpu=v9}, GCC generates co
+ architecture.  This adds 64-bit integer and floating-point move instructions,
+ 3 additional floating-point condition code registers and conditional move
+ instructions.  With @option{-mcpu=ultrasparc}, the compiler additionally
+-optimizes it for the Sun UltraSPARC I/II chips.  With
++optimizes it for the Sun UltraSPARC I/II/IIi chips.  With
+ @option{-mcpu=ultrasparc3}, the compiler additionally optimizes it for the
+-Sun UltraSPARC III chip.
++Sun UltraSPARC III/III+/IIIi/IIIi+/IV/IV+ chips.  With
++@option{-mcpu=niagara}, the compiler additionally optimizes it for
++Sun UltraSPARC T1 chips.
+ 
+ @item -mtune=@var{cpu_type}
+ @opindex mtune
+@@ -12331,8 +12333,8 @@ The same values for @option{-mcpu=@var{c
+ @option{-mtune=@var{cpu_type}}, but the only useful values are those
+ that select a particular cpu implementation.  Those are @samp{cypress},
+ @samp{supersparc}, @samp{hypersparc}, @samp{f930}, @samp{f934},
+-@samp{sparclite86x}, @samp{tsc701}, @samp{ultrasparc}, and
+-@samp{ultrasparc3}.
++@samp{sparclite86x}, @samp{tsc701}, @samp{ultrasparc},
++@samp{ultrasparc3}, and @samp{niagara}.
+ 
+ @item -mv8plus
+ @itemx -mno-v8plus
+--- gcc/config.gcc	(revision 111647)
++++ gcc/config.gcc	(revision 111648)
+@@ -2830,7 +2830,7 @@ case "${target}" in
+ 			"" | sparc | sparcv9 | sparc64 | sparc86x \
+ 			| v7 | cypress | v8 | supersparc | sparclite | f930 \
+ 			| f934 | hypersparc | sparclite86x | sparclet | tsc701 \
+-			| v9 | ultrasparc | ultrasparc3)
++			| v9 | ultrasparc | ultrasparc3 | niagara)
+ 				# OK
+ 				;;
+ 			*)
+--- gcc/config/sparc/niagara.md	(revision 0)
++++ gcc/config/sparc/niagara.md	(revision 111648)
+@@ -0,0 +1,119 @@
++;; Scheduling description for Niagara.
++;;   Copyright (C) 2006 Free Software Foundation, Inc.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify
++;; it under the terms of the GNU General Public License as published by
++;; the Free Software Foundation; either version 2, or (at your option)
++;; any later version.
++;;
++;; GCC is distributed in the hope that it will be useful,
++;; but WITHOUT ANY WARRANTY; without even the implied warranty of
++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++;; GNU General Public License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING.  If not, write to
++;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
++;; Boston, MA 02110-1301, USA.
++
++;; Niagara is a single-issue processor.
++
++(define_automaton "niagara_0")
++
++(define_cpu_unit "niag_pipe" "niagara_0")
++
++(define_insn_reservation "niag_5cycle" 5
++  (and (eq_attr "cpu" "niagara")
++    (eq_attr "type" "multi,flushw,iflush,trap"))
++  "niag_pipe*5")
++
++(define_insn_reservation "niag_4cycle" 4
++  (and (eq_attr "cpu" "niagara")
++    (eq_attr "type" "savew"))
++  "niag_pipe*4")
++
++/* Most basic operations are single-cycle. */
++(define_insn_reservation "niag_ialu" 1
++ (and (eq_attr "cpu" "niagara")
++   (eq_attr "type" "ialu,shift,compare,cmove"))
++ "niag_pipe")
++
++(define_insn_reservation "niag_imul" 11
++ (and (eq_attr "cpu" "niagara")
++   (eq_attr "type" "imul"))
++ "niag_pipe*11")
++
++(define_insn_reservation "niag_idiv" 72
++ (and (eq_attr "cpu" "niagara")
++   (eq_attr "type" "idiv"))
++ "niag_pipe*72")
++
++(define_insn_reservation "niag_branch" 3
++  (and (eq_attr "cpu" "niagara")
++    (eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch,branch"))
++  "niag_pipe*3")
++
++(define_insn_reservation "niag_3cycle_load" 3
++  (and (eq_attr "cpu" "niagara")
++    (eq_attr "type" "load"))
++  "niag_pipe*3")
++
++(define_insn_reservation "niag_9cycle_load" 9
++  (and (eq_attr "cpu" "niagara")
++    (eq_attr "type" "fpload"))
++  "niag_pipe*9")
++
++(define_insn_reservation "niag_1cycle_store" 1
++  (and (eq_attr "cpu" "niagara")
++    (eq_attr "type" "store"))
++  "niag_pipe")
++
++(define_insn_reservation "niag_8cycle_store" 8
++  (and (eq_attr "cpu" "niagara")
++    (eq_attr "type" "fpstore"))
++  "niag_pipe*8")
++
++/* Things incorrectly modelled here:
++ *  FPADD{s,d}: 26 cycles
++ *  FPSUB{s,d}: 26 cycles
++ *  FABSD: 26 cycles
++ *  F{s,d}TO{s,d}: 26 cycles
++ *  F{s,d}TO{i,x}: 26 cycles
++ *  FSMULD: 29 cycles
++ */
++(define_insn_reservation "niag_fmov" 8
++  (and (eq_attr "cpu" "niagara")
++    (eq_attr "type" "fpmove,fpcmove,fpcrmove"))
++  "niag_pipe*8")
++
++(define_insn_reservation "niag_fpcmp" 26
++  (and (eq_attr "cpu" "niagara")
++    (eq_attr "type" "fpcmp"))
++  "niag_pipe*26")
++
++(define_insn_reservation "niag_fmult" 29
++ (and (eq_attr "cpu" "niagara")
++    (eq_attr "type" "fpmul"))
++  "niag_pipe*29")
++
++(define_insn_reservation "niag_fdivs" 54
++  (and (eq_attr "cpu" "niagara")
++    (eq_attr "type" "fpdivs"))
++  "niag_pipe*54")
++
++(define_insn_reservation "niag_fdivd" 83
++  (and (eq_attr "cpu" "niagara")
++    (eq_attr "type" "fpdivd"))
++  "niag_pipe*83")
++
++/* Things incorrectly modelled here:
++ *  FPADD{16,32}: 10 cycles
++ *  FPSUB{16,32}: 10 cycles
++ *  FALIGNDATA: 10 cycles
++ */
++(define_insn_reservation "niag_vis" 8
++  (and (eq_attr "cpu" "niagara")
++    (eq_attr "type" "fga,fgm_pack,fgm_mul,fgm_cmp,fgm_pdist"))
++  "niag_pipe*8")
+--- gcc/config/sparc/sparc.md	(revision 111647)
++++ gcc/config/sparc/sparc.md	(revision 111648)
+@@ -94,7 +94,8 @@ (define_attr "cpu"
+    sparclet,tsc701,
+    v9,
+    ultrasparc,
+-   ultrasparc3"
++   ultrasparc3,
++   niagara"
+   (const (symbol_ref "sparc_cpu_attr")))
+ 
+ ;; Attribute for the instruction set.
+@@ -315,6 +316,7 @@ (define_delay (eq_attr "type" "return")
+ (include "sparclet.md")
+ (include "ultra1_2.md")
+ (include "ultra3.md")
++(include "niagara.md")
+ 
+ 
+ ;; Operand and operator predicates.
+--- gcc/config/sparc/sparc.c	(revision 111647)
++++ gcc/config/sparc/sparc.c	(revision 111648)
+@@ -197,6 +197,30 @@ struct processor_costs ultrasparc3_costs
+   0, /* shift penalty */
+ };
+ 
++static const
++struct processor_costs niagara_costs = {
++  COSTS_N_INSNS (3), /* int load */
++  COSTS_N_INSNS (3), /* int signed load */
++  COSTS_N_INSNS (3), /* int zeroed load */
++  COSTS_N_INSNS (9), /* float load */
++  COSTS_N_INSNS (8), /* fmov, fneg, fabs */
++  COSTS_N_INSNS (8), /* fadd, fsub */
++  COSTS_N_INSNS (26), /* fcmp */
++  COSTS_N_INSNS (8), /* fmov, fmovr */
++  COSTS_N_INSNS (29), /* fmul */
++  COSTS_N_INSNS (54), /* fdivs */
++  COSTS_N_INSNS (83), /* fdivd */
++  COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
++  COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
++  COSTS_N_INSNS (11), /* imul */
++  COSTS_N_INSNS (11), /* imulX */
++  0, /* imul bit factor */
++  COSTS_N_INSNS (72), /* idiv */
++  COSTS_N_INSNS (72), /* idivX */
++  COSTS_N_INSNS (1), /* movcc/movr */
++  0, /* shift penalty */
++};
++
+ const struct processor_costs *sparc_costs = &cypress_costs;
+ 
+ #ifdef HAVE_AS_RELAX_OPTION
+@@ -597,6 +621,7 @@ sparc_override_options (void)
+     { TARGET_CPU_v9, "v9" },
+     { TARGET_CPU_ultrasparc, "ultrasparc" },
+     { TARGET_CPU_ultrasparc3, "ultrasparc3" },
++    { TARGET_CPU_niagara, "niagara" },
+     { 0, 0 }
+   };
+   const struct cpu_default *def;
+@@ -632,6 +657,8 @@ sparc_override_options (void)
+     /* TI ultrasparc III */
+     /* ??? Check if %y issue still holds true in ultra3.  */
+     { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
++    /* UltraSPARC T1 */
++    { "niagara", PROCESSOR_NIAGARA, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
+     { 0, 0, 0, 0 }
+   };
+   const struct cpu_table *cpu;
+@@ -741,7 +768,8 @@ sparc_override_options (void)
+   /* Supply a default value for align_functions.  */
+   if (align_functions == 0
+       && (sparc_cpu == PROCESSOR_ULTRASPARC
+-	  || sparc_cpu == PROCESSOR_ULTRASPARC3))
++	  || sparc_cpu == PROCESSOR_ULTRASPARC3
++	  || sparc_cpu == PROCESSOR_NIAGARA))
+     align_functions = 32;
+ 
+   /* Validate PCC_STRUCT_RETURN.  */
+@@ -790,6 +818,9 @@ sparc_override_options (void)
+     case PROCESSOR_ULTRASPARC3:
+       sparc_costs = &ultrasparc3_costs;
+       break;
++    case PROCESSOR_NIAGARA:
++      sparc_costs = &niagara_costs;
++      break;
+     };
+ 
+ #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
+@@ -7099,7 +7130,8 @@ sparc_initialize_trampoline (rtx tramp, 
+      aligned on a 16 byte boundary so one flush clears it all.  */
+   emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
+   if (sparc_cpu != PROCESSOR_ULTRASPARC
+-      && sparc_cpu != PROCESSOR_ULTRASPARC3)
++      && sparc_cpu != PROCESSOR_ULTRASPARC3
++      && sparc_cpu != PROCESSOR_NIAGARA)
+     emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
+ 						     plus_constant (tramp, 8)))));
+ 
+@@ -7141,7 +7173,8 @@ sparc64_initialize_trampoline (rtx tramp
+   emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, tramp))));
+ 
+   if (sparc_cpu != PROCESSOR_ULTRASPARC
+-      && sparc_cpu != PROCESSOR_ULTRASPARC3)
++      && sparc_cpu != PROCESSOR_ULTRASPARC3
++      && sparc_cpu != PROCESSOR_NIAGARA)
+     emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
+ 
+   /* Call __enable_execute_stack after writing onto the stack to make sure
+@@ -7321,6 +7354,8 @@ sparc_sched_init (FILE *dump ATTRIBUTE_U
+ static int
+ sparc_use_sched_lookahead (void)
+ {
++  if (sparc_cpu == PROCESSOR_NIAGARA)
++    return 0;
+   if (sparc_cpu == PROCESSOR_ULTRASPARC
+       || sparc_cpu == PROCESSOR_ULTRASPARC3)
+     return 4;
+@@ -7336,6 +7371,7 @@ sparc_issue_rate (void)
+ {
+   switch (sparc_cpu)
+     {
++    case PROCESSOR_NIAGARA:
+     default:
+       return 1;
+     case PROCESSOR_V9:
+--- gcc/config/sparc/sol2-bi.h	(revision 111647)
++++ gcc/config/sparc/sol2-bi.h	(revision 111648)
+@@ -39,6 +39,15 @@
+ #define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG "b"
+ #endif
+ 
++#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara
++#undef CPP_CPU64_DEFAULT_SPEC
++#define CPP_CPU64_DEFAULT_SPEC ""
++#undef ASM_CPU32_DEFAULT_SPEC
++#define ASM_CPU32_DEFAULT_SPEC "-xarch=v8plusb"
++#undef ASM_CPU64_DEFAULT_SPEC
++#define ASM_CPU64_DEFAULT_SPEC AS_SPARC64_FLAG "b"
++#endif
++
+ #if DEFAULT_ARCH32_P
+ #define DEF_ARCH32_SPEC(__str) "%{!m64:" __str "}"
+ #define DEF_ARCH64_SPEC(__str) "%{m64:" __str "}"
+@@ -57,7 +66,7 @@
+ %{mcpu=sparclite|mcpu-f930|mcpu=f934:-D__sparclite__} \
+ %{mcpu=v8:" DEF_ARCH32_SPEC("-D__sparcv8") "} \
+ %{mcpu=supersparc:-D__supersparc__ " DEF_ARCH32_SPEC("-D__sparcv8") "} \
+-%{mcpu=v9|mcpu=ultrasparc|mcpu=ultrasparc3:" DEF_ARCH32_SPEC("-D__sparcv8") "} \
++%{mcpu=v9|mcpu=ultrasparc|mcpu=ultrasparc3|mcpu=niagara:" DEF_ARCH32_SPEC("-D__sparcv8") "} \
+ %{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(cpp_cpu_default)}}}}}}} \
+ "
+ 
+@@ -66,7 +75,8 @@
+ %{mcpu=v9:" DEF_ARCH32_SPEC("-xarch=v8plus") DEF_ARCH64_SPEC(AS_SPARC64_FLAG) "} \
+ %{mcpu=ultrasparc:" DEF_ARCH32_SPEC("-xarch=v8plusa") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "a") "} \
+ %{mcpu=ultrasparc3:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "b") "} \
+-%{!mcpu=ultrasparc3:%{!mcpu=ultrasparc:%{!mcpu=v9:%{mcpu*:" DEF_ARCH32_SPEC("-xarch=v8") DEF_ARCH64_SPEC(AS_SPARC64_FLAG) "}}}} \
++%{mcpu=niagara:" DEF_ARCH32_SPEC("-xarch=v8plusb") DEF_ARCH64_SPEC(AS_SPARC64_FLAG "b") "} \
++%{!mcpu=niagara:%{!mcpu=ultrasparc3:%{!mcpu=ultrasparc:%{!mcpu=v9:%{mcpu*:" DEF_ARCH32_SPEC("-xarch=v8") DEF_ARCH64_SPEC(AS_SPARC64_FLAG) "}}}}} \
+ %{!mcpu*:%(asm_cpu_default)} \
+ "
+ 
+--- gcc/config/sparc/sparc.h	(revision 111647)
++++ gcc/config/sparc/sparc.h	(revision 111648)
+@@ -206,7 +206,7 @@ extern enum cmodel sparc_cmodel;
+    which requires the following macro to be true if enabled.  Prior to V9,
+    there are no instructions to even talk about memory synchronization.
+    Note that the UltraSPARC III processors don't implement RMO, unlike the
+-   UltraSPARC II processors.
++   UltraSPARC II processors.  Niagara does not implement RMO either.
+ 
+    Default to false; for example, Solaris never enables RMO, only ever uses
+    total memory ordering (TMO).  */
+@@ -238,10 +238,12 @@ extern enum cmodel sparc_cmodel;
+ #define TARGET_CPU_sparc64	7	/* alias */
+ #define TARGET_CPU_ultrasparc	8
+ #define TARGET_CPU_ultrasparc3	9
++#define TARGET_CPU_niagara	10
+ 
+ #if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
+  || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \
+- || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3
++ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3 \
++ || TARGET_CPU_DEFAULT == TARGET_CPU_niagara
+ 
+ #define CPP_CPU32_DEFAULT_SPEC ""
+ #define ASM_CPU32_DEFAULT_SPEC ""
+@@ -262,6 +264,10 @@ extern enum cmodel sparc_cmodel;
+ #define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+ #define ASM_CPU64_DEFAULT_SPEC "-Av9b"
+ #endif
++#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara
++#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
++#define ASM_CPU64_DEFAULT_SPEC "-Av9b"
++#endif
+ 
+ #else
+ 
+@@ -352,6 +358,7 @@ extern enum cmodel sparc_cmodel;
+ %{mcpu=v9:-D__sparc_v9__} \
+ %{mcpu=ultrasparc:-D__sparc_v9__} \
+ %{mcpu=ultrasparc3:-D__sparc_v9__} \
++%{mcpu=niagara:-D__sparc_v9__} \
+ %{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(cpp_cpu_default)}}}}}}} \
+ "
+ #define CPP_ARCH32_SPEC ""
+@@ -401,6 +408,7 @@ extern enum cmodel sparc_cmodel;
+ %{mcpu=v9:-Av9} \
+ %{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
+ %{mcpu=ultrasparc3:%{!mv8plus:-Av9b}} \
++%{mcpu=niagara:%{!mv8plus:-Av9b}} \
+ %{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(asm_cpu_default)}}}}}}} \
+ "
+ 
+@@ -524,7 +532,8 @@ enum processor_type {
+   PROCESSOR_TSC701,
+   PROCESSOR_V9,
+   PROCESSOR_ULTRASPARC,
+-  PROCESSOR_ULTRASPARC3
++  PROCESSOR_ULTRASPARC3,
++  PROCESSOR_NIAGARA
+ };
+ 
+ /* This is set from -m{cpu,tune}=xxx.  */
+@@ -2137,7 +2146,8 @@ do {                                    
+     || (GENERAL_OR_I64 (CLASS1) && FP_REG_CLASS_P (CLASS2)) \
+     || (CLASS1) == FPCC_REGS || (CLASS2) == FPCC_REGS)		\
+    ? ((sparc_cpu == PROCESSOR_ULTRASPARC \
+-       || sparc_cpu == PROCESSOR_ULTRASPARC3) ? 12 : 6) : 2)
++       || sparc_cpu == PROCESSOR_ULTRASPARC3 \
++       || sparc_cpu == PROCESSOR_NIAGARA) ? 12 : 6) : 2)
+ 
+ /* Provide the cost of a branch.  For pre-v9 processors we use
+    a value of 3 to take into account the potential annulling of
+@@ -2147,22 +2157,30 @@ do {                                    
+ 
+    On v9 and later, which have branch prediction facilities, we set
+    it to the depth of the pipeline as that is the cost of a
+-   mispredicted branch.  */
++   mispredicted branch.
++
++   On Niagara, normal branches insert 3 bubbles into the pipe
++   and annulled branches insert 4 bubbles.  */
+ 
+ #define BRANCH_COST \
+ 	((sparc_cpu == PROCESSOR_V9 \
+ 	  || sparc_cpu == PROCESSOR_ULTRASPARC) \
+ 	 ? 7 \
+          : (sparc_cpu == PROCESSOR_ULTRASPARC3 \
+-            ? 9 : 3))
++            ? 9 \
++	 : (sparc_cpu == PROCESSOR_NIAGARA \
++	    ? 4 \
++	 : 3)))
+ 
+ #define PREFETCH_BLOCK \
+ 	((sparc_cpu == PROCESSOR_ULTRASPARC \
+-          || sparc_cpu == PROCESSOR_ULTRASPARC3) \
++          || sparc_cpu == PROCESSOR_ULTRASPARC3 \
++	  || sparc_cpu == PROCESSOR_NIAGARA) \
+          ? 64 : 32)
+ 
+ #define SIMULTANEOUS_PREFETCHES \
+-	((sparc_cpu == PROCESSOR_ULTRASPARC) \
++	((sparc_cpu == PROCESSOR_ULTRASPARC \
++	  || sparc_cpu == PROCESSOR_NIAGARA) \
+          ? 2 \
+          : (sparc_cpu == PROCESSOR_ULTRASPARC3 \
+             ? 8 : 3))
+--- gcc/config/sparc/linux64.h	(revision 111647)
++++ gcc/config/sparc/linux64.h	(revision 111648)
+@@ -43,7 +43,8 @@ Boston, MA 02110-1301, USA.  */
+ 
+ #if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
+     || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \
+-    || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3
++    || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3 \
++    || TARGET_CPU_DEFAULT == TARGET_CPU_niagara
+ /* A 64 bit v9 compiler with stack-bias,
+    in a Medium/Low code model environment.  */
+ 
+--- gcc/config/sparc/sol2.h	(revision 111647)
++++ gcc/config/sparc/sol2.h	(revision 111648)
+@@ -41,11 +41,17 @@ Boston, MA 02110-1301, USA.  */
+ #define ASM_CPU_DEFAULT_SPEC "-xarch=v8plusb"
+ #endif
+ 
++#if TARGET_CPU_DEFAULT == TARGET_CPU_niagara
++#undef ASM_CPU_DEFAULT_SPEC
++#define ASM_CPU_DEFAULT_SPEC "-xarch=v8plusb"
++#endif
++
+ #undef ASM_CPU_SPEC
+ #define ASM_CPU_SPEC "\
+ %{mcpu=v9:-xarch=v8plus} \
+ %{mcpu=ultrasparc:-xarch=v8plusa} \
+ %{mcpu=ultrasparc3:-xarch=v8plusb} \
++%{mcpu=niagara:-xarch=v8plusb} \
+ %{!mcpu*:%(asm_cpu_default)} \
+ "
+ 
diff --git a/gcc41-strncat-chk.patch b/gcc41-strncat-chk.patch
deleted file mode 100644
index c1c01b8..0000000
--- a/gcc41-strncat-chk.patch
+++ /dev/null
@@ -1,93 +0,0 @@
-2006-09-18  Jakub Jelinek  <jakub@redhat.com>
-
-	* builtins.c (expand_builtin, maybe_emit_chk_warning): Handle
-	BUILT_IN_STRNCAT_CHK.
-
-	* gcc.dg/builtin-strncat-chk-1.c: New test.
-
---- gcc/builtins.c.jj	2006-09-02 08:54:22.000000000 +0200
-+++ gcc/builtins.c	2006-09-18 16:54:57.000000000 +0200
-@@ -6437,6 +6437,7 @@ expand_builtin (tree exp, rtx target, rt
-     case BUILT_IN_STPCPY_CHK:
-     case BUILT_IN_STRNCPY_CHK:
-     case BUILT_IN_STRCAT_CHK:
-+    case BUILT_IN_STRNCAT_CHK:
-     case BUILT_IN_SNPRINTF_CHK:
-     case BUILT_IN_VSNPRINTF_CHK:
-       maybe_emit_chk_warning (exp, fcode);
-@@ -10128,6 +10129,11 @@ maybe_emit_chk_warning (tree exp, enum b
-       arg_mask = 6;
-       is_strlen = 1;
-       break;
-+    case BUILT_IN_STRNCAT_CHK:
-+    /* For __strncat_chk the warning will be emitted only if overflowing
-+       by at least strlen (dest) + 1 bytes.  */
-+      arg_mask = 12;
-+      break;
-     case BUILT_IN_STRNCPY_CHK:
-       arg_mask = 12;
-       break;
-@@ -10165,6 +10171,22 @@ maybe_emit_chk_warning (tree exp, enum b
-       if (! len || ! host_integerp (len, 1) || tree_int_cst_lt (len, size))
- 	return;
-     }
-+  else if (fcode == BUILT_IN_STRNCAT_CHK)
-+    {
-+      tree src = TREE_VALUE (TREE_CHAIN (arglist));
-+      if (! src || ! host_integerp (len, 1) || tree_int_cst_lt (len, size))
-+	return;
-+      src = c_strlen (src, 1);
-+      if (! src || ! host_integerp (src, 1))
-+	{
-+	  locus = EXPR_LOCATION (exp);
-+	  warning (0, "%Hcall to %D might overflow destination buffer",
-+		   &locus, get_callee_fndecl (exp));
-+	  return;
-+	}
-+      else if (tree_int_cst_lt (src, size))
-+	return;
-+    }
-   else if (! host_integerp (len, 1) || ! tree_int_cst_lt (size, len))
-     return;
- 
---- gcc/testsuite/gcc.dg/builtin-strncat-chk-1.c.jj	2006-09-18 13:07:54.000000000 +0200
-+++ gcc/testsuite/gcc.dg/builtin-strncat-chk-1.c	2006-09-18 16:55:09.000000000 +0200
-@@ -0,0 +1,38 @@
-+/* Test whether buffer overflow warnings for __strncat_chk builtin
-+   are emitted properly.  */
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -std=gnu99" } */
-+
-+extern void abort (void);
-+
-+#include "../gcc.c-torture/execute/builtins/chk.h"
-+
-+char buf1[20];
-+char *q;
-+
-+void
-+test (int arg, ...)
-+{
-+  char *p = &buf1[10];
-+
-+  *p = 0;
-+  strncat (p, "abcdefg", 9);
-+  *p = 0;
-+  strncat (p, "abcdefghi", 9);
-+  *p = 0;
-+  strncat (p, "abcdefghij", 9);
-+  *p = 0;
-+  strncat (p, "abcdefghi", 10);
-+  *p = 0;
-+  strncat (p, "abcdefghij", 10); /* { dg-warning "will always overflow" } */
-+  *p = 0;
-+  strncat (p, "abcdefgh", 11);
-+  *p = 0;
-+  strncat (p, "abcdefghijkl", 11); /* { dg-warning "will always overflow" } */
-+  *p = 0;
-+  strncat (p, q, 9);
-+  *p = 0;
-+  strncat (p, q, 10); /* { dg-warning "might overflow" } */
-+  *p = 0;
-+  strncat (p, q, 11); /* { dg-warning "might overflow" } */
-+}
diff --git a/gcc41-tests.patch b/gcc41-tests.patch
deleted file mode 100644
index 63ef171..0000000
--- a/gcc41-tests.patch
+++ /dev/null
@@ -1,392 +0,0 @@
-2006-12-08  Jakub Jelinek  <jakub@redhat.com>
-
-	* g++.dg/debug/vartrack1.C: New test.
-
-	* g++.dg/opt/ifcvt1.C: New test.
-
-2006-12-08  Alexandre Oliva  <aoliva@redhat.com>
-
-	* g++.dg/template/array17.C: New test.
-
-2006-05-05  Jakub Jelinek  <jakub@redhat.com>
-
-	* gcc.dg/tls/opt-13.c: New test.
-
-2006-04-25  Jakub Jelinek  <jakub@redhat.com>
-
-	* gcc.dg/20060425-2.c: New test.
-
-	* g++.dg/opt/pr15054-2.C: New test.
-
-2006-04-20  Jakub Jelinek  <jakub@redhat.com>
-
-	* gcc.c-torture/execute/20060420-1.c: New test.
-
-2006-04-12  Jakub Jelinek  <jakub@redhat.com>
-
-	* gcc.c-torture/execute/20060412-1.c: New test.
-
-2006-02-25  Alexandre Oliva  <aoliva@redhat.com>
-
-	* gcc.target/powerpc/altivec-23.c: New test.
-
---- gcc/testsuite/gcc.target/powerpc/altivec-23.c	1970-01-01 00:00:00.000000000 +0000
-+++ gcc/testsuite/gcc.target/powerpc/altivec-23.c	2006-02-25 04:10:36.000000000 -0300
-@@ -0,0 +1,25 @@
-+/* Verify that it is possible to define variables of composite types
-+   containing vector types.  We used to crash handling the
-+   initializer of automatic ones.  */
-+
-+/* { dg-do compile } */
-+/* { dg-xfail-if "" { "powerpc-ibm-aix*" } { "-maltivec" } { "" } } */
-+/* { dg-options "-maltivec -mabi=altivec" } */
-+
-+#include <altivec.h>
-+
-+typedef int bt;
-+typedef vector bt vt;
-+typedef struct { vt x; bt y[sizeof(vt) / sizeof (bt)]; } st;
-+#define INIT { 1, 2, 3, 4 }
-+
-+void f ()
-+{
-+  vt x = INIT;
-+  vt y[1] = { INIT };
-+  st s = { INIT, INIT };
-+}
-+
-+vt x = INIT;
-+vt y[1] = { INIT };
-+st s = { INIT, INIT };
---- gcc/testsuite/gcc.c-torture/execute/20060420-1.c.jj	2006-04-20 18:47:19.000000000 +0200
-+++ gcc/testsuite/gcc.c-torture/execute/20060420-1.c	2006-04-20 19:07:20.000000000 +0200
-@@ -0,0 +1,71 @@
-+extern void abort (void);
-+
-+typedef float v4flt __attribute__ ((vector_size (16)));
-+
-+void __attribute__ ((noinline)) foo (float *dst, float **src, int a, int n)
-+{
-+  int i, j;
-+  int z = sizeof (v4flt) / sizeof (float);
-+  unsigned m = sizeof (v4flt) - 1;
-+
-+  for (j = 0; j < n && (((unsigned long) dst + j) & m); ++j)
-+    {
-+      float t = src[0][j];
-+      for (i = 1; i < a; ++i)
-+	t += src[i][j];
-+      dst[j] = t;
-+    }
-+
-+  for (; j < (n - (4 * z - 1)); j += 4 * z)
-+    {
-+      v4flt t0 = *(v4flt *) (src[0] + j + 0 * z);
-+      v4flt t1 = *(v4flt *) (src[0] + j + 1 * z);
-+      v4flt t2 = *(v4flt *) (src[0] + j + 2 * z);
-+      v4flt t3 = *(v4flt *) (src[0] + j + 3 * z);
-+      for (i = 1; i < a; ++i)
-+	{
-+	  t0 += *(v4flt *) (src[i] + j + 0 * z);
-+	  t1 += *(v4flt *) (src[i] + j + 1 * z);
-+	  t2 += *(v4flt *) (src[i] + j + 2 * z);
-+	  t3 += *(v4flt *) (src[i] + j + 3 * z);
-+	}
-+      *(v4flt *) (dst + j + 0 * z) = t0;
-+      *(v4flt *) (dst + j + 1 * z) = t1;
-+      *(v4flt *) (dst + j + 2 * z) = t2;
-+      *(v4flt *) (dst + j + 3 * z) = t3;
-+    }
-+  for (; j < n; ++j)
-+    {
-+      float t = src[0][j];
-+      for (i = 1; i < a; ++i)
-+	t += src[i][j];
-+      dst[j] = t;
-+    }
-+}
-+
-+float buffer[64];
-+
-+int
-+main (void)
-+{
-+  int i;
-+  float *dst, *src[2];
-+
-+  dst = buffer;
-+  dst += (-(long int) buffer & (16 * sizeof (float) - 1)) / sizeof (float);
-+  src[0] = dst + 16;
-+  src[1] = dst + 32;
-+  for (i = 0; i < 16; ++i)
-+    {
-+      src[0][i] = (float) i + 11 * (float) i;
-+      src[1][i] = (float) i + 12 * (float) i;
-+    }
-+  foo (dst, src, 2, 16);
-+  for (i = 0; i < 16; ++i)
-+    {
-+      float e = (float) i + 11 * (float) i + (float) i + 12 * (float) i;
-+      if (dst[i] != e)
-+	abort ();
-+    }
-+  return 0;
-+}
---- gcc/testsuite/gcc.c-torture/execute/20060412-1.c.jj	2006-04-03 11:04:53.758553500 +0200
-+++ gcc/testsuite/gcc.c-torture/execute/20060412-1.c	2006-04-12 10:54:20.000000000 +0200
-@@ -0,0 +1,33 @@
-+extern void abort (void);
-+
-+struct S
-+{
-+  long o;
-+};
-+
-+struct T
-+{
-+  long o;
-+  struct S m[82];
-+};
-+
-+struct T t;
-+
-+int
-+main ()
-+{
-+  struct S *p, *q;
-+
-+  p = (struct S *) &t;
-+  p = &((struct T *) p)->m[0];
-+  q = p + 82;
-+  while (--q > p)
-+    q->o = -1;
-+  q->o = 0;
-+
-+  if (q > p)
-+    abort ();
-+  if (q - p > 0)
-+    abort ();
-+  return 0;
-+}
---- gcc/testsuite/g++.dg/opt/pr15054-2.C	2006-04-19 19:21:31.748476000 +0200
-+++ gcc/testsuite/g++.dg/opt/pr15054-2.C	2006-04-25 15:55:07.000000000 +0200
-@@ -0,0 +1,39 @@
-+// PR middle-end/15054
-+
-+// { dg-do run }
-+// { dg-options "-O2" }
-+
-+extern "C" void abort (void);
-+
-+void
-+__attribute__((noinline))
-+check (long x, long y)
-+{
-+  if (x != y)
-+    abort ();
-+}
-+
-+struct A
-+{
-+  A() : a(2) { check (a, 2); }
-+  ~A() { check (a, 2); }
-+private:
-+  long a;
-+};
-+
-+class B {
-+  long b;
-+  B& operator =(const B& );
-+public:
-+  B (long p) : b(p) { check (b, 6); }
-+  B (const B& p) : b(p.b) { check (b, 6); }
-+  ~B () { check (b, 6); A obj; check (b, 6); }
-+  B foo() { return B(*this); }
-+};
-+
-+int main ()
-+{
-+  B o(6);
-+  o.foo().foo();
-+  return 0;
-+}
---- gcc/testsuite/gcc.dg/20060425-2.c	2004-06-24 14:04:38.000000000 -0400
-+++ gcc/testsuite/gcc.dg/20060425-2.c	2006-04-25 09:15:04.000000000 -0400
-@@ -0,0 +1,10 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2" } */
-+
-+double
-+crashme (double v, double *p)
-+{
-+  if (v < 0. && *p == 1.)
-+    v = 0.;
-+  return v;
-+}
---- gcc/testsuite/gcc.dg/tls/opt-13.c	2006-04-19 19:21:31.748476000 +0200
-+++ gcc/testsuite/gcc.dg/tls/opt-13.c	2006-05-05 11:01:33.000000000 +0200
-@@ -0,0 +1,16 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2" } */
-+/* { dg-require-effective-target tls } */
-+
-+__thread struct
-+{
-+  int a;
-+  char b[32];
-+} thr;
-+
-+int
-+main ()
-+{
-+  __builtin_strcpy (thr.b, "abcd");
-+  return 0;
-+}
---- gcc/testsuite/g++.dg/debug/vartrack1.C	2004-06-24 14:04:38.000000000 -0400
-+++ gcc/testsuite/g++.dg/debug/vartrack1.C	2006-12-08 05:29:41.000000000 -0500
-@@ -0,0 +1,99 @@
-+// This testcase used to hang the compiler in vt_find_locations.
-+// { dg-do compile }
-+// { dg-options "-O2 -g" }
-+
-+struct S
-+{
-+  int a;
-+  S *b, *c, *d;
-+};
-+
-+struct T
-+{
-+  void f1 (S *x);
-+  void f2 (S *x);
-+  void f3 (S *x, S *y);
-+  S *e;
-+};
-+
-+void
-+T::f3 (S *x, S *y)
-+{
-+  while (x != this->e && (!x || x->a == 1))
-+    {
-+      if (x == y->c)
-+	{
-+	  S *w = y->d;
-+	  if (w && w->a == 0)
-+	    {
-+	      w->a = 1;
-+	      y->a = 0;
-+	      f2 (y);
-+	      w = y->d;
-+	    }
-+	  if (w && (!w->c || w->c->a == 1) && (!w->d || w->d->a == 1))
-+	    {
-+	      w->a = 0;
-+	      x = y;
-+	      y = x->b;
-+	    }
-+	  else
-+	    {
-+	      if (w && (!w->d || w->d->a == 1))
-+		{
-+		  if (w->c)
-+		    w->c->a = 1;
-+		  w->a = 0;
-+		  f1 (w);
-+		  w = y->d;
-+		}
-+	      if (w)
-+		{
-+		  w->a = y->a;
-+		  if (w->d)
-+		    w->d->a = 1;
-+		}
-+	      y->a = 1;
-+	      f2 (y);
-+	      x = e;
-+	    }
-+	}
-+      else
-+	{
-+	  S *w = y->c;
-+	  if (w && w->a == 0)
-+	    {
-+	      w->a = 1;
-+	      y->a = 0;
-+	      f1 (y);
-+	      w = y->c;
-+	    }
-+	  if (w && (!w->c || w->c->a == 1) && (!w->d || w->d->a == 1))
-+	    {
-+	      w->a = 0;
-+	      x = y;
-+	      y = x->b;
-+	    }
-+	  else
-+	    {
-+	      if (w && (!w->c || w->c->a == 1))
-+		{
-+		  w->a = 0;
-+		  if (w->d)
-+		    w->d->a = 1;
-+		  f2 (w);
-+		  w = y->c;
-+		}
-+	      if (w)
-+		{
-+		  w->a = y->a;
-+		  if (w->c)
-+		    w->c->a = 1;
-+		}
-+	      y->a = 1;
-+	      f1 (y);
-+	      x = e;
-+	    }
-+	}
-+    }
-+}
---- gcc/testsuite/g++.dg/opt/ifcvt1.C	2006-10-04 16:28:56.502613000 +0200
-+++ gcc/testsuite/g++.dg/opt/ifcvt1.C	2006-12-08 12:23:23.000000000 +0100
-@@ -0,0 +1,17 @@
-+// { dg-do compile }
-+// { dg-options "-O2 -fnon-call-exceptions" }
-+
-+struct S { ~S () throw () {} };
-+double bar ();
-+
-+int
-+foo ()
-+{
-+  S a;
-+  int i = 0;
-+  double c = bar ();
-+  c = c < 0 ? -c : c;
-+  if (c <= 1.e-8)
-+    i += 24;
-+  return i;
-+}
---- gcc/testsuite/g++.dg/template/array17.C	2006-10-04 16:28:56.502613000 +0200
-+++ gcc/testsuite/g++.dg/template/array17.C	2006-12-08 12:38:27.000000000 +0100
-@@ -0,0 +1,23 @@
-+// { dg-do compile }
-+
-+template <typename T>
-+struct V {
-+  T& operator[](int);
-+};
-+
-+struct S {
-+  S operator +(int);
-+  template <typename T> T value();
-+};
-+
-+template <typename T>
-+void R (T v)
-+{
-+  v[(S() + 0).template value<int>()][0] = 0;
-+}
-+
-+int
-+main ()
-+{
-+  R(V<V<int> >());
-+}
diff --git a/gcc41-tls-data-alignment.patch b/gcc41-tls-data-alignment.patch
deleted file mode 100644
index 1ad26c7..0000000
--- a/gcc41-tls-data-alignment.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-2007-05-03  Jakub Jelinek  <jakub@redhat.com>
-
-	* varasm.c (align_variable): Don't increase alignment for
-	DECL_THREAD_LOCAL_P variables above BITS_PER_WORD through
-	DATA_ALIGNMENT or CONSTANT_ALIGNMENT.
-
---- gcc/varasm.c.jj	2007-04-14 14:55:25.000000000 +0200
-+++ gcc/varasm.c	2007-05-03 11:23:56.000000000 +0200
-@@ -1097,11 +1097,22 @@ align_variable (tree decl, bool dont_out
-   if (! DECL_USER_ALIGN (decl))
-     {
- #ifdef DATA_ALIGNMENT
--      align = DATA_ALIGNMENT (TREE_TYPE (decl), align);
-+      unsigned int data_align = DATA_ALIGNMENT (TREE_TYPE (decl), align);
-+      /* Don't increase alignment too much for TLS variables - TLS space
-+	 is too precious.  */
-+      if (! DECL_THREAD_LOCAL_P (decl) || data_align <= BITS_PER_WORD)
-+	align = data_align;
- #endif
- #ifdef CONSTANT_ALIGNMENT
-       if (DECL_INITIAL (decl) != 0 && DECL_INITIAL (decl) != error_mark_node)
--	align = CONSTANT_ALIGNMENT (DECL_INITIAL (decl), align);
-+	{
-+	  unsigned int const_align = CONSTANT_ALIGNMENT (DECL_INITIAL (decl),
-+							 align);
-+	  /* Don't increase alignment too much for TLS variables - TLS space
-+	     is too precious.  */
-+	  if (! DECL_THREAD_LOCAL_P (decl) || const_align <= BITS_PER_WORD)
-+	    align = const_align;
-+	}
- #endif
-     }
- 
diff --git a/gcc41.spec b/gcc41.spec
index 1e156f6..dedb119 100644
--- a/gcc41.spec
+++ b/gcc41.spec
@@ -1,6 +1,6 @@
-%define DATE 20070503
+%define DATE 20070821
 %define gcc_version 4.1.2
-%define gcc_release 12
+%define gcc_release 18
 %define _unpackaged_files_terminate_build 0
 %define multilib_64_archs sparc64 ppc64 s390x x86_64
 %define include_gappletviewer 1
@@ -25,8 +25,11 @@
 Summary: Various compilers (C, C++, Objective-C, Java, ...)
 Name: gcc
 Version: %{gcc_version}
-Release: %{gcc_release}
-License: GPL
+Release: %{gcc_release}%{?dist}
+# libgcc, libgfortran, libmudflap and crtstuff have an exception which allows
+# linking it into any kind of programs or shared libraries without
+# restrictions.
+License: GPLv2+ and GPLv2+ with exceptions
 Group: Development/Languages
 Source0: gcc-%{version}-%{DATE}.tar.bz2
 Source1: libgcc_post_upgrade.c
@@ -118,35 +121,33 @@ Patch6: gcc41-ada-pr18302.patch
 Patch7: gcc41-ada-tweaks.patch
 Patch8: gcc41-java-slow_pthread_self.patch
 Patch9: gcc41-ppc32-retaddr.patch
-Patch10: gcc41-amdfam10.patch
-Patch11: gcc41-dsohandle.patch
-Patch12: gcc41-rh184446.patch
-Patch13: gcc41-pr20297-test.patch
-Patch14: gcc41-objc-rh185398.patch
-Patch15: gcc41-tests.patch
-Patch16: gcc41-hash-style-gnu.patch
-Patch17: gcc41-java-libdotdotlib.patch
-Patch18: gcc41-pr28709.patch
-Patch19: gcc41-pr28755.patch
-Patch20: gcc41-pr27898.patch
-Patch21: gcc41-pr27567.patch
-Patch22: gcc41-pr29272.patch
-Patch23: gcc41-pr29059.patch
-Patch24: gcc41-strncat-chk.patch
-Patch25: gcc41-pr29299.patch
-Patch26: gcc41-java-bogus-debugline.patch
-Patch27: gcc41-libjava-visibility.patch
-Patch28: gcc41-pr31187.patch
-Patch29: gcc41-dtor-relro.patch
-Patch30: gcc41-rh234515.patch
-Patch31: gcc41-libgomp-ncpus.patch
-Patch32: gcc41-rh236895.patch
-Patch33: gcc41-pr28482.patch
-Patch34: gcc41-rh235008.patch
-Patch35: gcc41-pr31748.patch
-Patch36: gcc41-tls-data-alignment.patch
+Patch10: gcc41-dsohandle.patch
+Patch11: gcc41-rh184446.patch
+Patch12: gcc41-pr20297-test.patch
+Patch13: gcc41-hash-style-gnu.patch
+Patch14: gcc41-java-libdotdotlib.patch
+Patch15: gcc41-pr28755.patch
+Patch16: gcc41-pr27898.patch
+Patch17: gcc41-java-bogus-debugline.patch
+Patch18: gcc41-libjava-visibility.patch
+Patch19: gcc41-pr32139.patch
+Patch20: gcc41-rh236895.patch
+Patch21: gcc41-rh235008.patch
+Patch22: gcc41-build-id.patch
+Patch23: gcc41-pr28690.patch
+Patch24: gcc41-rh247256.patch
+Patch25: gcc41-pr22244.patch
+Patch26: gcc41-pr32678.patch
+Patch27: gcc41-pr32912.patch
+Patch28: gcc41-sparc-niagara.patch
+Patch29: gcc41-ppc-tramp.patch
+Patch30: gcc41-rh253102.patch
 
+# On ARM EABI systems, we do want -gnueabi to be part of the
+# target triple.
+%ifnarch %{arm}
 %define _gnu %{nil}
+%endif
 %ifarch sparc
 %define gcc_target_platform sparc64-%{_vendor}-%{_target_os}
 %endif
@@ -433,33 +434,27 @@ which are required to run programs compiled with the GNAT.
 %patch7 -p0 -b .ada-tweaks~
 %patch8 -p0 -b .java-slow_pthread_self~
 %patch9 -p0 -b .ppc32-retaddr~
-%patch10 -p0 -b .amdfam10~
-%patch11 -p0 -b .dsohandle~
-%patch12 -p0 -b .rh184446~
-%patch13 -p0 -E -b .pr20297-test~
-%patch14 -p0 -b .objc-rh185398~
-%patch15 -p0 -b .tests~
-%patch16 -p0 -b .hash-style-gnu~
-%patch17 -p0 -b .java-libdotdotlib~
-%patch18 -p0 -b .pr28709~
-%patch19 -p0 -b .pr28755~
-%patch20 -p0 -b .pr27898~
-%patch21 -p0 -b .pr27567~
-%patch22 -p0 -b .pr29272~
-%patch23 -p0 -b .pr29059~
-%patch24 -p0 -b .strncat-chk~
-%patch25 -p0 -b .pr29299~
-%patch26 -p0 -b .java-bogus-debugline~
-%patch27 -p0 -b .libjava-visibility~
-%patch28 -p0 -b .pr31187~
-%patch29 -p0 -b .dtor-relro~
-%patch30 -p0 -b .rh234515~
-%patch31 -p0 -b .libgomp-ncpus~
-%patch32 -p0 -b .rh236895~
-%patch33 -p0 -b .pr28482~
-%patch34 -p0 -b .rh235008~
-%patch35 -p0 -b .pr31748~
-%patch36 -p0 -b .tls-data-alignment~
+%patch10 -p0 -b .dsohandle~
+%patch11 -p0 -b .rh184446~
+%patch12 -p0 -E -b .pr20297-test~
+%patch13 -p0 -b .hash-style-gnu~
+%patch14 -p0 -b .java-libdotdotlib~
+%patch15 -p0 -b .pr28755~
+%patch16 -p0 -b .pr27898~
+%patch17 -p0 -b .java-bogus-debugline~
+%patch18 -p0 -b .libjava-visibility~
+%patch19 -p0 -b .pr32139~
+%patch20 -p0 -b .rh236895~
+%patch21 -p0 -b .rh235008~
+#%patch22 -p0 -b .build-id~
+%patch23 -p0 -b .pr28690~
+%patch24 -p0 -b .rh247256~
+%patch25 -p0 -b .pr22244~
+%patch26 -p0 -b .pr32678~
+%patch27 -p0 -b .pr32912~
+%patch28 -p0 -b .sparc-niagara~
+%patch29 -p0 -b .ppc-tramp~
+%patch30 -p0 -b .rh253102~
 
 sed -i -e 's/4\.1\.3/4.1.2/' gcc/BASE-VER gcc/version.c
 sed -i -e 's/" (Red Hat[^)]*)"/" (Red Hat %{version}-%{gcc_release})"/' gcc/version.c
@@ -752,6 +747,17 @@ EOF
   fi
 done
 
+# Nuke bits/stdc++.h.gch dirs
+# 1) there is no bits/stdc++.h header installed, so when gch file can't be
+#    used, compilation fails
+# 2) sometimes it is hard to match the exact options used for building
+#    libstdc++-v3 or they aren't desirable
+# 3) there are multilib issues, conflicts etc. with this
+# 4) it is huge
+# People can always precompile on their own whatever they want, but
+# shipping this for everybody is unnecessary.
+rm -rf $RPM_BUILD_ROOT%{_prefix}/include/c++/%{gcc_version}/%{gcc_target_platform}/bits/stdc++.h.gch
+
 %ifarch sparc sparc64
 ln -f $RPM_BUILD_ROOT%{_prefix}/bin/%{gcc_target_platform}-gcc \
   $RPM_BUILD_ROOT%{_prefix}/bin/sparc-%{_vendor}-%{_target_os}-gcc
@@ -1583,6 +1589,57 @@ fi
 %doc rpm.doc/changelogs/libmudflap/ChangeLog*
 
 %changelog
+* Mon Aug 27 2007 Jakub Jelinek <jakub@redhat.com> 4.1.2-18.fc7
+- update from gcc-4_1-branch (-r124365:127672)
+  - PRs c++/32112, c++/17763, rtl-optimization/32450, target/31331,
+	target/32641, target/32660, tree-optimization/32681,
+	boehm-gc/21940, boehm-gc/21942, target/28307, target/32506,
+	tree-optimization/31966, tree-optimization/32533,
+	inline-asm/32109, rtl-optimization/28011, target/32389,
+	libfortran/31409, libfortran/31880, libfortran/31964,
+	rtl-optimization/31691, target/31022, target/31480, target/31701,
+	target/31876, target/32163, tree-optimization/26998
+- fix ppc32 libgcc.a(tramp.o), so that binaries using trampolines
+  aren't forced to use bss PLT
+- fix a fortran charlen sharing bug (#253102)
+- fix ICE with X|~X or X^~X with vectors (PR middle-end/32912)
+- nuke bits/stdc++.gch directories from libstdc++-devel (#253304)
+- fix fortran Tx format handling (Jerry DeLisle, #252152,
+  PR libgfortran/32678)
+- add support for Sun UltraSPARC T1 chips - -mcpu=niagara (David S. Miller)
+- don't NRV optimize fields inside anonymous unions (PR c++/32992)
+- fortran debuginfo improvements for constant bound arrays (#248541,
+  PR fortran/22244)
+- update License tag
+- backport ARM fixes from trunk (#246800)
+  - PRs middle-end/24998, target/28516, target/30486
+- fix simplify_plus_minus with ppc{,64} power6 tuning (regression from
+  4.1.1-52.el5.2, #247256)
+- fix OpenMP handling of Fortran POINTER non-array vars (PR fortran/32550)
+- gomp update from gcc-4_2-branch (-r125917:125918)
+  - PR middle-end/32362
+- on ppc{,64} when tuning for power6{,x}, try to put the base
+  register as first operand in instructions to improve
+  performance (Peter Bergner, #225425, PR middle-end/28690)
+- on ppc64 emit nop after a call and disallow sibling calls
+  if the target function is not defined in the same object file
+  (David Edelsohn, #245424)
+- gomp parallel sections fix and fix for checking whether combined
+  parallel can be used (PR libgomp/32468)
+- gomp updates from the trunk (-r125541:125542, -r125543:125544) and
+  from gcc-4_2-branch (-r125184:125185)
+  - PRs tree-optimization/31769, c++/32177
+- don't set TREE_READONLY on C++ objects that need runtime initialization
+  (PRs c++/31806, c++/31809)
+- fix computation of common pointer type (PR tree-optimization/32139)
+- precompute const and pure fn calls inside another fn call arguments
+  with accumulating outgoing args
+  (PRs middle-end/32285, tree-optimization/30493)
+- fix handling of RESULT_DECLs in points-to analysis
+  (#243438, PR tree-optimization/32353)
+- work around java.lang.reflect.Modifier.INTERPRETED clash with
+  java.lang.reflect.Modifier.SYNTHETIC (Andrew Haley, #240720)
+
 * Thu May  3 2007 Jakub Jelinek <jakub@redhat.com> 4.1.2-12
 - update from gcc-4_1-branch (-r124100:124365)
   - PRs c++/30016, c++/30221, middle-end/30761, target/18989,
diff --git a/libgcc_post_upgrade.c b/libgcc_post_upgrade.c
index b65076f..9fcb138 100644
--- a/libgcc_post_upgrade.c
+++ b/libgcc_post_upgrade.c
@@ -387,6 +387,35 @@ register void *__thread_self __asm ("g7");
 	   : inline_syscall_clobbers, "$20", "$21");		\
 	_sc_ret = _sc_0, _sc_err = _sc_19;			\
 }
+#elif defined __arm__ && defined __ARM_EABI__
+# define INTERNAL_SYSCALL_DECL(err) do { } while (0)
+# define INTERNAL_SYSCALL(name, err, nr, args...)		\
+  ({								\
+	register int _r0 __asm__("r0");				\
+	register int _nr __asm__("r7");				\
+	LOAD_ARGS_##nr(args)					\
+	_nr = __NR_##name;					\
+	asm volatile ("swi\t0\t@ syscall " #name "\n\t"		\
+	: "=r" (_r0)						\
+	: "r" (_nr) ASM_ARGS_##nr				\
+	: "memory");						\
+	_r0; })
+# define INTERNAL_SYSCALL_ERROR_P(val, err) \
+  ((unsigned int) (val) >= 0xfffff001u)
+# define ASM_ARGS_0
+# define ASM_ARGS_1	, "r" (_r0)
+# define ASM_ARGS_2	, "r" (_r0), "r" (_r1)
+# define ASM_ARGS_3	, "r" (_r0), "r" (_r1), "r" (_r2)
+# define LOAD_ARGS_0()
+# define LOAD_ARGS_1(r0)					\
+	_r0 = (int)r0;
+# define LOAD_ARGS_2(r0, r1)					\
+	_r0 = (int)r0;						\
+	register int _r1 __asm__("r1") = (int)r1;
+# define LOAD_ARGS_3(r0, r1, r2)				\
+	_r0 = (int)r0;						\
+	register int _r1 __asm__("r1") = (int)r1;		\
+	register int _r2 __asm__("r2") = (int)r2;
 #endif
 
 int main (int argc, char **argv)
diff --git a/sources b/sources
index 63b52c2..941bcb0 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-f592f2e4d5779b970a7050a864131e69  gcc-4.1.2-20070503.tar.bz2
+65778706d6b9c029a06fca968a45ab7f  gcc-4.1.2-20070821.tar.bz2