gcc/gcc44-power7-2.patch

1370 lines
49 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

2009-03-13 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/39457
* config/rs6000/rs6000.opt (-mdisallow-float-in-lr-ctr): Add
temporary debug switch.
* config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Revert
behavior of disallowing
2009-03-13 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/vector.md (vec_extract_evenv2df): Delete, insn
causes problems in building spec 2006.
(vec_extract_oddv2df): Ditto.
(vec_pack_trunc_v2df): New expanders for VSX vectorized
conversions.
(vec_pack_sfix_trunc_v2df): Ditto.
(vec_pack_ufix_trunc_v2df): Ditto.
(vec_unpacks_hi_v4sf): Ditto.
(vec_unpacks_lo_v4sf): Ditto.
(vec_unpacks_float_hi_v4si): Ditto.
(vec_unpacks_float_lo_v4si): Ditto.
(vec_unpacku_float_hi_v4si): Ditto.
(vec_unpacku_float_lo_v4si): Ditto.
* config/rs6000/rs6000-protos.h (rs6000_vector_secondary_reload):
Declaration for new target hook.
* config/rs6000/rs6000.c (TARGET_SECONDARY_RELOAD): Add new target
hook for eventually fixing up the memory references for Altivec
and VSX reloads to be reg+reg instead of reg+offset. Right now,
this is a stub function that prints debug information if
-mdebug=addr and then calls default_secondary_reload.
(rs6000_secondary_reload): Ditto.
(rs6000_vector_secondary_reload): Ditto.
(rs6000_builtin_conversion): Add support for V2DI/V2DF
conversions.
(rs6000_legitimate_offset_address_p): Test for the vector unit
doing the memory references.
(rs6000_legimize_reload_address): Ditto.
(rs6000_legitimize_address): Print extra \n if -mdebug=addr.
(rs6000_legitimize_reload_address): Ditto.
(rs6000_legitimate_address): Ditto.
(rs6000_mode_dependent_address): Ditto.
(bdesc_2arg): Add VSX builtins.
(bdesc_abs): Ditto.
(bdesc_1arg): Ditto.
(altivec_init_builtins): Ditto.
(rs6000_secondary_memory_needed_rtx): Add debug support if
-mdebug=addr.
(rs6000_preferred_reload_class): Ditto.
(rs6000_secondary_memory_needed): Ditto.
(rs6000_secondary_reload_class): Ditto.
(rs6000_cannot_change_mode_class): Ditto.
* config/rs6000/vsx.md (UNSPEC_VSX_*): Add unspecs for VSX
conversions.
(vsx_nabs<mode>): Add generator function.
(vsx_float<VSi><mode>2): Ditto.
(vsx_floatuns<VSi><mode>2): Ditto.
(vsx_xxmrghw): Ditto.
(vsx_xxmrglw): Ditto.
(vsx_xvcvdpsp): New VSX vector conversion insn.
(vsx_xvcvdpsxws): Ditto.
(vsx_xvcvdpuxws): Ditto.
(vsx_xvcvspdp): Ditto.
(vsx_xvcvsxwdp): Ditto.
(vsx_xvcvuxwdp): Ditto.
(vsx_reload_*): New insns for reload support.
* config/rs6000/rs6000.h: Fix a comment.
* config/rs6000/altivec.md (altivec_reload_*): New insns for
reload support.
* config/rs6000/rs6000.md (ptrsize): New mode attribute for the
pointer size.
2009-03-10 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/vsx.md (vsx_concat_v2df): Add explicit 'f'
register class for scalar data, correct uses of the xxpermdi
instruction.
(vsx_set_v2df): Ditto.
(vsx_extract_v2df): Ditto.
(vsx_xxpermdi): Ditto.
(vsx_splatv2df): Ditto.
(vsx_xxmrghw): Use wf instead of v constraints.
(vsx_xxmrglw): Ditto.
testsuite/
2009-03-13 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/39457
* gcc.target/powerpc/pr39457.c: New test for PR39457.
2009-03-13 Michael Meissner <meissner@linux.vnet.ibm.com>
* gcc.target/powerpc/vsx-builtin-1.c: New test for builtins.
* gcc.target/powerpc/vsx-builtin-2.c: Ditto.
--- gcc/config/rs6000/vector.md (revision 144758)
+++ gcc/config/rs6000/vector.md (revision 144843)
@@ -496,23 +496,122 @@ (define_expand "vec_interleave_lowv2df"
"VECTOR_UNIT_VSX_P (V2DFmode)"
"")
-;; For 2 element vectors, even/odd is the same as high/low
-(define_expand "vec_extract_evenv2df"
- [(set (match_operand:V2DF 0 "vfloat_operand" "")
- (vec_concat:V2DF
- (vec_select:DF (match_operand:V2DF 1 "vfloat_operand" "")
- (parallel [(const_int 0)]))
- (vec_select:DF (match_operand:V2DF 2 "vfloat_operand" "")
- (parallel [(const_int 0)]))))]
- "VECTOR_UNIT_VSX_P (V2DFmode)"
- "")
+
+;; Convert double word types to single word types
+(define_expand "vec_pack_trunc_v2df"
+ [(match_operand:V4SF 0 "vsx_register_operand" "")
+ (match_operand:V2DF 1 "vsx_register_operand" "")
+ (match_operand:V2DF 2 "vsx_register_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
+{
+ rtx r1 = gen_reg_rtx (V4SFmode);
+ rtx r2 = gen_reg_rtx (V4SFmode);
-(define_expand "vec_extract_oddv2df"
- [(set (match_operand:V2DF 0 "vfloat_operand" "")
- (vec_concat:V2DF
- (vec_select:DF (match_operand:V2DF 1 "vfloat_operand" "")
- (parallel [(const_int 1)]))
- (vec_select:DF (match_operand:V2DF 2 "vfloat_operand" "")
- (parallel [(const_int 1)]))))]
- "VECTOR_UNIT_VSX_P (V2DFmode)"
- "")
+ emit_insn (gen_vsx_xvcvdpsp (r1, operands[1]));
+ emit_insn (gen_vsx_xvcvdpsp (r2, operands[2]));
+ emit_insn (gen_vec_extract_evenv4sf (operands[0], r1, r2));
+ DONE;
+})
+
+(define_expand "vec_pack_sfix_trunc_v2df"
+ [(match_operand:V4SI 0 "vsx_register_operand" "")
+ (match_operand:V2DF 1 "vsx_register_operand" "")
+ (match_operand:V2DF 2 "vsx_register_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
+{
+ rtx r1 = gen_reg_rtx (V4SImode);
+ rtx r2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vsx_xvcvdpsxws (r1, operands[1]));
+ emit_insn (gen_vsx_xvcvdpsxws (r2, operands[2]));
+ emit_insn (gen_vec_extract_evenv4si (operands[0], r1, r2));
+ DONE;
+})
+
+(define_expand "vec_pack_ufix_trunc_v2df"
+ [(match_operand:V4SI 0 "vsx_register_operand" "")
+ (match_operand:V2DF 1 "vsx_register_operand" "")
+ (match_operand:V2DF 2 "vsx_register_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && TARGET_ALTIVEC"
+{
+ rtx r1 = gen_reg_rtx (V4SImode);
+ rtx r2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vsx_xvcvdpuxws (r1, operands[1]));
+ emit_insn (gen_vsx_xvcvdpuxws (r2, operands[2]));
+ emit_insn (gen_vec_extract_evenv4si (operands[0], r1, r2));
+ DONE;
+})
+
+;; Convert single word types to double word
+(define_expand "vec_unpacks_hi_v4sf"
+ [(match_operand:V2DF 0 "vsx_register_operand" "")
+ (match_operand:V4SF 1 "vsx_register_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
+{
+ rtx reg = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_vec_interleave_highv4sf (reg, operands[1], operands[1]));
+ emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v4sf"
+ [(match_operand:V2DF 0 "vsx_register_operand" "")
+ (match_operand:V4SF 1 "vsx_register_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)"
+{
+ rtx reg = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_vec_interleave_lowv4sf (reg, operands[1], operands[1]));
+ emit_insn (gen_vsx_xvcvspdp (operands[0], reg));
+ DONE;
+})
+
+(define_expand "vec_unpacks_float_hi_v4si"
+ [(match_operand:V2DF 0 "vsx_register_operand" "")
+ (match_operand:V4SI 1 "vsx_register_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+ rtx reg = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_highv4si (reg, operands[1], operands[1]));
+ emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
+ DONE;
+})
+
+(define_expand "vec_unpacks_float_lo_v4si"
+ [(match_operand:V2DF 0 "vsx_register_operand" "")
+ (match_operand:V4SI 1 "vsx_register_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+ rtx reg = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_lowv4si (reg, operands[1], operands[1]));
+ emit_insn (gen_vsx_xvcvsxwdp (operands[0], reg));
+ DONE;
+})
+
+(define_expand "vec_unpacku_float_hi_v4si"
+ [(match_operand:V2DF 0 "vsx_register_operand" "")
+ (match_operand:V4SI 1 "vsx_register_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+ rtx reg = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_highv4si (reg, operands[1], operands[1]));
+ emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
+ DONE;
+})
+
+(define_expand "vec_unpacku_float_lo_v4si"
+ [(match_operand:V2DF 0 "vsx_register_operand" "")
+ (match_operand:V4SI 1 "vsx_register_operand" "")]
+ "VECTOR_UNIT_VSX_P (V2DFmode) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SImode)"
+{
+ rtx reg = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_lowv4si (reg, operands[1], operands[1]));
+ emit_insn (gen_vsx_xvcvuxwdp (operands[0], reg));
+ DONE;
+})
--- gcc/config/rs6000/rs6000-protos.h (revision 144758)
+++ gcc/config/rs6000/rs6000-protos.h (revision 144843)
@@ -72,6 +72,7 @@ extern bool rs6000_secondary_memory_need
extern bool rs6000_cannot_change_mode_class (enum machine_mode,
enum machine_mode,
enum reg_class);
+extern void rs6000_vector_secondary_reload (rtx, rtx, rtx, bool);
extern int paired_emit_vector_cond_expr (rtx, rtx, rtx,
rtx, rtx, rtx);
extern void paired_expand_vector_move (rtx operands[]);
--- gcc/config/rs6000/rs6000.opt (revision 144845)
+++ gcc/config/rs6000/rs6000.opt (revision 144857)
@@ -139,6 +139,9 @@ mvsx-scalar-memory
Target Report Var(TARGET_VSX_SCALAR_MEMORY)
If -mvsx, use VSX scalar memory reference instructions for scalar double (off by default)
+mdisallow-float-in-lr-ctr
+Target Undocumented Var(TARGET_DISALLOW_FLOAT_IN_LR_CTR) Init(-1)
+
mupdate
Target Report Var(TARGET_UPDATE) Init(1)
Generate load/store with update instructions
--- gcc/config/rs6000/rs6000.c (revision 144758)
+++ gcc/config/rs6000/rs6000.c (revision 144843)
@@ -1004,6 +1004,10 @@ static rtx rs6000_emit_vector_compare (e
enum machine_mode);
static tree rs6000_stack_protect_fail (void);
+static enum reg_class rs6000_secondary_reload (bool, rtx, enum reg_class,
+ enum machine_mode,
+ struct secondary_reload_info *);
+
const int INSN_NOT_AVAILABLE = -1;
static enum machine_mode rs6000_eh_return_filter_mode (void);
@@ -1333,6 +1337,9 @@ static const char alt_reg_names[][8] =
#undef TARGET_INSTANTIATE_DECLS
#define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
+
struct gcc_target targetm = TARGET_INITIALIZER;
/* Return number of consecutive hard regs needed starting at reg REGNO
@@ -1448,10 +1448,16 @@ rs6000_hard_regno_mode_ok (int regno, en
if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
return 1;
- /* Don't allow anything but word sized integers (aka pointers) in CTR/LR. You
- really don't want to spill your floating point values to those
- registers. Also do it for the old MQ register in the power. */
- if (regno == CTR_REGNO || regno == LR_REGNO || regno == MQ_REGNO)
+ /* Don't allow anything but word sized integers (aka pointers) in CTR/LR.
+ You really don't want to spill your floating point values to those
+ registers. Also do it for the old MQ register in the power.
+
+ While this is desirable in theory, disabling float to go in LR/CTR does
+ cause some regressions, so until they are taken care of, revert to the old
+ behavior by default for most power systems, but enable it for power7. */
+ if ((TARGET_DISALLOW_FLOAT_IN_LR_CTR > 0
+ || (TARGET_DISALLOW_FLOAT_IN_LR_CTR < 0 && TARGET_VSX))
+ && (regno == CTR_REGNO || regno == LR_REGNO || regno == MQ_REGNO))
return (GET_MODE_CLASS (mode) == MODE_INT
&& GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
@@ -2447,6 +2454,14 @@ rs6000_builtin_conversion (enum tree_cod
case FIX_TRUNC_EXPR:
switch (TYPE_MODE (type))
{
+ case V2DImode:
+ if (!VECTOR_UNIT_VSX_P (V2DFmode))
+ return NULL_TREE;
+
+ return TYPE_UNSIGNED (type)
+ ? rs6000_builtin_decls[VSX_BUILTIN_XVCVDPUXDS]
+ : rs6000_builtin_decls[VSX_BUILTIN_XVCVDPSXDS];
+
case V4SImode:
if (VECTOR_UNIT_NONE_P (V4SImode) || VECTOR_UNIT_NONE_P (V4SFmode))
return NULL_TREE;
@@ -2462,6 +2477,14 @@ rs6000_builtin_conversion (enum tree_cod
case FLOAT_EXPR:
switch (TYPE_MODE (type))
{
+ case V2DImode:
+ if (!VECTOR_UNIT_VSX_P (V2DFmode))
+ return NULL_TREE;
+
+ return TYPE_UNSIGNED (type)
+ ? rs6000_builtin_decls[VSX_BUILTIN_XVCVUXDSP]
+ : rs6000_builtin_decls[VSX_BUILTIN_XVCVSXDSP];
+
case V4SImode:
if (VECTOR_UNIT_NONE_P (V4SImode) || VECTOR_UNIT_NONE_P (V4SFmode))
return NULL_TREE;
@@ -2469,6 +2492,7 @@ rs6000_builtin_conversion (enum tree_cod
return TYPE_UNSIGNED (type)
? rs6000_builtin_decls[VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF]
: rs6000_builtin_decls[VECTOR_BUILTIN_FLOAT_V4SI_V4SF];
+
default:
return NULL_TREE;
}
@@ -4101,7 +4125,7 @@ rs6000_legitimate_offset_address_p (enum
case V2DImode:
/* AltiVec/VSX vector modes. Only reg+reg addressing is valid and
constant offset zero should not occur due to canonicalization. */
- if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode))
+ if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
return false;
break;
@@ -4441,6 +4465,7 @@ rs6000_legitimize_address (rtx x, rtx ol
}
else
fprintf (stderr, "NULL returned\n");
+ fprintf (stderr, "\n");
}
return ret;
@@ -4776,8 +4801,7 @@ rs6000_legitimize_reload_address (rtx x,
&& REG_MODE_OK_FOR_BASE_P (XEXP (x, 0), mode)
&& GET_CODE (XEXP (x, 1)) == CONST_INT
&& (INTVAL (XEXP (x, 1)) & 3) != 0
- && !ALTIVEC_VECTOR_MODE (mode)
- && !VSX_VECTOR_MODE (mode)
+ && VECTOR_MEM_NONE_P (mode)
&& GET_MODE_SIZE (mode) >= UNITS_PER_WORD
&& TARGET_POWERPC64)
{
@@ -4798,8 +4822,7 @@ rs6000_legitimize_reload_address (rtx x,
&& !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
|| mode == DDmode || mode == TDmode
|| mode == DImode))
- && !ALTIVEC_VECTOR_MODE (mode)
- && !VSX_VECTOR_MODE (mode))
+ && VECTOR_MEM_NONE_P (mode))
{
HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
@@ -4843,6 +4866,7 @@ rs6000_legitimize_reload_address (rtx x,
/* Don't do this for TFmode or TDmode, since the result isn't
offsettable. The same goes for DImode without 64-bit gprs and
DFmode and DDmode without fprs. */
+ && VECTOR_MEM_NONE_P (mode)
&& mode != TFmode
&& mode != TDmode
&& (mode != DImode || TARGET_POWERPC64)
@@ -4918,6 +4942,8 @@ rs6000_legitimize_reload_address (rtx x,
fprintf (stderr, "New address:\n");
debug_rtx (ret);
}
+
+ fprintf (stderr, "\n");
}
return ret;
@@ -5035,6 +5061,7 @@ rs6000_legitimate_address (enum machine_
GET_MODE_NAME (mode),
reg_ok_strict);
debug_rtx (orig_x);
+ fprintf (stderr, "\n");
}
return ret;
@@ -5082,9 +5109,10 @@ rs6000_mode_dependent_address (rtx addr)
if (TARGET_DEBUG_ADDR)
{
fprintf (stderr,
- "\nrs6000_mode_dependent_address: ret = %d\n",
- (int)ret);
+ "\nrs6000_mode_dependent_address: ret = %s\n",
+ ret ? "true" : "false");
debug_rtx (addr);
+ fprintf (stderr, "\n");
}
return ret;
@@ -7917,6 +7945,20 @@ static struct builtin_description bdesc_
{ MASK_ALTIVEC, CODE_FOR_altivec_vsumsws, "__builtin_altivec_vsumsws", ALTIVEC_BUILTIN_VSUMSWS },
{ MASK_ALTIVEC, CODE_FOR_xorv4si3, "__builtin_altivec_vxor", ALTIVEC_BUILTIN_VXOR },
+ { MASK_VSX, CODE_FOR_addv2df3, "__builtin_vsx_xvadddp", VSX_BUILTIN_XVADDDP },
+ { MASK_VSX, CODE_FOR_subv2df3, "__builtin_vsx_xvsubdp", VSX_BUILTIN_XVSUBDP },
+ { MASK_VSX, CODE_FOR_mulv2df3, "__builtin_vsx_xvmuldp", VSX_BUILTIN_XVMULDP },
+ { MASK_VSX, CODE_FOR_divv2df3, "__builtin_vsx_xvdivdp", VSX_BUILTIN_XVDIVDP },
+ { MASK_VSX, CODE_FOR_sminv2df3, "__builtin_vsx_xvmindp", VSX_BUILTIN_XVMINDP },
+ { MASK_VSX, CODE_FOR_smaxv2df3, "__builtin_vsx_xvmaxdp", VSX_BUILTIN_XVMAXDP },
+
+ { MASK_VSX, CODE_FOR_addv4sf3, "__builtin_vsx_xvaddsp", VSX_BUILTIN_XVADDSP },
+ { MASK_VSX, CODE_FOR_subv4sf3, "__builtin_vsx_xvsubsp", VSX_BUILTIN_XVSUBSP },
+ { MASK_VSX, CODE_FOR_mulv4sf3, "__builtin_vsx_xvmulsp", VSX_BUILTIN_XVMULSP },
+ { MASK_VSX, CODE_FOR_divv4sf3, "__builtin_vsx_xvdivsp", VSX_BUILTIN_XVDIVSP },
+ { MASK_VSX, CODE_FOR_sminv4sf3, "__builtin_vsx_xvminsp", VSX_BUILTIN_XVMINSP },
+ { MASK_VSX, CODE_FOR_smaxv4sf3, "__builtin_vsx_xvmaxsp", VSX_BUILTIN_XVMAXSP },
+
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_add", ALTIVEC_BUILTIN_VEC_ADD },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vaddfp", ALTIVEC_BUILTIN_VEC_VADDFP },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vadduwm", ALTIVEC_BUILTIN_VEC_VADDUWM },
@@ -8288,7 +8330,11 @@ static const struct builtin_description
{ MASK_ALTIVEC, CODE_FOR_absv16qi2, "__builtin_altivec_abs_v16qi", ALTIVEC_BUILTIN_ABS_V16QI },
{ MASK_ALTIVEC, CODE_FOR_altivec_abss_v4si, "__builtin_altivec_abss_v4si", ALTIVEC_BUILTIN_ABSS_V4SI },
{ MASK_ALTIVEC, CODE_FOR_altivec_abss_v8hi, "__builtin_altivec_abss_v8hi", ALTIVEC_BUILTIN_ABSS_V8HI },
- { MASK_ALTIVEC, CODE_FOR_altivec_abss_v16qi, "__builtin_altivec_abss_v16qi", ALTIVEC_BUILTIN_ABSS_V16QI }
+ { MASK_ALTIVEC, CODE_FOR_altivec_abss_v16qi, "__builtin_altivec_abss_v16qi", ALTIVEC_BUILTIN_ABSS_V16QI },
+ { MASK_VSX, CODE_FOR_absv2df2, "__builtin_vsx_xvabsdp", VSX_BUILTIN_XVABSDP },
+ { MASK_VSX, CODE_FOR_vsx_nabsv2df2, "__builtin_vsx_xvnabsdp", VSX_BUILTIN_XVNABSDP },
+ { MASK_VSX, CODE_FOR_absv4sf2, "__builtin_vsx_xvabssp", VSX_BUILTIN_XVABSSP },
+ { MASK_VSX, CODE_FOR_vsx_nabsv4sf2, "__builtin_vsx_xvnabssp", VSX_BUILTIN_XVNABSSP },
};
/* Simple unary operations: VECb = foo (unsigned literal) or VECb =
@@ -8314,6 +8360,11 @@ static struct builtin_description bdesc_
{ MASK_ALTIVEC, CODE_FOR_altivec_vupklpx, "__builtin_altivec_vupklpx", ALTIVEC_BUILTIN_VUPKLPX },
{ MASK_ALTIVEC, CODE_FOR_altivec_vupklsh, "__builtin_altivec_vupklsh", ALTIVEC_BUILTIN_VUPKLSH },
+ { MASK_VSX, CODE_FOR_negv2df2, "__builtin_vsx_xvnegdp", VSX_BUILTIN_XVNEGDP },
+ { MASK_VSX, CODE_FOR_sqrtv2df2, "__builtin_vsx_xvsqrtdp", VSX_BUILTIN_XVSQRTDP },
+ { MASK_VSX, CODE_FOR_negv4sf2, "__builtin_vsx_xvnegsp", VSX_BUILTIN_XVNEGSP },
+ { MASK_VSX, CODE_FOR_sqrtv4sf2, "__builtin_vsx_xvsqrtsp", VSX_BUILTIN_XVSQRTSP },
+
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_abs", ALTIVEC_BUILTIN_VEC_ABS },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_abss", ALTIVEC_BUILTIN_VEC_ABSS },
{ MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_ceil", ALTIVEC_BUILTIN_VEC_CEIL },
@@ -8339,6 +8390,15 @@ static struct builtin_description bdesc_
{ MASK_ALTIVEC|MASK_VSX, CODE_FOR_fix_truncv4sfv4si2, "__builtin_vec_fix_sfsi", VECTOR_BUILTIN_FIX_V4SF_V4SI },
{ MASK_ALTIVEC|MASK_VSX, CODE_FOR_fixuns_truncv4sfv4si2, "__builtin_vec_fixuns_sfsi", VECTOR_BUILTIN_FIXUNS_V4SF_V4SI },
+ { MASK_VSX, CODE_FOR_floatv2div2df2, "__builtin_vsx_xvcvsxddp", VSX_BUILTIN_XVCVSXDDP },
+ { MASK_VSX, CODE_FOR_unsigned_floatv2div2df2, "__builtin_vsx_xvcvuxddp", VSX_BUILTIN_XVCVUXDDP },
+ { MASK_VSX, CODE_FOR_fix_truncv2dfv2di2, "__builtin_vsx_xvdpsxds", VSX_BUILTIN_XVCVDPSXDS },
+ { MASK_VSX, CODE_FOR_fixuns_truncv2dfv2di2, "__builtin_vsx_xvdpuxds", VSX_BUILTIN_XVCVDPUXDS },
+ { MASK_VSX, CODE_FOR_floatv4siv4sf2, "__builtin_vsx_xvcvsxwsp", VSX_BUILTIN_XVCVSXDSP },
+ { MASK_VSX, CODE_FOR_unsigned_floatv4siv4sf2, "__builtin_vsx_xvcvuxwsp", VSX_BUILTIN_XVCVUXWSP },
+ { MASK_VSX, CODE_FOR_fix_truncv4sfv4si2, "__builtin_vsx_xvspsxws", VSX_BUILTIN_XVCVSPSXWS },
+ { MASK_VSX, CODE_FOR_fixuns_truncv4sfv4si2, "__builtin_vsx_xvspuxws", VSX_BUILTIN_XVCVSPUXWS },
+
/* The SPE unary builtins must start with SPE_BUILTIN_EVABS and
end with SPE_BUILTIN_EVSUBFUSIAAW. */
{ 0, CODE_FOR_spe_evabs, "__builtin_spe_evabs", SPE_BUILTIN_EVABS },
@@ -10484,6 +10544,8 @@ altivec_init_builtins (void)
= build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
tree v4sf_ftype_v4sf
= build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df
+ = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
tree void_ftype_pcvoid_int_int
= build_function_type_list (void_type_node,
pcvoid_type_node, integer_type_node,
@@ -10641,6 +10703,9 @@ altivec_init_builtins (void)
case V4SFmode:
type = v4sf_ftype_v4sf;
break;
+ case V2DFmode:
+ type = v2df_ftype_v2df;
+ break;
default:
gcc_unreachable ();
}
@@ -10960,6 +11025,18 @@ rs6000_common_init_builtins (void)
tree int_ftype_v8hi_v8hi
= build_function_type_list (integer_type_node,
V8HI_type_node, V8HI_type_node, NULL_TREE);
+ tree v2di_ftype_v2df
+ = build_function_type_list (V2DI_type_node,
+ V2DF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, NULL_TREE);
+ tree v2df_ftype_v2di
+ = build_function_type_list (V2DF_type_node,
+ V2DI_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_v2df
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
tree v2df_ftype_v2df_v2df_v2df
= build_function_type_list (V2DF_type_node,
V2DF_type_node, V2DF_type_node,
@@ -11136,6 +11213,9 @@ rs6000_common_init_builtins (void)
case VOIDmode:
type = opaque_ftype_opaque_opaque;
break;
+ case V2DFmode:
+ type = v2df_ftype_v2df_v2df;
+ break;
case V4SFmode:
type = v4sf_ftype_v4sf_v4sf;
break;
@@ -11285,6 +11365,8 @@ rs6000_common_init_builtins (void)
type = v16qi_ftype_int;
else if (mode0 == VOIDmode && mode1 == VOIDmode)
type = opaque_ftype_opaque;
+ else if (mode0 == V2DFmode && mode1 == V2DFmode)
+ type = v2df_ftype_v2df;
else if (mode0 == V4SFmode && mode1 == V4SFmode)
type = v4sf_ftype_v4sf;
else if (mode0 == V8HImode && mode1 == V16QImode)
@@ -11310,6 +11392,10 @@ rs6000_common_init_builtins (void)
type = v4si_ftype_v4sf;
else if (mode0 == V4SFmode && mode1 == V4SImode)
type = v4sf_ftype_v4si;
+ else if (mode0 == V2DImode && mode1 == V2DFmode)
+ type = v2di_ftype_v2df;
+ else if (mode0 == V2DFmode && mode1 == V2DImode)
+ type = v2df_ftype_v2di;
else
gcc_unreachable ();
@@ -12092,8 +12178,10 @@ rtx
rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
{
static bool eliminated = false;
+ rtx ret;
+
if (mode != SDmode)
- return assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
+ ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
else
{
rtx mem = cfun->machine->sdmode_stack_slot;
@@ -12105,8 +12193,21 @@ rs6000_secondary_memory_needed_rtx (enum
cfun->machine->sdmode_stack_slot = mem;
eliminated = true;
}
- return mem;
+ ret = mem;
+ }
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "rs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
+ GET_MODE_NAME (mode));
+ if (!ret)
+ fprintf (stderr, "\tNULL_RTX\n");
+ else
+ debug_rtx (ret);
+ fprintf (stderr, "\n");
}
+
+ return ret;
}
static tree
@@ -12140,6 +12241,54 @@ rs6000_check_sdmode (tree *tp, int *walk
return NULL_TREE;
}
+/* Inform reload about cases where moving X with a mode MODE to a register in
+ RCLASS requires an extra scratch or immediate register. Return the class
+ needed for the immediate register. */
+
+static enum reg_class
+rs6000_secondary_reload (bool in_p,
+ rtx x,
+ enum reg_class rclass,
+ enum machine_mode mode,
+ secondary_reload_info *sri)
+{
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr,
+ "rs6000_secondary_reload, in_p = %s, rclass = %s, mode = %s\n",
+ in_p ? "true" : "false", reg_class_names[rclass],
+ GET_MODE_NAME (mode));
+ debug_rtx (x);
+ fprintf (stderr, "\n");
+ }
+
+ return default_secondary_reload (in_p, x, rclass, mode, sri);
+}
+
+/* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset
+ to SP+reg addressing. */
+
+void
+rs6000_vector_secondary_reload (rtx op0, rtx op1, rtx op2, bool to_mem_p)
+{
+ rtx memref = to_mem_p ? op0 : op1;
+ gcc_assert (MEM_P (memref));
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "rs6000_vector_secondary_reload, to_mem_p = %s\n",
+ to_mem_p ? "true" : "false");
+ fprintf (stderr, "op0:\n");
+ debug_rtx (op0);
+ fprintf (stderr, "op1:\n");
+ debug_rtx (op1);
+ fprintf (stderr, "op2:\n");
+ debug_rtx (op2);
+ fprintf (stderr, "\n");
+ }
+
+ gcc_unreachable ();
+}
/* Allocate a 64-bit stack slot to be used for copying SDmode
values through if this function has any SDmode references. */
@@ -12212,32 +12361,44 @@ enum reg_class
rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
{
enum machine_mode mode = GET_MODE (x);
+ enum reg_class ret;
if (TARGET_VSX && VSX_VECTOR_MODE (mode) && x == CONST0_RTX (mode)
&& VSX_REG_CLASS_P (rclass))
- return rclass;
+ ret = rclass;
- if (TARGET_ALTIVEC && ALTIVEC_VECTOR_MODE (mode) && rclass == ALTIVEC_REGS
- && easy_vector_constant (x, mode))
- return rclass;
+ else if (TARGET_ALTIVEC && ALTIVEC_VECTOR_MODE (mode)
+ && rclass == ALTIVEC_REGS && easy_vector_constant (x, mode))
+ ret = rclass;
- if (CONSTANT_P (x) && reg_classes_intersect_p (rclass, FLOAT_REGS))
- return NO_REGS;
+ else if (CONSTANT_P (x) && reg_classes_intersect_p (rclass, FLOAT_REGS))
+ ret = NO_REGS;
- if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
- return GENERAL_REGS;
+ else if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
+ ret = GENERAL_REGS;
/* For VSX, prefer the traditional registers. */
- if (rclass == VSX_REGS)
+ else if (rclass == VSX_REGS)
{
if (mode == DFmode)
- return FLOAT_REGS;
+ ret = FLOAT_REGS;
if (ALTIVEC_VECTOR_MODE (mode))
- return ALTIVEC_REGS;
+ ret = ALTIVEC_REGS;
+ }
+ else
+ ret = rclass;
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr,
+ "rs6000_preferred_reload_class, return %s, rclass = %s, x:\n",
+ reg_class_names[ret], reg_class_names[rclass]);
+ debug_rtx (x);
+ fprintf (stderr, "\n");
}
- return rclass;
+ return ret;
}
/* If we are copying between FP or AltiVec registers and anything else, we need
@@ -12251,31 +12412,46 @@ rs6000_secondary_memory_needed (enum reg
enum reg_class class2,
enum machine_mode mode)
{
+ bool ret;
+ bool vsx1;
+ bool vsx2;
+
if (class1 == class2)
- return false;
+ ret = false;
- if (TARGET_VSX && VSX_MOVE_MODE (mode) && VSX_REG_CLASS_P (class1)
- && VSX_REG_CLASS_P (class2))
- return false;
+ else if (TARGET_VSX && VECTOR_MEM_VSX_P (mode)
+ && ((vsx1 = VSX_REG_CLASS_P (class1))
+ || (vsx2 = VSX_REG_CLASS_P (class2))))
+ ret = (vsx1 != vsx2);
+
+ else if (class1 == FLOAT_REGS
+ && (!TARGET_MFPGPR || !TARGET_POWERPC64
+ || ((mode != DFmode)
+ && (mode != DDmode)
+ && (mode != DImode))))
+ ret = true;
+
+ else if (class2 == FLOAT_REGS
+ && (!TARGET_MFPGPR || !TARGET_POWERPC64
+ || ((mode != DFmode)
+ && (mode != DDmode)
+ && (mode != DImode))))
+ ret = true;
- if (class1 == FLOAT_REGS
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
- || ((mode != DFmode)
- && (mode != DDmode)
- && (mode != DImode))))
- return true;
+ else if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
+ ret = true;
- if (class2 == FLOAT_REGS
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
- || ((mode != DFmode)
- && (mode != DDmode)
- && (mode != DImode))))
- return true;
+ else
+ ret = false;
- if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
- return true;
+ if (TARGET_DEBUG_ADDR)
+ fprintf (stderr,
+ "rs6000_secondary_memory_needed, return: %s, class1 = %s, "
+ "class2 = %s, mode = %s\n",
+ ret ? "true" : "false", reg_class_names[class1],
+ reg_class_names[class2], GET_MODE_NAME (mode));
- return false;
+ return ret;
}
/* Return the register class of a scratch register needed to copy IN into
@@ -12287,6 +12463,7 @@ rs6000_secondary_reload_class (enum reg_
enum machine_mode mode,
rtx in)
{
+ enum reg_class ret = NO_REGS;
int regno;
if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
@@ -12307,58 +12484,75 @@ rs6000_secondary_reload_class (enum reg_
|| GET_CODE (in) == HIGH
|| GET_CODE (in) == LABEL_REF
|| GET_CODE (in) == CONST))
- return BASE_REGS;
+ ret = BASE_REGS;
}
- if (GET_CODE (in) == REG)
+ if (ret == NO_REGS)
{
- regno = REGNO (in);
- if (regno >= FIRST_PSEUDO_REGISTER)
+ if (GET_CODE (in) == REG)
+ {
+ regno = REGNO (in);
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ {
+ regno = true_regnum (in);
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ regno = -1;
+ }
+ }
+ else if (GET_CODE (in) == SUBREG)
{
regno = true_regnum (in);
if (regno >= FIRST_PSEUDO_REGISTER)
regno = -1;
}
- }
- else if (GET_CODE (in) == SUBREG)
- {
- regno = true_regnum (in);
- if (regno >= FIRST_PSEUDO_REGISTER)
+ else
regno = -1;
- }
- else
- regno = -1;
- /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
- into anything. */
- if (rclass == GENERAL_REGS || rclass == BASE_REGS
- || (regno >= 0 && INT_REGNO_P (regno)))
- return NO_REGS;
-
- /* Constants, memory, and FP registers can go into FP registers. */
- if ((regno == -1 || FP_REGNO_P (regno))
- && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
- return (mode != SDmode) ? NO_REGS : GENERAL_REGS;
-
- /* Memory, and FP/altivec registers can go into fp/altivec registers under
- VSX. */
- if (TARGET_VSX
- && (regno == -1 || VSX_REGNO_P (regno))
- && VSX_REG_CLASS_P (rclass))
- return NO_REGS;
+ /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
+ into anything. */
+ if (rclass == GENERAL_REGS || rclass == BASE_REGS
+ || (regno >= 0 && INT_REGNO_P (regno)))
+ ret = NO_REGS;
+
+ /* Constants, memory, and FP registers can go into FP registers. */
+ else if ((regno == -1 || FP_REGNO_P (regno))
+ && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
+ ret = (mode != SDmode) ? NO_REGS : GENERAL_REGS;
+
+ /* Memory, and FP/altivec registers can go into fp/altivec registers under
+ VSX. */
+ else if (TARGET_VSX
+ && (regno == -1 || VSX_REGNO_P (regno))
+ && VSX_REG_CLASS_P (rclass))
+ ret = NO_REGS;
+
+ /* Memory, and AltiVec registers can go into AltiVec registers. */
+ else if ((regno == -1 || ALTIVEC_REGNO_P (regno))
+ && rclass == ALTIVEC_REGS)
+ ret = NO_REGS;
+
+ /* We can copy among the CR registers. */
+ else if ((rclass == CR_REGS || rclass == CR0_REGS)
+ && regno >= 0 && CR_REGNO_P (regno))
+ ret = NO_REGS;
+
+ /* Otherwise, we need GENERAL_REGS. */
+ else
+ ret = GENERAL_REGS;
+ }
- /* Memory, and AltiVec registers can go into AltiVec registers. */
- if ((regno == -1 || ALTIVEC_REGNO_P (regno))
- && rclass == ALTIVEC_REGS)
- return NO_REGS;
-
- /* We can copy among the CR registers. */
- if ((rclass == CR_REGS || rclass == CR0_REGS)
- && regno >= 0 && CR_REGNO_P (regno))
- return NO_REGS;
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr,
+ "rs6000_secondary_reload_class, return %s, rclass = %s, "
+ "mode = %s, input rtx:\n",
+ reg_class_names[ret], reg_class_names[rclass],
+ GET_MODE_NAME (mode));
+ debug_rtx (in);
+ fprintf (stderr, "\n");
+ }
- /* Otherwise, we need GENERAL_REGS. */
- return GENERAL_REGS;
+ return ret;
}
/* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
@@ -12368,19 +12562,29 @@ rs6000_cannot_change_mode_class (enum ma
enum machine_mode to,
enum reg_class rclass)
{
- return (GET_MODE_SIZE (from) != GET_MODE_SIZE (to)
- ? ((GET_MODE_SIZE (from) < 8 || GET_MODE_SIZE (to) < 8
- || TARGET_IEEEQUAD)
- && reg_classes_intersect_p (FLOAT_REGS, rclass))
- : (((TARGET_E500_DOUBLE
- && ((((to) == DFmode) + ((from) == DFmode)) == 1
- || (((to) == TFmode) + ((from) == TFmode)) == 1
- || (((to) == DDmode) + ((from) == DDmode)) == 1
- || (((to) == TDmode) + ((from) == TDmode)) == 1
- || (((to) == DImode) + ((from) == DImode)) == 1))
- || (TARGET_SPE
- && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1))
- && reg_classes_intersect_p (GENERAL_REGS, rclass)));
+ bool ret = (GET_MODE_SIZE (from) != GET_MODE_SIZE (to)
+ ? ((GET_MODE_SIZE (from) < 8 || GET_MODE_SIZE (to) < 8
+ || TARGET_IEEEQUAD)
+ && reg_classes_intersect_p (FLOAT_REGS, rclass))
+ : (((TARGET_E500_DOUBLE
+ && ((((to) == DFmode) + ((from) == DFmode)) == 1
+ || (((to) == TFmode) + ((from) == TFmode)) == 1
+ || (((to) == DDmode) + ((from) == DDmode)) == 1
+ || (((to) == TDmode) + ((from) == TDmode)) == 1
+ || (((to) == DImode) + ((from) == DImode)) == 1))
+ || (TARGET_SPE
+ && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1))
+ && reg_classes_intersect_p (GENERAL_REGS, rclass)));
+
+ if (TARGET_DEBUG_ADDR)
+ fprintf (stderr,
+ "rs6000_cannot_change_mode_class, return %s, from = %s, "
+ "to = %s, rclass = %s\n",
+ ret ? "true" : "false",
+ GET_MODE_NAME (from), GET_MODE_NAME (to),
+ reg_class_names[rclass]);
+
+ return ret;
}
/* Given a comparison operation, return the bit number in CCR to test. We
--- gcc/config/rs6000/vsx.md (revision 144758)
+++ gcc/config/rs6000/vsx.md (revision 144843)
@@ -68,7 +68,13 @@ (define_mode_attr VSbit [(SI "32")
(DI "64")])
(define_constants
- [(UNSPEC_VSX_CONCAT_V2DF 500)])
+ [(UNSPEC_VSX_CONCAT_V2DF 500)
+ (UNSPEC_VSX_XVCVDPSP 501)
+ (UNSPEC_VSX_XVCVDPSXWS 502)
+ (UNSPEC_VSX_XVCVDPUXWS 503)
+ (UNSPEC_VSX_XVCVSPDP 504)
+ (UNSPEC_VSX_XVCVSXWDP 505)
+ (UNSPEC_VSX_XVCVUXWDP 506)])
;; VSX moves
(define_insn "*vsx_mov<mode>"
@@ -245,7 +251,7 @@ (define_insn "*vsx_abs<mode>2"
"xvabs<VSs> %x0,%x1"
[(set_attr "type" "vecfloat")])
-(define_insn "*vsx_nabs<mode>2"
+(define_insn "vsx_nabs<mode>2"
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>")
(neg:VSX_F
(abs:VSX_F
@@ -417,14 +423,14 @@ (define_insn "*vsx_ftrunc<mode>2"
"xvr<VSs>piz %x0,%x1"
[(set_attr "type" "vecperm")])
-(define_insn "*vsx_float<VSi><mode>2"
+(define_insn "vsx_float<VSi><mode>2"
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>")
(float:VSX_F (match_operand:<VSI> 1 "vsx_register_operand" "<VSr>")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"xvcvsx<VSc><VSs> %x0,%x1"
[(set_attr "type" "vecfloat")])
-(define_insn "*vsx_floatuns<VSi><mode>2"
+(define_insn "vsx_floatuns<VSi><mode>2"
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>")
(unsigned_float:VSX_F (match_operand:<VSI> 1 "vsx_register_operand" "<VSr>")))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
@@ -446,6 +452,62 @@ (define_insn "*vsx_fixuns_trunc<mode><VS
[(set_attr "type" "vecfloat")])
+;; VSX convert to/from double vector
+
+;; Convert from 64-bit to 32-bit types
+;; Note, favor the Altivec registers since the usual use of these instructions
+;; is in vector converts and we need to use the Altivec vperm instruction.
+
+(define_insn "vsx_xvcvdpsp"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=v,?wa")
+ (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
+ UNSPEC_VSX_XVCVDPSP))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvdpsp %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvdpsxws"
+ [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
+ (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
+ UNSPEC_VSX_XVCVDPSXWS))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvdpsxws %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvdpuxws"
+ [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa")
+ (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")]
+ UNSPEC_VSX_XVCVDPUXWS))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvdpuxws %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+;; Convert from 32-bit to 64-bit types
+(define_insn "vsx_xvcvspdp"
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+ (unspec:V2DF [(match_operand:V4SF 1 "vsx_register_operand" "wf,wa")]
+ UNSPEC_VSX_XVCVSPDP))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvspdp %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvsxwdp"
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+ (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
+ UNSPEC_VSX_XVCVSXWDP))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvsxwdp %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvuxwdp"
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+ (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
+ UNSPEC_VSX_XVCVUXWDP))]
+ "VECTOR_UNIT_VSX_P (V2DFmode)"
+ "xvcvuxwdp %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+
;; VSX scalar double precision floating point operations
(define_insn"*vsx_adddf3"
[(set (match_operand:DF 0 "vsx_register_operand" "=ws")
@@ -753,8 +815,8 @@ (define_insn "*vsx_andc<mode>3"
(define_insn "vsx_concat_v2df"
[(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
(unspec:V2DF
- [(match_operand:DF 1 "vsx_register_operand" "f,wa")
- (match_operand:DF 2 "vsx_register_operand" "f,wa")]
+ [(match_operand:DF 1 "vsx_register_operand" "ws,wa")
+ (match_operand:DF 2 "vsx_register_operand" "ws,wa")]
UNSPEC_VSX_CONCAT_V2DF))]
"VECTOR_UNIT_VSX_P (V2DFmode)"
"xxpermdi %x0,%x1,%x2,0"
@@ -762,32 +824,37 @@ (define_insn "vsx_concat_v2df"
;; Set a double into one element
(define_insn "vsx_set_v2df"
- [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd")
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
(vec_merge:V2DF
- (match_operand:V2DF 1 "vsx_register_operand" "wd")
- (vec_duplicate:V2DF (match_operand:DF 2 "vsx_register_operand" "ws"))
- (match_operand:QI 3 "u5bit_cint_operand" "i")))]
+ (match_operand:V2DF 1 "vsx_register_operand" "wd,wa")
+ (vec_duplicate:V2DF (match_operand:DF 2 "vsx_register_operand" "ws,f"))
+ (match_operand:QI 3 "u5bit_cint_operand" "i,i")))]
"VECTOR_UNIT_VSX_P (V2DFmode)"
{
- operands[3] = GEN_INT (INTVAL (operands[3]) & 1);
- return \"xxpermdi %x0,%x1,%x2,%3\";
+ if (INTVAL (operands[3]) == 0)
+ return \"xxpermdi %x0,%x1,%x2,1\";
+ else if (INTVAL (operands[3]) == 1)
+ return \"xxpermdi %x0,%x2,%x1,0\";
+ else
+ gcc_unreachable ();
}
[(set_attr "type" "vecperm")])
;; Extract a DF element from V2DF
(define_insn "vsx_extract_v2df"
- [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
- (vec_select:DF (match_operand:V2DF 1 "vsx_register_operand" "wd")
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws,f,?wa")
+ (vec_select:DF (match_operand:V2DF 1 "vsx_register_operand" "wd,wd,wa")
(parallel
- [(match_operand:QI 2 "u5bit_cint_operand" "i")])))]
+ [(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
"VECTOR_UNIT_VSX_P (V2DFmode)"
{
- operands[3] = GEN_INT (INTVAL (operands[2]) & 1);
+ gcc_assert (UINTVAL (operands[2]) <= 1);
+ operands[3] = GEN_INT (INTVAL (operands[2]) << 1);
return \"xxpermdi %x0,%x1,%x1,%3\";
}
[(set_attr "type" "vecperm")])
-;; General V2DF permute
+;; General V2DF permute, extract_{high,low,even,odd}
(define_insn "vsx_xxpermdi"
[(set (match_operand:V2DF 0 "vsx_register_operand" "=wd")
(vec_concat:V2DF
@@ -799,6 +866,7 @@ (define_insn "vsx_xxpermdi"
[(match_operand:QI 4 "u5bit_cint_operand" "i")]))))]
"VECTOR_UNIT_VSX_P (V2DFmode)"
{
+ gcc_assert ((UINTVAL (operands[2]) <= 1) && (UINTVAL (operands[4]) <= 1));
operands[5] = GEN_INT (((INTVAL (operands[2]) & 1) << 1)
| (INTVAL (operands[4]) & 1));
return \"xxpermdi %x0,%x1,%x3,%5\";
@@ -807,14 +875,15 @@ (define_insn "vsx_xxpermdi"
;; V2DF splat
(define_insn "vsx_splatv2df"
- [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd")
+ [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,wd")
(vec_duplicate:V2DF
- (match_operand:DF 1 "input_operand" "ws,Z")))]
+ (match_operand:DF 1 "input_operand" "ws,f,Z")))]
"VECTOR_UNIT_VSX_P (V2DFmode)"
"@
xxpermdi %x0,%x1,%x1,0
+ xxpermdi %x0,%x1,%x1,0
lxvdsx %x0,%y1"
- [(set_attr "type" "vecperm,vecload")])
+ [(set_attr "type" "vecperm,vecperm,vecload")])
;; V4SF splat
(define_insn "*vsx_xxspltw"
@@ -828,14 +897,14 @@ (define_insn "*vsx_xxspltw"
[(set_attr "type" "vecperm")])
;; V4SF interleave
-(define_insn "*vsx_xxmrghw"
- [(set (match_operand:V4SF 0 "register_operand" "=v")
- (vec_merge:V4SF (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "v")
+(define_insn "vsx_xxmrghw"
+ [(set (match_operand:V4SF 0 "register_operand" "=wf")
+ (vec_merge:V4SF (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "wf")
(parallel [(const_int 0)
(const_int 2)
(const_int 1)
(const_int 3)]))
- (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "v")
+ (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "wf")
(parallel [(const_int 2)
(const_int 0)
(const_int 3)
@@ -845,15 +914,15 @@ (define_insn "*vsx_xxmrghw"
"xxmrghw %x0,%x1,%x2"
[(set_attr "type" "vecperm")])
-(define_insn "*vsx_xxmrglw"
- [(set (match_operand:V4SF 0 "register_operand" "=v")
+(define_insn "vsx_xxmrglw"
+ [(set (match_operand:V4SF 0 "register_operand" "=wf")
(vec_merge:V4SF
- (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "v")
+ (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "wf")
(parallel [(const_int 2)
(const_int 0)
(const_int 3)
(const_int 1)]))
- (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "v")
+ (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "wf")
(parallel [(const_int 0)
(const_int 2)
(const_int 1)
@@ -862,3 +931,26 @@ (define_insn "*vsx_xxmrglw"
"VECTOR_UNIT_VSX_P (V4SFmode)"
"xxmrglw %x0,%x1,%x2"
[(set_attr "type" "vecperm")])
+
+
+;; Reload patterns for VSX loads/stores. We need a scratch register to convert
+;; the stack temporary address from reg+offset to reg+reg addressing.
+(define_expand "vsx_reload_<VSX_L:mode>_<P:ptrsize>_to_mem"
+ [(parallel [(match_operand:VSX_L 0 "memory_operand" "")
+ (match_operand:VSX_L 1 "register_operand" "=wa")
+ (match_operand:P 2 "register_operand" "=&b")])]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+ rs6000_vector_secondary_reload (operands[0], operands[1], operands[2], true);
+ DONE;
+})
+
+(define_expand "vsx_reload_<VSX_L:mode>_<P:ptrsize>_to_reg"
+ [(parallel [(match_operand:VSX_L 0 "register_operand" "=wa")
+ (match_operand:VSX_L 1 "memory_operand" "")
+ (match_operand:P 2 "register_operand" "=&b")])]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+ rs6000_vector_secondary_reload (operands[0], operands[1], operands[2], false);
+ DONE;
+})
--- gcc/config/rs6000/rs6000.h (revision 144758)
+++ gcc/config/rs6000/rs6000.h (revision 144843)
@@ -3388,7 +3388,7 @@ enum rs6000_builtins
VSX_BUILTIN_XXSPLTW,
VSX_BUILTIN_XXSWAPD,
- /* Combine VSX/Altivec builtins. */
+ /* Combined VSX/Altivec builtins. */
VECTOR_BUILTIN_FLOAT_V4SI_V4SF,
VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF,
VECTOR_BUILTIN_FIX_V4SF_V4SI,
--- gcc/config/rs6000/altivec.md (revision 144758)
+++ gcc/config/rs6000/altivec.md (revision 144843)
@@ -2685,3 +2685,27 @@ (define_expand "vec_unpacku_float_lo_v8h
emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
DONE;
}")
+
+
+;; Reload patterns for Altivec loads/stores. We need a scratch register to
+;; convert the stack temporary address from reg+offset to reg+reg addressing.
+
+(define_expand "altivec_reload_<V:mode>_<P:ptrsize>_to_mem"
+ [(parallel [(match_operand:V 0 "memory_operand" "")
+ (match_operand:V 1 "register_operand" "=v")
+ (match_operand:P 2 "register_operand" "=&b")])]
+ "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+{
+ rs6000_vector_secondary_reload (operands[0], operands[1], operands[2], true);
+ DONE;
+})
+
+(define_expand "altivec_reload_<V:mode>_<P:ptrsize>_to_reg"
+ [(parallel [(match_operand:V 0 "register_operand" "=v")
+ (match_operand:V 1 "memory_operand" "")
+ (match_operand:P 2 "register_operand" "=&b")])]
+ "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
+{
+ rs6000_vector_secondary_reload (operands[0], operands[1], operands[2], false);
+ DONE;
+})
--- gcc/config/rs6000/rs6000.md (revision 144758)
+++ gcc/config/rs6000/rs6000.md (revision 144843)
@@ -222,6 +222,10 @@ (define_mode_attr dbits [(QI "56") (HI "
;; ISEL/ISEL64 target selection
(define_mode_attr sel [(SI "") (DI "64")])
+;; Suffix for reload patterns
+(define_mode_attr ptrsize [(SI "32bit")
+ (DI "64bit")])
+
;; Start with fixed-point load and store insns. Here we put only the more
;; complex forms. Basic data transfer is done later.
--- gcc/testsuite/gcc.target/powerpc/vsx-builtin-2.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vsx-builtin-2.c (revision 144843)
@@ -0,0 +1,29 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7" } */
+/* { dg-final { scan-assembler "xvaddsp" } } */
+/* { dg-final { scan-assembler "xvsubsp" } } */
+/* { dg-final { scan-assembler "xvmulsp" } } */
+/* { dg-final { scan-assembler "xvdivsp" } } */
+/* { dg-final { scan-assembler "xvmaxsp" } } */
+/* { dg-final { scan-assembler "xvminsp" } } */
+/* { dg-final { scan-assembler "xvsqrtsp" } } */
+/* { dg-final { scan-assembler "xvabssp" } } */
+/* { dg-final { scan-assembler "xvnabssp" } } */
+
+void use_builtins (__vector float *p, __vector float *q, __vector float *r)
+{
+ __vector float tmp1 = *q;
+ __vector float tmp2 = *r;
+
+ *p++ = __builtin_vsx_xvaddsp (tmp1, tmp2);
+ *p++ = __builtin_vsx_xvsubsp (tmp1, tmp2);
+ *p++ = __builtin_vsx_xvmulsp (tmp1, tmp2);
+ *p++ = __builtin_vsx_xvdivsp (tmp1, tmp2);
+ *p++ = __builtin_vsx_xvmaxsp (tmp1, tmp2);
+ *p++ = __builtin_vsx_xvminsp (tmp1, tmp2);
+ *p++ = __builtin_vsx_xvabssp (tmp1);
+ *p++ = __builtin_vsx_xvnabssp (tmp1);
+ *p = __builtin_vsx_xvsqrtsp (tmp1);
+}
--- gcc/testsuite/gcc.target/powerpc/vsx-builtin-1.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vsx-builtin-1.c (revision 144843)
@@ -0,0 +1,29 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7" } */
+/* { dg-final { scan-assembler "xvadddp" } } */
+/* { dg-final { scan-assembler "xvsubdp" } } */
+/* { dg-final { scan-assembler "xvmuldp" } } */
+/* { dg-final { scan-assembler "xvdivdp" } } */
+/* { dg-final { scan-assembler "xvmaxdp" } } */
+/* { dg-final { scan-assembler "xvmindp" } } */
+/* { dg-final { scan-assembler "xvsqrtdp" } } */
+/* { dg-final { scan-assembler "xvabsdp" } } */
+/* { dg-final { scan-assembler "xvnabsdp" } } */
+
+void use_builtins (__vector double *p, __vector double *q, __vector double *r)
+{
+ __vector double tmp1 = *q;
+ __vector double tmp2 = *r;
+
+ *p++ = __builtin_vsx_xvadddp (tmp1, tmp2);
+ *p++ = __builtin_vsx_xvsubdp (tmp1, tmp2);
+ *p++ = __builtin_vsx_xvmuldp (tmp1, tmp2);
+ *p++ = __builtin_vsx_xvdivdp (tmp1, tmp2);
+ *p++ = __builtin_vsx_xvmaxdp (tmp1, tmp2);
+ *p++ = __builtin_vsx_xvmindp (tmp1, tmp2);
+ *p++ = __builtin_vsx_xvabsdp (tmp1);
+ *p++ = __builtin_vsx_xvnabsdp (tmp1);
+ *p = __builtin_vsx_xvsqrtdp (tmp1);
+}
--- gcc/testsuite/gcc.target/powerpc/pr39457.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr39457.c (revision 144857)
@@ -0,0 +1,56 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-options "-m64 -O2 -mminimal-toc" } */
+
+/* PR 39457 -- fix breakage because the compiler ran out of registers and
+ wanted to stash a floating point value to the LR/CTR register. */
+
+/* -O2 -m64 -mminimal-toc */
+typedef struct { void *s; } S;
+typedef void (*T1) (void);
+typedef void (*T2) (void *, void *, int, void *);
+char *fn1 (const char *, ...);
+void *fn2 (void);
+int fn3 (char *, int);
+int fn4 (const void *);
+int fn5 (const void *);
+long fn6 (void) __attribute__ ((__const__));
+int fn7 (void *, void *, void *);
+void *fn8 (void *, long);
+void *fn9 (void *, long, const char *, ...);
+void *fn10 (void *);
+long fn11 (void) __attribute__ ((__const__));
+long fn12 (void *, const char *, T1, T2, void *);
+void *fn13 (void *);
+long fn14 (void) __attribute__ ((__const__));
+extern void *v1;
+extern char *v2;
+extern int v3;
+
+void
+foo (void *x, char *z)
+{
+ void *i1, *i2;
+ int y;
+ if (v1)
+ return;
+ v1 = fn9 (fn10 (fn2 ()), fn6 (), "x", 0., "y", 0., 0);
+ y = 520 - (520 - fn4 (x)) / 2;
+ fn9 (fn8 (v1, fn6 ()), fn6 (), "wig", fn8 (v1, fn14 ()), "x", 18.0,
+ "y", 16.0, "wid", 80.0, "hi", 500.0, 0);
+ fn9 (fn10 (v1), fn6 (), "x1", 0., "y1", 0., "x2", 80.0, "y2",
+ 500.0, "f", fn3 ("fff", 0x0D0DFA00), 0);
+ fn13 (((S *) fn8 (v1, fn6 ()))->s);
+ fn12 (fn8 (v1, fn11 ()), "ev", (T1) fn7, 0, fn8 (v1, fn6 ()));
+ fn9 (fn8 (v1, fn6 ()), fn6 (), "wig",
+ fn8 (v1, fn14 ()), "x", 111.0, "y", 14.0, "wid", 774.0, "hi",
+ 500.0, 0);
+ v1 = fn9 (fn10 (v1), fn6 (), "x1", 0., "y1", 0., "x2", 774.0, "y2",
+ 500.0, "f", fn3 ("gc", 0x0D0DFA00), 0);
+ fn1 (z, 0);
+ i1 = fn9 (fn8 (v1, fn6 ()), fn6 (), "pixbuf", x, "x",
+ 800 - fn5 (x) / 2, "y", y - fn4 (x), 0);
+ fn12 (fn8 (i1, fn11 ()), "ev", (T1) fn7, 0, "/ok/");
+ fn12 (fn8 (i1, fn11 ()), "ev", (T1) fn7, 0, 0);
+ i2 = fn9 (fn8 (v1, fn6 ()), fn6 (), "txt", "OK", "fnt", v2, "x",
+ 800, "y", y - fn4 (x) + 15, "ar", 0, "f", v3, 0);
+}