Backport some fixes needed by mesa and rust

This commit is contained in:
Tom Stellard 2018-08-06 21:53:23 +00:00
parent 8c9aac9934
commit 93d2074b7b
4 changed files with 1334 additions and 1 deletions

View File

@ -0,0 +1,47 @@
From 2eb830fed5b813c5624e770c244eec61dacb04d7 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Mon, 9 Jul 2018 10:35:30 -0700
Subject: [PATCH] Don't run BV DAG Combine before legalization if it assumes
legal types
---
lib/Target/PowerPC/PPCISelLowering.cpp | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 26e9f13..f622b05 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11790,10 +11790,15 @@ static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
if (!Op)
return false;
- if (Op.getOpcode() != ISD::SIGN_EXTEND)
+ if (Op.getOpcode() != ISD::SIGN_EXTEND &&
+ Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
return false;
+ // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
+ // of the right width.
SDValue Extract = Op.getOperand(0);
+ if (Extract.getOpcode() == ISD::ANY_EXTEND)
+ Extract = Extract.getOperand(0);
if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return false;
@@ -11881,8 +11886,10 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
return Reduced;
// If we're building a vector out of extended elements from another vector
- // we have P9 vector integer extend instructions.
- if (Subtarget.hasP9Altivec()) {
+ // we have P9 vector integer extend instructions. The code assumes legal
+ // input types (i.e. it can't handle things like v4i16) so do not run before
+ // legalization.
+ if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
Reduced = combineBVOfVecSExt(N, DAG);
if (Reduced)
return Reduced;
--
1.8.3.1

View File

@ -0,0 +1,919 @@
From 88ad713b81c2f51dd8405b251f9825b0bca6e57d Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Thu, 2 Aug 2018 00:03:22 +0000
Subject: [PATCH] [PowerPC] Do not round values prior to converting to integer
Adding the FP_ROUND nodes when combining FP_TO_[SU]INT of elements
feeding a BUILD_VECTOR into an FP_TO_[SU]INT of the built vector
loses precision. This patch removes the code that adds these nodes
to true f64 operands. It also adds patterns required to ensure
the code is still vectorized rather than converting individual
elements and inserting into a vector.
Fixes https://bugs.llvm.org/show_bug.cgi?id=38342
Differential Revision: https://reviews.llvm.org/D50121
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338658 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/PowerPC/PPCISelLowering.cpp | 22 +-
lib/Target/PowerPC/PPCInstrVSX.td | 86 +++++++
test/CodeGen/PowerPC/build-vector-tests.ll | 357 +++++++++++++----------------
3 files changed, 258 insertions(+), 207 deletions(-)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index f622b05..527ec5a 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11560,6 +11560,14 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
ShiftCst);
}
+// Is this an extending load from an f32 to an f64?
+static bool isFPExtLoad(SDValue Op) {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
+ return LD->getExtensionType() == ISD::EXTLOAD &&
+ Op.getValueType() == MVT::f64;
+ return false;
+}
+
/// \brief Reduces the number of fp-to-int conversion when building a vector.
///
/// If this vector is built out of floating to integer conversions,
@@ -11594,11 +11602,18 @@ combineElementTruncationToVectorTruncation(SDNode *N,
SmallVector<SDValue, 4> Ops;
EVT TargetVT = N->getValueType(0);
for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
- if (N->getOperand(i).getOpcode() != PPCISD::MFVSR)
+ SDValue NextOp = N->getOperand(i);
+ if (NextOp.getOpcode() != PPCISD::MFVSR)
return SDValue();
- unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode();
+ unsigned NextConversion = NextOp.getOperand(0).getOpcode();
if (NextConversion != FirstConversion)
return SDValue();
+ // If we are converting to 32-bit integers, we need to add an FP_ROUND.
+ // This is not valid if the input was originally double precision. It is
+ // also not profitable to do unless this is an extending load in which
+ // case doing this combine will allow us to combine consecutive loads.
+ if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
+ return SDValue();
if (N->getOperand(i) != FirstInput)
IsSplat = false;
}
@@ -11612,8 +11627,9 @@ combineElementTruncationToVectorTruncation(SDNode *N,
// Now that we know we have the right type of node, get its operands
for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
SDValue In = N->getOperand(i).getOperand(0);
- // For 32-bit values, we need to add an FP_ROUND node.
if (Is32Bit) {
+ // For 32-bit values, we need to add an FP_ROUND node (if we made it
+ // here, we know that all inputs are extending loads so this is safe).
if (In.isUndef())
Ops.push_back(DAG.getUNDEF(SrcVT));
else {
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 6f71978..1f48473 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3100,6 +3100,17 @@ def DblToFlt {
dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1))));
}
+def ExtDbl {
+ dag A0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 0))))));
+ dag A1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 1))))));
+ dag B0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 0))))));
+ dag B1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 1))))));
+ dag A0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 0))))));
+ dag A1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 1))))));
+ dag B0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 0))))));
+ dag B1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 1))))));
+}
+
def ByteToWord {
dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
@@ -3177,9 +3188,15 @@ def FltToULong {
}
def DblToInt {
dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A))));
+ dag B = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$B))));
+ dag C = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$C))));
+ dag D = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$D))));
}
def DblToUInt {
dag A = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$A))));
+ dag B = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$B))));
+ dag C = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$C))));
+ dag D = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$D))));
}
def DblToLong {
dag A = (i64 (PPCmfvsr (f64 (PPCfctidz f64:$A))));
@@ -3218,6 +3235,47 @@ def MrgFP {
dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3));
}
+// Word-element merge dags - conversions from f64 to i32 merged into vectors.
+def MrgWords {
+ // For big endian, we merge low and hi doublewords (A, B).
+ dag A0B0 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 0));
+ dag A1B1 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 3));
+ dag CVA1B1S = (v4i32 (XVCVDPSXWS A1B1));
+ dag CVA0B0S = (v4i32 (XVCVDPSXWS A0B0));
+ dag CVA1B1U = (v4i32 (XVCVDPUXWS A1B1));
+ dag CVA0B0U = (v4i32 (XVCVDPUXWS A0B0));
+
+ // For little endian, we merge low and hi doublewords (B, A).
+ dag B1A1 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 0));
+ dag B0A0 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 3));
+ dag CVB1A1S = (v4i32 (XVCVDPSXWS B1A1));
+ dag CVB0A0S = (v4i32 (XVCVDPSXWS B0A0));
+ dag CVB1A1U = (v4i32 (XVCVDPUXWS B1A1));
+ dag CVB0A0U = (v4i32 (XVCVDPUXWS B0A0));
+
+ // For big endian, we merge hi doublewords of (A, C) and (B, D), convert
+ // then merge.
+ dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC),
+ (COPY_TO_REGCLASS f64:$C, VSRC), 0));
+ dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC),
+ (COPY_TO_REGCLASS f64:$D, VSRC), 0));
+ dag CVACS = (v4i32 (XVCVDPSXWS AC));
+ dag CVBDS = (v4i32 (XVCVDPSXWS BD));
+ dag CVACU = (v4i32 (XVCVDPUXWS AC));
+ dag CVBDU = (v4i32 (XVCVDPUXWS BD));
+
+ // For little endian, we merge hi doublewords of (D, B) and (C, A), convert
+ // then merge.
+ dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC),
+ (COPY_TO_REGCLASS f64:$B, VSRC), 0));
+ dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC),
+ (COPY_TO_REGCLASS f64:$A, VSRC), 0));
+ dag CVDBS = (v4i32 (XVCVDPSXWS DB));
+ dag CVCAS = (v4i32 (XVCVDPSXWS CA));
+ dag CVDBU = (v4i32 (XVCVDPUXWS DB));
+ dag CVCAU = (v4i32 (XVCVDPUXWS CA));
+}
+
// Patterns for BUILD_VECTOR nodes.
def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">;
let AddedComplexity = 400 in {
@@ -3286,6 +3344,20 @@ let AddedComplexity = 400 in {
def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
DblToFlt.B0, DblToFlt.B1)),
(v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>;
+
+ // Convert 4 doubles to a vector of ints.
+ def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
+ DblToInt.C, DblToInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>;
+ def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
+ DblToUInt.C, DblToUInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>;
+ def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
+ ExtDbl.B0S, ExtDbl.B1S)),
+ (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>;
+ def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
+ ExtDbl.B0U, ExtDbl.B1U)),
+ (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
}
let Predicates = [IsLittleEndian, HasVSX] in {
@@ -3300,6 +3372,20 @@ let AddedComplexity = 400 in {
def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
DblToFlt.B0, DblToFlt.B1)),
(v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>;
+
+ // Convert 4 doubles to a vector of ints.
+ def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
+ DblToInt.C, DblToInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>;
+ def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
+ DblToUInt.C, DblToUInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>;
+ def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
+ ExtDbl.B0S, ExtDbl.B1S)),
+ (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>;
+ def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
+ ExtDbl.B0U, ExtDbl.B1U)),
+ (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>;
}
let Predicates = [HasDirectMove] in {
diff --git a/test/CodeGen/PowerPC/build-vector-tests.ll b/test/CodeGen/PowerPC/build-vector-tests.ll
index 16b562b..3785b2a 100644
--- a/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -119,8 +119,8 @@
;vector int spltCnstConvftoi() { //
; return (vector int) 4.74f; //
;} //
-;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
-;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvdpsxws //
+;// P8: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
+;// P9: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
;vector int fromRegsConvftoi(float a, float b, float c, float d) { //
; return (vector int) { a, b, c, d }; //
;} //
@@ -139,15 +139,15 @@
;vector int fromDiffMemConsDConvftoi(float *ptr) { //
; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
;} //
-;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
-;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
+;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
+;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
;// sldi 2, load, xvcvspuxws //
;vector int fromDiffMemVarAConvftoi(float *arr, int elem) { //
; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; //
;} //
-;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
-;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
+;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
+;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
;// sldi 2, 2 x load, vperm, xvcvspuxws //
;vector int fromDiffMemVarDConvftoi(float *arr, int elem) { //
@@ -168,8 +168,8 @@
;vector int spltCnstConvdtoi() { //
; return (vector int) 4.74; //
;} //
-;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
-;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
+;// P8: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
+;// P9: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
;vector int fromRegsConvdtoi(double a, double b, double c, double d) { //
; return (vector int) { a, b, c, d }; //
;} //
@@ -178,25 +178,23 @@
;vector int fromDiffConstsConvdtoi() { //
; return (vector int) { 24.46, 234., 988.19, 422.39 }; //
;} //
-;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, //
-;// xvcvspsxws //
-;// P9: 2 x lxvx, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, //
-;// xvcvspsxws //
+;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspsxws, vmrgew //
+;// P9: 2 x lxvx, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspsxws, vmrgew //
;vector int fromDiffMemConsAConvdtoi(double *ptr) { //
; return (vector int) { ptr[0], ptr[1], ptr[2], ptr[3] }; //
;} //
-;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
-;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
+;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
+;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
;vector int fromDiffMemConsDConvdtoi(double *ptr) { //
; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
;} //
-;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
-;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
+;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
+;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
;vector int fromDiffMemVarAConvdtoi(double *arr, int elem) { //
; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; //
;} //
-;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
-;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
+;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
+;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
;vector int fromDiffMemVarDConvdtoi(double *arr, int elem) { //
; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; //
;} //
@@ -296,8 +294,8 @@
;vector unsigned int spltCnstConvftoui() { //
; return (vector unsigned int) 4.74f; //
;} //
-;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
-;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
+;// P8: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
+;// P9: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
;vector unsigned int fromRegsConvftoui(float a, float b, float c, float d) { //
; return (vector unsigned int) { a, b, c, d }; //
;} //
@@ -316,16 +314,16 @@
;vector unsigned int fromDiffMemConsDConvftoui(float *ptr) { //
; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
;} //
-;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
-;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
+;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
+;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
;// sldi 2, load, xvcvspuxws //
;vector unsigned int fromDiffMemVarAConvftoui(float *arr, int elem) { //
; return (vector unsigned int) { arr[elem], arr[elem+1], //
; arr[elem+2], arr[elem+3] }; //
;} //
-;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
-;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
+;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
+;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
;// sldi 2, 2 x load, vperm, xvcvspuxws //
;vector unsigned int fromDiffMemVarDConvftoui(float *arr, int elem) { //
@@ -347,8 +345,8 @@
;vector unsigned int spltCnstConvdtoui() { //
; return (vector unsigned int) 4.74; //
;} //
-;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
-;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
+;// P8: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
+;// P9: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
;vector unsigned int fromRegsConvdtoui(double a, double b, //
; double c, double d) { //
; return (vector unsigned int) { a, b, c, d }; //
@@ -358,25 +356,24 @@
;vector unsigned int fromDiffConstsConvdtoui() { //
; return (vector unsigned int) { 24.46, 234., 988.19, 422.39 }; //
;} //
-;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, //
-;// xvcvspuxws //
-;// P9: 2 x lxvx, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
+;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspuxws, vmrgew //
+;// P9: 2 x lxvx, xxmrgld, xxmrghd, 2 x xvcvspuxws, vmrgew //
;vector unsigned int fromDiffMemConsAConvdtoui(double *ptr) { //
; return (vector unsigned int) { ptr[0], ptr[1], ptr[2], ptr[3] }; //
;} //
-;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
-;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
+;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
+;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
;vector unsigned int fromDiffMemConsDConvdtoui(double *ptr) { //
; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
;} //
-;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
-;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
+;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
+;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
;vector unsigned int fromDiffMemVarAConvdtoui(double *arr, int elem) { //
; return (vector unsigned int) { arr[elem], arr[elem+1], //
; arr[elem+2], arr[elem+3] }; //
;} //
-;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
-;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
+;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
+;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
;vector unsigned int fromDiffMemVarDConvdtoui(double *arr, int elem) { //
; return (vector unsigned int) { arr[elem], arr[elem-1], //
; arr[elem-2], arr[elem-3] }; //
@@ -1253,28 +1250,24 @@ entry:
; P8LE-LABEL: fromRegsConvftoi
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9BE: xvcvspsxws v2, v2
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P9LE: vmrgew v2, [[REG4]], [[REG3]]
-; P9LE: xvcvspsxws v2, v2
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P8BE: vmrgew v2, [[REG3]], [[REG4]]
-; P8BE: xvcvspsxws v2, v2
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P8LE: vmrgew v2, [[REG4]], [[REG3]]
-; P8LE: xvcvspsxws v2, v2
}
; Function Attrs: norecurse nounwind readnone
@@ -1529,28 +1522,24 @@ entry:
; P8LE-LABEL: fromRegsConvdtoi
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9BE: xvcvspsxws v2, v2
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P9LE: vmrgew v2, [[REG4]], [[REG3]]
-; P9LE: xvcvspsxws v2, v2
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P8BE: vmrgew v2, [[REG3]], [[REG4]]
-; P8BE: xvcvspsxws v2, v2
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P8LE: vmrgew v2, [[REG4]], [[REG3]]
-; P8LE: xvcvspsxws v2, v2
}
; Function Attrs: norecurse nounwind readnone
@@ -1592,36 +1581,32 @@ entry:
; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
-; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
-; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
+; P9BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]]
+; P9BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]]
; P9BE: vmrgew v2, [[REG6]], [[REG5]]
-; P9BE: xvcvspsxws v2, v2
; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]]
; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]]
-; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
-; P9LE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
+; P9LE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]]
+; P9LE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]]
; P9LE: vmrgew v2, [[REG6]], [[REG5]]
-; P9LE: xvcvspsxws v2, v2
; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
-; P8BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
-; P8BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
+; P8BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]]
+; P8BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]]
; P8BE: vmrgew v2, [[REG6]], [[REG5]]
-; P8BE: xvcvspsxws v2, v2
; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]]
; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]]
; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]]
; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]]
-; P8LE-DAG: xvcvdpsp [[REG7:[vs0-9]+]], [[REG5]]
-; P8LE-DAG: xvcvdpsp [[REG8:[vs0-9]+]], [[REG6]]
+; P8LE-DAG: xvcvdpsxws [[REG7:[vs0-9]+]], [[REG5]]
+; P8LE-DAG: xvcvdpsxws [[REG8:[vs0-9]+]], [[REG6]]
; P8LE: vmrgew v2, [[REG8]], [[REG7]]
-; P8LE: xvcvspsxws v2, v2
}
; Function Attrs: norecurse nounwind readonly
@@ -1653,40 +1638,36 @@ entry:
; P9BE: lfd
; P9BE: xxmrghd
; P9BE: xxmrghd
-; P9BE: xvcvdpsp
-; P9BE: xvcvdpsp
-; P9BE: vmrgew
-; P9BE: xvcvspsxws v2
+; P9BE: xvcvdpsxws
+; P9BE: xvcvdpsxws
+; P9BE: vmrgew v2
; P9LE: lfd
; P9LE: lfd
; P9LE: lfd
; P9LE: lfd
; P9LE: xxmrghd
; P9LE: xxmrghd
-; P9LE: xvcvdpsp
-; P9LE: xvcvdpsp
-; P9LE: vmrgew
-; P9LE: xvcvspsxws v2
+; P9LE: xvcvdpsxws
+; P9LE: xvcvdpsxws
+; P9LE: vmrgew v2
; P8BE: lxsdx
; P8BE: lxsdx
; P8BE: lxsdx
; P8BE: lxsdx
; P8BE: xxmrghd
; P8BE: xxmrghd
-; P8BE: xvcvdpsp
-; P8BE: xvcvdpsp
-; P8BE: vmrgew
-; P8BE: xvcvspsxws v2
+; P8BE: xvcvdpsxws
+; P8BE: xvcvdpsxws
+; P8BE: vmrgew v2
; P8LE: lxsdx
; P8LE: lxsdx
; P8LE: lxsdx
; P8LE: lxsdx
; P8LE: xxmrghd
; P8LE: xxmrghd
-; P8LE: xvcvdpsp
-; P8LE: xvcvdpsp
-; P8LE: vmrgew
-; P8LE: xvcvspsxws v2
+; P8LE: xvcvdpsxws
+; P8LE: xvcvdpsxws
+; P8LE: vmrgew v2
}
; Function Attrs: norecurse nounwind readonly
@@ -1726,40 +1707,36 @@ entry:
; P9BE: lfd
; P9BE: xxmrghd
; P9BE: xxmrghd
-; P9BE: xvcvdpsp
-; P9BE: xvcvdpsp
-; P9BE: vmrgew
-; P9BE: xvcvspsxws v2
+; P9BE: xvcvdpsxws
+; P9BE: xvcvdpsxws
+; P9BE: vmrgew v2
; P9LE: lfdux
; P9LE: lfd
; P9LE: lfd
; P9LE: lfd
; P9LE: xxmrghd
; P9LE: xxmrghd
-; P9LE: xvcvdpsp
-; P9LE: xvcvdpsp
-; P9LE: vmrgew
-; P9LE: xvcvspsxws v2
+; P9LE: xvcvdpsxws
+; P9LE: xvcvdpsxws
+; P9LE: vmrgew v2
; P8BE: lfdux
; P8BE: lxsdx
; P8BE: lxsdx
; P8BE: lxsdx
; P8BE: xxmrghd
; P8BE: xxmrghd
-; P8BE: xvcvdpsp
-; P8BE: xvcvdpsp
-; P8BE: vmrgew
-; P8BE: xvcvspsxws v2
+; P8BE: xvcvdpsxws
+; P8BE: xvcvdpsxws
+; P8BE: vmrgew v2
; P8LE: lfdux
; P8LE: lxsdx
; P8LE: lxsdx
; P8LE: lxsdx
; P8LE: xxmrghd
; P8LE: xxmrghd
-; P8LE: xvcvdpsp
-; P8LE: xvcvdpsp
-; P8LE: vmrgew
-; P8LE: xvcvspsxws v2
+; P8LE: xvcvdpsxws
+; P8LE: xvcvdpsxws
+; P8LE: vmrgew v2
}
; Function Attrs: norecurse nounwind readonly
@@ -1799,40 +1776,36 @@ entry:
; P9BE: lfd
; P9BE: xxmrghd
; P9BE: xxmrghd
-; P9BE: xvcvdpsp
-; P9BE: xvcvdpsp
-; P9BE: vmrgew
-; P9BE: xvcvspsxws v2
+; P9BE: xvcvdpsxws
+; P9BE: xvcvdpsxws
+; P9BE: vmrgew v2
; P9LE: lfdux
; P9LE: lfd
; P9LE: lfd
; P9LE: lfd
; P9LE: xxmrghd
; P9LE: xxmrghd
-; P9LE: xvcvdpsp
-; P9LE: xvcvdpsp
-; P9LE: vmrgew
-; P9LE: xvcvspsxws v2
+; P9LE: xvcvdpsxws
+; P9LE: xvcvdpsxws
+; P9LE: vmrgew v2
; P8BE: lfdux
; P8BE: lxsdx
; P8BE: lxsdx
; P8BE: lxsdx
; P8BE: xxmrghd
; P8BE: xxmrghd
-; P8BE: xvcvdpsp
-; P8BE: xvcvdpsp
-; P8BE: vmrgew
-; P8BE: xvcvspsxws v2
+; P8BE: xvcvdpsxws
+; P8BE: xvcvdpsxws
+; P8BE: vmrgew v2
; P8LE: lfdux
; P8LE: lxsdx
; P8LE: lxsdx
; P8LE: lxsdx
; P8LE: xxmrghd
; P8LE: xxmrghd
-; P8LE: xvcvdpsp
-; P8LE: xvcvdpsp
-; P8LE: vmrgew
-; P8LE: xvcvspsxws v2
+; P8LE: xvcvdpsxws
+; P8LE: xvcvdpsxws
+; P8LE: vmrgew v2
}
; Function Attrs: norecurse nounwind readnone
@@ -2413,28 +2386,24 @@ entry:
; P8LE-LABEL: fromRegsConvftoui
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9BE: xvcvspuxws v2, v2
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P9LE: vmrgew v2, [[REG4]], [[REG3]]
-; P9LE: xvcvspuxws v2, v2
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P8BE: vmrgew v2, [[REG3]], [[REG4]]
-; P8BE: xvcvspuxws v2, v2
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P8LE: vmrgew v2, [[REG4]], [[REG3]]
-; P8LE: xvcvspuxws v2, v2
}
; Function Attrs: norecurse nounwind readnone
@@ -2689,28 +2658,24 @@ entry:
; P8LE-LABEL: fromRegsConvdtoui
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P9BE: vmrgew v2, [[REG3]], [[REG4]]
-; P9BE: xvcvspuxws v2, v2
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P9LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P9LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P9LE: vmrgew v2, [[REG4]], [[REG3]]
-; P9LE: xvcvspuxws v2, v2
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
-; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P8BE: vmrgew v2, [[REG3]], [[REG4]]
-; P8BE: xvcvspuxws v2, v2
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
-; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
-; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
+; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
+; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
; P8LE: vmrgew v2, [[REG4]], [[REG3]]
-; P8LE: xvcvspuxws v2, v2
}
; Function Attrs: norecurse nounwind readnone
@@ -2752,36 +2717,32 @@ entry:
; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
-; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
-; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
+; P9BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]]
+; P9BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]]
; P9BE: vmrgew v2, [[REG6]], [[REG5]]
-; P9BE: xvcvspuxws v2, v2
; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
-; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]]
; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]]
-; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
-; P9LE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
+; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]]
+; P9LE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]]
+; P9LE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]]
; P9LE: vmrgew v2, [[REG6]], [[REG5]]
-; P9LE: xvcvspuxws v2, v2
; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
-; P8BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
-; P8BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
+; P8BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]]
+; P8BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]]
; P8BE: vmrgew v2, [[REG6]], [[REG5]]
-; P8BE: xvcvspuxws v2, v2
; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]]
; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]]
; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]]
; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]]
-; P8LE-DAG: xvcvdpsp [[REG7:[vs0-9]+]], [[REG5]]
-; P8LE-DAG: xvcvdpsp [[REG8:[vs0-9]+]], [[REG6]]
+; P8LE-DAG: xvcvdpuxws [[REG7:[vs0-9]+]], [[REG5]]
+; P8LE-DAG: xvcvdpuxws [[REG8:[vs0-9]+]], [[REG6]]
; P8LE: vmrgew v2, [[REG8]], [[REG7]]
-; P8LE: xvcvspuxws v2, v2
}
; Function Attrs: norecurse nounwind readonly
@@ -2813,40 +2774,36 @@ entry:
; P9BE: lfd
; P9BE: xxmrghd
; P9BE: xxmrghd
-; P9BE: xvcvdpsp
-; P9BE: xvcvdpsp
-; P9BE: vmrgew
-; P9BE: xvcvspuxws v2
+; P9BE: xvcvdpuxws
+; P9BE: xvcvdpuxws
+; P9BE: vmrgew v2
; P9LE: lfd
; P9LE: lfd
; P9LE: lfd
; P9LE: lfd
; P9LE: xxmrghd
; P9LE: xxmrghd
-; P9LE: xvcvdpsp
-; P9LE: xvcvdpsp
-; P9LE: vmrgew
-; P9LE: xvcvspuxws v2
+; P9LE: xvcvdpuxws
+; P9LE: xvcvdpuxws
+; P9LE: vmrgew v2
; P8BE: lxsdx
; P8BE: lxsdx
; P8BE: lxsdx
; P8BE: lxsdx
; P8BE: xxmrghd
; P8BE: xxmrghd
-; P8BE: xvcvdpsp
-; P8BE: xvcvdpsp
-; P8BE: vmrgew
-; P8BE: xvcvspuxws v2
+; P8BE: xvcvdpuxws
+; P8BE: xvcvdpuxws
+; P8BE: vmrgew v2
; P8LE: lxsdx
; P8LE: lxsdx
; P8LE: lxsdx
; P8LE: lxsdx
; P8LE: xxmrghd
; P8LE: xxmrghd
-; P8LE: xvcvdpsp
-; P8LE: xvcvdpsp
-; P8LE: vmrgew
-; P8LE: xvcvspuxws v2
+; P8LE: xvcvdpuxws
+; P8LE: xvcvdpuxws
+; P8LE: vmrgew v2
}
; Function Attrs: norecurse nounwind readonly
@@ -2886,40 +2843,36 @@ entry:
; P9BE: lfd
; P9BE: xxmrghd
; P9BE: xxmrghd
-; P9BE: xvcvdpsp
-; P9BE: xvcvdpsp
-; P9BE: vmrgew
-; P9BE: xvcvspuxws v2
+; P9BE: xvcvdpuxws
+; P9BE: xvcvdpuxws
+; P9BE: vmrgew v2
; P9LE: lfdux
; P9LE: lfd
; P9LE: lfd
; P9LE: lfd
; P9LE: xxmrghd
; P9LE: xxmrghd
-; P9LE: xvcvdpsp
-; P9LE: xvcvdpsp
-; P9LE: vmrgew
-; P9LE: xvcvspuxws v2
+; P9LE: xvcvdpuxws
+; P9LE: xvcvdpuxws
+; P9LE: vmrgew v2
; P8BE: lfdux
; P8BE: lxsdx
; P8BE: lxsdx
; P8BE: lxsdx
; P8BE: xxmrghd
; P8BE: xxmrghd
-; P8BE: xvcvdpsp
-; P8BE: xvcvdpsp
-; P8BE: vmrgew
-; P8BE: xvcvspuxws v2
+; P8BE: xvcvdpuxws
+; P8BE: xvcvdpuxws
+; P8BE: vmrgew v2
; P8LE: lfdux
; P8LE: lxsdx
; P8LE: lxsdx
; P8LE: lxsdx
; P8LE: xxmrghd
; P8LE: xxmrghd
-; P8LE: xvcvdpsp
-; P8LE: xvcvdpsp
-; P8LE: vmrgew
-; P8LE: xvcvspuxws v2
+; P8LE: xvcvdpuxws
+; P8LE: xvcvdpuxws
+; P8LE: vmrgew v2
}
; Function Attrs: norecurse nounwind readonly
@@ -2959,40 +2912,36 @@ entry:
; P9BE: lfd
; P9BE: xxmrghd
; P9BE: xxmrghd
-; P9BE: xvcvdpsp
-; P9BE: xvcvdpsp
-; P9BE: vmrgew
-; P9BE: xvcvspuxws v2
+; P9BE: xvcvdpuxws
+; P9BE: xvcvdpuxws
+; P9BE: vmrgew v2
; P9LE: lfdux
; P9LE: lfd
; P9LE: lfd
; P9LE: lfd
; P9LE: xxmrghd
; P9LE: xxmrghd
-; P9LE: xvcvdpsp
-; P9LE: xvcvdpsp
-; P9LE: vmrgew
-; P9LE: xvcvspuxws v2
+; P9LE: xvcvdpuxws
+; P9LE: xvcvdpuxws
+; P9LE: vmrgew v2
; P8BE: lfdux
; P8BE: lxsdx
; P8BE: lxsdx
; P8BE: lxsdx
; P8BE: xxmrghd
; P8BE: xxmrghd
-; P8BE: xvcvdpsp
-; P8BE: xvcvdpsp
-; P8BE: vmrgew
-; P8BE: xvcvspuxws v2
+; P8BE: xvcvdpuxws
+; P8BE: xvcvdpuxws
+; P8BE: vmrgew v2
; P8LE: lfdux
; P8LE: lxsdx
; P8LE: lxsdx
; P8LE: lxsdx
; P8LE: xxmrghd
; P8LE: xxmrghd
-; P8LE: xvcvdpsp
-; P8LE: xvcvdpsp
-; P8LE: vmrgew
-; P8LE: xvcvspuxws v2
+; P8LE: xvcvdpuxws
+; P8LE: xvcvdpuxws
+; P8LE: vmrgew v2
}
; Function Attrs: norecurse nounwind readnone
--
1.8.3.1

View File

@ -0,0 +1,360 @@
From 2ac90db51fc323d183aabe744e57f4feca6d3008 Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Wed, 1 Aug 2018 11:57:58 +0000
Subject: [PATCH] [SystemZ, TableGen] Fix shift count handling
*Backport of this patch from trunk without the TableGen fix and modified
to work with LLVM 6.0 TableGen. *
The DAG combiner logic to simplify AND masks in shift counts is invalid.
While it is true that the SystemZ shift instructions ignore all but the
low 6 bits of the shift count, it is still invalid to simplify the AND
masks while the DAG still uses the standard shift operators (which are
*not* defined to match the SystemZ instruction behavior).
Instead, this patch performs equivalent operations during instruction
selection. For completely removing the AND, this now happens via
additional DAG match patterns implemented by a multi-alternative
PatFrags. For simplifying a 32-bit AND to a 16-bit AND, the existing DAG
patterns were already mostly OK, they just needed an output XForm to
actually truncate the immediate value.
Unfortunately, the latter change also exposed a bug in TableGen: it
seems XForms are currently only handled correctly for direct operands of
the outermost operation node. This patch also fixes that bug by simply
recurring through the whole pattern. This should be NFC for all other
targets.
Differential Revision: https://reviews.llvm.org/D50096
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338521 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/SystemZ/SystemZISelLowering.cpp | 78 ------------------------------
lib/Target/SystemZ/SystemZISelLowering.h | 1 -
lib/Target/SystemZ/SystemZInstrInfo.td | 49 +++++++++++++------
lib/Target/SystemZ/SystemZOperands.td | 1 +
lib/Target/SystemZ/SystemZOperators.td | 6 +++
test/CodeGen/SystemZ/shift-12.ll | 12 +++++
utils/TableGen/CodeGenDAGPatterns.cpp | 39 ++++++++-------
7 files changed, 71 insertions(+), 115 deletions(-)
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index adf3683..505b143 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -522,10 +522,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::BSWAP);
- setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::SRL);
- setTargetDAGCombine(ISD::ROTL);
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -5405,76 +5401,6 @@ SDValue SystemZTargetLowering::combineBSWAP(
return SDValue();
}
-SDValue SystemZTargetLowering::combineSHIFTROT(
- SDNode *N, DAGCombinerInfo &DCI) const {
-
- SelectionDAG &DAG = DCI.DAG;
-
- // Shift/rotate instructions only use the last 6 bits of the second operand
- // register. If the second operand is the result of an AND with an immediate
- // value that has its last 6 bits set, we can safely remove the AND operation.
- //
- // If the AND operation doesn't have the last 6 bits set, we can't remove it
- // entirely, but we can still truncate it to a 16-bit value. This prevents
- // us from ending up with a NILL with a signed operand, which will cause the
- // instruction printer to abort.
- SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() == ISD::AND) {
- SDValue AndMaskOp = N1->getOperand(1);
- auto *AndMask = dyn_cast<ConstantSDNode>(AndMaskOp);
-
- // The AND mask is constant
- if (AndMask) {
- auto AmtVal = AndMask->getZExtValue();
-
- // Bottom 6 bits are set
- if ((AmtVal & 0x3f) == 0x3f) {
- SDValue AndOp = N1->getOperand(0);
-
- // This is the only use, so remove the node
- if (N1.hasOneUse()) {
- // Combine the AND away
- DCI.CombineTo(N1.getNode(), AndOp);
-
- // Return N so it isn't rechecked
- return SDValue(N, 0);
-
- // The node will be reused, so create a new node for this one use
- } else {
- SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
- N->getValueType(0), N->getOperand(0),
- AndOp);
- DCI.AddToWorklist(Replace.getNode());
-
- return Replace;
- }
-
- // We can't remove the AND, but we can use NILL here (normally we would
- // use NILF). Only keep the last 16 bits of the mask. The actual
- // transformation will be handled by .td definitions.
- } else if (AmtVal >> 16 != 0) {
- SDValue AndOp = N1->getOperand(0);
-
- auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff,
- SDLoc(AndMaskOp),
- AndMaskOp.getValueType());
-
- auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(),
- AndOp, NewMask);
-
- SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
- N->getValueType(0), N->getOperand(0),
- NewAnd);
- DCI.AddToWorklist(Replace.getNode());
-
- return Replace;
- }
- }
- }
-
- return SDValue();
-}
-
SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch(N->getOpcode()) {
@@ -5487,10 +5413,6 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- case ISD::ROTL: return combineSHIFTROT(N, DCI);
}
return SDValue();
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 2cdc88d..1918d45 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -570,7 +570,6 @@ private:
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const;
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index abb8045..fb40cb4 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1318,9 +1318,20 @@ def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
// Shifts
//===----------------------------------------------------------------------===//
+// Complexity is 8 so we match it before the NILL paterns below.
+let AddedComplexity = 8 in {
+
+class ShiftAndPat <SDNode node, Instruction inst, ValueType vt> : Pat <
+ (node vt:$val, (and i32:$count, imm32bottom6set)),
+ (inst vt:$val, i32:$count, 0)
+>;
+}
+
// Logical shift left.
defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
+def : ShiftAndPat <shl, SLL, i32>;
def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
+def : ShiftAndPat <shl, SLLG, i64>;
def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>;
// Arithmetic shift left.
@@ -1332,7 +1343,9 @@ let Defs = [CC] in {
// Logical shift right.
defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
+def : ShiftAndPat <srl, SRL, i32>;
def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>;
+def : ShiftAndPat <srl, SRLG, i64>;
def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>;
// Arithmetic shift right.
@@ -1341,10 +1354,14 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>;
def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>;
}
+def : ShiftAndPat <sra, SRA, i32>;
+def : ShiftAndPat <sra, SRAG, i64>;
// Rotate left.
def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>;
+def : ShiftAndPat <rotl, RLL, i32>;
def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>;
+def : ShiftAndPat <rotl, RLLG, i64>;
// Rotate second operand left and inserted selected bits into first operand.
// These can act like 32-bit operands provided that the constant start and
@@ -2154,29 +2171,29 @@ def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
// Complexity is added so that we match this before we match NILF on the AND
// operation alone.
let AddedComplexity = 4 in {
- def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(shl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(sra GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRA GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(srl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(shl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(sra GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRAG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(srl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (SRLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
- (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(rotl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (RLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
- def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
- (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
+ def : Pat<(rotl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
+ (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
}
// Peepholes for turning scalar operations into block operations.
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 7136121..61a1124 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -341,6 +341,7 @@ def imm32zx16 : Immediate<i32, [{
}], UIMM16, "U16Imm">;
def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
+def imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;
// Full 32-bit immediates. we need both signed and unsigned versions
// because the assembler is picky. E.g. AFI requires signed operands
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index d067f33..269c3d0 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -611,6 +611,12 @@ class storei<SDPatternOperator operator, SDPatternOperator store = store>
: PatFrag<(ops node:$addr),
(store (operator), node:$addr)>;
+// Create a shift operator that optionally ignores an AND of the
+// shift count with an immediate if the bottom 6 bits are all set.
+def imm32bottom6set : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() & 0x3f) == 0x3f;
+}]>;
+
// Vector representation of all-zeros and all-ones.
def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
diff --git a/test/CodeGen/SystemZ/shift-12.ll b/test/CodeGen/SystemZ/shift-12.ll
index 4ebc42b..53d3d53 100644
--- a/test/CodeGen/SystemZ/shift-12.ll
+++ b/test/CodeGen/SystemZ/shift-12.ll
@@ -104,3 +104,15 @@ define i32 @f10(i32 %a, i32 %sh) {
%reuse = add i32 %and, %shift
ret i32 %reuse
}
+
+; Test that AND is not removed for i128 (which calls __ashlti3)
+define i128 @f11(i128 %a, i32 %sh) {
+; CHECK-LABEL: f11:
+; CHECK: risbg %r4, %r4, 57, 191, 0
+; CHECK: brasl %r14, __ashlti3@PLT
+ %and = and i32 %sh, 127
+ %ext = zext i32 %and to i128
+ %shift = shl i128 %a, %ext
+ ret i128 %shift
+}
+
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 493066e..74af62b 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -3919,6 +3919,24 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) {
return false;
}
+// Promote xform function to be an explicit node wherever set.
+static TreePatternNode* PromoteXForms(TreePatternNode* N) {
+ if (Record *Xform = N->getTransformFn()) {
+ N->setTransformFn(nullptr);
+ std::vector<TreePatternNode*> Children;
+ Children.push_back(PromoteXForms(N));
+ return new TreePatternNode(Xform, std::move(Children),
+ N->getNumTypes());
+ }
+
+ if (!N->isLeaf())
+ for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) {
+ TreePatternNode* Child = N->getChild(i);
+ N->setChild(i, std::move(PromoteXForms(Child)));
+ }
+ return N;
+}
+
void CodeGenDAGPatterns::ParsePatterns() {
std::vector<Record*> Patterns = Records.getAllDerivedDefinitions("Pattern");
@@ -4009,26 +4027,7 @@ void CodeGenDAGPatterns::ParsePatterns() {
InstImpResults);
// Promote the xform function to be an explicit node if set.
- TreePatternNode *DstPattern = Result.getOnlyTree();
- std::vector<TreePatternNode*> ResultNodeOperands;
- for (unsigned ii = 0, ee = DstPattern->getNumChildren(); ii != ee; ++ii) {
- TreePatternNode *OpNode = DstPattern->getChild(ii);
- if (Record *Xform = OpNode->getTransformFn()) {
- OpNode->setTransformFn(nullptr);
- std::vector<TreePatternNode*> Children;
- Children.push_back(OpNode);
- OpNode = new TreePatternNode(Xform, Children, OpNode->getNumTypes());
- }
- ResultNodeOperands.push_back(OpNode);
- }
- DstPattern = Result.getOnlyTree();
- if (!DstPattern->isLeaf())
- DstPattern = new TreePatternNode(DstPattern->getOperator(),
- ResultNodeOperands,
- DstPattern->getNumTypes());
-
- for (unsigned i = 0, e = Result.getOnlyTree()->getNumTypes(); i != e; ++i)
- DstPattern->setType(i, Result.getOnlyTree()->getExtType(i));
+ TreePatternNode* DstPattern = PromoteXForms(Result.getOnlyTree());
TreePattern Temp(Result.getRecord(), DstPattern, false, *this);
Temp.InferAllTypes();
--
1.8.3.1

View File

@ -30,7 +30,7 @@
Name: %{pkg_name}
Version: %{maj_ver}.%{min_ver}.%{patch_ver}
Release: 5%{?dist}
Release: 6%{?dist}
Summary: The Low Level Virtual Machine
License: NCSA
@ -43,6 +43,10 @@ Patch3: 0001-CMake-Split-static-library-exports-into-their-own-ex.patch
Patch7: 0001-Filter-out-cxxflags-not-supported-by-clang.patch
Patch9: 0001-Export-LLVM_DYLIB_COMPONENTS-in-LLVMConfig.cmake.patch
Patch10: 0001-Don-t-run-BV-DAG-Combine-before-legalization-if-it-a.patch
Patch11: 0001-PowerPC-Do-not-round-values-prior-to-converting-to-i.patch
Patch12: 0001-SystemZ-TableGen-Fix-shift-count-handling.patch
BuildRequires: gcc
BuildRequires: gcc-c++
BuildRequires: cmake
@ -310,6 +314,9 @@ fi
%endif
%changelog
* Mon Aug 06 2018 Tom Stellard <tstellar@redhat.com> - 6.0.1-6
- Backport some fixes needed by mesa and rust
* Thu Jul 26 2018 Tom Stellard <tstellar@redhat.com> - 6.0.1-5
- Move libLLVM-6.0.so to llvm6.0-libs.