summaryrefslogtreecommitdiff
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-05-29 16:25:25 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-05-29 16:25:25 +0000
commitab44ce3d598882e51a25eb82eb7ae6308de85ae6 (patch)
tree568d786a59d49bef961dcb9bd09d422701b9da5b /lib/Target/PowerPC
parentb5630dbadf9a2a06754194387d6b0fd9962a67f1 (diff)
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/PPCExpandISEL.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp92
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h6
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td4
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td2
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td76
7 files changed, 155 insertions, 29 deletions
diff --git a/lib/Target/PowerPC/PPCExpandISEL.cpp b/lib/Target/PowerPC/PPCExpandISEL.cpp
index ebd414baf1d21..41e3190c3eec7 100644
--- a/lib/Target/PowerPC/PPCExpandISEL.cpp
+++ b/lib/Target/PowerPC/PPCExpandISEL.cpp
@@ -339,7 +339,7 @@ void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL,
// Note: Cannot use stepBackward instead since we are using the Reg
// liveness state at the end of MBB (liveOut of MBB) as the liveIn for
// NewSuccessor. Otherwise, will cause cyclic dependence.
- LivePhysRegs LPR(MF->getSubtarget<PPCSubtarget>().getRegisterInfo());
+ LivePhysRegs LPR(*MF->getSubtarget<PPCSubtarget>().getRegisterInfo());
SmallVector<std::pair<unsigned, const MachineOperand *>, 2> Clobbers;
for (MachineInstr &MI : *MBB)
LPR.stepForward(MI, Clobbers);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index e65b1f1aa0a55..b90a5ee28342f 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1596,9 +1596,8 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
return true;
}
-bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
- unsigned &InsertAtByte, bool &Swap, bool IsLE) {
// Check that the mask is shuffling words
+static bool isWordShuffleMask(ShuffleVectorSDNode *N) {
for (unsigned i = 0; i < 4; ++i) {
unsigned B0 = N->getMaskElt(i*4);
unsigned B1 = N->getMaskElt(i*4+1);
@@ -1610,6 +1609,14 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
return false;
}
+ return true;
+}
+
+bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ unsigned &InsertAtByte, bool &Swap, bool IsLE) {
+ if (!isWordShuffleMask(N))
+ return false;
+
// Now we look at mask elements 0,4,8,12
unsigned M0 = N->getMaskElt(0) / 4;
unsigned M1 = N->getMaskElt(4) / 4;
@@ -1680,6 +1687,69 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
return false;
}
+bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ bool &Swap, bool IsLE) {
+ assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
+ // Ensure each byte index of the word is consecutive.
+ if (!isWordShuffleMask(N))
+ return false;
+
+ // Now we look at mask elements 0,4,8,12, which are the beginning of words.
+ unsigned M0 = N->getMaskElt(0) / 4;
+ unsigned M1 = N->getMaskElt(4) / 4;
+ unsigned M2 = N->getMaskElt(8) / 4;
+ unsigned M3 = N->getMaskElt(12) / 4;
+
+ // If both vector operands for the shuffle are the same vector, the mask will
+ // contain only elements from the first one and the second one will be undef.
+ if (N->getOperand(1).isUndef()) {
+ assert(M0 < 4 && "Indexing into an undef vector?");
+ if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
+ return false;
+
+ ShiftElts = IsLE ? (4 - M0) % 4 : M0;
+ Swap = false;
+ return true;
+ }
+
+ // Ensure each word index of the ShuffleVector Mask is consecutive.
+ if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
+ return false;
+
+ if (IsLE) {
+ if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
+ // Input vectors don't need to be swapped if the leading element
+ // of the result is one of the 3 left elements of the second vector
+ // (or if there is no shift to be done at all).
+ Swap = false;
+ ShiftElts = (8 - M0) % 8;
+ } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
+ // Input vectors need to be swapped if the leading element
+ // of the result is one of the 3 left elements of the first vector
+ // (or if we're shifting by 4 - thereby simply swapping the vectors).
+ Swap = true;
+ ShiftElts = (4 - M0) % 4;
+ }
+
+ return true;
+ } else { // BE
+ if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
+ // Input vectors don't need to be swapped if the leading element
+ // of the result is one of the 4 elements of the first vector.
+ Swap = false;
+ ShiftElts = M0;
+ } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
+ // Input vectors need to be swapped if the leading element
+ // of the result is one of the 4 elements of the right vector.
+ Swap = true;
+ ShiftElts = M0 - 4;
+ }
+
+ return true;
+ }
+}
+
+
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
@@ -7679,6 +7749,20 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}
+
+ if (Subtarget.hasVSX() &&
+ PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
+ if (Swap)
+ std::swap(V1, V2);
+ SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+ SDValue Conv2 =
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
+
+ SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
+ DAG.getConstant(ShiftElts, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
+ }
+
if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
@@ -8212,10 +8296,12 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDLoc DL(Op);
switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
case Intrinsic::ppc_cfence: {
+ assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
- Op.getOperand(ArgStart + 1))),
+ Op.getOperand(ArgStart + 1)),
+ Op.getOperand(0)),
0);
}
default:
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index acb77943b118e..2f9eb95f6de68 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -450,7 +450,11 @@ namespace llvm {
/// a VMRGEW or VMRGOW instruction
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
unsigned ShuffleKind, SelectionDAG &DAG);
-
+ /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable
+ /// for a XXSLDWI instruction.
+ bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+ bool &Swap, bool IsLE);
+
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
/// shift amount, otherwise return -1.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index a3f894c81a01b..165970f9678c3 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1001,7 +1001,9 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
isPPC64;
// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
// explicitly defined when this op is created, so not mentioned here.
-let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+// This is lowered to BL8_NOP_TLS by the assembly printer, so the size must be
+// correct because the branch select pass is relying on it.
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8,
Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsADDR",
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 46f103141bc1d..fd6785e963a64 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1931,6 +1931,8 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case PPC::DFSTOREf64: {
assert(Subtarget.hasP9Vector() &&
"Invalid D-Form Pseudo-ops on non-P9 target.");
+ assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() &&
+ "D-form op must have register and immediate operands");
unsigned UpperOpcode, LowerOpcode;
switch (MI.getOpcode()) {
case PPC::DFLOADf32:
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 0766cfe4a9871..26b99eced23cb 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -46,7 +46,7 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>,
]>;
def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>,
- SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
+ SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3>
]>;
def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>,
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index b98140fedfc04..1589ab03e5075 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1066,6 +1066,10 @@ def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
+// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and
+// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable.
+def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>;
+
// Selects.
def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
(SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
@@ -2379,8 +2383,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Load Vector Indexed
def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc,
- [(set v2f64:$XT, (load xoaddr:$src))]>;
-
+ [(set v2f64:$XT, (load xaddr:$src))]>;
// Load Vector (Left-justified) with Length
def LXVL : XX1Form<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
"lxvl $XT, $src, $rB", IIC_LdStLoad,
@@ -2430,7 +2433,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Store Vector Indexed
def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc,
- [(store v2f64:$XT, xoaddr:$dst)]>;
+ [(store v2f64:$XT, xaddr:$dst)]>;
// Store Vector (Left-justified) with Length
def STXVL : XX1Form<31, 397, (outs), (ins vsrc:$XT, memr:$dst, g8rc:$rB),
@@ -2498,21 +2501,38 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
} // IsLittleEndian, HasP9Vector
- def : Pat<(v2f64 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v2i64 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4f32 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4i32 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(store v4f32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
-
+ // D-Form Load/Store
+ def : Pat<(v4i32 (load iaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v4f32 (load iaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2i64 (load iaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2f64 (load iaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddr:$src)), (LXV memrix16:$src)>;
+ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddr:$src)), (LXV memrix16:$src)>;
+
+ def : Pat<(store v4f32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(store v4i32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(store v2f64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(store v2i64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddr:$dst),
+ (STXV $rS, memrix16:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddr:$dst),
+ (STXV $rS, memrix16:$dst)>;
+
+
+ def : Pat<(v2f64 (load xaddr:$src)), (LXVX xaddr:$src)>;
+ def : Pat<(v2i64 (load xaddr:$src)), (LXVX xaddr:$src)>;
+ def : Pat<(v4f32 (load xaddr:$src)), (LXVX xaddr:$src)>;
+ def : Pat<(v4i32 (load xaddr:$src)), (LXVX xaddr:$src)>;
+ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xaddr:$src)), (LXVX xaddr:$src)>;
+ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xaddr:$src)), (LXVX xaddr:$src)>;
+ def : Pat<(store v2f64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
+ def : Pat<(store v2i64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
+ def : Pat<(store v4f32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
+ def : Pat<(store v4i32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xaddr:$dst),
+ (STXVX $rS, xaddr:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xaddr:$dst),
+ (STXVX $rS, xaddr:$dst)>;
def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
(v4i32 (LXVWSX xoaddr:$src))>;
def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
@@ -2704,9 +2724,15 @@ def FltToUIntLoad {
def FltToLongLoad {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A)))));
}
+def FltToLongLoadP9 {
+ dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddr:$A)))));
+}
def FltToULongLoad {
dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A)))));
}
+def FltToULongLoadP9 {
+ dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddr:$A)))));
+}
def FltToLong {
dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A))));
}
@@ -2728,9 +2754,15 @@ def DblToULong {
def DblToIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A)))));
}
+def DblToIntLoadP9 {
+ dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddr:$A)))));
+}
def DblToUIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A)))));
}
+def DblToUIntLoadP9 {
+ dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddr:$A)))));
+}
def DblToLongLoad {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A)))));
}
@@ -2898,17 +2930,17 @@ let AddedComplexity = 400 in {
(v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>;
def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
(v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
- def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
+ def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
- def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
+ def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
- def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
+ def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
(DFLOADf32 iaddr:$A),
VSFRC)), 0))>;
- def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
+ def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
(DFLOADf32 iaddr:$A),
VSFRC)), 0))>;