diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-05-29 16:25:25 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-05-29 16:25:25 +0000 |
commit | ab44ce3d598882e51a25eb82eb7ae6308de85ae6 (patch) | |
tree | 568d786a59d49bef961dcb9bd09d422701b9da5b /lib/Target/PowerPC | |
parent | b5630dbadf9a2a06754194387d6b0fd9962a67f1 (diff) |
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r-- | lib/Target/PowerPC/PPCExpandISEL.cpp | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 92 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 6 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstr64Bit.td | 4 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.cpp | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.td | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrVSX.td | 76 |
7 files changed, 155 insertions, 29 deletions
diff --git a/lib/Target/PowerPC/PPCExpandISEL.cpp b/lib/Target/PowerPC/PPCExpandISEL.cpp index ebd414baf1d21..41e3190c3eec7 100644 --- a/lib/Target/PowerPC/PPCExpandISEL.cpp +++ b/lib/Target/PowerPC/PPCExpandISEL.cpp @@ -339,7 +339,7 @@ void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL, // Note: Cannot use stepBackward instead since we are using the Reg // liveness state at the end of MBB (liveOut of MBB) as the liveIn for // NewSuccessor. Otherwise, will cause cyclic dependence. - LivePhysRegs LPR(MF->getSubtarget<PPCSubtarget>().getRegisterInfo()); + LivePhysRegs LPR(*MF->getSubtarget<PPCSubtarget>().getRegisterInfo()); SmallVector<std::pair<unsigned, const MachineOperand *>, 2> Clobbers; for (MachineInstr &MI : *MBB) LPR.stepForward(MI, Clobbers); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index e65b1f1aa0a55..b90a5ee28342f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1596,9 +1596,8 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { return true; } -bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, - unsigned &InsertAtByte, bool &Swap, bool IsLE) { // Check that the mask is shuffling words +static bool isWordShuffleMask(ShuffleVectorSDNode *N) { for (unsigned i = 0; i < 4; ++i) { unsigned B0 = N->getMaskElt(i*4); unsigned B1 = N->getMaskElt(i*4+1); @@ -1610,6 +1609,14 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, return false; } + return true; +} + +bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + unsigned &InsertAtByte, bool &Swap, bool IsLE) { + if (!isWordShuffleMask(N)) + return false; + // Now we look at mask elements 0,4,8,12 unsigned M0 = N->getMaskElt(0) / 4; unsigned M1 = N->getMaskElt(4) / 4; @@ -1680,6 +1687,69 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, return false; } +bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE) { + assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); + // Ensure each byte index of the word is consecutive. + if (!isWordShuffleMask(N)) + return false; + + // Now we look at mask elements 0,4,8,12, which are the beginning of words. + unsigned M0 = N->getMaskElt(0) / 4; + unsigned M1 = N->getMaskElt(4) / 4; + unsigned M2 = N->getMaskElt(8) / 4; + unsigned M3 = N->getMaskElt(12) / 4; + + // If both vector operands for the shuffle are the same vector, the mask will + // contain only elements from the first one and the second one will be undef. + if (N->getOperand(1).isUndef()) { + assert(M0 < 4 && "Indexing into an undef vector?"); + if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4) + return false; + + ShiftElts = IsLE ? (4 - M0) % 4 : M0; + Swap = false; + return true; + } + + // Ensure each word index of the ShuffleVector Mask is consecutive. + if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8) + return false; + + if (IsLE) { + if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) { + // Input vectors don't need to be swapped if the leading element + // of the result is one of the 3 left elements of the second vector + // (or if there is no shift to be done at all). + Swap = false; + ShiftElts = (8 - M0) % 8; + } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) { + // Input vectors need to be swapped if the leading element + // of the result is one of the 3 left elements of the first vector + // (or if we're shifting by 4 - thereby simply swapping the vectors). + Swap = true; + ShiftElts = (4 - M0) % 4; + } + + return true; + } else { // BE + if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) { + // Input vectors don't need to be swapped if the leading element + // of the result is one of the 4 elements of the first vector. + Swap = false; + ShiftElts = M0; + } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) { + // Input vectors need to be swapped if the leading element + // of the result is one of the 4 elements of the right vector. + Swap = true; + ShiftElts = M0 - 4; + } + + return true; + } +} + + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, @@ -7679,6 +7749,20 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } + + if (Subtarget.hasVSX() && + PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { + if (Swap) + std::swap(V1, V2); + SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue Conv2 = + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2); + + SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2, + DAG.getConstant(ShiftElts, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl); + } + if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); @@ -8212,10 +8296,12 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDLoc DL(Op); switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) { case Intrinsic::ppc_cfence: { + assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."); assert(Subtarget.isPPC64() && "Only 64-bit is supported for now."); return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, - Op.getOperand(ArgStart + 1))), + Op.getOperand(ArgStart + 1)), + Op.getOperand(0)), 0); } default: diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index acb77943b118e..2f9eb95f6de68 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -450,7 +450,11 @@ namespace llvm { /// a VMRGEW or VMRGOW instruction bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG); - + /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXSLDWI instruction. + bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE); + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the /// shift amount, otherwise return -1. int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index a3f894c81a01b..165970f9678c3 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1001,7 +1001,9 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), isPPC64; // LR8 is a true define, while the rest of the Defs are clobbers. X3 is // explicitly defined when this op is created, so not mentioned here. -let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, +// This is lowered to BL8_NOP_TLS by the assembly printer, so the size must be +// correct because the branch select pass is relying on it. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8, Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), "#GETtlsADDR", diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 46f103141bc1d..fd6785e963a64 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1931,6 +1931,8 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case PPC::DFSTOREf64: { assert(Subtarget.hasP9Vector() && "Invalid D-Form Pseudo-ops on non-P9 target."); + assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() && + "D-form op must have register and immediate operands"); unsigned UpperOpcode, LowerOpcode; switch (MI.getOpcode()) { case PPC::DFLOADf32: diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 0766cfe4a9871..26b99eced23cb 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -46,7 +46,7 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>, ]>; def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, - SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> + SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3> ]>; def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>, diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index b98140fedfc04..1589ab03e5075 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -1066,6 +1066,10 @@ def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>; +// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and +// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable. +def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>; + // Selects. def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; @@ -2379,8 +2383,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Load Vector Indexed def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, - [(set v2f64:$XT, (load xoaddr:$src))]>; - + [(set v2f64:$XT, (load xaddr:$src))]>; // Load Vector (Left-justified) with Length def LXVL : XX1Form<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB), "lxvl $XT, $src, $rB", IIC_LdStLoad, @@ -2430,7 +2433,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Store Vector Indexed def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, - [(store v2f64:$XT, xoaddr:$dst)]>; + [(store v2f64:$XT, xaddr:$dst)]>; // Store Vector (Left-justified) with Length def STXVL : XX1Form<31, 397, (outs), (ins vsrc:$XT, memr:$dst, g8rc:$rB), @@ -2498,21 +2501,38 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; } // IsLittleEndian, HasP9Vector - def : Pat<(v2f64 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v2i64 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v4f32 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v4i32 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(store v4f32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; - + // D-Form Load/Store + def : Pat<(v4i32 (load iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4f32 (load iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2i64 (load iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (load iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddr:$src)), (LXV memrix16:$src)>; + + def : Pat<(store v4f32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(store v4i32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(store v2f64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(store v2i64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddr:$dst), + (STXV $rS, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddr:$dst), + (STXV $rS, memrix16:$dst)>; + + + def : Pat<(v2f64 (load xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v2i64 (load xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v4f32 (load xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v4i32 (load xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(store v2f64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; + def : Pat<(store v2i64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; + def : Pat<(store v4f32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; + def : Pat<(store v4i32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xaddr:$dst), + (STXVX $rS, xaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xaddr:$dst), + (STXVX $rS, xaddr:$dst)>; def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), (v4i32 (LXVWSX xoaddr:$src))>; def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), @@ -2704,9 +2724,15 @@ def FltToUIntLoad { def FltToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A))))); } +def FltToLongLoadP9 { + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddr:$A))))); +} def FltToULongLoad { dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A))))); } +def FltToULongLoadP9 { + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddr:$A))))); +} def FltToLong { dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A)))); } @@ -2728,9 +2754,15 @@ def DblToULong { def DblToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A))))); } +def DblToIntLoadP9 { + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddr:$A))))); +} def DblToUIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A))))); } +def DblToUIntLoadP9 { + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddr:$A))))); +} def DblToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A))))); } @@ -2898,17 +2930,17 @@ let AddedComplexity = 400 in { (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>; def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; - def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)), + def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; - def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)), + def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; - def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)), + def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddr:$A), VSFRC)), 0))>; - def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)), + def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddr:$A), VSFRC)), 0))>; |